WikimediaNorge · zache-fi · Mar 17, 2025 · jhsoby · Apr 5, 2025
diff --git a/ukbot/contest.py b/ukbot/contest.py
@@ -41,7 +41,10 @@ def sum_stats_by(values, key=None, user=None):
 
 class FilterTemplate(object):
 
-    def __init__(self, template, translations, sites):
+    def __init__(self, template, translations, sites, start, end, page):
+        self.start = start
+        self.end = end
+        self.page = page
         self.template = template
         self.sites = sites
         self.named_params_raw_values = {
@@ -214,7 +217,7 @@ def extract_rules(self, txt, catignore_page=''):
         filter_template_config = config['templates']['filters']
         if filter_template_config['name'] in dp.templates:
             for template in dp.templates[filter_template_config['name']]:
-                filter_tpl = FilterTemplate(template, filter_template_config, self.sites)
+                filter_tpl = FilterTemplate(template, filter_template_config, self.sites, self.start, self.end, self.page)
 
                 if filter_tpl.type in ['new', 'existing', 'namespace']:
                     op = 'AND'

diff --git a/ukbot/filters.py b/ukbot/filters.py
@@ -721,39 +721,103 @@ def make(cls, tpl, cfg, **kwargs):
         params = {
             'query': tpl.get_raw_param('query'),
             'sites': tpl.sites,
+            'tpl': tpl
         }
         return cls(**params)
 
-    def __init__(self, sites, query):
+    def get_competition_wikidata_ids(cls, tpl, target_lang):
+        """
+        Fetch Wikidata IDs for a competition from the toolforge API.
+
+        Args:
+            tpl: Template object containing page and time information
+            target_lang: Target language string
+
+        Returns:
+            list: Wikidata item IDs related to the competition
+        """
+
+        # Extract wiki information
+        site = tpl.page.site
+        src_wiki = site.host.split('.')[0]
+        target_wiki = target_lang.split('.')[0]
+        namespace_id = tpl.page.namespace
+
+        # Parse article name correctly based on namespace
+        full_title = tpl.page.name
+
+        # Remove namespace from article title
+        if ':' in full_title and namespace_id != 0:
+            article_name = full_title.split(':', 1)[1]
+        else:
+            article_name = full_title
+
+        # Build request parameters
+        params = {
+            "src": src_wiki,
+            "namespace": namespace_id,
+            "page": article_name,
+            "start": tpl.start.strftime("%Y%m%d%H%M%S"),
+            "end": tpl.end.strftime("%Y%m%d%H%M%S"),
+            "target": target_wiki
+        }
+
+        # Construct URL and make request
+        base_url = "https://fiwiki-tools.toolforge.org/get_wikidata_items_by_ukbot_competition.php"
+        url = f"{base_url}?{urllib.parse.urlencode(params)}"
+        print(f"Requesting: {url}")
+
+        try:
+            response = requests.get(url, timeout=30)
+            response.raise_for_status()  # Raise exception for non-200 status codes
+
+            data = response.json()
+            wikidata_items = data.get('wikidata_items', [])
+            print(f"Retrieved {len(wikidata_items)} Wikidata items")
+            return wikidata_items
+
+        except requests.exceptions.RequestException as e:
+            print(f"Error fetching data: {e}")
+            return []
+        except ValueError as e:
+            print(f"Error parsing JSON response: {e}")
+            return []
+
+    def __init__(self, sites, query, tpl):
         """
         Args:
             sites (SiteManager): References to the sites part of this contest
             query (str): The SPARQL query
         """
         Filter.__init__(self, sites)
         self.query = query
+        self.tpl = tpl
         self.fetch()
 
     def do_query(self, querystring):
         logger.info('Running SPARQL query: %s', querystring)
         try:
-            response = requests_retry_session().get(
+            response = requests_retry_session().post(
                 'https://query.wikidata.org/sparql',
-                params={
+                data={
                     'query': querystring,
                 },
                 headers={
                     'accept': 'application/sparql-results+json',
                     'accept-encoding': 'gzip, deflate, br',
+                    'Content-Type': 'application/x-www-form-urlencoded',
                     'user-agent': 'UKBot/1.0, run by User:Danmichaelo',
                 }
             )
-        except Exception as ex:
-            logger.error('SPARQL query failed')
-            raise ex
-
-        if not response.ok:
-            raise IOError('SPARQL query returned status %s', response.status_code)
+            response.raise_for_status()  # raises HTTPError for 4xx/5xx responses
+        except requests.exceptions.RequestException as ex:
+            error_content = ''
+            if ex.response is not None:
+                error_content = ex.response.text
+                logger.error('SPARQL query (POST) failed with response: %s', error_content)
+            else:
+                logger.error('SPARQL query (POST) failed without response: %s', str(ex))
+            raise IOError(f'SPARQL query (POST) error: {str(ex)} | Response content: {error_content}') from ex
 
         expected_length = response.headers.get('Content-Length')
         if expected_length is not None and 'tell' in dir(response.raw):
@@ -778,7 +842,6 @@ def do_query(self, querystring):
 
     def fetch(self):
         logger.debug('SparqlFilter: %s', self.query)
-
         item_var = 'item'
 
         # Implementation notes:
@@ -803,16 +866,28 @@ def fetch(self):
         logger.info('SparqlFilter: Initialized with %d articles', len(self.page_keys))
 
     def add_linked_articles(self, site, item_var):
+        wikidata_items = self.get_competition_wikidata_ids(self.tpl, site)
+
+        # If possible wikidata_items is none then there is no need for SPARQL query
+        if not wikidata_items:
+            logger.info('Skipping SPARQL query for site %s as get_competition_wikidata_ids() returned no items', site)
+            return
+
+        formatted_items = ' '.join(f"wd:{item}" for item in wikidata_items)
+
         article_var = 'article19472065'  # "random string" to avoid matching anything in the subquery
         query = """
             SELECT ?%(article)s
             WHERE {
+              VALUES ?item { %(itemlist)s }
+
               { %(query)s }
               ?%(article)s schema:about ?%(item)s .
               ?%(article)s schema:isPartOf <https://%(site)s/> .
             }
         """ % {
             'item': item_var,
+            'itemlist': formatted_items,
             'article': article_var,
             'query': self.query,
             'site': site,