Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions ukbot/contest.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ def sum_stats_by(values, key=None, user=None):

class FilterTemplate(object):

def __init__(self, template, translations, sites):
def __init__(self, template, translations, sites, start, end, page):
self.start = start
self.end = end
self.page = page
self.template = template
self.sites = sites
self.named_params_raw_values = {
Expand Down Expand Up @@ -214,7 +217,7 @@ def extract_rules(self, txt, catignore_page=''):
filter_template_config = config['templates']['filters']
if filter_template_config['name'] in dp.templates:
for template in dp.templates[filter_template_config['name']]:
filter_tpl = FilterTemplate(template, filter_template_config, self.sites)
filter_tpl = FilterTemplate(template, filter_template_config, self.sites, self.start, self.end, self.page)

if filter_tpl.type in ['new', 'existing', 'namespace']:
op = 'AND'
Expand Down
95 changes: 85 additions & 10 deletions ukbot/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,39 +721,103 @@ def make(cls, tpl, cfg, **kwargs):
params = {
'query': tpl.get_raw_param('query'),
'sites': tpl.sites,
'tpl': tpl
}
return cls(**params)

def __init__(self, sites, query):
def get_competition_wikidata_ids(cls, tpl, target_lang):
"""
Fetch Wikidata IDs for a competition from the toolforge API.

Args:
tpl: Template object containing page and time information
target_lang: Target language string

Returns:
list: Wikidata item IDs related to the competition
"""

# Extract wiki information
site = tpl.page.site
src_wiki = site.host.split('.')[0]
target_wiki = target_lang.split('.')[0]
namespace_id = tpl.page.namespace

# Parse article name correctly based on namespace
full_title = tpl.page.name

# Remove namespace from article title
if ':' in full_title and namespace_id != 0:
article_name = full_title.split(':', 1)[1]
else:
article_name = full_title

# Build request parameters
params = {
"src": src_wiki,
"namespace": namespace_id,
"page": article_name,
"start": tpl.start.strftime("%Y%m%d%H%M%S"),
"end": tpl.end.strftime("%Y%m%d%H%M%S"),
"target": target_wiki
}

# Construct URL and make request
base_url = "https://fiwiki-tools.toolforge.org/get_wikidata_items_by_ukbot_competition.php"
url = f"{base_url}?{urllib.parse.urlencode(params)}"
print(f"Requesting: {url}")

try:
response = requests.get(url, timeout=30)
response.raise_for_status() # Raise exception for non-200 status codes

data = response.json()
wikidata_items = data.get('wikidata_items', [])
print(f"Retrieved {len(wikidata_items)} Wikidata items")
return wikidata_items

except requests.exceptions.RequestException as e:
print(f"Error fetching data: {e}")
return []
except ValueError as e:
print(f"Error parsing JSON response: {e}")
return []

def __init__(self, sites, query, tpl):
"""
Args:
sites (SiteManager): References to the sites part of this contest
query (str): The SPARQL query
"""
Filter.__init__(self, sites)
self.query = query
self.tpl = tpl
self.fetch()

def do_query(self, querystring):
logger.info('Running SPARQL query: %s', querystring)
try:
response = requests_retry_session().get(
response = requests_retry_session().post(
'https://query.wikidata.org/sparql',
params={
data={
'query': querystring,
},
headers={
'accept': 'application/sparql-results+json',
'accept-encoding': 'gzip, deflate, br',
'Content-Type': 'application/x-www-form-urlencoded',
'user-agent': 'UKBot/1.0, run by User:Danmichaelo',
}
)
except Exception as ex:
logger.error('SPARQL query failed')
raise ex

if not response.ok:
raise IOError('SPARQL query returned status %s', response.status_code)
response.raise_for_status() # raises HTTPError for 4xx/5xx responses
except requests.exceptions.RequestException as ex:
error_content = ''
if ex.response is not None:
error_content = ex.response.text
logger.error('SPARQL query (POST) failed with response: %s', error_content)
else:
logger.error('SPARQL query (POST) failed without response: %s', str(ex))
raise IOError(f'SPARQL query (POST) error: {str(ex)} | Response content: {error_content}') from ex

expected_length = response.headers.get('Content-Length')
if expected_length is not None and 'tell' in dir(response.raw):
Expand All @@ -778,7 +842,6 @@ def do_query(self, querystring):

def fetch(self):
logger.debug('SparqlFilter: %s', self.query)

item_var = 'item'

# Implementation notes:
Expand All @@ -803,16 +866,28 @@ def fetch(self):
logger.info('SparqlFilter: Initialized with %d articles', len(self.page_keys))

def add_linked_articles(self, site, item_var):
wikidata_items = self.get_competition_wikidata_ids(self.tpl, site)

# If possible wikidata_items is none then there is no need for SPARQL query
if not wikidata_items:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But what if it returned no Wikidata items because of an error, or because fiwiki-tools is down for some unrelated reason? In that case, I think it should fall back to the old VALUES-less SPARQL query, instead of not running the query at all, like now.

Essentially: I'd like the script to distinguish between "get_competition_wikidata_ids() worked as it should and returned no items" and "get_competition_wikidata_ids() had an exception and therefore returned no items". Does that make sense?

logger.info('Skipping SPARQL query for site %s as get_competition_wikidata_ids() returned no items', site)
return

formatted_items = ' '.join(f"wd:{item}" for item in wikidata_items)

article_var = 'article19472065' # "random string" to avoid matching anything in the subquery
query = """
SELECT ?%(article)s
WHERE {
VALUES ?item { %(itemlist)s }

{ %(query)s }
?%(article)s schema:about ?%(item)s .
?%(article)s schema:isPartOf <https://%(site)s/> .
}
""" % {
'item': item_var,
'itemlist': formatted_items,
'article': article_var,
'query': self.query,
'site': site,
Expand Down