From 759fe6d7d99b9b0c825ddac2b7207350bd396025 Mon Sep 17 00:00:00 2001 From: Tacsipacsi Date: Sat, 13 Dec 2025 15:08:49 +0100 Subject: [PATCH] fix(WikidataCommand): Always process P138 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The latest update for Budapest [0] complained about `data/wikidata/Q44377.json` missing. This is probably the result of the following: 1. It discovered https://www.openstreetmap.org/relation/19901264. 2. Based on its `name:etymology:wikidata` tag, it downloaded Q850862. Since we’re not interested in what the namesakes are named after, it didn’t bother about P138. 3. Later it also discovered https://www.openstreetmap.org/way/156974362. 4. It saw that Q850862 has already been downloaded, so it skipped downloading it – and it also skipped downloading its namesake, Q44377. 5. Subsequently GeoJSONCommand couldn’t find `Q44377.json`. To fix this, simply don’t skip reading back the entity and trying to extract P138 from it. This also allows moving the `file_exists()` check to `save()`, reducing code duplication. This might result in a slight performance degradation (more local disk I/O if a street with its own Wikidata item consists of multiple ways not bundled in a relation), but no increase in HTTP requests (except for now doing the one or two HTTP requests per city that used to be erronously skipped), and correctness is worth that slight performance degradation. I confirmed that running on Budapest locally, the warning is printed without the fix, and not printed with the fix. I also confirmed that with the fix, `gender.csv` correctly includes `Q44377` for Budavári alagút. Please note that while the current example doesn’t involve people, so it may seem borderline out of scope, it’s very possible for similar cases to involve people: e.g. https://www.openstreetmap.org/way/335395340 (Flórián tér overpass) could have `name:etymology:wikidata=Q65215958` (Flórián tér) – I’m not 100% sure if this would be right, but arguably yes –, and that would potentially (depending on the processing order) break the yellow color of https://www.openstreetmap.org/way/908080555. [0] https://github.com/EqualStreetNames/equalstreetnames-budapest/actions/runs/19877172828/job/56967142726 --- Command/WikidataCommand.php | 87 +++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 46 deletions(-) diff --git a/Command/WikidataCommand.php b/Command/WikidataCommand.php index 82ac5e6..dd4e317 100644 --- a/Command/WikidataCommand.php +++ b/Command/WikidataCommand.php @@ -116,9 +116,7 @@ function ($element): bool { // Download Wikidata item $path = sprintf('%s/%s.json', $outputDir, $identifier); - if (!file_exists($path)) { - self::save($identifier, $element, $path, $warnings); - } + self::save($identifier, $element, $path, $warnings); } } @@ -132,27 +130,22 @@ function ($element): bool { // Download Wikidata item $path = sprintf('%s/%s.json', $outputDir, $wikidataTag); - if (!file_exists($path)) { - self::save($wikidataTag, $element, $path, $warnings); - - $wikiPath = sprintf('%s/%s.json', $outputDir, $wikidataTag); - $entity = Wikidata::read($wikiPath); - - $identifiers = Wikidata::extractNamedAfter($entity); - if (!is_null($identifiers)) { - foreach ($identifiers as $identifier) { - // Check that the value of the tag is a valid Wikidata item identifier - if (preg_match('/^Q[0-9]+$/', $identifier) !== 1) { - $warnings[] = sprintf('Format of `P138` (NamedAfter) property is invalid (%s) for in item "%s".', $identifier, $wikidataTag); - continue; - } - - // Download Wikidata item - $path = sprintf('%s/%s.json', $outputDir, $identifier); - if (!file_exists($path)) { - self::save($identifier, $element, $path, $warnings); - } + self::save($wikidataTag, $element, $path, $warnings); + + $entity = Wikidata::read($path); + + $identifiers = Wikidata::extractNamedAfter($entity); + if (!is_null($identifiers)) { + foreach ($identifiers as $identifier) { + // Check that the value of the tag is a valid Wikidata item identifier + if (preg_match('/^Q[0-9]+$/', $identifier) !== 1) { + $warnings[] = sprintf('Format of `P138` (NamedAfter) property is invalid (%s) for in item "%s".', $identifier, $wikidataTag); + continue; } + + // Download Wikidata item + $path = sprintf('%s/%s.json', $outputDir, $identifier); + self::save($identifier, $element, $path, $warnings); } } } @@ -187,31 +180,33 @@ function ($element): bool { */ private static function save(string $identifier, $element, string $path, array &$warnings = []): void { - $url = sprintf('%s%s.json', self::URL, $identifier); - - try { - $client = new \GuzzleHttp\Client(); - $client->request('GET', $url, [ - 'headers' => [ - 'User-Agent' => 'EqualStreetNames (https://equalstreetnames.org)', - 'Accept' => 'application/json', - ], - 'sink' => $path, - ]); - } catch (BadResponseException $exception) { - if (file_exists($path)) { - unlink($path); - } + if (!file_exists($path)) { + $url = sprintf('%s%s.json', self::URL, $identifier); + + try { + $client = new \GuzzleHttp\Client(); + $client->request('GET', $url, [ + 'headers' => [ + 'User-Agent' => 'EqualStreetNames (https://equalstreetnames.org)', + 'Accept' => 'application/json', + ], + 'sink' => $path, + ]); + } catch (BadResponseException $exception) { + if (file_exists($path)) { + unlink($path); + } - switch ($exception->getResponse()->getStatusCode()) { - case 404: - $warnings[] = sprintf('Wikidata item %s for %s(%d) does not exist.', $identifier, $element->type, $element->id); - break; - default: - $warnings[] = sprintf('Error while fetching Wikidata item %s for %s(%d): %s.', $identifier, $element->type, $element->id, $exception->getMessage()); - break; + switch ($exception->getResponse()->getStatusCode()) { + case 404: + $warnings[] = sprintf('Wikidata item %s for %s(%d) does not exist.', $identifier, $element->type, $element->id); + break; + default: + $warnings[] = sprintf('Error while fetching Wikidata item %s for %s(%d): %s.', $identifier, $element->type, $element->id, $exception->getMessage()); + break; + } + print_r($warnings); } - print_r($warnings); } } }