diff --git a/.github/workflows/update-ror-mappings.yml b/.github/workflows/update-ror-mappings.yml index 9c313c7d8..6176c463c 100644 --- a/.github/workflows/update-ror-mappings.yml +++ b/.github/workflows/update-ror-mappings.yml @@ -22,6 +22,7 @@ jobs: BUCKET: ${{ secrets.ROR_ANALYSIS_S3_BUCKET }} S3_FUNDER_KEY: ror_funder_mapping/funder_to_ror.json S3_HIERARCHY_KEY: ror_funder_mapping/ror_hierarchy.json + S3_COUNTRIES_KEY: ror_funder_mapping/ror_to_countries.json LOCAL_DIR: app/resources steps: @@ -40,6 +41,7 @@ jobs: set -euo pipefail aws s3 cp "s3://${BUCKET}/${S3_FUNDER_KEY}" funder_to_ror.json.new aws s3 cp "s3://${BUCKET}/${S3_HIERARCHY_KEY}" ror_hierarchy.json.new + aws s3 cp "s3://${BUCKET}/${S3_COUNTRIES_KEY}" ror_to_countries.json.new - name: Compare and update tracked files (semantic JSON) id: update @@ -73,6 +75,7 @@ jobs: normalize_and_update funder_to_ror.json.new "${LOCAL_DIR}/funder_to_ror.json" normalize_and_update ror_hierarchy.json.new "${LOCAL_DIR}/ror_hierarchy.json" + normalize_and_update ror_to_countries.json.new "${LOCAL_DIR}/ror_to_countries.json" echo "changed=${changed}" >> "$GITHUB_OUTPUT" diff --git a/app/controllers/datacite_dois_controller.rb b/app/controllers/datacite_dois_controller.rb index 13efac3bc..4fa775222 100644 --- a/app/controllers/datacite_dois_controller.rb +++ b/app/controllers/datacite_dois_controller.rb @@ -142,6 +142,7 @@ def index client_type: params[:client_type], funded_by: params[:funded_by], include_funder_child_organizations: params[:include_funder_child_organizations], + affiliation_country: params[:affiliation_country], ) end @@ -338,6 +339,7 @@ def index publisher: params[:publisher], funded_by: params[:funded_by], include_funder_child_organizations: params[:include_funder_child_organizations], + "affiliation-country" => params[:affiliation_country], # The cursor link should be an array of values, but we want to encode it into a single string for the URL "page[cursor]" => page[:cursor] ? make_cursor(results) : nil, diff --git a/app/models/concerns/rorable.rb b/app/models/concerns/rorable.rb index 956fac340..5c1bf45b8 100644 --- a/app/models/concerns/rorable.rb +++ b/app/models/concerns/rorable.rb @@ -12,4 +12,12 @@ def get_ror_parents(ror_id) normalized_ror = "https://#{ror_from_url(ror_id)}" ROR_HIERARCHY[normalized_ror]&.fetch("ancestors", []) || [] end + + def get_countries_from_ror(ror_id) + normalized_ror = ror_from_url(ror_id) + return [] if normalized_ror.blank? + + countries = ROR_TO_COUNTRIES[normalized_ror] + Array.wrap(countries).map(&:upcase).uniq + end end diff --git a/app/models/doi.rb b/app/models/doi.rb index a08b6608d..b61da1077 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -272,6 +272,7 @@ def validate_publisher_obj?(doi) indexes :related_dmp_organization_id, type: :keyword indexes :funder_rors, type: :keyword indexes :funder_parent_rors, type: :keyword + indexes :affiliation_countries, type: :keyword indexes :client_id_and_name, type: :keyword indexes :provider_id_and_name, type: :keyword indexes :resource_type_id_and_name, type: :keyword @@ -644,6 +645,7 @@ def as_indexed_json(_options = {}) "related_dmp_organization_id" => related_dmp_organization_and_affiliation_id, "funder_rors" => funder_rors, "funder_parent_rors" => funder_parent_rors, + "affiliation_countries" => affiliation_countries, "affiliation_id_and_name" => affiliation_id_and_name, "fair_affiliation_id_and_name" => fair_affiliation_id_and_name, "media_ids" => media_ids, @@ -1258,6 +1260,14 @@ def self.query(query, options = {}) minimum_should_match = 1 end + if options[:affiliation_country].present? + country_codes = options[:affiliation_country] + .split(",") + .map { |c| c.strip.upcase } + .reject(&:blank?) + filter << { terms: { "affiliation_countries" => country_codes } } if country_codes.any? + end + must_not << { terms: { agency: ["crossref", "kisti", "medra", "jalc", "istic", "airiti", "cnki", "op"] } } if options[:exclude_registration_agencies] # ES query can be optionally defined in different ways @@ -2025,6 +2035,33 @@ def funder_parent_rors end end + def affiliation_countries + countries = [] + countries.concat(extract_countries_from_people(creators)) + countries.concat(extract_countries_from_people(contributors)) + countries.uniq + end + + private + + def extract_countries_from_people(people) + Array.wrap(people).flat_map do |person| + next [] unless person.is_a?(Hash) + + Array.wrap(person.fetch("affiliation", [])).flat_map do |affiliation| + next [] unless affiliation.is_a?(Hash) + next [] unless affiliation.fetch("affiliationIdentifierScheme", nil) == "ROR" + + affiliation_identifier = affiliation.fetch("affiliationIdentifier", nil) + next [] if affiliation_identifier.blank? + + get_countries_from_ror(affiliation_identifier) + end + end + end + + public + def prefix doi.split("/", 2).first if doi.present? end diff --git a/app/resources/ror_to_countries.json b/app/resources/ror_to_countries.json new file mode 100644 index 000000000..58ba24503 --- /dev/null +++ b/app/resources/ror_to_countries.json @@ -0,0 +1,6 @@ +{ + "_comment": "Placeholder test data. This file will be populated automatically by the update-ror-mappings workflow from S3.", + "ror.org/00k4n6c32": ["US"], + "ror.org/00a0jsq62": ["US"], + "ror.org/04wxnsj81": ["GB"] +} diff --git a/config/initializers/load_ror_data.rb b/config/initializers/load_ror_data.rb index 4d6e76e47..38e194069 100644 --- a/config/initializers/load_ror_data.rb +++ b/config/initializers/load_ror_data.rb @@ -2,3 +2,4 @@ FUNDER_TO_ROR = JSON.parse(File.read(Rails.root.join("app/resources/funder_to_ror.json"))).freeze ROR_HIERARCHY = JSON.parse(File.read(Rails.root.join("app/resources/ror_hierarchy.json"))).freeze +ROR_TO_COUNTRIES = JSON.parse(File.read(Rails.root.join("app/resources/ror_to_countries.json"))).freeze diff --git a/openapi.yaml b/openapi.yaml index be1ff9613..b5bfc7c54 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -638,6 +638,12 @@ paths: description: Search creators.affiliation.affiliationIdentifier and contributors.affiliation.affiliationIdentifier for a ROR ID. schema: type: string + - in: query + name: affiliation-country + description: Filter DOIs by associated country inferred from ROR IDs in creators/contributors affiliations. Use comma-separated ISO 3166-1 alpha-2 country codes. + schema: + type: string + example: US,GB - in: query name: funded-by description: Search fundingReferences.funderIdentifier for a ROR ID. Results also include DOIs containing a Crossref Funder ID in fundingReferences.funderIdentifier corresponding to the ROR ID. diff --git a/spec/concerns/rorable_spec.rb b/spec/concerns/rorable_spec.rb index 2d0132ae6..53e54fc37 100644 --- a/spec/concerns/rorable_spec.rb +++ b/spec/concerns/rorable_spec.rb @@ -70,4 +70,49 @@ expect(ancestors).to eq([]) end end + + describe "ROR to country mapping" do + let(:doi) { create(:doi) } + + it "loads ROR to countries mapping" do + expect(ROR_TO_COUNTRIES).to be_a(Hash) + expect(ROR_TO_COUNTRIES).not_to be_empty + end + + it "maps ROR URL to country codes" do + ror_id = "https://ror.org/00k4n6c32" + countries = doi.get_countries_from_ror(ror_id) + expect(countries).to eq(["US"]) + end + + it "maps incomplete ROR URL to country codes" do + ror_id = "ror.org/00k4n6c32" + countries = doi.get_countries_from_ror(ror_id) + expect(countries).to eq(["US"]) + end + + it "maps ROR suffix to country codes" do + ror_id = "00k4n6c32" + countries = doi.get_countries_from_ror(ror_id) + expect(countries).to eq(["US"]) + end + + it "returns empty array for invalid ROR" do + ror_id = "doi.org/00k4n6c32" + countries = doi.get_countries_from_ror(ror_id) + expect(countries).to eq([]) + end + + it "returns empty array for ROR not in mapping" do + ror_id = "https://ror.org/nonexistent" + countries = doi.get_countries_from_ror(ror_id) + expect(countries).to eq([]) + end + + it "normalizes country codes to uppercase" do + ror_id = "https://ror.org/00a0jsq62" + countries = doi.get_countries_from_ror(ror_id) + expect(countries).to eq(["US"]) + end + end end diff --git a/spec/models/doi_spec.rb b/spec/models/doi_spec.rb index a2359634e..853277f0a 100644 --- a/spec/models/doi_spec.rb +++ b/spec/models/doi_spec.rb @@ -2363,4 +2363,54 @@ expect(doi.as_indexed_json["funder_parent_rors"]).to eq(["https://ror.org/019w4f821", "https://ror.org/04cw6st05"]) end end + + describe "with affiliation ROR IDs" do + let(:doi) { create(:doi, + creators: [ + { + "name": "Garza, Kristian", + "givenName": "Kristian", + "familyName": "Garza", + "nameType": "Personal", + "affiliation": [ + { + "name": "DataCite", + "affiliationIdentifier": "https://ror.org/00k4n6c32", + "affiliationIdentifierScheme": "ROR" + }, + { + "name": "University of Cambridge", + "affiliationIdentifier": "https://ror.org/04wxnsj81", + "affiliationIdentifierScheme": "ROR" + } + ] + } + ], + contributors: [ + { + "name": "Smith, John", + "givenName": "John", + "familyName": "Smith", + "contributorType": "Editor", + "affiliation": [ + { + "name": "DataCite", + "affiliationIdentifier": "https://ror.org/00k4n6c32", + "affiliationIdentifierScheme": "ROR" + } + ] + } + ] + ) } + + it "has countries from ROR affiliations in affiliation_countries" do + expect(doi.affiliation_countries).to match_array(["US", "GB"]) + expect(doi.as_indexed_json["affiliation_countries"]).to match_array(["US", "GB"]) + end + + it "deduplicates country codes from multiple affiliations" do + # Verify that the duplicate US from creator and contributor is deduplicated + expect(doi.affiliation_countries.count("US")).to eq(1) + end + end end