Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/update-ror-mappings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ jobs:
BUCKET: ${{ secrets.ROR_ANALYSIS_S3_BUCKET }}
S3_FUNDER_KEY: ror_funder_mapping/funder_to_ror.json
S3_HIERARCHY_KEY: ror_funder_mapping/ror_hierarchy.json
S3_COUNTRIES_KEY: ror_funder_mapping/ror_to_countries.json
LOCAL_DIR: app/resources

steps:
Expand All @@ -40,6 +41,7 @@ jobs:
set -euo pipefail
aws s3 cp "s3://${BUCKET}/${S3_FUNDER_KEY}" funder_to_ror.json.new
aws s3 cp "s3://${BUCKET}/${S3_HIERARCHY_KEY}" ror_hierarchy.json.new
aws s3 cp "s3://${BUCKET}/${S3_COUNTRIES_KEY}" ror_to_countries.json.new

- name: Compare and update tracked files (semantic JSON)
id: update
Expand Down Expand Up @@ -73,6 +75,7 @@ jobs:

normalize_and_update funder_to_ror.json.new "${LOCAL_DIR}/funder_to_ror.json"
normalize_and_update ror_hierarchy.json.new "${LOCAL_DIR}/ror_hierarchy.json"
normalize_and_update ror_to_countries.json.new "${LOCAL_DIR}/ror_to_countries.json"

echo "changed=${changed}" >> "$GITHUB_OUTPUT"

Expand Down
2 changes: 2 additions & 0 deletions app/controllers/datacite_dois_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def index
client_type: params[:client_type],
funded_by: params[:funded_by],
include_funder_child_organizations: params[:include_funder_child_organizations],
affiliation_country: params[:affiliation_country],
)
end

Expand Down Expand Up @@ -338,6 +339,7 @@ def index
publisher: params[:publisher],
funded_by: params[:funded_by],
include_funder_child_organizations: params[:include_funder_child_organizations],
"affiliation-country" => params[:affiliation_country],
# The cursor link should be an array of values, but we want to encode it into a single string for the URL
"page[cursor]" =>
page[:cursor] ? make_cursor(results) : nil,
Expand Down
8 changes: 8 additions & 0 deletions app/models/concerns/rorable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,12 @@ def get_ror_parents(ror_id)
normalized_ror = "https://#{ror_from_url(ror_id)}"
ROR_HIERARCHY[normalized_ror]&.fetch("ancestors", []) || []
end

def get_countries_from_ror(ror_id)
normalized_ror = ror_from_url(ror_id)
return [] if normalized_ror.blank?

countries = ROR_TO_COUNTRIES[normalized_ror]
Array.wrap(countries).map(&:upcase).uniq
end
end
37 changes: 37 additions & 0 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ def validate_publisher_obj?(doi)
indexes :related_dmp_organization_id, type: :keyword
indexes :funder_rors, type: :keyword
indexes :funder_parent_rors, type: :keyword
indexes :affiliation_countries, type: :keyword
indexes :client_id_and_name, type: :keyword
indexes :provider_id_and_name, type: :keyword
indexes :resource_type_id_and_name, type: :keyword
Expand Down Expand Up @@ -644,6 +645,7 @@ def as_indexed_json(_options = {})
"related_dmp_organization_id" => related_dmp_organization_and_affiliation_id,
"funder_rors" => funder_rors,
"funder_parent_rors" => funder_parent_rors,
"affiliation_countries" => affiliation_countries,
"affiliation_id_and_name" => affiliation_id_and_name,
"fair_affiliation_id_and_name" => fair_affiliation_id_and_name,
"media_ids" => media_ids,
Expand Down Expand Up @@ -1258,6 +1260,14 @@ def self.query(query, options = {})
minimum_should_match = 1
end

if options[:affiliation_country].present?
country_codes = options[:affiliation_country]
.split(",")
.map { |c| c.strip.upcase }
.reject(&:blank?)
filter << { terms: { "affiliation_countries" => country_codes } } if country_codes.any?
end

must_not << { terms: { agency: ["crossref", "kisti", "medra", "jalc", "istic", "airiti", "cnki", "op"] } } if options[:exclude_registration_agencies]

# ES query can be optionally defined in different ways
Expand Down Expand Up @@ -2025,6 +2035,33 @@ def funder_parent_rors
end
end

def affiliation_countries
countries = []
countries.concat(extract_countries_from_people(creators))
countries.concat(extract_countries_from_people(contributors))
countries.uniq
end

private

def extract_countries_from_people(people)
Array.wrap(people).flat_map do |person|
next [] unless person.is_a?(Hash)

Array.wrap(person.fetch("affiliation", [])).flat_map do |affiliation|
next [] unless affiliation.is_a?(Hash)
next [] unless affiliation.fetch("affiliationIdentifierScheme", nil) == "ROR"

affiliation_identifier = affiliation.fetch("affiliationIdentifier", nil)
next [] if affiliation_identifier.blank?

get_countries_from_ror(affiliation_identifier)
end
end
end

public

def prefix
doi.split("/", 2).first if doi.present?
end
Expand Down
6 changes: 6 additions & 0 deletions app/resources/ror_to_countries.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"_comment": "Placeholder test data. This file will be populated automatically by the update-ror-mappings workflow from S3.",
"ror.org/00k4n6c32": ["US"],
"ror.org/00a0jsq62": ["US"],
"ror.org/04wxnsj81": ["GB"]
}
1 change: 1 addition & 0 deletions config/initializers/load_ror_data.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@

FUNDER_TO_ROR = JSON.parse(File.read(Rails.root.join("app/resources/funder_to_ror.json"))).freeze
ROR_HIERARCHY = JSON.parse(File.read(Rails.root.join("app/resources/ror_hierarchy.json"))).freeze
ROR_TO_COUNTRIES = JSON.parse(File.read(Rails.root.join("app/resources/ror_to_countries.json"))).freeze
6 changes: 6 additions & 0 deletions openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,12 @@ paths:
description: Search creators.affiliation.affiliationIdentifier and contributors.affiliation.affiliationIdentifier for a ROR ID.
schema:
type: string
- in: query
name: affiliation-country
description: Filter DOIs by associated country inferred from ROR IDs in creators/contributors affiliations. Use comma-separated ISO 3166-1 alpha-2 country codes.
schema:
type: string
example: US,GB
- in: query
name: funded-by
description: Search fundingReferences.funderIdentifier for a ROR ID. Results also include DOIs containing a Crossref Funder ID in fundingReferences.funderIdentifier corresponding to the ROR ID.
Expand Down
45 changes: 45 additions & 0 deletions spec/concerns/rorable_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,49 @@
expect(ancestors).to eq([])
end
end

describe "ROR to country mapping" do
let(:doi) { create(:doi) }

it "loads ROR to countries mapping" do
expect(ROR_TO_COUNTRIES).to be_a(Hash)
expect(ROR_TO_COUNTRIES).not_to be_empty
end

it "maps ROR URL to country codes" do
ror_id = "https://ror.org/00k4n6c32"
countries = doi.get_countries_from_ror(ror_id)
expect(countries).to eq(["US"])
end

it "maps incomplete ROR URL to country codes" do
ror_id = "ror.org/00k4n6c32"
countries = doi.get_countries_from_ror(ror_id)
expect(countries).to eq(["US"])
end

it "maps ROR suffix to country codes" do
ror_id = "00k4n6c32"
countries = doi.get_countries_from_ror(ror_id)
expect(countries).to eq(["US"])
end

it "returns empty array for invalid ROR" do
ror_id = "doi.org/00k4n6c32"
countries = doi.get_countries_from_ror(ror_id)
expect(countries).to eq([])
end

it "returns empty array for ROR not in mapping" do
ror_id = "https://ror.org/nonexistent"
countries = doi.get_countries_from_ror(ror_id)
expect(countries).to eq([])
end

it "normalizes country codes to uppercase" do
ror_id = "https://ror.org/00a0jsq62"
countries = doi.get_countries_from_ror(ror_id)
expect(countries).to eq(["US"])
end
end
end
50 changes: 50 additions & 0 deletions spec/models/doi_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2363,4 +2363,54 @@
expect(doi.as_indexed_json["funder_parent_rors"]).to eq(["https://ror.org/019w4f821", "https://ror.org/04cw6st05"])
end
end

describe "with affiliation ROR IDs" do
let(:doi) { create(:doi,
creators: [
{
"name": "Garza, Kristian",
"givenName": "Kristian",
"familyName": "Garza",
"nameType": "Personal",
"affiliation": [
{
"name": "DataCite",
"affiliationIdentifier": "https://ror.org/00k4n6c32",
"affiliationIdentifierScheme": "ROR"
},
{
"name": "University of Cambridge",
"affiliationIdentifier": "https://ror.org/04wxnsj81",
"affiliationIdentifierScheme": "ROR"
}
]
}
],
contributors: [
{
"name": "Smith, John",
"givenName": "John",
"familyName": "Smith",
"contributorType": "Editor",
"affiliation": [
{
"name": "DataCite",
"affiliationIdentifier": "https://ror.org/00k4n6c32",
"affiliationIdentifierScheme": "ROR"
}
]
}
]
) }

it "has countries from ROR affiliations in affiliation_countries" do
expect(doi.affiliation_countries).to match_array(["US", "GB"])
expect(doi.as_indexed_json["affiliation_countries"]).to match_array(["US", "GB"])
end

it "deduplicates country codes from multiple affiliations" do
# Verify that the duplicate US from creator and contributor is deduplicated
expect(doi.affiliation_countries.count("US")).to eq(1)
end
end
end
Loading