From 4781d9130b7b640907e89a4e410cff31fe9dc735 Mon Sep 17 00:00:00 2001 From: Joseph Rhoads Date: Mon, 16 Feb 2026 17:13:19 +0100 Subject: [PATCH 1/2] Remove v1 API support and transition to v2 exclusively (#521) * refactor: remove v1 API deprecation middleware and v1 configuration settings * refactor: restrict API version routing to v2 only in urls.py * refactor: remove V1 support and hardcode V2 in OrganizationViewSet and related views * refactor: remove rorapi v1 models, serializers, and index template * refactor: remove v1 support and consolidate matching and queries to v2 * refactor: remove version parameter from check_ror_id and its calls * test: delete v1 tests and refactor v2 unit tests to remove versioning * docs: update README to default to v2 schema indexing and endpoints * refactor: improve file handling in BulkUpdate and clean up unused vars --- README.md | 16 +- rorapi/common/create_update.py | 2 +- rorapi/common/es_utils.py | 7 +- rorapi/common/matching.py | 78 +-- rorapi/common/matching_single_search.py | 18 +- rorapi/common/queries.py | 203 ++---- rorapi/common/urls.py | 14 +- rorapi/common/views.py | 123 ++-- rorapi/management/commands/generaterorid.py | 6 +- rorapi/middleware/deprecation.py | 33 - rorapi/settings.py | 8 +- .../tests/tests_functional/tests_search_v1.py | 104 --- .../tests_integration/tests_matching_v1.py | 43 -- .../tests_integration/tests_search_v1.py | 135 ---- rorapi/tests/tests_integration/tests_v1.py | 302 --------- .../tests_deprecation_middleware.py | 131 ---- rorapi/tests/tests_unit/tests_es_utils_v1.py | 229 ------- rorapi/tests/tests_unit/tests_es_utils_v2.py | 23 +- .../tests_unit/tests_generaterorid_v1.py | 35 - .../tests_unit/tests_generaterorid_v2.py | 4 +- rorapi/tests/tests_unit/tests_matching_v1.py | 514 --------------- rorapi/tests/tests_unit/tests_matching_v2.py | 48 +- rorapi/tests/tests_unit/tests_models_v1.py | 264 -------- rorapi/tests/tests_unit/tests_queries_v1.py | 622 ------------------ rorapi/tests/tests_unit/tests_queries_v2.py | 103 ++- rorapi/tests/tests_unit/tests_views_v1.py | 280 -------- rorapi/v1/__init__.py | 0 rorapi/v1/index_template_es7.json | 307 --------- rorapi/v1/models.py | 151 ----- rorapi/v1/serializers.py | 123 ---- 30 files changed, 231 insertions(+), 3695 deletions(-) delete mode 100644 rorapi/middleware/deprecation.py delete mode 100644 rorapi/tests/tests_functional/tests_search_v1.py delete mode 100644 rorapi/tests/tests_integration/tests_matching_v1.py delete mode 100644 rorapi/tests/tests_integration/tests_search_v1.py delete mode 100644 rorapi/tests/tests_integration/tests_v1.py delete mode 100644 rorapi/tests/tests_unit/tests_deprecation_middleware.py delete mode 100644 rorapi/tests/tests_unit/tests_es_utils_v1.py delete mode 100644 rorapi/tests/tests_unit/tests_generaterorid_v1.py delete mode 100644 rorapi/tests/tests_unit/tests_matching_v1.py delete mode 100644 rorapi/tests/tests_unit/tests_models_v1.py delete mode 100644 rorapi/tests/tests_unit/tests_queries_v1.py delete mode 100644 rorapi/tests/tests_unit/tests_views_v1.py delete mode 100644 rorapi/v1/__init__.py delete mode 100644 rorapi/v1/index_template_es7.json delete mode 100644 rorapi/v1/models.py delete mode 100644 rorapi/v1/serializers.py diff --git a/README.md b/README.md index dd589353..b517d280 100644 --- a/README.md +++ b/README.md @@ -35,11 +35,11 @@ ROR staff should replace values in [] with valid credential values. External use 3. Index the latest ROR dataset from https://github.com/ror-community/ror-data - docker-compose exec web python manage.py setup v1.0-2022-03-17-ror-data -s 1 + docker-compose exec web python manage.py setup v1.0-2022-03-17-ror-data -s 2 *Note: You must specify a dataset that exists in [ror-data](https://github.com/ror-community/ror-data)* -4. . +4. . 5. Optionally, start other services, such as [ror-app](https://github.com/ror-community/ror-app) (the search UI) or [generate-id](https://github.com/ror-community/generate-id) (middleware microservice) @@ -64,9 +64,9 @@ Used in the data deployment process managed in [ror-records](https://github.com/ docker-compose up -d -3. Index the latest v1 ROR dataset from https://github.com/ror-community/ror-data . To index a v2 dataset, see [Indexing v2 data below](#indexing-v2-data) +3. Index the latest ROR dataset from https://github.com/ror-community/ror-data (see [Indexing v2 data](#indexing-v2-data) below): - docker-compose exec web python manage.py setup v1.0-2022-03-17-ror-data -s 1 + docker-compose exec web python manage.py setup v1.0-2022-03-17-ror-data -s 2 *Note: You must specify a dataset that exists in [ror-data](https://github.com/ror-community/ror-data)* @@ -92,19 +92,17 @@ To delete the existing index, create a new index and index a data dump: **LOCALHOST:** Run - docker-compose exec web python manage.py setup v1.0-2022-03-17-ror-data -s 1 + docker-compose exec web python manage.py setup v1.0-2022-03-17-ror-data -s 2 **DEV/STAGING/PROD:** Access the running ror-api container and run: - python manage.py setup v1.0-2022-03-17-ror-data -s 1 + python manage.py setup v1.0-2022-03-17-ror-data -s 2 *Note: You must specify a dataset that exists in [ror-data](https://github.com/ror-community/ror-data)* #### Indexing v2 data -The `-s` argument specifies which schema version to index. To index a v2 data dump, use `-s 2`. To index both v1 and v2 at the same time, omit the `-s` option. - -Note that a v2 formatted JSON file must exist in the zip file for the specified data dump version. Currently, v2 files only exist in [ror-community/ror-data-test](https://github.com/ror-community/ror-data-test). To index a data dump from ror-data-test rather than ror-data, add the `-t` option to the setup command, ex: +The API uses the v2 schema only. Use `-s 2` when indexing a data dump. A v2 formatted JSON file must exist in the zip file for the specified data dump version. Currently, v2 files only exist in [ror-community/ror-data-test](https://github.com/ror-community/ror-data-test). To index a data dump from ror-data-test rather than ror-data, add the `-t` option to the setup command, ex: python manage.py setup v1.32-2023-09-14-ror-data -s 2 -t diff --git a/rorapi/common/create_update.py b/rorapi/common/create_update.py index 9c84c24f..8642e0bb 100644 --- a/rorapi/common/create_update.py +++ b/rorapi/common/create_update.py @@ -76,7 +76,7 @@ def new_record_from_json(json_input, version): if not error: new_record['locations'] = updated_locations new_record = add_created_last_mod(new_record) - new_ror_id = check_ror_id(version) + new_ror_id = check_ror_id() print("new ror id: " + new_ror_id) new_record['id'] = new_ror_id error, valid_data = validate_record(sort_list_fields(new_record), V2_SCHEMA) diff --git a/rorapi/common/es_utils.py b/rorapi/common/es_utils.py index af19e529..e1ff9cdd 100644 --- a/rorapi/common/es_utils.py +++ b/rorapi/common/es_utils.py @@ -6,11 +6,8 @@ class ESQueryBuilder: """Elasticsearch query builder class""" - def __init__(self, version): - if version == "v2": - self.search = Search(using=ES7, index=ES_VARS["INDEX_V2"]) - else: - self.search = Search(using=ES7, index=ES_VARS["INDEX_V1"]) + def __init__(self): + self.search = Search(using=ES7, index=ES_VARS["INDEX_V2"]) self.search = self.search.extra(track_total_hits=True) self.search = self.search.params(search_type="dfs_query_then_fetch") diff --git a/rorapi/common/matching.py b/rorapi/common/matching.py index f6614738..c3c8ab17 100644 --- a/rorapi/common/matching.py +++ b/rorapi/common/matching.py @@ -6,7 +6,6 @@ from rorapi.common.models import Errors from rorapi.common.es_utils import ESQueryBuilder -from rorapi.v1.models import MatchingResult as MatchingResultV1 from rorapi.v2.models import MatchingResult as MatchingResultV2 from collections import namedtuple @@ -200,25 +199,16 @@ def get_similarity(aff_sub, cand_name): return comparfun(aff_sub, cand_name) / 100 -def get_score(candidate, aff_sub, countries, version): +def get_score(candidate, aff_sub, countries): """Calculate the similarity between the affiliation substring and the candidate, using all name versions.""" - if version == "v2": - country_code = candidate.locations[0].geonames_details.country_code - all_names = [ - name["value"] for name in candidate.names if "acronym" not in name["types"] - ] - acronyms = [ - name["value"] for name in candidate.names if "acronym" in name["types"] - ] - else: - country_code = candidate.country.country_code - all_names = ( - [candidate.name] - + [l.label for l in candidate.labels] - + list(candidate.aliases) - ) - acronyms = candidate.acronyms + country_code = candidate.locations[0].geonames_details.country_code + all_names = [ + name["value"] for name in candidate.names if "acronym" not in name["types"] + ] + acronyms = [ + name["value"] for name in candidate.names if "acronym" in name["types"] + ] if countries and to_region(country_code) not in countries: return 0 @@ -239,11 +229,11 @@ def get_score(candidate, aff_sub, countries, version): MatchedOrganization.__new__.__defaults__ = (False, None, None, 0, None) -def match_by_query(text, matching_type, query, countries, version): +def match_by_query(text, matching_type, query, countries): """Match affiliation text using specific ES query.""" candidates = query.execute() scores = [ - (candidate, get_score(candidate, text, countries, version)) + (candidate, get_score(candidate, text, countries)) for candidate in candidates ] if not candidates: @@ -262,11 +252,10 @@ def match_by_query(text, matching_type, query, countries, version): return chosen, all_matched -def match_by_type(text, matching_type, countries, version): +def match_by_type(text, matching_type, countries): """Match affiliation text using specific matching mode/type.""" - fields_v1 = ["name.norm", "aliases.norm", "labels.label.norm"] - fields_v2 = ["names.value.norm"] + fields = ["names.value.norm"] substrings = [] if matching_type == MATCHING_TYPE_HEURISTICS: h1 = re.search(r"University of ([^\s]+)", text) @@ -289,12 +278,7 @@ def match_by_type(text, matching_type, countries, version): else: substrings.append(text) - queries = [ESQueryBuilder(version) for _ in substrings] - - if version == "v2": - fields = fields_v2 - else: - fields = fields_v1 + queries = [ESQueryBuilder() for _ in substrings] for s, q in zip(substrings, queries): if matching_type == MATCHING_TYPE_PHRASE: @@ -309,7 +293,7 @@ def match_by_type(text, matching_type, countries, version): q.add_common_query(fields, normalize(text)) queries = [q.get_query() for q in queries] matched = [ - match_by_query(t, matching_type, q, countries, version) + match_by_query(t, matching_type, q, countries) for t, q in zip(substrings, queries) ] if not matched: @@ -327,16 +311,15 @@ class MatchingNode: """Matching node class. Represents a substring of the original affiliation that potentially could be matched to an organization.""" - def __init__(self, text, version): + def __init__(self, text): self.text = text - self.version = version self.matched = None self.all_matched = [] def match(self, countries, min_score): for matching_type in NODE_MATCHING_TYPES: chosen, all_matched = match_by_type( - self.text, matching_type, countries, self.version + self.text, matching_type, countries ) self.all_matched.extend(all_matched) if self.matched is None: @@ -388,20 +371,19 @@ class MatchingGraph: This prevents matching an organization to a substring and another organization to the substring's substring.""" - def __init__(self, affiliation, version): + def __init__(self, affiliation): self.nodes = [] - self.version = version self.affiliation = affiliation affiliation = re.sub("&", "&", affiliation) affiliation_cleaned = clean_search_string(affiliation) - n = MatchingNode(affiliation_cleaned, self.version) + n = MatchingNode(affiliation_cleaned) self.nodes.append(n) for part in [s.strip() for s in re.split("[,;:]", affiliation)]: part_cleaned = clean_search_string(part) do_not_match = check_do_not_match(part_cleaned) # do not perform search if substring exactly matches a country name or ISO code if do_not_match == False: - n = MatchingNode(part_cleaned, self.version) + n = MatchingNode(part_cleaned) self.nodes.append(n) def remove_low_scores(self, min_score): @@ -422,7 +404,7 @@ def match(self, countries, min_score): ]: chosen.append(node.matched) acr_chosen, acr_all_matched = match_by_type( - self.affiliation, MATCHING_TYPE_ACRONYM, countries, self.version + self.affiliation, MATCHING_TYPE_ACRONYM, countries ) all_matched.extend(acr_all_matched) return chosen, all_matched @@ -492,33 +474,31 @@ def get_output(chosen, all_matched, active_only): return sorted(output, key=lambda x: x.score, reverse=True)[:100] -def check_exact_match(affiliation, countries, version): - qb = ESQueryBuilder(version) +def check_exact_match(affiliation, countries): + qb = ESQueryBuilder() qb.add_string_query('"' + affiliation + '"') return match_by_query( - affiliation, MATCHING_TYPE_EXACT, qb.get_query(), countries, version + affiliation, MATCHING_TYPE_EXACT, qb.get_query(), countries ) -def match_affiliation(affiliation, active_only, version): +def match_affiliation(affiliation, active_only): countries = get_countries(affiliation) - exact_chosen, exact_all_matched = check_exact_match(affiliation, countries, version) + exact_chosen, exact_all_matched = check_exact_match(affiliation, countries) if exact_chosen.score == 1.0: return get_output(exact_chosen, exact_all_matched, active_only) else: - graph = MatchingGraph(affiliation, version) + graph = MatchingGraph(affiliation) chosen, all_matched = graph.match(countries, MIN_CHOSEN_SCORE) return get_output(chosen, all_matched, active_only) -def match_organizations(params, version): +def match_organizations(params): if "affiliation" in params: active_only = True if "all_status" in params: if params["all_status"] == "" or params["all_status"].lower() == "true": active_only = False - matched = match_affiliation(params.get("affiliation"), active_only, version) - if version == "v2": - return None, MatchingResultV2(matched) - return None, MatchingResultV1(matched) + matched = match_affiliation(params.get("affiliation"), active_only) + return None, MatchingResultV2(matched) return Errors('"affiliation" parameter missing'), None diff --git a/rorapi/common/matching_single_search.py b/rorapi/common/matching_single_search.py index db34bcd1..ac3b51ec 100644 --- a/rorapi/common/matching_single_search.py +++ b/rorapi/common/matching_single_search.py @@ -7,7 +7,6 @@ from rorapi.common.models import Errors from rorapi.settings import ES7 from rorapi.common.es_utils import ESQueryBuilder -from rorapi.v1.models import MatchingResult as MatchingResultV1 from rorapi.v2.models import MatchingResult as MatchingResultV2 from collections import namedtuple @@ -296,23 +295,20 @@ def get_output(chosen, all_matched): return all_matched -def get_candidates(aff, countries, version): - qb = ESQueryBuilder(version) +def get_candidates(aff, countries): + qb = ESQueryBuilder() qb.add_affiliation_query(aff, 200) return match_by_query(aff, qb.get_query(), countries) -def match_affiliation(affiliation, version): +def match_affiliation(affiliation): countries = get_countries(affiliation) - chosen, all_matched = get_candidates(affiliation, countries, version) + chosen, all_matched = get_candidates(affiliation, countries) return get_output(chosen, all_matched) -def match_organizations(params, version): +def match_organizations(params): if "affiliation" in params: - matched = match_affiliation(params.get("affiliation"), version) - - if version == "v2": - return None, MatchingResultV2(matched) - return None, MatchingResultV1(matched) + matched = match_affiliation(params.get("affiliation")) + return None, MatchingResultV2(matched) return Errors(["'affiliation' parameter missing"]), None \ No newline at end of file diff --git a/rorapi/common/queries.py b/rorapi/common/queries.py index 7bedcfdd..c7fc472e 100644 --- a/rorapi/common/queries.py +++ b/rorapi/common/queries.py @@ -5,10 +5,6 @@ from rorapi.common.models import Errors from rorapi.common.matching import match_affiliation -from rorapi.v1.models import ( - Organization as OrganizationV1, - ListResult as ListResultV1 -) from rorapi.v2.models import ( Organization as OrganizationV2, ListResult as ListResultV2 @@ -18,74 +14,9 @@ from urllib.parse import unquote -ALLOWED_FILTERS_V1 = ("country.country_code", "types", "country.country_name", "status") ALLOWED_FILTERS_V2 = ("country.country_code", "locations.geonames_details.country_code", "types", "country.country_name", "locations.geonames_details.country_name", "status", "locations.geonames_details.continent_code", "locations.geonames_details.continent_name") ALLOWED_PARAM_KEYS = ("query", "page", "filter", "query.advanced", "all_status") ALLOWED_ALL_STATUS_VALUES = ("", "true", "false") -# includes deprecated ext id types -ALLOWED_FIELDS_V1 = ( - "acronyms", - "addresses.city", - "addresses.country_geonames_id", - "addresses.geonames_city.city", - "addresses.geonames_city.geonames_admin1.ascii_name", - "addresses.geonames_city.geonames_admin1.code", - "addresses.geonames_city.geonames_admin1.name", - "addresses.geonames_city.geonames_admin2.ascii_name", - "addresses.geonames_city.geonames_admin2.code", - "addresses.geonames_city.geonames_admin2.name", - "addresses.geonames_city.id", - "addresses.geonames_city.license.attribution", - "addresses.geonames_city.license.license", - "addresses.geonames_city.nuts_level1.code", - "addresses.geonames_city.nuts_level1.name", - "addresses.geonames_city.nuts_level2.code", - "addresses.geonames_city.nuts_level2.name", - "addresses.geonames_city.nuts_level3.code", - "addresses.geonames_city.nuts_level3.name", - "addresses.lat", - "addresses.line", - "addresses.lng", - "addresses.postcode", - "addresses.primary", - "addresses.state", - "addresses.state_code", - "aliases", - "country.country_code", - "country.country_name", - "email_address", - "established", - "external_ids.CNRS.all", - "external_ids.CNRS.preferred", - "external_ids.FundRef.all", - "external_ids.FundRef.preferred", - "external_ids.HESA.all", - "external_ids.HESA.preferred", - "external_ids.GRID.all", - "external_ids.GRID.preferred", - "external_ids.ISNI.all", - "external_ids.ISNI.preferred", - "external_ids.OrgRef.all", - "external_ids.OrgRef.preferred", - "external_ids.UCAS.all", - "external_ids.UCAS.preferred", - "external_ids.UKPRNS.all", - "external_ids.UKPRNS.preferred", - "external_ids.Wikidata.all", - "external_ids.Wikidata.preferred", - "id", - "ip_addresses", - "labels.iso639", - "labels.label", - "links", - "name", - "relationships.id", - "relationships.label", - "relationships.type", - "status", - "types", - "wikipedia_url", -) ALLOWED_FIELDS_V2 = ( "admin.created.date", "admin.created.schema_version", @@ -155,12 +86,9 @@ def check_status_adv_q(adv_q_string): status_in_q = True return status_in_q -def get_country_name_filters(country_name_field, filter_string, version): +def get_country_name_filters(country_name_field, filter_string): country_name_filters = [] - if version == "v1": - allowed_filters = ALLOWED_FILTERS_V1 - else: - allowed_filters = ALLOWED_FILTERS_V2 + allowed_filters = ALLOWED_FILTERS_V2 search = re.findall(country_name_field + ":([^:]*)", filter_string) if search: for s in search: @@ -175,26 +103,19 @@ def get_country_name_filters(country_name_field, filter_string, version): return country_name_filters -def filter_string_to_list(filter_string, version): +def filter_string_to_list(filter_string): filter_list = [] - if version == "v2": - if "country.country_code" in filter_string: - filter_string = filter_string.replace( - "country.country_code", "locations.geonames_details.country_code" - ) - if "country.country_name" in filter_string: - filter_string = filter_string.replace( - "country.country_name", "locations.geonames_details.country_name" - ) - # some country names contain comma chars - # allow comma chars in country_name filter values only - # country.country_name:Germany,types:Company - if version == "v1": - country_name_field = "country.country_name" - else: - country_name_field = "locations.geonames_details.country_code" + if "country.country_code" in filter_string: + filter_string = filter_string.replace( + "country.country_code", "locations.geonames_details.country_code" + ) + if "country.country_name" in filter_string: + filter_string = filter_string.replace( + "country.country_name", "locations.geonames_details.country_name" + ) + country_name_field = "locations.geonames_details.country_code" if country_name_field in filter_string: - country_name_filters = get_country_name_filters(country_name_field, filter_string, version) + country_name_filters = get_country_name_filters(country_name_field, filter_string) filter_list = [f for f in filter_string.split(",") if f] filter_list = filter_list + country_name_filters else: @@ -202,7 +123,7 @@ def filter_string_to_list(filter_string, version): return filter_list -def validate(params, version): +def validate(params): """Validates API GET parameters. Returns an error object that can be serialized into JSON or None.""" @@ -226,30 +147,19 @@ def validate(params, version): ) adv_query_fields = adv_query_string_to_list(params.get("query.advanced", "")) - illegal_fields = [] - if version == "v2": - illegal_fields = [ - f - for f in adv_query_fields - if ( - not f.endswith(tuple(ALLOWED_FIELDS_V2)) - and not f.endswith(tuple(ALLOWED_ENDINGS)) - ) - ] - else: - illegal_fields = [ - f - for f in adv_query_fields - if ( - not f.endswith(tuple(ALLOWED_FIELDS_V1)) - and not f.endswith(tuple(ALLOWED_ENDINGS)) - ) - ] + illegal_fields = [ + f + for f in adv_query_fields + if ( + not f.endswith(tuple(ALLOWED_FIELDS_V2)) + and not f.endswith(tuple(ALLOWED_ENDINGS)) + ) + ] errors.extend( ["string '{}' contains an illegal field name".format(f) for f in illegal_fields] ) - filters = filter_string_to_list(params.get("filter", ""), version) + filters = filter_string_to_list(params.get("filter", "")) invalid_filters = [f for f in filters if ":" not in f] errors.extend( ["filter '{}' is not in the key:value form".format(n) for n in invalid_filters] @@ -257,11 +167,7 @@ def validate(params, version): valid_filters = [f for f in filters if ":" in f] filter_keys = [f.split(":")[0] for f in valid_filters] - if version == "v1": - allowed_filters = ALLOWED_FILTERS_V1 - else: - allowed_filters = ALLOWED_FILTERS_V2 - illegal_keys = [v for v in filter_keys if v not in allowed_filters] + illegal_keys = [v for v in filter_keys if v not in ALLOWED_FILTERS_V2] errors.extend(["filter key '{}' is illegal".format(k) for k in illegal_keys]) if "page" in params: @@ -279,10 +185,10 @@ def validate(params, version): return Errors(errors) if errors else None -def build_search_query(params, version): +def build_search_query(params): """Builds search query from API parameters""" - qb = ESQueryBuilder(version) + qb = ESQueryBuilder() ror_id = None if "all_status" in params: @@ -303,17 +209,14 @@ def build_search_query(params, version): if "filter" in params or (not "all_status" in params): filters = [ f.split(":") - for f in filter_string_to_list(params.get("filter", ""), version) + for f in filter_string_to_list(params.get("filter", "")) if f ] # normalize filter values based on casing conventions used in ROR records for f in filters: f[1] = " ".join(f[1].split()) if f[0] == "types": - if version == "v2": - f[1] = f[1].lower() - else: - f[1] = f[1].title() + f[1] = f[1].lower() if f[0] == "country.country_code" or f[0] == "locations.geonames_details.country_code": f[1] = f[1].upper() if f[0] == "country.country_name" or f[0] == "locations.geonames_details.country_name": @@ -340,51 +243,37 @@ def build_search_query(params, version): filter_dict.update({"status": ["active"]}) qb.add_filters(filter_dict) - if version == "v2": - qb.add_aggregations( - [ - ("types", "types"), - ("countries", "locations.geonames_details.country_code"), - ("continents", "locations.geonames_details.continent_code"), - ("statuses", "status"), - ] - ) - else: - qb.add_aggregations( - [ - ("types", "types"), - ("countries", "country.country_code"), - ("statuses", "status"), - ] - ) - - sort_field = params.get("sort", "id") - sort_order = params.get("order", "asc") + qb.add_aggregations( + [ + ("types", "types"), + ("countries", "locations.geonames_details.country_code"), + ("continents", "locations.geonames_details.continent_code"), + ("statuses", "status"), + ] + ) qb.paginate(int(params.get("page", 1))) return qb.get_query() -def build_retrieve_query(ror_id, version): +def build_retrieve_query(ror_id): """Builds retrieval query""" - qb = ESQueryBuilder(version) + qb = ESQueryBuilder() qb.add_id_query(ror_id) return qb.get_query() -def search_organizations(params, version): +def search_organizations(params): """Searches for organizations according to the parameters""" - error = validate(params, version) + error = validate(params) if error is not None: return error, None - search = build_search_query(params, version) - if version == "v2": - return None, ListResultV2(search.execute()) - return None, ListResultV1(search.execute()) + search = build_search_query(params) + return None, ListResultV2(search.execute()) -def retrieve_organization(ror_id, version): +def retrieve_organization(ror_id): """Retrieves the organization of the given ROR ID""" if any(ror_id in ror_id_url for ror_id_url in GRID_REMOVED_IDS): return ( @@ -399,11 +288,9 @@ def retrieve_organization(ror_id, version): ), None, ) - search = build_retrieve_query(ror_id, version) + search = build_retrieve_query(ror_id) results = search.execute() total = results.hits.total.value if total > 0: - if version == "v2": - return None, OrganizationV2(results[0]) - return None, OrganizationV1(results[0]) + return None, OrganizationV2(results[0]) return Errors(["ROR ID '{}' does not exist".format(ror_id)]), None diff --git a/rorapi/common/urls.py b/rorapi/common/urls.py index 277e0bd0..581a0214 100644 --- a/rorapi/common/urls.py +++ b/rorapi/common/urls.py @@ -7,18 +7,18 @@ urlpatterns = [ # Health check - url(r"^(?P(v1|v2))\/heartbeat$", HeartbeatView.as_view()), + url(r"^(?Pv2)\/heartbeat$", HeartbeatView.as_view()), url(r"^heartbeat$", HeartbeatView.as_view()), # Using REST API - url(r"^(?P(v1|v2))\/generateaddress\/(?P[0-9]+)", GenerateAddress.as_view()), + url(r"^(?Pv2)\/generateaddress\/(?P[0-9]+)", GenerateAddress.as_view()), path('generateaddress/', GenerateAddress.as_view()), url(r"^generateid$", GenerateId.as_view()), - re_path(r"^(?P(v1|v2))\/bulkupdate$", BulkUpdate.as_view()), - re_path(r"^(?P(v1|v2))\/register$", ClientRegistrationView.as_view()), + re_path(r"^(?Pv2)\/bulkupdate$", BulkUpdate.as_view()), + re_path(r"^(?Pv2)\/register$", ClientRegistrationView.as_view()), path('validate-client-id//', ValidateClientView.as_view()), - url(r"^(?P(v1|v2))\/indexdata/(?P.*)", IndexData.as_view()), - url(r"^(?P(v1|v2))\/indexdatadump\/(?Pv(\d+\.)?(\d+\.)?(\*|\d+)-\d{4}-\d{2}-\d{2}-ror-data)\/(?P(test|prod))$", IndexDataDump.as_view()), - url(r"^(?P(v1|v2))\/", include(views.organizations_router.urls)), + url(r"^(?Pv2)\/indexdata/(?P.*)", IndexData.as_view()), + url(r"^(?Pv2)\/indexdatadump\/(?Pv(\d+\.)?(\d+\.)?(\*|\d+)-\d{4}-\d{2}-\d{2}-ror-data)\/(?P(test|prod))$", IndexDataDump.as_view()), + url(r"^(?Pv2)\/", include(views.organizations_router.urls)), url(r"^", include(views.organizations_router.urls)), url(r"^docs/", include_docs_urls(title="Research Organization Registry")), # Prometheus diff --git a/rorapi/common/views.py b/rorapi/common/views.py index a180c014..cd699aa1 100644 --- a/rorapi/common/views.py +++ b/rorapi/common/views.py @@ -22,11 +22,6 @@ ) from rorapi.common.serializers import ErrorsSerializer -from rorapi.v1.serializers import ( - OrganizationSerializer as OrganizationSerializerV1, - ListResultSerializer as ListResultSerializerV1, - MatchingResultSerializer as MatchingResultSerializerV1 -) from rorapi.v2.serializers import ( OrganizationSerializer as OrganizationSerializerV2, ListResultSerializer as ListResultSerializerV2, @@ -156,28 +151,18 @@ def list(self, request, version=REST_FRAMEWORK["DEFAULT_VERSION"]): if "format" in params: del params["format"] if "affiliation" in params: - if version == "v2": - if "single_search" in params: - # errors, organizations = match_organizations(params, version) - errors, organizations = single_search_match_organizations(params, version) - else: - errors, organizations = match_organizations(params, version) + if "single_search" in params: + errors, organizations = single_search_match_organizations(params) else: - errors, organizations = match_organizations(params, version) + errors, organizations = match_organizations(params) else: - errors, organizations = search_organizations(params, version) + errors, organizations = search_organizations(params) if errors is not None: return Response(ErrorsSerializer(errors).data) if "affiliation" in params: - if version == "v2": - serializer = MatchingResultSerializerV2(organizations) - else: - serializer = MatchingResultSerializerV1(organizations) + serializer = MatchingResultSerializerV2(organizations) else: - if version == "v2": - serializer = ListResultSerializerV2(organizations) - else: - serializer = ListResultSerializerV1(organizations) + serializer = ListResultSerializerV2(organizations) return Response(serializer.data) def retrieve(self, request, pk=None, version=REST_FRAMEWORK["DEFAULT_VERSION"]): @@ -187,29 +172,23 @@ def retrieve(self, request, pk=None, version=REST_FRAMEWORK["DEFAULT_VERSION"]): return Response( ErrorsSerializer(errors).data, status=status.HTTP_404_NOT_FOUND ) - errors, organization = retrieve_organization(ror_id, version) + errors, organization = retrieve_organization(ror_id) if errors is not None: return Response( ErrorsSerializer(errors).data, status=status.HTTP_404_NOT_FOUND ) - if version == "v2": - serializer = OrganizationSerializerV2(organization) - else: - serializer = OrganizationSerializerV1(organization) + serializer = OrganizationSerializerV2(organization) return Response(serializer.data) def create(self, request, version=REST_FRAMEWORK["DEFAULT_VERSION"]): errors = None - if version == "v2": - json_input = request.data - if 'id' in json_input and (json_input['id'] is not None and json_input['id'] != ""): - errors = Errors(["Value {} found in ID field. New records cannot contain a value in the ID field".format(json_input['id'])]) - else: - create_error, valid_data = new_record_from_json(json_input, version) - if create_error: - errors = Errors([create_error]) + json_input = request.data + if 'id' in json_input and (json_input['id'] is not None and json_input['id'] != ""): + errors = Errors(["Value {} found in ID field. New records cannot contain a value in the ID field".format(json_input['id'])]) else: - errors = Errors(["Version {} does not support creating records".format(version)]) + create_error, valid_data = new_record_from_json(json_input, 'v2') + if create_error: + errors = Errors([create_error]) if errors is not None: return Response( ErrorsSerializer(errors).data, status=status.HTTP_400_BAD_REQUEST @@ -219,29 +198,26 @@ def create(self, request, version=REST_FRAMEWORK["DEFAULT_VERSION"]): def update(self, request, pk=None, version=REST_FRAMEWORK["DEFAULT_VERSION"]): errors = None - if version == "v2": - ror_id = get_ror_id(pk) - if ror_id is None: - errors = Errors(["'{}' is not a valid ROR ID".format(pk)]) - return Response( - ErrorsSerializer(errors).data, status=status.HTTP_404_NOT_FOUND - ) - errors, organization = retrieve_organization(ror_id, version) - if organization is None: - return Response( - ErrorsSerializer(errors).data, status=status.HTTP_404_NOT_FOUND - ) - json = request.data - if 'id' not in json: - errors = Errors(["No value found in ID field. Updated records must include a value in the ID field"]) - elif get_ror_id(json['id']) != ror_id: - errors = Errors(["Value {} in IDs field does not match resource ID specified in request URL {}".format(json['id'], pk)]) - else: - update_error, valid_data = update_record_from_json(json, organization) - if update_error: - errors = Errors([update_error]) + ror_id = get_ror_id(pk) + if ror_id is None: + errors = Errors(["'{}' is not a valid ROR ID".format(pk)]) + return Response( + ErrorsSerializer(errors).data, status=status.HTTP_404_NOT_FOUND + ) + errors, organization = retrieve_organization(ror_id) + if organization is None: + return Response( + ErrorsSerializer(errors).data, status=status.HTTP_404_NOT_FOUND + ) + json = request.data + if 'id' not in json: + errors = Errors(["No value found in ID field. Updated records must include a value in the ID field"]) + elif get_ror_id(json['id']) != ror_id: + errors = Errors(["Value {} in IDs field does not match resource ID specified in request URL {}".format(json['id'], pk)]) else: - errors = Errors(["Version {} does not support creating records".format(version)]) + update_error, valid_data = update_record_from_json(json, organization) + if update_error: + errors = Errors([update_error]) if errors is not None: return Response( ErrorsSerializer(errors).data, status=status.HTTP_400_BAD_REQUEST @@ -258,14 +234,10 @@ def update(self, request, pk=None, version=REST_FRAMEWORK["DEFAULT_VERSION"]): class HeartbeatView(View): def get(self, request, version=REST_FRAMEWORK["DEFAULT_VERSION"]): - print(version) try: - index = ES_VARS['INDEX_V1'] - if version == 'v2': - index = ES_VARS['INDEX_V2'] - if ES7.indices.exists(index): + if ES7.indices.exists(ES_VARS['INDEX_V2']): return HttpResponse("OK") - except: + except Exception: pass return HttpResponse(status=500) @@ -274,10 +246,7 @@ class GenerateAddress(APIView): permission_classes = [OurTokenPermission] def get(self, request, geonamesid, version=REST_FRAMEWORK["DEFAULT_VERSION"]): - if version == 'v2': - address = ua.new_geonames_v2(geonamesid) - else: - address = ua.new_geonames(geonamesid) + address = ua.new_geonames_v2(geonamesid) return Response(address) @@ -285,7 +254,7 @@ class GenerateId(APIView): permission_classes = [OurTokenPermission] def get(self, request, version=REST_FRAMEWORK["DEFAULT_VERSION"]): - id = check_ror_id(version) + id = check_ror_id() print("Generated ID: {}".format(id)) return Response({"id": id}) @@ -294,7 +263,7 @@ class IndexData(APIView): def get(self, request, branch, version=REST_FRAMEWORK["DEFAULT_VERSION"]): st = 200 - msg = process_files(branch, version) + msg = process_files(branch, 'v2') if msg["status"] == "ERROR": st = 400 return Response({"status": msg["status"], "msg": msg["msg"]}, status=st) @@ -323,9 +292,11 @@ class BulkUpdate(APIView): def post(self, request, version=REST_FRAMEWORK["DEFAULT_VERSION"]): validate_only = False errors = None - if version == 'v2': - if request.data: - file_object = request.data['file'] + if request.data: + file_object = request.data.get('file') + if file_object is None: + errors = Errors(["File upload required. 'file' field is missing."]) + else: mime_type = magic.from_buffer(file_object.read(2048)) print(mime_type) if "ASCII text" in mime_type or "UTF-8 text" in mime_type or "UTF-8 Unicode text" in mime_type or "CSV text" in mime_type: @@ -336,17 +307,15 @@ def post(self, request, version=REST_FRAMEWORK["DEFAULT_VERSION"]): params = request.GET.dict() if "validate" in params: validate_only = True - process_csv_error, msg = process_csv(file_object, version, validate_only) + process_csv_error, msg = process_csv(file_object, 'v2', validate_only) if process_csv_error: errors = Errors([process_csv_error]) else: - errors=Errors(csv_validation_errors) + errors = Errors(csv_validation_errors) else: errors = Errors(["File upload must be CSV. File type '{}' is not supported".format(mime_type)]) - else: - errors = Errors(["Could not processs request. No data included in request."]) else: - errors = Errors(["Version {} does not support creating records".format(version)]) + errors = Errors(["Could not process request. No data included in request."]) if errors is not None: print(errors.__dict__) return Response( diff --git a/rorapi/management/commands/generaterorid.py b/rorapi/management/commands/generaterorid.py index a743981b..f240403e 100644 --- a/rorapi/management/commands/generaterorid.py +++ b/rorapi/management/commands/generaterorid.py @@ -17,13 +17,13 @@ def generate_ror_id(): return '{}0{}{}'.format(ROR_API['ID_PREFIX'], n_encoded, checksum) -def check_ror_id(version): +def check_ror_id(): """Checks if generated ror id exists in the index. If so, it generates a new id, otherwise it returns the generated ror id """ ror_id = get_ror_id(generate_ror_id()) - errors, organization = retrieve_organization(ror_id, version) + errors, organization = retrieve_organization(ror_id) if errors is None: - return check_ror_id(version) + return check_ror_id() return ror_id diff --git a/rorapi/middleware/deprecation.py b/rorapi/middleware/deprecation.py deleted file mode 100644 index 905ab933..00000000 --- a/rorapi/middleware/deprecation.py +++ /dev/null @@ -1,33 +0,0 @@ -from django.http import JsonResponse -from django.conf import settings - - -class V1DeprecationMiddleware: - """ - Middleware to return 410 Gone status for deprecated v1 API endpoints. - - This middleware checks if V1_DEPRECATED setting is enabled, and if so, - returns a 410 status code with a deprecation message for any requests - to /v1 endpoints. - """ - - def __init__(self, get_response): - self.get_response = get_response - - def __call__(self, request): - # Check if v1 deprecation is enabled and path starts with /v1 - if getattr(settings, 'V1_DEPRECATED', False): - if request.path.startswith('/v1/') or request.path == '/v1': - return JsonResponse( - { - 'errors': [{ - 'status': '410', - 'title': 'API Version Deprecated', - 'detail': 'The v1 API has been deprecated. Please migrate to v2.' - }] - }, - status=410 - ) - - response = self.get_response(request) - return response diff --git a/rorapi/settings.py b/rorapi/settings.py index db643c8a..6ae0cd5b 100644 --- a/rorapi/settings.py +++ b/rorapi/settings.py @@ -65,7 +65,6 @@ MIDDLEWARE = [ 'django_prometheus.middleware.PrometheusBeforeMiddleware', 'corsheaders.middleware.CorsMiddleware', - 'rorapi.middleware.deprecation.V1DeprecationMiddleware', 'django.middleware.common.CommonMiddleware', 'django.middleware.security.SecurityMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', @@ -101,7 +100,7 @@ 'DEFAULT_RENDERER_CLASSES': ('rest_framework.renderers.JSONRenderer', ), 'DEFAULT_VERSIONING_CLASS': 'rest_framework.versioning.URLPathVersioning', 'DEFAULT_VERSION': 'v2', - 'ALLOWED_VERSIONS': ['v1','v2'], + 'ALLOWED_VERSIONS': ['v2'], } @@ -153,7 +152,6 @@ STATIC_ROOT = os.path.join(BASE_DIR, 'static/') ES_VARS = { - 'INDEX_V1': 'organizations', # Kept for v1 API queries (backward compatibility) 'INDEX_V2': 'organizations-v2', 'INDEX_TEMPLATE_ES7_V2': os.path.join(BASE_DIR, 'rorapi', 'v2', 'index_template_es7.json'), 'BATCH_SIZE': 20, @@ -299,7 +297,6 @@ LAUNCH_DARKLY_KEY = os.environ.get('LAUNCH_DARKLY_KEY') # Toggle for behavior-based rate limiting -import os ENABLE_BEHAVIORAL_LIMITING = os.getenv("ENABLE_BEHAVIORAL_LIMITING", "False") == "True" # Email settings for Django @@ -307,6 +304,3 @@ AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID') AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY') AWS_SES_REGION_NAME = os.environ.get('AWS_REGION', 'eu-west-1') - -# API Deprecation -V1_DEPRECATED = os.environ.get("V1_DEPRECATED", "False").lower() in ("true", "1", "yes") \ No newline at end of file diff --git a/rorapi/tests/tests_functional/tests_search_v1.py b/rorapi/tests/tests_functional/tests_search_v1.py deleted file mode 100644 index e792184a..00000000 --- a/rorapi/tests/tests_functional/tests_search_v1.py +++ /dev/null @@ -1,104 +0,0 @@ -import json -import os -import re - -from .evaluation import search, escape_query -from django.test import SimpleTestCase -from statsmodels.stats.api import DescrStatsW, proportion_confint - -RANK_MAX_QUERY = 2.315534 -R1_MIN_QUERY = 0.749118 -R5_MIN_QUERY = 0.913082 - -RANK_MAX_QUERY_FUZZY = 2.619402 -R1_MIN_QUERY_FUZZY = 0.728343 -R5_MIN_QUERY_FUZZY = 0.902090 - -API_URL = os.environ.get('ROR_BASE_URL', 'http://localhost') -API_VERSION = 'v1' - - -def get_rank(ror_id, items): - for i, item in enumerate(items): - if ror_id == item['id']: - return i + 1 - return 21 - - -def mean_rank(ranks): - return sum(ranks) / len(ranks), DescrStatsW(ranks).tconfint_mean() - - -def recall_at_n(ranks, n): - s = len([r for r in ranks if r <= n]) - a = len(ranks) - return s / a, proportion_confint(s, a) - - -class SearchTestCase(SimpleTestCase): - def set_up(self, param, rank_max, r1_min, r5_min): - with open( - os.path.join(os.path.dirname(__file__), - 'data/dataset_names.json')) as names_file: - data = json.load(names_file) - data_query = [] - for i, d in enumerate(data): - data_query.append((d, search(API_URL, param, d['affiliation'], API_VERSION))) - if i % 100 == 0: - print('Progress: {0:.2f}%'.format(100 * i / len(data))) - self.ranks = [ - get_rank(case['ror-id'], items) for case, items in data_query - ] - self.rank_max = rank_max - self.r1_min = r1_min - self.r5_min = r5_min - - def validate(self, name): - mean, ci = mean_rank(self.ranks) - print('\nMean rank for {}: {} {}'.format(name, mean, ci)) - self.assertTrue(mean <= self.rank_max) - - recall_1, ci = recall_at_n(self.ranks, 1) - print('Recall@1 for {}: {} {}'.format(name, recall_1, ci)) - self.assertTrue(recall_1 >= self.r1_min) - - recall_5, ci = recall_at_n(self.ranks, 5) - print('Recall@5 for {}: {} {}'.format(name, recall_5, ci)) - self.assertTrue(recall_5 >= self.r5_min) - - -class QueryFuzzySearchTestCase(SearchTestCase): - def setUp(self): - self.param = 'query' - with open( - os.path.join(os.path.dirname(__file__), - 'data/dataset_names.json')) as names_file: - data = json.load(names_file) - data_query = [] - for i, d in enumerate(data): - data_query.append((d, - search(API_URL, - 'query', - re.sub('([^ ])(?= |$)', r'\g<1>~', - escape_query(d['affiliation'])), - API_VERSION, - escape=False))) - if i % 100 == 0: - print('Progress: {0:.2f}%'.format(100 * i / len(data))) - self.ranks = [ - get_rank(case['ror-id'], items) for case, items in data_query - ] - self.rank_max = RANK_MAX_QUERY_FUZZY - self.r1_min = R1_MIN_QUERY_FUZZY - self.r5_min = R5_MIN_QUERY_FUZZY - - def test_search_query(self): - self.validate('query (fuzzy)') - - -class QuerySearchTestCase(SearchTestCase): - def setUp(self): - self.set_up('query', RANK_MAX_QUERY, R1_MIN_QUERY, R5_MIN_QUERY) - - def test_search_query(self): - self.validate('query') diff --git a/rorapi/tests/tests_integration/tests_matching_v1.py b/rorapi/tests/tests_integration/tests_matching_v1.py deleted file mode 100644 index 379e3e66..00000000 --- a/rorapi/tests/tests_integration/tests_matching_v1.py +++ /dev/null @@ -1,43 +0,0 @@ -import os -import re -import requests - -from django.test import SimpleTestCase - -BASE_URL = '{}/v1/organizations'.format( - os.environ.get('ROR_BASE_URL', 'http://localhost')) - - -class APIMatchingTestCase(SimpleTestCase): - def test_query_organizations(self): - output = requests.get(BASE_URL, { - 'affiliation': 'university of warsaw' - }).json() - - self.assertTrue(output['number_of_results'] > 1) - - for k in ['number_of_results', 'items']: - self.assertTrue(k in output) - - prev = 1 - for i in output['items']: - for k in [ - 'substring', 'score', 'matching_type', 'chosen', - 'organization' - ]: - self.assertTrue(k in i) - - for k in ['id', 'name']: - self.assertTrue(k in i.get('organization')) - self.assertIsNotNone( - re.match(r'https:\/\/ror\.org\/0\w{6}\d{2}', - i.get('organization').get('id'))) - - self.assertEqual(i.get('substring'), 'university of warsaw') - self.assertTrue(i.get('score') > 0) - self.assertTrue(i.get('score') <= 1) - self.assertTrue(i.get('score') <= prev) - prev = i.get('score') - self.assertTrue( - i.get('matching_type') in - ['PHRASE', 'ACRONYM', 'FUZZY', 'HEURISTICS', 'COMMON TERMS', 'EXACT']) diff --git a/rorapi/tests/tests_integration/tests_search_v1.py b/rorapi/tests/tests_integration/tests_search_v1.py deleted file mode 100644 index dca97986..00000000 --- a/rorapi/tests/tests_integration/tests_search_v1.py +++ /dev/null @@ -1,135 +0,0 @@ -import os -import requests - -from django.test import SimpleTestCase - -BASE_URL = '{}/v1/organizations'.format( - os.environ.get('ROR_BASE_URL', 'http://localhost')) - - -class QueryTestCase(SimpleTestCase): - def test_exact(self): - items = requests.get(BASE_URL, { - 'query': 'Centro Universitário do Maranhão' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') - - items = requests.get(BASE_URL, { - 'query': 'Julius-Maximilians-Universität Würzburg' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') - - def test_lowercase(self): - items = requests.get(BASE_URL, { - 'query': 'centro universitário do maranhão' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') - - items = requests.get(BASE_URL, { - 'query': 'julius-maximilians-universität würzburg' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') - - def test_accents_stripped(self): - items = requests.get(BASE_URL, { - 'query': 'centro universitario do maranhao' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') - - items = requests.get(BASE_URL, { - 'query': 'julius-maximilians-universitat wurzburg' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') - - def test_extra_word(self): - items = requests.get(BASE_URL, { - 'query': 'Centro Universitário do Maranhão School' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') - - items = requests.get( - BASE_URL, { - 'query': 'Julius-Maximilians-Universität Würzburg School' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') - - -class QueryFuzzyTestCase(SimpleTestCase): - def test_exact(self): - items = requests.get(BASE_URL, { - 'query': 'Centro~ Universitário~ do~ Maranhão~' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') - - items = requests.get( - BASE_URL, { - 'query': 'Julius~ Maximilians~ Universität~ Würzburg~' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') - - def test_lowercase(self): - items = requests.get(BASE_URL, { - 'query': 'centro~ universitário~ do~ maranhão~' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') - - items = requests.get( - BASE_URL, { - 'query': 'julius~ maximilians~ universität~ würzburg~' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') - - def test_accents_stripped(self): - items = requests.get(BASE_URL, { - 'query': 'centro~ universitario~ do~ maranhao~' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') - - items = requests.get( - BASE_URL, { - 'query': 'julius~ maximilians~ universitat~ wurzburg~' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') - - def test_typos(self): - items = requests.get(BASE_URL, { - 'query': 'centre~ universitario~ do~ marahao~' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') - - items = requests.get( - BASE_URL, { - 'query': 'julius~ maximilian~ universitat~ wuerzburg~' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/03pvr2g57') - - def test_extra_word(self): - items = requests.get( - BASE_URL, { - 'query': 'Centro~ Universitário~ do~ Maranhão~ School~' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') - - items = requests.get( - BASE_URL, { - 'query': 'Julius~ Maximilians~ Universität~ Würzburg~ School~' - }).json() - self.assertTrue(items['number_of_results'] > 0) - self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') diff --git a/rorapi/tests/tests_integration/tests_v1.py b/rorapi/tests/tests_integration/tests_v1.py deleted file mode 100644 index 8073ffed..00000000 --- a/rorapi/tests/tests_integration/tests_v1.py +++ /dev/null @@ -1,302 +0,0 @@ -import itertools -import json -import os -import re -import requests - -from django.test import SimpleTestCase -from rorapi.settings import ROR_API, ES_VARS - -BASE_URL = '{}/v1/organizations'.format( - os.environ.get('ROR_BASE_URL', 'http://localhost')) - - -class APITestCase(SimpleTestCase): - def get_total(self, output): - return output['number_of_results'] - - def get_total_from_query(self, query): - return self.get_total(requests.get(BASE_URL, query).json()) - - def verify_full_list(self, output): - # TODO use JSON schema instead? - for k in ['number_of_results', 'time_taken', 'items', 'meta']: - self.assertTrue(k in output) - - self.assertEquals(len(output['items']), 20) - for i in output['items']: - for k in ['id', 'name']: - self.assertTrue(k in i) - self.assertIsNotNone( - re.match(r'https:\/\/ror\.org\/0\w{6}\d{2}', i['id'])) - - self.assertTrue('types' in output['meta']) - self.assertTrue(len(output['meta']['types']) > 0) - for t in output['meta']['types']: - self.assertTrue('id' in t) - self.assertTrue('count' in t) - - self.assertTrue('countries' in output['meta']) - self.assertTrue(len(output['meta']['countries']) > 0) - for t in output['meta']['countries']: - self.assertTrue('id' in t) - self.assertTrue('count' in t) - - self.assertTrue('statuses' in output['meta']) - self.assertTrue(len(output['meta']['statuses']) > 0) - for t in output['meta']['statuses']: - self.assertTrue('id' in t) - self.assertTrue('count' in t) - - def verify_empty(self, output): - self.assertEquals(self.get_total(output), 0) - self.assertEquals(output['items'], []) - self.assertEquals(output['meta'], {'types': [], 'statuses': [],'countries': []}) - - def verify_single_item(self, output, org): - self.assertEquals(self.get_total(output), 1) - self.assertEquals(output['items'][0], org) - - def test_list_organizations(self): - output = requests.get(BASE_URL).json() - - self.verify_full_list(output) - # sanity check - self.assertTrue(self.get_total(output) > 50000) - - def test_query_organizations(self): - total = self.get_total_from_query({}) - - output = requests.get(BASE_URL, {'query': 'university'}).json() - self.verify_full_list(output) - self.assertTrue(self.get_total(output) < total) - - def test_deprecated_queries(self): - for q in [{}, { - 'page': 7 - }, { - 'filter': 'country.country_code:US' - }, { - 'filter': 'country.country_code:US', - 'page': 3 - }]: - status_code = requests.get(BASE_URL, - dict(q, query='university')).status_code - if status_code != 200: - print("failing query: ", dict(q, query='university')) - output = requests.get(BASE_URL, dict(q, query='university')).json() - del output['time_taken'] - output_deprecated = requests.get( - BASE_URL, dict(q, **{'query.name': 'university'})).json() - del output_deprecated['time_taken'] - self.assertEqual(output_deprecated, output) - - def verify_paging(self, query): - total = self.get_total_from_query(query) - max_page = min(400, int(total / ROR_API['PAGE_SIZE'])) - outputs = [ - requests.get(BASE_URL, dict(query, page=n)).json() - for n in range(1, max_page + 1) - ] - - for output in outputs: - self.verify_full_list(output) - # all responses declare the same number of results - self.assertEquals(len(set([self.get_total(o) for o in outputs])), 1) - # IDs of the items listed are all distinct - self.assertEquals(len(set([o['items'][0]['id'] for o in outputs])), - max_page) - # all responses have the same aggregations - self.assertEquals(len(set([json.dumps(o['meta']) for o in outputs])), - 1) - - def test_paging(self): - self.verify_paging({}) - - self.verify_paging({'query': 'university'}) - self.verify_paging({ - 'query': 'university', - 'filter': 'types:Healthcare' - }) - - def test_iteration(self): - total = 10000 - ids = [] - for page in range(1, ES_VARS['MAX_PAGE'] + 1): - request = requests.get(BASE_URL, {'page': page}) - if request.status_code != 200: - print("failing query: ", {'page': page}) - output = requests.get(BASE_URL, {'page': page}).json() - ids.extend([i['id'] for i in output['items']]) - self.assertEquals(len(ids), total) - self.assertEquals(len(set(ids)), total) - - def verify_filtering(self, query): - aggregations = requests.get(BASE_URL, query).json()['meta'] - t_aggrs = aggregations['types'] - c_aggrs = aggregations['countries'] - s_aggrs = aggregations['statuses'] - - for t_aggr in t_aggrs: - filter_string = 'types:{}'.format(t_aggr['title']) - params = dict(query, filter=filter_string) - output = requests.get(BASE_URL, params).json() - - self.assertEquals(self.get_total(output), t_aggr['count']) - for i in output['items']: - self.assertTrue(t_aggr['title'] in i['types']) - self.assertTrue(any([t_aggr == t - for t in output['meta']['types']])) - - for c_aggr in c_aggrs: - filter_string = 'country.country_code:{}' \ - .format(c_aggr['id'].upper()) - params = dict(query, filter=filter_string) - output = requests.get(BASE_URL, params).json() - - self.assertEquals(self.get_total(output), c_aggr['count']) - for i in output['items']: - self.assertEquals(c_aggr['id'].upper(), - i['country']['country_code']) - self.assertTrue( - any([c_aggr == c for c in output['meta']['countries']])) - - for s_aggr in s_aggrs: - filter_string = 'status:{}'.format(s_aggr['title']) - params = dict(query, filter=filter_string) - output = requests.get(BASE_URL, params).json() - - self.assertEquals(self.get_total(output), s_aggr['count']) - for i in output['items']: - self.assertTrue(s_aggr['title'] in i['status']) - self.assertTrue(any([s_aggr == s - for s in output['meta']['statuses']])) - - for t_aggr, c_aggr, s_aggr in itertools.product(t_aggrs, c_aggrs, s_aggrs): - filter_string = 'country.country_code:{},types:{},status:{}' \ - .format(c_aggr['id'].upper(), t_aggr['title'], s_aggr['title']) - params = dict(query, filter=filter_string) - status_code = requests.get(BASE_URL, params).status_code - if status_code != 200: - print("failing params: ", params) - output = requests.get(BASE_URL, params).json() - if self.get_total(output) == 0: - self.verify_empty(output) - continue - self.assertTrue(self.get_total(output) <= t_aggr['count']) - self.assertTrue(self.get_total(output) <= c_aggr['count']) - self.assertTrue(self.get_total(output) <= s_aggr['count']) - for i in output['items']: - self.assertTrue(t_aggr['title'] in i['types']) - self.assertEquals(c_aggr['id'].upper(), - i['country']['country_code']) - self.assertTrue(s_aggr['title'] in i['status']) - self.assertTrue( - any([t_aggr['id'] == t['id'] - for t in output['meta']['types']])) - self.assertTrue( - any([ - c_aggr['id'] == c['id'] - for c in output['meta']['countries'] - ])) - self.assertTrue( - any([s_aggr['id'] == s['id'] - for s in output['meta']['statuses']])) - - def test_filtering(self): - self.verify_filtering({}) - self.verify_filtering({'query': 'university'}) - - def test_empty_output(self): - output = requests.get(BASE_URL, {'filter': 'types:notatype'}).json() - self.verify_empty(output) - - def test_query_retrieval(self): - for test_org in requests.get(BASE_URL).json()['items']: - for test_id in \ - [test_org['id'], - re.sub('https', 'http', test_org['id']), - re.sub(r'https:\/\/', '', test_org['id']), - re.sub(r'https:\/\/ror.org\/', '', test_org['id']), - re.sub(r'https:\/\/ror.org\/', r'ror.org%2F', test_org['id']), - re.sub(r'https:\/\/ror.org\/', r'http%3A%2F%2Fror.org%2F', - test_org['id']), - re.sub(r'https:\/\/ror.org\/', r'https%3A%2F%2Fror.org%2F', - test_org['id'])]: - output = requests.get(BASE_URL, {'query': test_id}).json() - self.verify_single_item(output, test_org) - - def test_retrieval(self): - for test_org in requests.get(BASE_URL).json()['items']: - for test_id in \ - [test_org['id'], - re.sub('https', 'http', test_org['id']), - re.sub(r'https:\/\/', '', test_org['id']), - re.sub(r'https:\/\/ror.org\/', '', test_org['id']), - re.sub(r'https:\/\/ror.org\/', r'ror.org%2F', test_org['id']), - re.sub(r'https:\/\/ror.org\/', r'http%3A%2F%2Fror.org%2F', - test_org['id']), - re.sub(r'https:\/\/ror.org\/', r'https%3A%2F%2Fror.org%2F', - test_org['id'])]: - output = requests.get(BASE_URL + '/' + test_id).json() - self.assertEquals(output, test_org) - - def test_query_grid_retrieval(self): - for test_org in requests.get(BASE_URL).json()['items']: - if 'GRID' in test_org['external_ids'].keys(): - grid = test_org['external_ids']['GRID']['preferred'] - output = requests.get(BASE_URL, {'query': '"' + grid + '"'}).json() - self.verify_single_item(output, test_org) - - def test_error(self): - output = requests.get(BASE_URL, { - 'query': 'query', - 'illegal': 'whatever', - 'another': 3 - }).json() - self.assertEquals(len(output['errors']), 2) - self.assertTrue(any(['\'illegal\'' in e for e in output['errors']])) - self.assertTrue(any(['\'another\'' in e for e in output['errors']])) - - output = requests.get(BASE_URL, { - 'query': 'query', - 'filter': 'fi1:e,types:F,f3,field2:44' - }).json() - self.assertEquals(len(output['errors']), 3) - self.assertTrue(any(['\'fi1\'' in e for e in output['errors']])) - self.assertTrue(any(['\'field2\'' in e for e in output['errors']])) - self.assertTrue(any(['\'f3\'' in e for e in output['errors']])) - - output = requests.get(BASE_URL, { - 'query': 'query', - 'page': 'whatever' - }).json() - self.assertEquals(len(output['errors']), 1) - self.assertTrue('\'whatever\'' in output['errors'][0]) - - output = requests.get(BASE_URL, { - 'query': 'query', - 'page': '10000' - }).json() - self.assertEquals(len(output['errors']), 1) - self.assertTrue('\'10000\'' in output['errors'][0]) - - output = requests.get( - BASE_URL, { - 'query': 'query', - 'illegal': 'whatever', - 'filter': 'fi1:e,types:F,f3,field2:44', - 'another': 3, - 'page': 'third' - }).json() - self.assertEquals(len(output['errors']), 6) - self.assertTrue(any(['\'illegal\'' in e for e in output['errors']])) - self.assertTrue(any(['\'another\'' in e for e in output['errors']])) - self.assertTrue(any(['\'fi1\'' in e for e in output['errors']])) - self.assertTrue(any(['\'field2\'' in e for e in output['errors']])) - self.assertTrue(any(['\'f3\'' in e for e in output['errors']])) - self.assertTrue(any(['\'third\'' in e for e in output['errors']])) - - output = requests.get(BASE_URL + '/https://ror.org/0qwerty89').json() - self.assertEquals(len(output['errors']), 1) - self.assertTrue('\'https://ror.org/0qwerty89\'' in output['errors'][0]) diff --git a/rorapi/tests/tests_unit/tests_deprecation_middleware.py b/rorapi/tests/tests_unit/tests_deprecation_middleware.py deleted file mode 100644 index b7147f15..00000000 --- a/rorapi/tests/tests_unit/tests_deprecation_middleware.py +++ /dev/null @@ -1,131 +0,0 @@ -from django.test import TestCase, RequestFactory, override_settings -from django.http import JsonResponse -from rorapi.middleware.deprecation import V1DeprecationMiddleware -import json - - -class V1DeprecationMiddlewareTestCase(TestCase): - """ - Tests for V1DeprecationMiddleware that returns 410 Gone for deprecated v1 endpoints. - """ - - def setUp(self): - self.factory = RequestFactory() - - # Mock get_response function - def get_response(request): - return JsonResponse({'message': 'success'}, status=200) - - self.get_response = get_response - self.middleware = V1DeprecationMiddleware(self.get_response) - - @override_settings(V1_DEPRECATED=True) - def test_v1_path_returns_410_when_deprecated(self): - """Test that /v1/ paths return 410 when V1_DEPRECATED is True""" - request = self.factory.get('/v1/organizations') - response = self.middleware(request) - - self.assertEqual(response.status_code, 410) - content = json.loads(response.content.decode('utf-8')) - self.assertIn('errors', content) - self.assertEqual(content['errors'][0]['status'], '410') - self.assertEqual(content['errors'][0]['title'], 'API Version Deprecated') - - @override_settings(V1_DEPRECATED=True) - def test_v1_exact_path_returns_410_when_deprecated(self): - """Test that exact /v1 path returns 410 when V1_DEPRECATED is True""" - request = self.factory.get('/v1') - response = self.middleware(request) - - self.assertEqual(response.status_code, 410) - content = json.loads(response.content.decode('utf-8')) - self.assertIn('errors', content) - - @override_settings(V1_DEPRECATED=True) - def test_v2_path_passes_through_when_v1_deprecated(self): - """Test that /v2/ paths work normally even when V1_DEPRECATED is True""" - request = self.factory.get('/v2/organizations') - response = self.middleware(request) - - self.assertEqual(response.status_code, 200) - content = json.loads(response.content.decode('utf-8')) - self.assertEqual(content['message'], 'success') - - @override_settings(V1_DEPRECATED=False) - def test_v1_path_passes_through_when_not_deprecated(self): - """Test that /v1/ paths work normally when V1_DEPRECATED is False""" - request = self.factory.get('/v1/organizations') - response = self.middleware(request) - - self.assertEqual(response.status_code, 200) - content = json.loads(response.content.decode('utf-8')) - self.assertEqual(content['message'], 'success') - - @override_settings(V1_DEPRECATED=False) - def test_v2_path_passes_through_when_v1_not_deprecated(self): - """Test that /v2/ paths work normally when V1_DEPRECATED is False""" - request = self.factory.get('/v2/organizations') - response = self.middleware(request) - - self.assertEqual(response.status_code, 200) - content = json.loads(response.content.decode('utf-8')) - self.assertEqual(content['message'], 'success') - - @override_settings(V1_DEPRECATED=None) - def test_v1_path_passes_through_when_setting_not_set(self): - """Test that /v1/ paths work when V1_DEPRECATED setting doesn't exist""" - # Don't use override_settings, rely on default behavior - request = self.factory.get('/v1/organizations') - response = self.middleware(request) - - self.assertEqual(response.status_code, 200) - content = json.loads(response.content.decode('utf-8')) - self.assertEqual(content['message'], 'success') - - @override_settings(V1_DEPRECATED=True) - def test_root_path_passes_through(self): - """Test that root path is not affected by middleware""" - request = self.factory.get('/') - response = self.middleware(request) - - self.assertEqual(response.status_code, 200) - - @override_settings(V1_DEPRECATED=True) - def test_other_paths_pass_through(self): - """Test that non-v1 paths pass through normally""" - request = self.factory.get('/heartbeat') - response = self.middleware(request) - - self.assertEqual(response.status_code, 200) - - @override_settings(V1_DEPRECATED=True) - def test_v1_with_query_params_returns_410(self): - """Test that /v1/ paths with query parameters return 410""" - request = self.factory.get('/v1/organizations?query=test') - response = self.middleware(request) - - self.assertEqual(response.status_code, 410) - - @override_settings(V1_DEPRECATED=True) - def test_v1_post_request_returns_410(self): - """Test that POST requests to /v1/ paths return 410""" - request = self.factory.post('/v1/organizations') - response = self.middleware(request) - - self.assertEqual(response.status_code, 410) - - @override_settings(V1_DEPRECATED=True) - def test_deprecation_error_message_format(self): - """Test that the deprecation error message follows the expected format""" - request = self.factory.get('/v1/organizations') - response = self.middleware(request) - - content = json.loads(response.content.decode('utf-8')) - self.assertIn('errors', content) - self.assertEqual(len(content['errors']), 1) - - error = content['errors'][0] - self.assertIn('status', error) - self.assertIn('title', error) - self.assertIn('detail', error) - self.assertIn('migrate to v2', error['detail']) diff --git a/rorapi/tests/tests_unit/tests_es_utils_v1.py b/rorapi/tests/tests_unit/tests_es_utils_v1.py deleted file mode 100644 index ca059cb6..00000000 --- a/rorapi/tests/tests_unit/tests_es_utils_v1.py +++ /dev/null @@ -1,229 +0,0 @@ -from django.test import SimpleTestCase -from rorapi.common.es_utils import ESQueryBuilder - -class QueryBuilderTestCaseV1(SimpleTestCase): - V1_VERSION = 'v1' - def test_id_query(self): - qb = ESQueryBuilder(self.V1_VERSION) - qb.add_id_query('ror-id') - - self.assertEqual(qb.get_query().to_dict(), { - 'query': { - 'match': { - 'id': { - 'query': 'ror-id', - 'operator': 'and' - } - } - }, - 'track_total_hits': True - }) - - def test_match_all_query(self): - qb = ESQueryBuilder(self.V1_VERSION) - qb.add_match_all_query() - - self.assertEqual(qb.get_query().to_dict(), - {'query': { - 'match_all': {} - }, - 'track_total_hits': True - }) - - def test_string_query(self): - qb = ESQueryBuilder(self.V1_VERSION) - qb.add_string_query('query terms') - - self.assertEqual( - qb.get_query().to_dict(), { - 'query': { - 'nested': { - 'path': 'names_ids', - 'score_mode': 'max', - 'query': { - 'query_string': { - 'query': 'query terms', - 'fuzzy_max_expansions': 1 - } - } - } - }, - 'track_total_hits': True - }) - def test_string_query_advanced(self): - qb = ESQueryBuilder(self.V1_VERSION) - qb.add_string_query_advanced('query terms') - - self.assertEqual( - qb.get_query().to_dict(), { - 'query': { - 'bool': { - 'must': [{ - 'query_string': { - 'query': 'query terms', - 'default_field': '*', - 'default_operator':'and', - 'fuzzy_max_expansions': 1 - } - }] - } - }, - 'track_total_hits': True - }) - - def test_phrase_query(self): - qb = ESQueryBuilder(self.V1_VERSION) - qb.add_phrase_query(['f1', 'f2'], 'query terms') - - self.assertEqual( - qb.get_query().to_dict(), { - 'query': { - 'dis_max': { - 'queries': [{ - 'match_phrase': { - 'f1': 'query terms' - } - }, { - 'match_phrase': { - 'f2': 'query terms' - } - }] - } - }, - 'track_total_hits': True - }) - - def test_common_query(self): - qb = ESQueryBuilder(self.V1_VERSION) - qb.add_common_query(['f1', 'f2'], 'query terms') - - self.assertEqual( - qb.get_query().to_dict(), { - 'query': { - 'dis_max': { - 'queries': [{ - 'common': { - 'f1': { - 'query': 'query terms', - 'cutoff_frequency': 0.001 - } - } - }, { - 'common': { - 'f2': { - 'query': 'query terms', - 'cutoff_frequency': 0.001 - } - } - }] - } - }, - 'track_total_hits': True - }) - - def test_match_query(self): - qb = ESQueryBuilder(self.V1_VERSION) - qb.add_match_query('query terms') - - self.assertEqual(qb.get_query().to_dict(), - {'query': { - 'match': { - 'acronyms': 'query terms' - } - }, - 'track_total_hits': True - }) - - def test_fuzzy_query(self): - qb = ESQueryBuilder(self.V1_VERSION) - qb.add_fuzzy_query(['f1', 'f2'], 'query terms') - - self.assertEqual( - qb.get_query().to_dict(), { - 'query': { - 'dis_max': { - 'queries': [{ - 'match': { - 'f1': { - 'query': 'query terms', - 'fuzziness': 'AUTO' - } - } - }, { - 'match': { - 'f2': { - 'query': 'query terms', - 'fuzziness': 'AUTO' - } - } - }] - } - }, - 'track_total_hits': True - }) - - def test_add_filters(self): - qb = ESQueryBuilder(self.V1_VERSION) - qb.add_match_all_query() - qb.add_filters({'key1': ['val1'], 'k2': ['value2']}) - - self.assertEqual( - qb.get_query().to_dict(), { - 'query': { - 'bool': { - 'filter': [{ - 'terms': { - 'key1': ['val1'] - } - }, { - 'terms': { - 'k2': ['value2'] - } - }] - } - }, - 'track_total_hits': True - }) - - def test_add_aggregations(self): - qb = ESQueryBuilder(self.V1_VERSION) - qb.add_match_all_query() - qb.add_aggregations([('countries', 'code'), ('types', 'type')]) - - self.assertEqual( - qb.get_query().to_dict(), { - 'query': { - 'match_all': {} - }, - 'track_total_hits': True, - 'aggs': { - 'countries': { - 'terms': { - 'field': 'code', - 'min_doc_count': 1, - 'size': 10 - } - }, - 'types': { - 'terms': { - 'field': 'type', - 'min_doc_count': 1, - 'size': 10 - } - } - } - }) - - def test_paginate(self): - qb = ESQueryBuilder(self.V1_VERSION) - qb.add_match_all_query() - qb.paginate(10) - - self.assertEqual(qb.get_query().to_dict(), { - 'query': { - 'match_all': {} - }, - 'from': 180, - 'size': 20, - 'track_total_hits': True - }) diff --git a/rorapi/tests/tests_unit/tests_es_utils_v2.py b/rorapi/tests/tests_unit/tests_es_utils_v2.py index 318981ea..ec003791 100644 --- a/rorapi/tests/tests_unit/tests_es_utils_v2.py +++ b/rorapi/tests/tests_unit/tests_es_utils_v2.py @@ -2,9 +2,8 @@ from rorapi.common.es_utils import ESQueryBuilder class QueryBuilderTestCaseV2(SimpleTestCase): - V2_VERSION = 'v2' def test_id_query(self): - qb = ESQueryBuilder(self.V2_VERSION) + qb = ESQueryBuilder() qb.add_id_query('ror-id') self.assertEqual(qb.get_query().to_dict(), { @@ -20,7 +19,7 @@ def test_id_query(self): }) def test_match_all_query(self): - qb = ESQueryBuilder(self.V2_VERSION) + qb = ESQueryBuilder() qb.add_match_all_query() self.assertEqual(qb.get_query().to_dict(), @@ -31,7 +30,7 @@ def test_match_all_query(self): }) def test_string_query(self): - qb = ESQueryBuilder(self.V2_VERSION) + qb = ESQueryBuilder() qb.add_string_query('query terms') self.assertEqual( @@ -51,7 +50,7 @@ def test_string_query(self): 'track_total_hits': True }) def test_string_query_advanced(self): - qb = ESQueryBuilder(self.V2_VERSION) + qb = ESQueryBuilder() qb.add_string_query_advanced('query terms') self.assertEqual( @@ -72,7 +71,7 @@ def test_string_query_advanced(self): }) def test_phrase_query(self): - qb = ESQueryBuilder(self.V2_VERSION) + qb = ESQueryBuilder() qb.add_phrase_query(['f1', 'f2'], 'query terms') self.assertEqual( @@ -94,7 +93,7 @@ def test_phrase_query(self): }) def test_common_query(self): - qb = ESQueryBuilder(self.V2_VERSION) + qb = ESQueryBuilder() qb.add_common_query(['f1', 'f2'], 'query terms') self.assertEqual( @@ -122,7 +121,7 @@ def test_common_query(self): }) def test_match_query(self): - qb = ESQueryBuilder(self.V2_VERSION) + qb = ESQueryBuilder() qb.add_match_query('query terms') self.assertEqual(qb.get_query().to_dict(), @@ -135,7 +134,7 @@ def test_match_query(self): }) def test_fuzzy_query(self): - qb = ESQueryBuilder(self.V2_VERSION) + qb = ESQueryBuilder() qb.add_fuzzy_query(['f1', 'f2'], 'query terms') self.assertEqual( @@ -163,7 +162,7 @@ def test_fuzzy_query(self): }) def test_add_filters(self): - qb = ESQueryBuilder(self.V2_VERSION) + qb = ESQueryBuilder() qb.add_match_all_query() qb.add_filters({'key1': ['val1'], 'k2': ['value2']}) @@ -186,7 +185,7 @@ def test_add_filters(self): }) def test_add_aggregations(self): - qb = ESQueryBuilder(self.V2_VERSION) + qb = ESQueryBuilder() qb.add_match_all_query() qb.add_aggregations([('countries', 'code'), ('types', 'type')]) @@ -215,7 +214,7 @@ def test_add_aggregations(self): }) def test_paginate(self): - qb = ESQueryBuilder(self.V2_VERSION) + qb = ESQueryBuilder() qb.add_match_all_query() qb.paginate(10) diff --git a/rorapi/tests/tests_unit/tests_generaterorid_v1.py b/rorapi/tests/tests_unit/tests_generaterorid_v1.py deleted file mode 100644 index d9fe5d8b..00000000 --- a/rorapi/tests/tests_unit/tests_generaterorid_v1.py +++ /dev/null @@ -1,35 +0,0 @@ -from django.test import SimpleTestCase -from unittest.mock import patch -from rorapi.management.commands import generaterorid -from rorapi.common.models import Errors -from rorapi.settings import ROR_API - -DUPLICATE_ID_RAW = "duplicateid" -UNIQUE_ID_RAW = "uniqueid" -DUPLICATE_ROR_ID = f"{ROR_API['ID_PREFIX']}{DUPLICATE_ID_RAW}" -UNIQUE_ROR_ID = f"{ROR_API['ID_PREFIX']}{UNIQUE_ID_RAW}" -TEST_VERSION = 'v1' - -class GenerateRorIdCommandTestCase(SimpleTestCase): - - @patch('rorapi.management.commands.generaterorid.get_ror_id') - @patch('rorapi.management.commands.generaterorid.retrieve_organization') - @patch('rorapi.management.commands.generaterorid.generate_ror_id') - def test_check_ror_id_handles_collision_and_returns_unique( - self, mock_generate_ror_id, mock_retrieve_organization, mock_get_ror_id - ): - mock_generate_ror_id.side_effect = [ - DUPLICATE_ROR_ID, - UNIQUE_ROR_ID - ] - - mock_get_ror_id.side_effect = lambda x: x - - mock_retrieve_organization.side_effect = [ - (None, {'id': DUPLICATE_ROR_ID, 'name': 'Mock Duplicate Org'}), - (Errors(f"ROR ID '{UNIQUE_ROR_ID}' does not exist"), None) - ] - - result_ror_id = generaterorid.check_ror_id(TEST_VERSION) - - self.assertEqual(result_ror_id, UNIQUE_ROR_ID) \ No newline at end of file diff --git a/rorapi/tests/tests_unit/tests_generaterorid_v2.py b/rorapi/tests/tests_unit/tests_generaterorid_v2.py index 95f20b98..699e7ff7 100644 --- a/rorapi/tests/tests_unit/tests_generaterorid_v2.py +++ b/rorapi/tests/tests_unit/tests_generaterorid_v2.py @@ -8,8 +8,6 @@ UNIQUE_ID_RAW = "uniqueid" DUPLICATE_ROR_ID = f"{ROR_API['ID_PREFIX']}{DUPLICATE_ID_RAW}" UNIQUE_ROR_ID = f"{ROR_API['ID_PREFIX']}{UNIQUE_ID_RAW}" -TEST_VERSION = 'v2' - class GenerateRorIdCommandTestCase(SimpleTestCase): @patch('rorapi.management.commands.generaterorid.get_ror_id') @@ -30,6 +28,6 @@ def test_check_ror_id_handles_collision_and_returns_unique( (Errors(f"ROR ID '{UNIQUE_ROR_ID}' does not exist"), None) ] - result_ror_id = generaterorid.check_ror_id(TEST_VERSION) + result_ror_id = generaterorid.check_ror_id() self.assertEqual(result_ror_id, UNIQUE_ROR_ID) \ No newline at end of file diff --git a/rorapi/tests/tests_unit/tests_matching_v1.py b/rorapi/tests/tests_unit/tests_matching_v1.py deleted file mode 100644 index a640df9f..00000000 --- a/rorapi/tests/tests_unit/tests_matching_v1.py +++ /dev/null @@ -1,514 +0,0 @@ -from django.test import SimpleTestCase - -from rorapi.common.matching import load_geonames_countries, load_geonames_cities, load_countries, to_region, get_country_codes, \ - get_countries, normalize, MatchedOrganization, get_similarity, get_score, \ - MatchingNode, clean_search_string, check_do_not_match, MatchingGraph, get_output, \ - check_exact_match, MATCHING_TYPE_PHRASE, MATCHING_TYPE_COMMON, MATCHING_TYPE_FUZZY -from .utils import AttrDict - - -class CountriesTestCase(SimpleTestCase): - def test_load_geonames_countries(self): - countries = load_geonames_countries() - - self.assertTrue('AZ' in countries) - self.assertTrue('FM' in countries) - self.assertTrue('ZM' in countries) - - def test_load_geonames_cities(self): - cities = load_geonames_cities() - - self.assertTrue('3031582' in cities) - - def test_load_countries(self): - countries = load_countries() - - self.assertEqual(len(countries), 590) - self.assertTrue(('az', 'azarbaycan respublikasi') in countries) - self.assertTrue(('fm', 'federated states of micronesia') in countries) - self.assertTrue(('zm', 'zambia') in countries) - - def test_to_region(self): - self.assertEqual(to_region('PL'), 'PL') - for c in ['GB', 'UK']: - self.assertEqual(to_region(c), 'GB-UK') - if c in ['CN', 'HK', 'TW']: - self.assertEqual(to_region(c), 'CN-HK-TW') - if c in ['PR', 'US']: - self.assertEqual(to_region(c), 'US-PR') - - def test_get_country_codes(self): - self.assertEqual(get_country_codes('Seoul, Korea.'), ['KR']) - self.assertEqual(get_country_codes('Chicago, Illinois, USA'), ['US']) - self.assertEqual( - get_country_codes( - 'University of California, Berkeley, California'), ['US']) - self.assertEqual(get_country_codes('Hospital Kassel, Kassel, Germany'), - ['DE']) - self.assertEqual(get_country_codes('New South Wales, Australia'), - ['AU']) - self.assertEqual(get_country_codes('State of Illinois'), ['US']) - self.assertEqual( - get_country_codes('Lehigh Valley Hospital, Allentown, PA;'), - ['US']) - self.assertEqual( - get_country_codes('Boston Children\'s Hospital, Boston, MA '), - ['US']) - self.assertEqual( - get_country_codes('Winthrop University Hospital, Mineola, NY'), - ['US']) - self.assertEqual( - get_country_codes('Medical Dow Chemical Company, U.S.A.'), ['US']) - self.assertEqual(get_country_codes('New York University'), ['US']) - self.assertEqual(get_country_codes('Enschede, The Netherlands'), - ['NL']) - self.assertEqual( - get_country_codes( - 'University of Surrey, Guildford, United Kingdom'), ['UK']) - self.assertEqual(get_country_codes('República Dominicana'), ['DO']) - self.assertEqual( - get_country_codes('České Budějovice , Czech Republic'), ['CZ']) - self.assertEqual(get_country_codes('Washington, D.C.'), ['US']) - self.assertEqual( - get_country_codes('Agency for Health Care Policy and Research'), - []) - - def test_get_country(self): - self.assertEqual(get_countries('Seoul, Korea.'), ['KR']) - self.assertEqual(get_countries('Chicago, Illinois, USA'), ['US-PR']) - self.assertEqual( - get_countries('University of California, Berkeley, California'), - ['US-PR']) - self.assertEqual(get_countries('Hospital Kassel, Kassel, Germany'), - ['DE']) - self.assertEqual(get_countries('New South Wales, Australia'), ['AU']) - self.assertEqual(get_countries('State of Illinois'), ['US-PR']) - self.assertEqual( - get_countries('Lehigh Valley Hospital, Allentown, PA;'), ['US-PR']) - self.assertEqual( - get_countries('Boston Children\'s Hospital, Boston, MA '), - ['US-PR']) - self.assertEqual( - get_countries('Winthrop University Hospital, Mineola, NY'), - ['US-PR']) - self.assertEqual(get_countries('Medical Dow Chemical Company, U.S.A.'), - ['US-PR']) - self.assertEqual(get_countries('New York University'), ['US-PR']) - self.assertEqual(get_countries('Enschede, The Netherlands'), ['NL']) - self.assertEqual( - get_countries('University of Surrey, Guildford, United Kingdom'), - ['GB-UK']) - self.assertEqual(get_countries('República Dominicana'), ['DO']) - self.assertEqual(get_countries('České Budějovice , Czech Republic'), - ['CZ']) - self.assertEqual(get_countries('Washington, D.C.'), ['US-PR']) - self.assertEqual( - get_countries('Agency for Health Care Policy and Research'), []) - - -class NormalizeTestCase(SimpleTestCase): - def test_normalize(self): - self.assertEqual(normalize('university of excellence'), - 'university of excellence') - self.assertEqual(normalize('ünivërsity óf éxcellençe'), - 'university of excellence') - self.assertEqual(normalize('University of ExceLLence'), - 'university of excellence') - self.assertEqual(normalize('The University of Excellence'), - 'university of excellence') - self.assertEqual(normalize('University of Excellence & Brilliance'), - 'university of excellence and brilliance') - self.assertEqual( - normalize('The University of Excellence & Brilliance'), - 'university of excellence and brilliance') - self.assertEqual(normalize('U.S. University of Excellence'), - 'united states university of excellence') - self.assertEqual(normalize('university of tech'), - 'university of technology') - self.assertEqual(normalize('university of tech & Excellence'), - 'university of technology and excellence') - self.assertEqual(normalize('University of Tech. & Excellence'), - 'university of technology and excellence') - self.assertEqual(normalize('Inst. of excellence'), - 'institute of excellence') - self.assertEqual(normalize('Inst of Excellence'), - 'institute of excellence') - self.assertEqual(normalize('Inst of Excellence inst'), - 'institute of excellence institute') - self.assertEqual(normalize('Lab. of excellence'), - 'laboratory of excellence') - self.assertEqual(normalize('Lab of Excellence'), - 'laboratory of excellence') - self.assertEqual(normalize('lab of Excellence lab'), - 'laboratory of excellence laboratory') - self.assertEqual(normalize('Univ. of Excellence'), - 'university of excellence') - self.assertEqual(normalize('univ of Excellence'), - 'university of excellence') - self.assertEqual(normalize('Excellence Univ'), 'excellence university') - self.assertEqual(normalize('U. of Excellence'), - 'university of excellence') - self.assertEqual(normalize('U.W.X. of Excellence'), - 'u.w.x. of excellence') - self.assertEqual(normalize('U. W. X. of Excellence'), - 'u. w. x. of excellence') - self.assertEqual(normalize('関西光科学研究所'), - '関西光科学研究所') - self.assertEqual(normalize('Московский государственный университет Russia '), - 'московский государственный университет russia') - - -class MatchedOrganizationTestCase(SimpleTestCase): - def test_init(self): - empty = MatchedOrganization() - self.assertTrue(empty.substring is None) - self.assertEqual(empty.score, 0) - self.assertEqual(empty.chosen, False) - self.assertTrue(empty.matching_type is None) - self.assertTrue(empty.organization is None) - - match = MatchedOrganization(substring='aff', - score=60, - chosen=True, - matching_type='query', - organization='obj') - self.assertEqual(match.substring, 'aff') - self.assertEqual(match.score, 60) - self.assertEqual(match.matching_type, 'query') - self.assertEqual(match.organization, 'obj') - self.assertEqual(match.chosen, True) - - -class SimilarityTestCase(SimpleTestCase): - - V1_VERSION = 'v1' - - def test_get_similarity(self): - self.assertEqual( - get_similarity('University of Excellence', - 'University of Excellence'), 1) - self.assertEqual( - get_similarity('univ. of excellençë', 'Univërsity of Excellence'), - 1) - self.assertEqual( - get_similarity('of Excellence University', - 'University of Excellence'), 1) - self.assertEqual( - get_similarity('of excellençë univ', 'University of Excellence'), - 1) - self.assertEqual( - get_similarity('Excellence University', - 'University of Excellence'), 0.93) - self.assertEqual( - get_similarity('excellençë univ', 'University of Excellence'), - 0.93) - self.assertEqual( - get_similarity('University of Exçellence', - 'University of Excellence (Gallifrey)'), 1) - self.assertEqual( - get_similarity('University of Excellence and Brilliance', - 'University of Excellence'), 0.76) - self.assertEqual( - get_similarity('University of Excellence (and Brilliance)', - 'University of Excellence'), 1) - self.assertEqual( - get_similarity('University of Excellence School of Perseverance', - 'University of Excellence'), 1) - self.assertEqual( - get_similarity('University of Excellence Mediocrity Hospital', - 'University of Excellence'), 1) - - def test_get_score(self): - empty = { - 'name': '', - 'labels': [], - 'aliases': [], - 'acronyms': [], - 'country': { - 'country_code': '' - } - } - self.assertEqual( - get_score(AttrDict(dict(empty, name='University of Excellence')), - 'University of Excellence', None, self.V1_VERSION), 1) - self.assertEqual( - get_score( - AttrDict( - dict(empty, - name='University of Excellence', - country={'country_code': 'XY'})), - 'University of Excellence', ['US-PR'], self.V1_VERSION), 0) - self.assertEqual( - get_score( - AttrDict( - dict(empty, - name='University of Excellence', - country={'country_code': 'PR'})), - 'University of Excellence', ['US-PR'], self.V1_VERSION), 1) - self.assertEqual( - get_score( - AttrDict( - dict(empty, labels=[{ - 'label': 'University of Excellence' - }])), 'University of Excellence', None, self.V1_VERSION), 1) - self.assertEqual( - get_score( - AttrDict( - dict(empty, - labels=[{ - 'label': 'Excellence U' - }, { - 'label': 'University of Excellence' - }])), 'University of Excellence', None, self.V1_VERSION), 1) - self.assertEqual( - get_score( - AttrDict(dict(empty, aliases=['University of Excellence'])), - 'University of Excellence', None, self.V1_VERSION), 1) - self.assertEqual( - get_score( - AttrDict( - dict(empty, - aliases=['Excellence U', - 'University of Excellence'])), - 'University of Excellence', None, self.V1_VERSION), 1) - self.assertEqual( - get_score(AttrDict(dict(empty, acronyms=['UEXC'])), - 'University of Excellence', None, self.V1_VERSION), 0) - self.assertEqual( - get_score(AttrDict(dict(empty, acronyms=['UEXC'])), 'UEXC', None, self.V1_VERSION), - 0.9) - self.assertEqual( - get_score( - AttrDict( - dict(empty, - acronyms=['UEXC'], - country={'country_code': 'PR'})), 'UEXC', ['US-PR'], self.V1_VERSION), - 1) - - self.assertEqual( - get_score( - AttrDict( - dict(empty, - name='University of Excellence', - labels=[{ - 'label': 'Excellence U' - }, { - 'label': 'University Excellence' - }], - aliases=['Excellence U', 'University Excellence'], - acronyms=['UEXC'])), 'University of Excellence', - None, self.V1_VERSION), 1) - self.assertEqual( - get_score( - AttrDict( - dict(empty, - name='University Excellence', - labels=[{ - 'label': 'Excellence U' - }, { - 'label': 'University of Excellence' - }], - aliases=['Excellence U', 'University Excellence'], - acronyms=['UEXC'])), 'University of Excellence', - None, self.V1_VERSION), 1) - self.assertEqual( - get_score( - AttrDict( - dict(empty, - name='University Excellence', - labels=[{ - 'label': 'Excellence U' - }, { - 'label': 'University Excellence' - }], - aliases=['Excellence U', 'University of Excellence'], - acronyms=['UEXC'])), 'University of Excellence', - None, self.V1_VERSION), 1) - self.assertEqual( - get_score( - AttrDict( - dict(empty, - name='University of Brilliance', - labels=[{ - 'label': 'University of Brilliance' - }], - aliases=['Brilliance U', 'University Brilliance'], - acronyms=['UEXC'])), 'UEXC', None, self.V1_VERSION), 0.9) - self.assertEqual( - get_score( - AttrDict( - dict(empty, - name='University of Brilliance', - labels=[{ - 'label': 'University of Brilliance' - }], - aliases=['Brilliance U', 'University Brilliance'], - acronyms=['UEXC'], - country={'country_code': 'PR'})), 'UEXC', ['US-PR'], self.V1_VERSION), - 1) - self.assertEqual( - get_score( - AttrDict( - dict(empty, - name='University of Brilliance', - labels=[{ - 'label': 'University of Brilliance' - }], - aliases=['Brilliance U', 'University Brilliance'], - acronyms=['UEXC'], - country={'country_code': 'AV'})), 'UEXC', ['US-PR'], self.V1_VERSION), - 0) - - -class TestMatchingNode(SimpleTestCase): - - V1_VERSION = 'v1' - - def test_init(self): - empty = MatchingNode('text', self.V1_VERSION) - self.assertEqual(empty.text, 'text') - self.assertTrue(empty.matched is None) - -class TestCleanSearchString(SimpleTestCase): - def test_init(self): - self.assertEqual(clean_search_string('university of excellence'), - 'university of excellence') - self.assertEqual(clean_search_string('ünivërsity óf éxcellençe'), - 'ünivërsity óf éxcellençe') - self.assertEqual(clean_search_string('University of ExceLLence'), - 'University of ExceLLence') - self.assertEqual(clean_search_string('The University of Excellence'), - 'The University of Excellence') - self.assertEqual(clean_search_string('University of Excellence & Brilliance'), - 'University of Excellence & Brilliance') - self.assertEqual(clean_search_string('U.S. University of Excellence'), - 'U S University of Excellence') - self.assertEqual(clean_search_string('University of Tech. & Excellence'), - 'University of Tech & Excellence') - self.assertEqual(clean_search_string('University of Tech, Excellence'), - 'University of Tech Excellence') - self.assertEqual(clean_search_string('University of Tech/Excellence'), - 'University of Tech Excellence') - self.assertEqual(clean_search_string('University of Tech: Excellence'), - 'University of Tech Excellence') - self.assertEqual(clean_search_string('University of Tech; Excellence'), - 'University of Tech Excellence') - self.assertEqual(clean_search_string('University of Tech Excellence;'), - 'University of Tech Excellence') - -class TestCheckDoNotMatch(SimpleTestCase): - def test_init(self): - self.assertTrue(check_do_not_match('university hospital')), - self.assertTrue(check_do_not_match('MX')), - self.assertTrue(check_do_not_match('Mexico')), - self.assertTrue(check_do_not_match('MEX')), - self.assertTrue(check_do_not_match('Bordeaux')), - self.assertFalse(check_do_not_match('university of excellence')) - -class TestMatchingGraph(SimpleTestCase): - - V1_VERSION = 'v1' - - def test_init(self): - graph = MatchingGraph('University of Excellence', self.V1_VERSION) - self.assertEqual(len(graph.nodes), 2) - self.assertEqual(graph.nodes[0].text, 'University of Excellence') - self.assertEqual(graph.nodes[1].text, 'University of Excellence') - - graph = \ - MatchingGraph('University of Excellence and Creativity Institute', self.V1_VERSION) - self.assertEqual(len(graph.nodes), 2) - self.assertEqual(graph.nodes[0].text, 'University of Excellence and Creativity Institute') - self.assertEqual(graph.nodes[1].text, 'University of Excellence and Creativity Institute') - - graph = \ - MatchingGraph('University of Excellence & Creativity Institute', self.V1_VERSION) - self.assertEqual(len(graph.nodes), 2) - self.assertEqual(graph.nodes[0].text, - 'University of Excellence & Creativity Institute') - self.assertEqual(graph.nodes[1].text, - 'University of Excellence & Creativity Institute') - - graph = MatchingGraph( - 'University of Excellence & Creativity Institute', self.V1_VERSION) - self.assertEqual(len(graph.nodes), 2) - self.assertEqual(graph.nodes[0].text, - 'University of Excellence & Creativity Institute') - self.assertEqual(graph.nodes[1].text, - 'University of Excellence & Creativity Institute') - - graph = MatchingGraph('University of Excellence, Creativity Institute', self.V1_VERSION) - self.assertEqual(len(graph.nodes), 3) - self.assertEqual(graph.nodes[0].text, - 'University of Excellence Creativity Institute') - self.assertEqual(graph.nodes[1].text, 'University of Excellence') - self.assertEqual(graph.nodes[2].text, 'Creativity Institute') - - - graph = MatchingGraph('School of Brilliance, University of ' + - 'Excellence and Perseverance; 21-100 ' + - 'Gallifrey: Outerspace', self.V1_VERSION) - self.assertEqual(len(graph.nodes), 5) - self.assertEqual(graph.nodes[0].text, 'School of Brilliance University of Excellence and Perseverance 21 100 Gallifrey Outerspace') - self.assertEqual(graph.nodes[1].text, 'School of Brilliance') - self.assertEqual(graph.nodes[2].text, 'University of Excellence and Perseverance') - self.assertEqual(graph.nodes[3].text, '21 100 Gallifrey') - self.assertEqual(graph.nodes[4].text, 'Outerspace') - - def test_remove_low_scores(self): - graph = MatchingGraph('University of Excellence, Creativity Institute', self.V1_VERSION) - graph.nodes[0].matched = MatchedOrganization(substring='s0', - score=10, - matching_type='q', - organization='obj') - graph.nodes[1].matched = MatchedOrganization(substring='s1', - score=100, - matching_type='q', - organization='obj') - graph.nodes[2].matched = MatchedOrganization(substring='s2', - score=67, - matching_type='q', - organization='obj') - graph.remove_low_scores(90) - self.assertTrue(graph.nodes[0].matched is None) - self.assertTrue(graph.nodes[1].matched is not None) - self.assertEqual(graph.nodes[1].matched.substring, 's1') - self.assertTrue(graph.nodes[2].matched is None) - -class TestGenerateOutput(SimpleTestCase): - def org(self, substring, score, type, id, chosen=False): - return MatchedOrganization(substring=substring, - score=score, - matching_type=type, - chosen=chosen, - organization=AttrDict({'id': id})) - - def test_get_output(self): - c1 = self.org('s 1', 1, MATCHING_TYPE_PHRASE, 'org1') - c2 = self.org('s 2', 0.94, MATCHING_TYPE_FUZZY, 'org2') - - m1 = self.org('s 2', 1, MATCHING_TYPE_COMMON, 'org1') - m2 = self.org('s 1', 1, MATCHING_TYPE_PHRASE, 'org1') - m3 = self.org('s 1', 1, MATCHING_TYPE_FUZZY, 'org1') - - m4 = self.org('s 2', 1, MATCHING_TYPE_PHRASE, 'org2') - m5 = self.org('s 2', 0.94, MATCHING_TYPE_FUZZY, 'org2') - - m6 = self.org('s 3', 0.5, MATCHING_TYPE_COMMON, 'org3') - m7 = self.org('s 4', 0.66, MATCHING_TYPE_PHRASE, 'org3') - m8 = self.org('s 5', 0.49, MATCHING_TYPE_FUZZY, 'org3') - - m9 = self.org('s 3', 0.76, MATCHING_TYPE_COMMON, 'org4') - m10 = self.org('s 4', 0.76, MATCHING_TYPE_PHRASE, 'org4') - - m11 = self.org('s 3', 0.48, MATCHING_TYPE_FUZZY, 'org5') - m12 = self.org('s 4', 0.06, MATCHING_TYPE_PHRASE, 'org5') - m13 = self.org('s 55', 0.48, MATCHING_TYPE_FUZZY, 'org5') - - c1_ch = self.org('s 1', 1, MATCHING_TYPE_PHRASE, 'org1', chosen=False) - - self.assertEquals( - get_output( - [c1, c2], - [m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13], False), - [c1_ch, m4, m10, m7]) \ No newline at end of file diff --git a/rorapi/tests/tests_unit/tests_matching_v2.py b/rorapi/tests/tests_unit/tests_matching_v2.py index 1db31c93..1e871acf 100644 --- a/rorapi/tests/tests_unit/tests_matching_v2.py +++ b/rorapi/tests/tests_unit/tests_matching_v2.py @@ -233,26 +233,26 @@ def test_get_score(self): } self.assertEqual( get_score(AttrDict(dict(empty, names=[{'value': 'University of Excellence', 'types': ['ror_display']}])), - 'University of Excellence', None, self.V2_VERSION), 1) + 'University of Excellence', None), 1) self.assertEqual( get_score( AttrDict( dict(empty, names=[{'value': 'University of Excellence', 'types': ['ror_display']}], locations=[{'geonames_details': {'country_code': 'XY'}}])), - 'University of Excellence', ['US-PR'], self.V2_VERSION), 0) + 'University of Excellence', ['US-PR']), 0) self.assertEqual( get_score( AttrDict( dict(empty, names=[{'value': 'University of Excellence', 'types': ['ror_display']}], locations=[{'geonames_details': {'country_code': 'PR'}}])), - 'University of Excellence', ['US-PR'], self.V2_VERSION), 1) + 'University of Excellence', ['US-PR']), 1) self.assertEqual( get_score( AttrDict( dict(empty, names=[{'value': 'University of Excellence', 'types': ['label']}] - )), 'University of Excellence', None, self.V2_VERSION), 1) + )), 'University of Excellence', None), 1) self.assertEqual( get_score( AttrDict( @@ -261,11 +261,11 @@ def test_get_score(self): {'value': 'University of Excellence', 'types': ['ror_display']}, {'value': 'Excellence U', 'types': ['label']} ] - )), 'University of Excellence', None, self.V2_VERSION), 1) + )), 'University of Excellence', None), 1) self.assertEqual( get_score( AttrDict(dict(empty, names=[{'value': 'University of Excellence', 'types': ['alias']}])), - 'University of Excellence', None, self.V2_VERSION), 1) + 'University of Excellence', None), 1) self.assertEqual( get_score( AttrDict( @@ -274,17 +274,17 @@ def test_get_score(self): {'value': 'University of Excellence', 'types': ['alias']}, {'value': 'Excellence U', 'types': ['alias']} ])), - 'University of Excellence', None, self.V2_VERSION), 1) + 'University of Excellence', None), 1) self.assertEqual( get_score(AttrDict(dict(empty, names=[ {'value': 'UEXC', 'types': ['acronym']}, {'value': '', 'types': ['ror_display']}])), - 'University of Excellence', None, self.V2_VERSION), 0) + 'University of Excellence', None), 0) self.assertEqual( get_score(AttrDict(dict(empty, names=[ {'value': 'UEXC', 'types': ['acronym']}, {'value': '', 'types': ['ror_display']}])), - 'UEXC', None, self.V2_VERSION), + 'UEXC', None), .9) self.assertEqual( get_score( @@ -293,7 +293,7 @@ def test_get_score(self): names=[ {'value': 'UEXC', 'types': ['acronym']}, {'value': '', 'types': ['ror_display']}], - locations=[{'geonames_details': {'country_code': 'PR'}}])), 'UEXC', ['US-PR'], self.V2_VERSION), + locations=[{'geonames_details': {'country_code': 'PR'}}])), 'UEXC', ['US-PR']), 1) self.assertEqual( @@ -308,7 +308,7 @@ def test_get_score(self): {'value': 'Excellence U', 'types': ['alias']}, {'value': 'University Excellence', 'types': ['alias']} ])), 'University of Excellence', - None, self.V2_VERSION), 1) + None), 1) self.assertEqual( get_score( AttrDict( @@ -321,7 +321,7 @@ def test_get_score(self): {'value': 'Excellence U', 'types': ['alias']}, {'value': 'University Excellence', 'types': ['alias']} ])), 'University of Excellence', - None, self.V2_VERSION), 1) + None), 1) self.assertEqual( get_score( AttrDict( @@ -334,7 +334,7 @@ def test_get_score(self): {'value': 'Excellence U', 'types': ['alias']}, {'value': 'University of Excellence', 'types': ['alias']} ])), 'University of Excellence', - None, self.V2_VERSION), 1) + None), 1) self.assertEqual( get_score( AttrDict( @@ -346,7 +346,7 @@ def test_get_score(self): {'value': 'Brilliance U', 'types': ['alias']}, {'value': 'University Brilliance', 'types': ['alias']} ])), 'UEXC', - None, self.V2_VERSION), .9) + None), .9) self.assertEqual( get_score( AttrDict( @@ -358,7 +358,7 @@ def test_get_score(self): {'value': 'Brilliance U', 'types': ['alias']}, {'value': 'University Brilliance', 'types': ['alias']} ], - locations=[{'geonames_details': {'country_code': 'PR'}}])), 'UEXC', ['US-PR'], self.V2_VERSION), + locations=[{'geonames_details': {'country_code': 'PR'}}])), 'UEXC', ['US-PR']), 1) self.assertEqual( get_score( @@ -371,7 +371,7 @@ def test_get_score(self): {'value': 'Brilliance U', 'types': ['alias']}, {'value': 'University Brilliance', 'types': ['alias']} ], - locations=[{'geonames_details': {'country_code': 'AV'}}])), 'UEXC', ['US-PR'], self.V2_VERSION), + locations=[{'geonames_details': {'country_code': 'AV'}}])), 'UEXC', ['US-PR']), 0) @@ -380,7 +380,7 @@ class TestMatchingNode(SimpleTestCase): V2_VERSION = 'v2' def test_init(self): - empty = MatchingNode('text', self.V2_VERSION) + empty = MatchingNode('text') self.assertEqual(empty.text, 'text') self.assertTrue(empty.matched is None) @@ -425,19 +425,19 @@ class TestMatchingGraph(SimpleTestCase): V2_VERSION = 'v2' def test_init(self): - graph = MatchingGraph('University of Excellence', self.V2_VERSION) + graph = MatchingGraph('University of Excellence') self.assertEqual(len(graph.nodes), 2) self.assertEqual(graph.nodes[0].text, 'University of Excellence') self.assertEqual(graph.nodes[1].text, 'University of Excellence') graph = \ - MatchingGraph('University of Excellence and Creativity Institute', self.V2_VERSION) + MatchingGraph('University of Excellence and Creativity Institute') self.assertEqual(len(graph.nodes), 2) self.assertEqual(graph.nodes[0].text, 'University of Excellence and Creativity Institute') self.assertEqual(graph.nodes[1].text, 'University of Excellence and Creativity Institute') graph = \ - MatchingGraph('University of Excellence & Creativity Institute', self.V2_VERSION) + MatchingGraph('University of Excellence & Creativity Institute') self.assertEqual(len(graph.nodes), 2) self.assertEqual(graph.nodes[0].text, 'University of Excellence & Creativity Institute') @@ -445,14 +445,14 @@ def test_init(self): 'University of Excellence & Creativity Institute') graph = MatchingGraph( - 'University of Excellence & Creativity Institute', self.V2_VERSION) + 'University of Excellence & Creativity Institute') self.assertEqual(len(graph.nodes), 2) self.assertEqual(graph.nodes[0].text, 'University of Excellence & Creativity Institute') self.assertEqual(graph.nodes[1].text, 'University of Excellence & Creativity Institute') - graph = MatchingGraph('University of Excellence, Creativity Institute', self.V2_VERSION) + graph = MatchingGraph('University of Excellence, Creativity Institute') self.assertEqual(len(graph.nodes), 3) self.assertEqual(graph.nodes[0].text, 'University of Excellence Creativity Institute') @@ -462,7 +462,7 @@ def test_init(self): graph = MatchingGraph('School of Brilliance, University of ' + 'Excellence and Perseverance; 21-100 ' + - 'Gallifrey: Outerspace', self.V2_VERSION) + 'Gallifrey: Outerspace') self.assertEqual(len(graph.nodes), 5) self.assertEqual(graph.nodes[0].text, 'School of Brilliance University of Excellence and Perseverance 21 100 Gallifrey Outerspace') self.assertEqual(graph.nodes[1].text, 'School of Brilliance') @@ -471,7 +471,7 @@ def test_init(self): self.assertEqual(graph.nodes[4].text, 'Outerspace') def test_remove_low_scores(self): - graph = MatchingGraph('University of Excellence, Creativity Institute', self.V2_VERSION) + graph = MatchingGraph('University of Excellence, Creativity Institute') graph.nodes[0].matched = MatchedOrganization(substring='s0', score=10, matching_type='q', diff --git a/rorapi/tests/tests_unit/tests_models_v1.py b/rorapi/tests/tests_unit/tests_models_v1.py deleted file mode 100644 index 65f9c432..00000000 --- a/rorapi/tests/tests_unit/tests_models_v1.py +++ /dev/null @@ -1,264 +0,0 @@ -from django.test import SimpleTestCase - -from rorapi.v1.models import ( - Aggregations, - ExternalIds, - Organization, - MatchedOrganization -) -from .utils import AttrDict - - -class ExternalIdsTestCase(SimpleTestCase): - def test_attributes_exist(self): - data = { - "ISNI": {"preferred": "isni-p", "all": ["isni-a", "isni-b"]}, - "GRID": {"preferred": "grid-p", "all": "grid-a"}, - } - entity = ExternalIds(AttrDict(data)) - self.assertEqual(entity.ISNI.preferred, data["ISNI"]["preferred"]) - self.assertEqual(entity.ISNI.all, data["ISNI"]["all"]) - self.assertEqual(entity.GRID.preferred, data["GRID"]["preferred"]) - self.assertEqual(entity.GRID.all, data["GRID"]["all"]) - - def test_omit_attributes(self): - entity = ExternalIds( - AttrDict( - { - "FundRef": {"preferred": "fr-p", "all": ["fr-a", "fr-b"]}, - "GRID": {"preferred": "grid-p", "all": "grid-a"}, - "other": {"preferred": "isni-p", "all": ["isni-a", "isni-b"]}, - } - ) - ) - msg = "'ExternalIds' object has no attribute '{}'" - with self.assertRaisesMessage(AttributeError, msg.format("ISNI")): - entity.ISNI - with self.assertRaisesMessage(AttributeError, msg.format("HESA")): - entity.HESA - with self.assertRaisesMessage(AttributeError, msg.format("other")): - entity.other - - -class OrganizationTestCase(SimpleTestCase): - def test_attributes_exist(self): - data = { - "id": "ror-id", - "name": "University of Gallifrey", - "types": ["school", "research centre"], - "links": [], - "ip_addresses": [], - "email_address": None, - "aliases": ["Gallifrey University", "Timey-Wimey University of Gallifrey"], - "acronyms": ["UG"], - "addresses": [ - { - "lat": "49.198027", - "lng": "-123.007714", - "state_code": "CA-BC", - "city": "Burnaby", - "primary": False, - "geonames_city": { - "id": 5911606, - "city": "Burnaby", - "geonames_admin1": { - "name": "British Columbia", - "id": "5909050", - "ascii_name": "British Columbia", - "code": "CA.02", - }, - "geonames_admin2": { - "name": "Metro Vancouver Regional District", - "id": "5965814", - "ascii_name": "Metro Vancouver Regional District", - "code": "CA.02.5915", - }, - "nuts_level1": {"name": "SLOVENIJA", "code": "SI0"}, - "nuts_level2": {"name": "Vzhodna Slovenija", "code": "SI03"}, - "nuts_level3": {"name": "TEST", "code": "SI036"}, - }, - "postcode": "123456", - "line": "123 Somewhere Over A Rainbow", - "country_geonames_id": 6251999, - "state": "British Columbia", - } - ], - "relationships": [ - {"label": "Calvary Hospital", "type": "Related", "id": "grid.1234.6"} - ], - "established": 1946, - "status": "active", - "wikipedia_url": "https://en.wikipedia.org/wiki/Gallifrey", - "labels": [ - {"label": "Uniwersytet Gallifrenski", "iso639": "pl"}, - {"label": "ben DuSaQ'a'Daq DawI' SoH gallifrey", "iso639": "kl"}, - ], - "country": {"country_name": "Gallifrey", "country_code": "GE"}, - "external_ids": { - "ISNI": {"preferred": "0000 0004", "all": ["0000 0004"]}, - "FundRef": {"preferred": None, "all": ["5011004567542"]}, - "GRID": {"preferred": "grid.12580.34", "all": "grid.12580.34"}, - }, - } - organization = Organization(AttrDict(data)) - self.assertEqual(organization.id, data["id"]) - self.assertEqual(organization.name, data["name"]) - self.assertEqual(organization.types, data["types"]) - self.assertEqual(organization.established, data["established"]) - self.assertEqual(organization.addresses[0].lat, data["addresses"][0]["lat"]) - self.assertEqual(organization.addresses[0].lng, data["addresses"][0]["lng"]) - self.assertEqual( - organization.addresses[0].state_code, data["addresses"][0]["state_code"] - ) - self.assertEqual(organization.addresses[0].city, data["addresses"][0]["city"]) - self.assertEqual( - organization.addresses[0].geonames_city.id, - data["addresses"][0]["geonames_city"]["id"], - ) - self.assertEqual( - organization.addresses[0].postcode, data["addresses"][0]["postcode"] - ) - self.assertEqual(organization.addresses[0].line, data["addresses"][0]["line"]) - self.assertEqual( - organization.addresses[0].country_geonames_id, - data["addresses"][0]["country_geonames_id"], - ) - self.assertEqual(organization.links, data["links"]) - self.assertEqual(organization.aliases, data["aliases"]) - self.assertEqual(organization.acronyms, data["acronyms"]) - self.assertEqual(organization.status, data["status"]) - self.assertEqual(organization.wikipedia_url, data["wikipedia_url"]) - self.assertEqual(len(organization.labels), 2) - self.assertEqual(organization.labels[0].label, data["labels"][0]["label"]) - self.assertEqual(organization.labels[0].iso639, data["labels"][0]["iso639"]) - self.assertEqual(organization.labels[1].label, data["labels"][1]["label"]) - self.assertEqual(organization.labels[1].iso639, data["labels"][1]["iso639"]) - self.assertEqual( - organization.country.country_name, data["country"]["country_name"] - ) - self.assertEqual( - organization.country.country_code, data["country"]["country_code"] - ) - self.assertEqual( - organization.external_ids.ISNI.preferred, - data["external_ids"]["ISNI"]["preferred"], - ) - self.assertEqual( - organization.external_ids.ISNI.all, data["external_ids"]["ISNI"]["all"] - ) - self.assertEqual( - organization.external_ids.FundRef.preferred, - data["external_ids"]["FundRef"]["preferred"], - ) - self.assertEqual( - organization.external_ids.FundRef.all, - data["external_ids"]["FundRef"]["all"], - ) - self.assertEqual( - organization.external_ids.GRID.preferred, - data["external_ids"]["GRID"]["preferred"], - ) - self.assertEqual( - organization.external_ids.GRID.all, data["external_ids"]["GRID"]["all"] - ) - - -class MatchedOrganizationTestCase(SimpleTestCase): - def test_attributes_exist(self): - data = { - "substring": "UGallifrey", - "score": 0.95, - "matching_type": "fuzzy", - "chosen": True, - "organization": { - "id": "ror-id", - "name": "University of Gallifrey", - "types": ["research centre"], - "links": [], - "aliases": [], - "acronyms": [], - "wikipedia_url": "https://en.wikipedia.org/wiki/Gallifrey", - "labels": [], - "country": {"country_name": "Gallifrey", "country_code": "GE"}, - "external_ids": {}, - "status": "active", - "established": 1979, - "relationships": [], - "addresses": [], - "ip_addresses": [], - }, - } - organization = MatchedOrganization(AttrDict(data)) - self.assertEqual(organization.substring, data["substring"]) - self.assertEqual(organization.score, data["score"]) - self.assertEqual(organization.matching_type, data["matching_type"]) - self.assertEqual(organization.chosen, data["chosen"]) - self.assertEqual(organization.organization.id, data["organization"]["id"]) - self.assertEqual(organization.organization.name, data["organization"]["name"]) - -class AggregationsTestCase(SimpleTestCase): - def test_attributes_exist(self): - aggr = Aggregations( - AttrDict( - { - "types": { - "buckets": [ - {"key": "TyPE 1", "doc_count": 482}, - {"key": "Type2", "doc_count": 42}, - ] - }, - "countries": { - "buckets": [ - {"key": "IE", "doc_count": 48212}, - {"key": "FR", "doc_count": 4821}, - {"key": "GB", "doc_count": 482}, - {"key": "US", "doc_count": 48}, - ] - }, - "continentss": { - "buckets": [ - {"key": "AF", "doc_count": 48212}, - {"key": "AS", "doc_count": 4821}, - {"key": "EU", "doc_count": 482}, - {"key": "NA", "doc_count": 48}, - ] - }, - "statuses": { - "buckets": [ - {"key": "active", "doc_count": 102927}, - {"key": "inactive", "doc_count": 3}, - {"key": "withdrawn", "doc_count": 2}, - ] - }, - } - ) - ) - self.assertEqual(len(aggr.types), 2) - self.assertEqual(aggr.types[0].id, "type 1") - self.assertEqual(aggr.types[0].title, "TyPE 1") - self.assertEqual(aggr.types[0].count, 482) - self.assertEqual(aggr.types[1].id, "type2") - self.assertEqual(aggr.types[1].title, "Type2") - self.assertEqual(aggr.types[1].count, 42) - self.assertEqual(len(aggr.countries), 4) - self.assertEqual(aggr.countries[0].id, "ie") - self.assertEqual(aggr.countries[0].title, "Ireland") - self.assertEqual(aggr.countries[0].count, 48212) - self.assertEqual(aggr.countries[1].id, "fr") - self.assertEqual(aggr.countries[1].title, "France") - self.assertEqual(aggr.countries[1].count, 4821) - self.assertEqual(aggr.countries[2].id, "gb") - self.assertEqual(aggr.countries[2].title, "United Kingdom") - self.assertEqual(aggr.countries[2].count, 482) - self.assertEqual(aggr.countries[3].id, "us") - self.assertEqual(aggr.countries[3].title, "United States") - self.assertEqual(aggr.countries[3].count, 48) - self.assertEqual(aggr.statuses[0].id, "active") - self.assertEqual(aggr.statuses[0].title, "active") - self.assertEqual(aggr.statuses[0].count, 102927) - self.assertEqual(aggr.statuses[1].id, "inactive") - self.assertEqual(aggr.statuses[1].title, "inactive") - self.assertEqual(aggr.statuses[1].count, 3) - self.assertEqual(aggr.statuses[2].id, "withdrawn") - self.assertEqual(aggr.statuses[2].title, "withdrawn") - self.assertEqual(aggr.statuses[2].count, 2) diff --git a/rorapi/tests/tests_unit/tests_queries_v1.py b/rorapi/tests/tests_unit/tests_queries_v1.py deleted file mode 100644 index 237517ab..00000000 --- a/rorapi/tests/tests_unit/tests_queries_v1.py +++ /dev/null @@ -1,622 +0,0 @@ -import json -import mock -import os - -from django.test import SimpleTestCase -from rorapi.common.queries import get_ror_id, validate, build_search_query, \ - build_retrieve_query, search_organizations, retrieve_organization -from rorapi.settings import ES_VARS -from .utils import IterableAttrDict - - -class GetRorIDTestCase(SimpleTestCase): - - def test_no_id(self): - self.assertIsNone(get_ror_id('no id here')) - self.assertIsNone(get_ror_id('http://0w7hudk23')) - self.assertIsNone(get_ror_id('https://0w7hudk23')) - - def test_extract_id(self): - self.assertEquals(get_ror_id('0w7hudk23'), 'https://ror.org/0w7hudk23') - self.assertEquals(get_ror_id('ror.org/0w7hudk23'), - 'https://ror.org/0w7hudk23') - self.assertEquals(get_ror_id('ror.org%2F0w7hudk23'), - 'https://ror.org/0w7hudk23') - self.assertEquals(get_ror_id('http://ror.org/0w7hudk23'), - 'https://ror.org/0w7hudk23') - self.assertEquals(get_ror_id('http%3A%2F%2Fror.org%2F0w7hudk23'), - 'https://ror.org/0w7hudk23') - self.assertEquals(get_ror_id('https://ror.org/0w7hudk23'), - 'https://ror.org/0w7hudk23') - self.assertEquals(get_ror_id('https%3A%2F%2Fror.org%2F0w7hudk23'), - 'https://ror.org/0w7hudk23') - - -class ValidationTestCase(SimpleTestCase): - - V1_VERSION = 'v1' - - def test_illegal_parameters(self): - error = validate({ - 'query': 'query', - 'illegal': 'whatever', - 'another': 3 - }, self.V1_VERSION) - self.assertEquals(len(error.errors), 2) - self.assertTrue(any(['illegal' in e for e in error.errors])) - self.assertTrue(any(['another' in e for e in error.errors])) - - def test_invalid_all_status_value(self): - error = validate({ - 'all_status': 'foo' - }, self.V1_VERSION) - self.assertEquals(len(error.errors), 1) - self.assertTrue(any(['allowed values' in e for e in error.errors])) - - def test_too_many_parameters(self): - error = validate({ - 'query': 'query', - 'query.advanced': 'query' - }, self.V1_VERSION) - self.assertEquals(len(error.errors), 1) - self.assertTrue(any(['combined' in e for e in error.errors])) - - def test_illegal_field(self): - error = validate({ - 'query.advanced': 'foo:bar' - }, self.V1_VERSION) - self.assertEquals(len(error.errors), 1) - self.assertTrue(any(['illegal' in e for e in error.errors])) - - - def test_invalid_filter(self): - error = validate({ - 'query': 'query', - 'filter': 'fi1:e,types:F,f3,field2:44' - }, self.V1_VERSION) - self.assertEquals(len(error.errors), 3) - self.assertTrue(any(['fi1' in e for e in error.errors])) - self.assertTrue(any(['field2' in e for e in error.errors])) - self.assertTrue(any(['f3' in e for e in error.errors])) - - def test_invalid_page(self): - for page in [ - 'whatever', '-5', '0', - str(ES_VARS['MAX_PAGE'] + 1), '10001' - ]: - error = validate({'query': 'query', 'page': page}, self.V1_VERSION) - self.assertEquals(len(error.errors), 1) - self.assertTrue(page in error.errors[0]) - - def test_multiple_errors(self): - error = validate({ - 'query': 'query', - 'illegal': 'whatever', - 'filter': 'fi1:e,types:F,f3,field2:44', - 'another': 3, - 'page': 'third' - }, self.V1_VERSION) - self.assertEquals(len(error.errors), 6) - self.assertTrue(any(['illegal' in e for e in error.errors])) - self.assertTrue(any(['another' in e for e in error.errors])) - self.assertTrue(any(['fi1' in e for e in error.errors])) - self.assertTrue(any(['field2' in e for e in error.errors])) - self.assertTrue(any(['f3' in e for e in error.errors])) - self.assertTrue(any(['third' in e for e in error.errors])) - - def test_all_good(self): - error = validate({ - 'query': 'query', - 'page': 4, - 'filter': 'country.country_code:DE,types:s,status:inactive', - 'all_status': '' - }, self.V1_VERSION) - self.assertIsNone(error) - - def test_all_good_country_name(self): - error = validate({ - 'query': 'query', - 'page': 4, - 'filter': 'country.country_name:Germany,types:s,status:inactive', - 'all_status': '' - }, self.V1_VERSION) - self.assertIsNone(error) - - def test_query_adv_no_fields(self): - error = validate({ - 'query.advanced': 'query' - }, self.V1_VERSION) - self.assertIsNone(error) - - def test_query_adv_wildcard(self): - error = validate({ - 'query.advanced': 'addresses.\*:bar' - }, self.V1_VERSION) - self.assertIsNone(error) - - def test_query_adv_exists(self): - error = validate({ - 'query.advanced': '_exists_:id' - }, self.V1_VERSION) - self.assertIsNone(error) - - def test_query_adv_esc(self): - error = validate({ - 'query.advanced': 'query\:query' - }, self.V1_VERSION) - self.assertIsNone(error) - - def test_query__all_status(self): - error = validate({ - 'query': 'query', - 'all_status': '' - }, self.V1_VERSION) - self.assertIsNone(error) - - def test_query_adv_no_fields_all_status(self): - error = validate({ - 'query.advanced': 'query', - 'all_status': '' - }, self.V1_VERSION) - self.assertIsNone(error) - - def test_no_query_all_status(self): - error = validate({ - 'all_status': '' - }, self.V1_VERSION) - self.assertIsNone(error) - -class BuildSearchQueryTestCase(SimpleTestCase): - V1_VERSION = 'v1' - - def setUp(self): - self.default_query = \ - {'aggs': {'types': {'terms': {'field': 'types', 'size': 10, 'min_doc_count': 1}}, - 'countries': {'terms': {'field': 'country.country_code', 'size': 10, 'min_doc_count': 1}}, - 'statuses': {'terms': {'field': 'status', 'size': 10, 'min_doc_count': 1}}}, - 'track_total_hits': True, 'from': 0, 'size': 20} - - def test_empty_query_default(self): - expected = {'query': { - 'bool': { - 'filter': [{'terms': {'status': ['active']}}] - } - }} - expected.update(self.default_query) - query = build_search_query({}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_empty_query_all_status(self): - expected = {'query': {'match_all': {}}, 'track_total_hits': True} - expected.update(self.default_query) - query = build_search_query({'all_status':''}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_empty_query_all_status_false(self): - expected = {'query': { - 'bool': { - 'filter': [{'terms': {'status': ['active']}}] - } - }} - expected.update(self.default_query) - query = build_search_query({'all_status':'false'}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_query_id(self): - expected = {'query': { - 'match': { - 'id': { - 'query': 'https://ror.org/0w7hudk23', - 'operator': 'and' - } - } - }} - - expected.update(self.default_query) - - query = build_search_query({'query': '0w7hudk23'}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - query = build_search_query({'query': 'ror.org/0w7hudk23'}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - query = build_search_query({'query': 'ror.org%2F0w7hudk23'}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - query = build_search_query({'query': 'http://ror.org/0w7hudk23'}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - query = build_search_query( - {'query': 'http%3A%2F%2Fror.org%2F0w7hudk23'}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - query = build_search_query({'query': 'https://ror.org/0w7hudk23'}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - query = build_search_query( - {'query': 'https%3A%2F%2Fror.org%2F0w7hudk23'}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_query_default(self): - expected = {'query': { - 'bool': { - 'filter': [{'terms': {'status': ['active']}}], - 'must': [{'nested': { - 'path': 'names_ids', - 'score_mode': 'max', - 'query': { - 'query_string': { - 'query': 'query terms', - 'fuzzy_max_expansions': 1 - } - } - }}] - } - }} - expected.update(self.default_query) - query = build_search_query({'query': 'query terms'}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_query_all_status(self): - expected = {'query': { - 'nested': { - 'path': 'names_ids', - 'score_mode': 'max', - 'query': { - 'query_string': { - 'query': 'query terms', - 'fuzzy_max_expansions': 1 - } - } - } - }} - expected.update(self.default_query) - query = build_search_query({'query': 'query terms', 'all_status': ''}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_query_advanced(self): - expected = {'query': { - 'bool': { - 'filter': [{'terms': {'status': ['active']}}], - 'must': [{ - 'query_string': { - 'query': 'query terms', - 'default_field': '*', - 'default_operator':'and', - 'fuzzy_max_expansions': 1 - } - }] - } - }} - expected.update(self.default_query) - query = build_search_query({'query.advanced': 'query terms'}, self.V1_VERSION,) - self.assertEquals(query.to_dict(), expected) - - def test_query_advanced_all_status(self): - expected = {'query': { - 'bool': { - 'must': [{ - 'query_string': { - 'query': 'query terms', - 'default_field': '*', - 'default_operator':'and', - 'fuzzy_max_expansions': 1 - } - }] - } - }} - expected.update(self.default_query) - query = build_search_query({'query.advanced': 'query terms', 'all_status': ''}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_query_advanced_status_filter(self): - expected = {'query': { - 'bool': { - 'filter': [{'terms': {'status': ('inactive',)}}], - 'must': [{ - 'query_string': { - 'query': 'query terms', - 'default_field': '*', - 'default_operator':'and', - 'fuzzy_max_expansions': 1 - } - }] - } - }} - expected.update(self.default_query) - f = 'status:inactive' - query = build_search_query({'query.advanced': 'query terms', 'filter': f}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_query_advanced_status_field(self): - expected = {'query': { - 'bool': { - 'must': [{ - 'query_string': { - 'query': 'status:inactive', - 'default_field': '*', - 'default_operator':'and', - 'fuzzy_max_expansions': 1 - } - }] - } - }} - expected.update(self.default_query) - query = build_search_query({'query.advanced': 'status:inactive'}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_filter(self): - f = 'key1:val1,k2:value2' - expected = {'query': { - 'bool': { - 'filter': [ - {'terms': {'key1': ('val1',)}}, - {'terms': {'k2': ('value2',)}}, - {'terms': {'status': ['active']}} - ], - } - }} - expected.update(self.default_query) - query = build_search_query({'filter': f}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_filter_status_filter(self): - f = 'key1:val1,k2:value2,status:inactive' - expected = {'query': { - 'bool': { - 'filter': [ - {'terms': {'key1': ('val1',)}}, - {'terms': {'k2': ('value2',)}}, - {'terms': {'status': ('inactive',)}} - ], - } - }} - expected.update(self.default_query) - query = build_search_query({'filter': f}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_filter_all_status(self): - f = 'key1:val1,k2:value2' - expected = {'query': { - 'bool': { - 'filter': [ - {'terms': {'key1': ('val1',)}}, - {'terms': {'k2': ('value2',)}}, - ], - } - }} - expected.update(self.default_query) - query = build_search_query({'filter': f, 'all_status': ''}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_filter_query(self): - f = 'key1:val1,k2:value2' - expected = {'query': { - 'bool': { - 'filter': [ - {'terms': {'key1': ('val1',)}}, - {'terms': {'k2': ('value2',)}}, - {'terms': {'status': ['active']}} - ], - 'must': [{ - 'nested': { - 'path': 'names_ids', - 'score_mode': 'max', - 'query': { - 'query_string': { - 'query': 'query terms', - 'fuzzy_max_expansions': 1 - } - } - } - }] - } - }} - expected.update(self.default_query) - query = build_search_query({'query': 'query terms', 'filter': f}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_filter_query_all_status(self): - f = 'key1:val1,k2:value2' - expected = {'query': { - 'bool': { - 'filter': [ - {'terms': {'key1': ('val1',)}}, - {'terms': {'k2': ('value2',)}}, - ], - 'must': [{ - 'nested': { - 'path': 'names_ids', - 'score_mode': 'max', - 'query': { - 'query_string': { - 'query': 'query terms', - 'fuzzy_max_expansions': 1 - } - } - } - }] - } - }} - expected.update(self.default_query) - query = build_search_query({'query': 'query terms', 'filter': f, 'all_status': ''}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_pagination(self): - expected = {'query': {'bool': {'filter': [{'terms': {'status': ['active']}}]}}} - expected.update(self.default_query) - expected['from'] = 80 - query = build_search_query({'page': '5'}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_pagination_all_status(self): - expected = {'query': {'match_all': {}}} - expected.update(self.default_query) - expected['from'] = 80 - query = build_search_query({'page': '5', 'all_status': ''}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_pagination_query(self): - expected = {'query': { - 'bool': { - 'filter': [{'terms': {'status': ['active']}}], - 'must': [{ - 'nested': { - 'path': 'names_ids', - 'score_mode': 'max', - 'query': { - 'query_string': { - 'query': 'query terms', - 'fuzzy_max_expansions': 1 - } - } - } - }] - } - }} - expected.update(self.default_query) - expected['from'] = 80 - query = build_search_query({'page': '5', 'query': 'query terms'}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - def test_pagination_query_all_status(self): - expected = {'query': { - 'nested': { - 'path': 'names_ids', - 'score_mode': 'max', - 'query': { - 'query_string': { - 'query': 'query terms', - 'fuzzy_max_expansions': 1 - } - } - } - }} - expected.update(self.default_query) - expected['from'] = 80 - query = build_search_query({'page': '5', 'query': 'query terms', 'all_status': ''}, self.V1_VERSION) - self.assertEquals(query.to_dict(), expected) - - -class BuildRetrieveQueryTestCase(SimpleTestCase): - - V1_VERSION = 'v1' - - def test_retrieve_query(self): - query = build_retrieve_query('ror-id', self.V1_VERSION) - self.assertEquals(query.to_dict(), { - 'query': { - 'match': { - 'id': { - 'operator': 'and', - 'query': 'ror-id' - } - } - }, - 'track_total_hits': True - }) - -class SearchOrganizationsTestCase(SimpleTestCase): - - V1_VERSION = 'v1' - - def setUp(self): - with open( - os.path.join(os.path.dirname(__file__), - 'data/test_data_search_es7.json'), 'r') as f: - self.test_data = json.load(f) - - @mock.patch('elasticsearch_dsl.Search.execute') - def test_search_organizations(self, search_mock): - search_mock.return_value = \ - IterableAttrDict(self.test_data, self.test_data['hits']['hits']) - - error, organizations = search_organizations({}, self.V1_VERSION) - self.assertIsNone(error) - - search_mock.assert_called_once() - self.assertEquals(organizations.number_of_results, - self.test_data['hits']['total']['value']) - self.assertEquals(organizations.time_taken, self.test_data['took']) - self.assertEquals(len(organizations.items), - len(self.test_data['hits']['hits'])) - for ret, exp in zip(organizations.items, - self.test_data['hits']['hits']): - self.assertEquals(ret.id, exp['_source']['id']) - self.assertEquals(ret.name, exp['_source']['name']) - self.assertEquals( - len(organizations.meta.types), - len(self.test_data['aggregations']['types']['buckets'])) - for ret, exp in \ - zip(organizations.meta.types, - self.test_data['aggregations']['types']['buckets']): - self.assertEquals(ret.title, exp['key']) - self.assertEquals(ret.count, exp['doc_count']) - self.assertEquals( - len(organizations.meta.countries), - len(self.test_data['aggregations']['countries']['buckets'])) - for ret, exp in \ - zip(organizations.meta.countries, - self.test_data['aggregations']['countries']['buckets']): - self.assertEquals(ret.id, exp['key'].lower()) - self.assertEquals(ret.count, exp['doc_count']) - self.assertEquals( - len(organizations.meta.statuses), - len(self.test_data['aggregations']['statuses']['buckets'])) - for ret, exp in \ - zip(organizations.meta.statuses, - self.test_data['aggregations']['statuses']['buckets']): - self.assertEquals(ret.id, exp['key'].lower()) - self.assertEquals(ret.count, exp['doc_count']) - - @mock.patch('elasticsearch_dsl.Search.execute') - def test_malformed_search_organizations(self, search_mock): - search_mock.return_value = \ - IterableAttrDict(self.test_data, self.test_data['hits']['hits']) - - error, organizations = search_organizations({ - 'query': 'query', - 'illegal': 'whatever', - 'filter': 'fi1:e,types:F,f3,field2:44', - 'another': 3, - 'page': 'third' - }, self.V1_VERSION) - self.assertIsNone(organizations) - - search_mock.assert_not_called() - self.assertEquals(len(error.errors), 6) - - -class RetrieveOrganizationsTestCase(SimpleTestCase): - - V1_VERSION = 'v1' - - def setUp(self): - with open( - os.path.join(os.path.dirname(__file__), - 'data/test_data_retrieve_es7.json'), 'r') as f: - self.test_data = json.load(f) - with open( - os.path.join(os.path.dirname(__file__), - 'data/test_data_empty_es7.json'), 'r') as f: - self.test_data_empty = json.load(f) - - @mock.patch('elasticsearch_dsl.Search.execute') - def test_retrieve_organization(self, search_mock): - search_mock.return_value = \ - IterableAttrDict(self.test_data, self.test_data['hits']['hits']) - - error, organization = retrieve_organization('ror-id', self.V1_VERSION) - self.assertIsNone(error) - - search_mock.assert_called_once() - expected = self.test_data['hits']['hits'][0]['_source'] - self.assertEquals(organization.id, expected['id']) - self.assertEquals(organization.name, expected['name']) - - @mock.patch('elasticsearch_dsl.Search.execute') - def test_retrieve_non_existing_organization(self, search_mock): - search_mock.return_value = \ - IterableAttrDict(self.test_data_empty, - self.test_data_empty['hits']['hits']) - - error, organization = retrieve_organization('ror-id', self.V1_VERSION) - self.assertIsNone(organization) - - search_mock.assert_called_once() - self.assertEquals(len(error.errors), 1) - self.assertTrue('ror-id' in error.errors[0]) diff --git a/rorapi/tests/tests_unit/tests_queries_v2.py b/rorapi/tests/tests_unit/tests_queries_v2.py index dca80f39..8e5ef41d 100644 --- a/rorapi/tests/tests_unit/tests_queries_v2.py +++ b/rorapi/tests/tests_unit/tests_queries_v2.py @@ -34,14 +34,12 @@ def test_extract_id(self): class ValidationTestCase(SimpleTestCase): - V2_VERSION = 'v2' - def test_illegal_parameters(self): error = validate({ 'query': 'query', 'illegal': 'whatever', 'another': 3 - }, self.V2_VERSION) + }) self.assertEquals(len(error.errors), 2) self.assertTrue(any(['illegal' in e for e in error.errors])) self.assertTrue(any(['another' in e for e in error.errors])) @@ -49,7 +47,7 @@ def test_illegal_parameters(self): def test_invalid_all_status_value(self): error = validate({ 'all_status': 'foo' - }, self.V2_VERSION) + }) self.assertEquals(len(error.errors), 1) self.assertTrue(any(['allowed values' in e for e in error.errors])) @@ -57,14 +55,14 @@ def test_too_many_parameters(self): error = validate({ 'query': 'query', 'query.advanced': 'query' - }, self.V2_VERSION) + }) self.assertEquals(len(error.errors), 1) self.assertTrue(any(['combined' in e for e in error.errors])) def test_illegal_field(self): error = validate({ 'query.advanced': 'foo:bar' - }, self.V2_VERSION) + }) self.assertEquals(len(error.errors), 1) self.assertTrue(any(['illegal' in e for e in error.errors])) @@ -73,7 +71,7 @@ def test_invalid_filter(self): error = validate({ 'query': 'query', 'filter': 'fi1:e,types:F,f3,field2:44' - }, self.V2_VERSION) + }) self.assertEquals(len(error.errors), 3) self.assertTrue(any(['fi1' in e for e in error.errors])) self.assertTrue(any(['field2' in e for e in error.errors])) @@ -84,7 +82,7 @@ def test_invalid_page(self): 'whatever', '-5', '0', str(ES_VARS['MAX_PAGE'] + 1), '10001' ]: - error = validate({'query': 'query', 'page': page}, self.V2_VERSION) + error = validate({'query': 'query', 'page': page}) self.assertEquals(len(error.errors), 1) self.assertTrue(page in error.errors[0]) @@ -95,7 +93,7 @@ def test_multiple_errors(self): 'filter': 'fi1:e,types:F,f3,field2:44', 'another': 3, 'page': 'third' - }, self.V2_VERSION) + }) self.assertEquals(len(error.errors), 6) self.assertTrue(any(['illegal' in e for e in error.errors])) self.assertTrue(any(['another' in e for e in error.errors])) @@ -110,7 +108,7 @@ def test_all_good(self): 'page': 4, 'filter': 'country.country_code:DE,types:s,status:inactive', 'all_status': '' - }, self.V2_VERSION) + }) self.assertIsNone(error) def test_all_good_country_name(self): @@ -119,55 +117,54 @@ def test_all_good_country_name(self): 'page': 4, 'filter': 'country.country_name:Germany,types:s,status:inactive', 'all_status': '' - }, self.V2_VERSION) + }) self.assertIsNone(error) def test_query_adv_no_fields(self): error = validate({ 'query.advanced': 'query' - }, self.V2_VERSION) + }) self.assertIsNone(error) def test_query_adv_wildcard(self): error = validate({ 'query.advanced': 'locations.\*:bar' - }, self.V2_VERSION) + }) self.assertIsNone(error) def test_query_adv_exists(self): error = validate({ 'query.advanced': '_exists_:id' - }, self.V2_VERSION) + }) self.assertIsNone(error) def test_query_adv_esc(self): error = validate({ 'query.advanced': 'query\:query' - }, self.V2_VERSION) + }) self.assertIsNone(error) def test_query__all_status(self): error = validate({ 'query': 'query', 'all_status': '' - }, self.V2_VERSION) + }) self.assertIsNone(error) def test_query_adv_no_fields_all_status(self): error = validate({ 'query.advanced': 'query', 'all_status': '' - }, self.V2_VERSION) + }) self.assertIsNone(error) def test_no_query_all_status(self): error = validate({ 'all_status': '' - }, self.V2_VERSION) + }) self.assertIsNone(error) class BuildSearchQueryTestCase(SimpleTestCase): - V2_VERSION = 'v2' def setUp(self): self.default_query = \ @@ -184,13 +181,13 @@ def test_empty_query_default(self): } }} expected.update(self.default_query) - query = build_search_query({}, self.V2_VERSION) + query = build_search_query({}) self.assertEquals(query.to_dict(), expected) def test_empty_query_all_status(self): expected = {'query': {'match_all': {}}, 'track_total_hits': True} expected.update(self.default_query) - query = build_search_query({'all_status':''}, self.V2_VERSION) + query = build_search_query({'all_status':''}) self.assertEquals(query.to_dict(), expected) def test_empty_query_all_status_false(self): @@ -200,7 +197,7 @@ def test_empty_query_all_status_false(self): } }} expected.update(self.default_query) - query = build_search_query({'all_status':'false'}, self.V2_VERSION) + query = build_search_query({'all_status':'false'}) self.assertEquals(query.to_dict(), expected) def test_query_id(self): @@ -215,21 +212,21 @@ def test_query_id(self): expected.update(self.default_query) - query = build_search_query({'query': '0w7hudk23'}, self.V2_VERSION) + query = build_search_query({'query': '0w7hudk23'}) self.assertEquals(query.to_dict(), expected) - query = build_search_query({'query': 'ror.org/0w7hudk23'}, self.V2_VERSION) + query = build_search_query({'query': 'ror.org/0w7hudk23'}) self.assertEquals(query.to_dict(), expected) - query = build_search_query({'query': 'ror.org%2F0w7hudk23'}, self.V2_VERSION) + query = build_search_query({'query': 'ror.org%2F0w7hudk23'}) self.assertEquals(query.to_dict(), expected) - query = build_search_query({'query': 'http://ror.org/0w7hudk23'}, self.V2_VERSION) + query = build_search_query({'query': 'http://ror.org/0w7hudk23'}) self.assertEquals(query.to_dict(), expected) query = build_search_query( - {'query': 'http%3A%2F%2Fror.org%2F0w7hudk23'}, self.V2_VERSION) + {'query': 'http%3A%2F%2Fror.org%2F0w7hudk23'}) self.assertEquals(query.to_dict(), expected) - query = build_search_query({'query': 'https://ror.org/0w7hudk23'}, self.V2_VERSION) + query = build_search_query({'query': 'https://ror.org/0w7hudk23'}) self.assertEquals(query.to_dict(), expected) query = build_search_query( - {'query': 'https%3A%2F%2Fror.org%2F0w7hudk23'}, self.V2_VERSION) + {'query': 'https%3A%2F%2Fror.org%2F0w7hudk23'}) self.assertEquals(query.to_dict(), expected) def test_query_default(self): @@ -249,7 +246,7 @@ def test_query_default(self): } }} expected.update(self.default_query) - query = build_search_query({'query': 'query terms'}, self.V2_VERSION) + query = build_search_query({'query': 'query terms'}) self.assertEquals(query.to_dict(), expected) def test_query_all_status(self): @@ -266,7 +263,7 @@ def test_query_all_status(self): } }} expected.update(self.default_query) - query = build_search_query({'query': 'query terms', 'all_status': ''}, self.V2_VERSION) + query = build_search_query({'query': 'query terms', 'all_status': ''}) self.assertEquals(query.to_dict(), expected) def test_query_advanced(self): @@ -284,7 +281,7 @@ def test_query_advanced(self): } }} expected.update(self.default_query) - query = build_search_query({'query.advanced': 'query terms'}, self.V2_VERSION,) + query = build_search_query({'query.advanced': 'query terms'}) self.assertEquals(query.to_dict(), expected) def test_query_advanced_all_status(self): @@ -301,7 +298,7 @@ def test_query_advanced_all_status(self): } }} expected.update(self.default_query) - query = build_search_query({'query.advanced': 'query terms', 'all_status': ''}, self.V2_VERSION) + query = build_search_query({'query.advanced': 'query terms', 'all_status': ''}) self.assertEquals(query.to_dict(), expected) def test_query_advanced_status_filter(self): @@ -320,7 +317,7 @@ def test_query_advanced_status_filter(self): }} expected.update(self.default_query) f = 'status:inactive' - query = build_search_query({'query.advanced': 'query terms', 'filter': f}, self.V2_VERSION) + query = build_search_query({'query.advanced': 'query terms', 'filter': f}) self.assertEquals(query.to_dict(), expected) def test_query_advanced_status_field(self): @@ -337,7 +334,7 @@ def test_query_advanced_status_field(self): } }} expected.update(self.default_query) - query = build_search_query({'query.advanced': 'status:inactive'}, self.V2_VERSION) + query = build_search_query({'query.advanced': 'status:inactive'}) self.assertEquals(query.to_dict(), expected) def test_filter(self): @@ -352,7 +349,7 @@ def test_filter(self): } }} expected.update(self.default_query) - query = build_search_query({'filter': f}, self.V2_VERSION) + query = build_search_query({'filter': f}) self.assertEquals(query.to_dict(), expected) def test_filter_status_filter(self): @@ -367,7 +364,7 @@ def test_filter_status_filter(self): } }} expected.update(self.default_query) - query = build_search_query({'filter': f}, self.V2_VERSION) + query = build_search_query({'filter': f}) self.assertEquals(query.to_dict(), expected) def test_filter_whitespace_normalization(self): @@ -381,7 +378,7 @@ def test_filter_whitespace_normalization(self): } }} expected.update(self.default_query) - query = build_search_query({'filter': f}, self.V2_VERSION) + query = build_search_query({'filter': f}) self.assertEquals(query.to_dict(), expected) def test_filter_all_status(self): @@ -395,7 +392,7 @@ def test_filter_all_status(self): } }} expected.update(self.default_query) - query = build_search_query({'filter': f, 'all_status': ''}, self.V2_VERSION) + query = build_search_query({'filter': f, 'all_status': ''}) self.assertEquals(query.to_dict(), expected) def test_filter_query(self): @@ -422,7 +419,7 @@ def test_filter_query(self): } }} expected.update(self.default_query) - query = build_search_query({'query': 'query terms', 'filter': f}, self.V2_VERSION) + query = build_search_query({'query': 'query terms', 'filter': f}) self.assertEquals(query.to_dict(), expected) def test_filter_query_all_status(self): @@ -448,21 +445,21 @@ def test_filter_query_all_status(self): } }} expected.update(self.default_query) - query = build_search_query({'query': 'query terms', 'filter': f, 'all_status': ''}, self.V2_VERSION) + query = build_search_query({'query': 'query terms', 'filter': f, 'all_status': ''}) self.assertEquals(query.to_dict(), expected) def test_pagination(self): expected = {'query': {'bool': {'filter': [{'terms': {'status': ['active']}}]}}} expected.update(self.default_query) expected['from'] = 80 - query = build_search_query({'page': '5'}, self.V2_VERSION) + query = build_search_query({'page': '5'}) self.assertEquals(query.to_dict(), expected) def test_pagination_all_status(self): expected = {'query': {'match_all': {}}} expected.update(self.default_query) expected['from'] = 80 - query = build_search_query({'page': '5', 'all_status': ''}, self.V2_VERSION) + query = build_search_query({'page': '5', 'all_status': ''}) self.assertEquals(query.to_dict(), expected) def test_pagination_query(self): @@ -485,7 +482,7 @@ def test_pagination_query(self): }} expected.update(self.default_query) expected['from'] = 80 - query = build_search_query({'page': '5', 'query': 'query terms'}, self.V2_VERSION) + query = build_search_query({'page': '5', 'query': 'query terms'}) self.assertEquals(query.to_dict(), expected) def test_pagination_query_all_status(self): @@ -503,16 +500,14 @@ def test_pagination_query_all_status(self): }} expected.update(self.default_query) expected['from'] = 80 - query = build_search_query({'page': '5', 'query': 'query terms', 'all_status': ''}, self.V2_VERSION) + query = build_search_query({'page': '5', 'query': 'query terms', 'all_status': ''}) self.assertEquals(query.to_dict(), expected) class BuildRetrieveQueryTestCase(SimpleTestCase): - V2_VERSION = 'v2' - def test_retrieve_query(self): - query = build_retrieve_query('ror-id', self.V2_VERSION) + query = build_retrieve_query('ror-id') self.assertEquals(query.to_dict(), { 'query': { 'match': { @@ -527,8 +522,6 @@ def test_retrieve_query(self): class SearchOrganizationsTestCase(SimpleTestCase): - V2_VERSION = 'v2' - def setUp(self): with open( os.path.join(os.path.dirname(__file__), @@ -540,7 +533,7 @@ def test_search_organizations(self, search_mock): search_mock.return_value = \ IterableAttrDict(self.test_data, self.test_data['hits']['hits']) - error, organizations = search_organizations({}, self.V2_VERSION) + error, organizations = search_organizations({}) self.assertIsNone(error) search_mock.assert_called_once() @@ -602,7 +595,7 @@ def test_malformed_search_organizations(self, search_mock): 'filter': 'fi1:e,types:F,f3,field2:44', 'another': 3, 'page': 'third' - }, self.V2_VERSION) + }) self.assertIsNone(organizations) search_mock.assert_not_called() @@ -611,8 +604,6 @@ def test_malformed_search_organizations(self, search_mock): class RetrieveOrganizationsTestCase(SimpleTestCase): - V2_VERSION = 'v2' - def setUp(self): with open( os.path.join(os.path.dirname(__file__), @@ -628,7 +619,7 @@ def test_retrieve_organization(self, search_mock): search_mock.return_value = \ IterableAttrDict(self.test_data, self.test_data['hits']['hits']) - error, organization = retrieve_organization('ror-id', self.V2_VERSION) + error, organization = retrieve_organization('ror-id') self.assertIsNone(error) search_mock.assert_called_once() @@ -647,7 +638,7 @@ def test_retrieve_non_existing_organization(self, search_mock): IterableAttrDict(self.test_data_empty, self.test_data_empty['hits']['hits']) - error, organization = retrieve_organization('ror-id', self.V2_VERSION) + error, organization = retrieve_organization('ror-id') self.assertIsNone(organization) search_mock.assert_called_once() diff --git a/rorapi/tests/tests_unit/tests_views_v1.py b/rorapi/tests/tests_unit/tests_views_v1.py deleted file mode 100644 index cc2fdc72..00000000 --- a/rorapi/tests/tests_unit/tests_views_v1.py +++ /dev/null @@ -1,280 +0,0 @@ -import json -import mock -import os - -from django.test import SimpleTestCase, Client -from rest_framework.test import APIRequestFactory - -from rorapi.common import views - -from .utils import IterableAttrDict - -factory = APIRequestFactory() - -class ViewListTestCase(SimpleTestCase): - - V1_VERSION = 'v1' - - def setUp(self): - with open( - os.path.join(os.path.dirname(__file__), - 'data/test_data_search_es7.json'), 'r') as f: - self.test_data = json.load(f) - - @mock.patch('elasticsearch_dsl.Search.execute') - def test_search_organizations(self, search_mock): - search_mock.return_value = \ - IterableAttrDict(self.test_data, self.test_data['hits']['hits']) - - view = views.OrganizationViewSet.as_view({'get': 'list'}) - request = factory.get('/v1/organizations') - response = view(request, version=self.V1_VERSION) - response.render() - organizations = json.loads(response.content.decode('utf-8')) - - search_mock.assert_called_once() - - self.assertEquals(organizations['number_of_results'], - self.test_data['hits']['total']['value']) - self.assertEquals(organizations['time_taken'], self.test_data['took']) - self.assertEquals( - len(organizations['meta']['types']), - len(self.test_data['aggregations']['types']['buckets'])) - for ret, exp in \ - zip(organizations['meta']['types'], - self.test_data['aggregations']['types']['buckets']): - self.assertEquals(ret['title'], exp['key']) - self.assertEquals(ret['count'], exp['doc_count']) - self.assertEquals( - len(organizations['meta']['countries']), - len(self.test_data['aggregations']['countries']['buckets'])) - for ret, exp in \ - zip(organizations['meta']['countries'], - self.test_data['aggregations']['countries']['buckets']): - self.assertEquals(ret['id'], exp['key'].lower()) - self.assertEquals(ret['count'], exp['doc_count']) - - @mock.patch('elasticsearch_dsl.Search.execute') - def test_invalid_search_organizations(self, search_mock): - search_mock.return_value = \ - IterableAttrDict(self.test_data, self.test_data['hits']['hits']) - - view = views.OrganizationViewSet.as_view({'get': 'list'}) - request = factory.get('/v1/organizations?query=query&illegal=whatever&' + - 'filter=fi1:e,types:F,f3,field2:44&another=3&' + - 'page=third') - response = view(request, version=self.V1_VERSION) - response.render() - organizations = json.loads(response.content.decode('utf-8')) - - self.assertEquals(list(organizations.keys()), ['errors']) - self.assertEquals(len(organizations['errors']), 6) - - @mock.patch('elasticsearch_dsl.Search.execute') - def test_query_redirect(self, search_mock): - client = Client() - search_mock.return_value = \ - IterableAttrDict(self.test_data, self.test_data['hits']['hits']) - - response = client.get('/v1/organizations?query.names=query') - self.assertRedirects(response, '/v1/organizations?query=query') - -class ViewRetrievalTestCase(SimpleTestCase): - - V1_VERSION = 'v1' - - def setUp(self): - with open( - os.path.join(os.path.dirname(__file__), - 'data/test_data_retrieve_es7.json'), 'r') as f: - self.test_data = json.load(f) - with open( - os.path.join(os.path.dirname(__file__), - 'data/test_data_empty_es7.json'), 'r') as f: - self.test_data_empty = json.load(f) - - self.maxDiff = None - - @mock.patch('elasticsearch_dsl.Search.execute') - def test_retrieve_organization(self, search_mock): - search_mock.return_value = \ - IterableAttrDict(self.test_data, self.test_data['hits']['hits']) - - view = views.OrganizationViewSet.as_view({'get': 'retrieve'}) - request = factory.get('/v1/organizations/https://ror.org/02atag894') - response = view(request, pk='https://ror.org/02atag894', version=self.V1_VERSION) - response.render() - organization = json.loads(response.content.decode('utf-8')) - print("organization:") - print(organization) - print("test data:") - print(self.test_data['hits']['hits'][0]['_source']) - # go through every attribute and check to see that they are equal - self.assertEquals(response.status_code, 200) - self.assertEquals(organization, self.test_data['hits']['hits'][0]['_source']) - - @mock.patch('elasticsearch_dsl.Search.execute') - def test_retrieve_non_existing_organization(self, search_mock): - search_mock.return_value = \ - IterableAttrDict(self.test_data_empty, - self.test_data_empty['hits']['hits']) - - view = views.OrganizationViewSet.as_view({'get': 'retrieve'}) - request = factory.get('/v1/organizations/https://ror.org/052gg0110') - response = view(request, pk='https://ror.org/052gg0110', version=self.V1_VERSION) - response.render() - organization = json.loads(response.content.decode('utf-8')) - - self.assertEquals(response.status_code, 404) - self.assertEquals(list(organization.keys()), ['errors']) - self.assertEquals(len(organization['errors']), 1) - self.assertTrue(any(['does not exist' in e for e in organization['errors']])) - - @mock.patch('elasticsearch_dsl.Search.execute') - def test_retrieve_invalid_id(self, search_mock): - search_mock.return_value = \ - IterableAttrDict(self.test_data_empty, - self.test_data_empty['hits']['hits']) - - view = views.OrganizationViewSet.as_view({'get': 'retrieve'}) - request = factory.get('/v1/organizations/https://ror.org/abc123') - response = view(request, pk='https://ror.org/abc123', version=self.V1_VERSION) - response.render() - organization = json.loads(response.content.decode('utf-8')) - - self.assertEquals(response.status_code, 404) - self.assertEquals(list(organization.keys()), ['errors']) - self.assertEquals(len(organization['errors']), 1) - self.assertTrue(any(['not a valid' in e for e in organization['errors']])) - -class GenerateIdViewTestCase(SimpleTestCase): - - def setUp(self): - with open( - os.path.join(os.path.dirname(__file__), - 'data/test_data_empty_es7.json'), 'r') as f: - self.test_data_empty = json.load(f) - self.maxDiff = None - - @mock.patch('rorapi.common.views.OurTokenPermission.has_permission') - @mock.patch('elasticsearch_dsl.Search.execute') - def test_generateid_success(self, search_mock, permission_mock): - search_mock.return_value = \ - IterableAttrDict(self.test_data_empty, - self.test_data_empty['hits']['hits']) - permission_mock.return_value = True - response = self.client.get('/generateid') - self.assertEquals(response.status_code, 200) - - @mock.patch('rorapi.common.views.OurTokenPermission.has_permission') - def test_generateid_fail_no_permission(self, permission_mock): - permission_mock.return_value = False - response = self.client.get('/generateid') - self.assertEquals(response.status_code, 403) - -class GenerateAddressViewTestCase(SimpleTestCase): - V1_VERSION = 'v1' - def setUp(self): - with open( - os.path.join(os.path.dirname(__file__), - 'data/test_data_address.json'), 'r') as f: - self.test_data_address = json.load(f) - with open( - os.path.join(os.path.dirname(__file__), - 'data/test_data_address_empty.json'), 'r') as f: - self.test_data_address_empty = json.load(f) - self.maxDiff = None - - @mock.patch('rorapi.common.views.OurTokenPermission.has_permission') - @mock.patch('update_address.new_geonames') - def test_generateaddress_success(self, address_mock, permission_mock): - address_mock.return_value = self.test_data_address - permission_mock.return_value = True - response = self.client.get('/v1/generateaddress/5378538') - self.assertContains(response, 'address') - self.assertEquals(response.status_code, 200) - - @mock.patch('rorapi.common.views.OurTokenPermission.has_permission') - @mock.patch('update_address.new_geonames') - def test_generateaddress_fail_empty(self, address_mock, permission_mock): - address_mock.return_value = self.test_data_address_empty - permission_mock.return_value = True - response = self.client.get('/v1/generateaddress/0000000') - self.assertContains(response, 'Expecting value') - self.assertEquals(response.status_code, 200) - - @mock.patch('rorapi.common.views.OurTokenPermission.has_permission') - def test_generateid_fail_no_permission(self, permission_mock): - permission_mock.return_value = False - response = self.client.get('/v1/generateaddress/5378538') - self.assertEquals(response.status_code, 403) - -class IndexRorViewTestCase(SimpleTestCase): - def setUp(self): - self.success_msg = {"status": "OK", "msg": "dir indexed"} - self.error_msg = {"status": "ERROR", "msg": "error"} - self.maxDiff = None - - @mock.patch('rorapi.common.views.OurTokenPermission.has_permission') - @mock.patch('rorapi.common.views.process_files') - def test_index_ror_success(self, index_mock, permission_mock): - index_mock.return_value = self.success_msg - permission_mock.return_value = True - response = self.client.get('/v1/indexdata/foo') - self.assertEquals(response.status_code, 200) - - @mock.patch('rorapi.common.views.OurTokenPermission.has_permission') - @mock.patch('rorapi.common.views.process_files') - def test_index_ror_fail_error(self, index_mock, permission_mock): - index_mock.return_value = self.error_msg - permission_mock.return_value = True - response = self.client.get('/v1/indexdata/foo') - self.assertEquals(response.status_code, 400) - - @mock.patch('rorapi.common.views.OurTokenPermission.has_permission') - def test_index_ror_fail_no_permission(self, permission_mock): - permission_mock.return_value = False - response = self.client.get('/v1/indexdata/foo') - self.assertEquals(response.status_code, 403) - -class HeartbeatViewTestCase(SimpleTestCase): - def setUp(self): - with open( - os.path.join(os.path.dirname(__file__), - 'data/test_data_search_es7.json'), 'r') as f: - self.test_data = json.load(f) - - @mock.patch('elasticsearch_dsl.Search.execute') - def test_heartbeat_success(self, search_mock): - search_mock.return_value = \ - IterableAttrDict(self.test_data, self.test_data['hits']['hits']) - response = self.client.get('/heartbeat') - self.assertEquals(response.status_code, 200) - -class IndexRorDumpViewTestCase(SimpleTestCase): - def setUp(self): - self.success_msg = "SUCCESS: ROR dataset vX.XX-XXXX-XX-XX-ror-data indexed in version X. Using test repo: X" - self.error_msg = "ERROR: ROR dataset for file vX.XX-XXXX-XX-XX-ror-data not found. Please generate the data dump first." - self.maxDiff = None - - @mock.patch('rorapi.common.views.OurTokenPermission.has_permission') - @mock.patch('django.core.management.call_command') - def test_index_ror_success(self, index_mock, permission_mock): - index_mock.return_value = self.success_msg - permission_mock.return_value = True - response = self.client.get('/v1/indexdatadump/v1.1-1111-11-11-ror-data/prod') - self.assertEquals(response.status_code, 200) - - @mock.patch('rorapi.common.views.OurTokenPermission.has_permission') - @mock.patch('django.core.management.call_command') - def test_index_ror_fail_error(self, index_mock, permission_mock): - index_mock.return_value = self.error_msg - permission_mock.return_value = True - response = self.client.get('/v1/indexdatadump/v1.1-1111-11-11-ror-data/prod') - self.assertEquals(response.status_code, 400) - - @mock.patch('rorapi.common.views.OurTokenPermission.has_permission') - def test_index_ror_fail_no_permission(self, permission_mock): - permission_mock.return_value = False - response = self.client.get('/v1/indexdatadump/v1.1-1111-11-11-ror-data/prod') - self.assertEquals(response.status_code, 403) \ No newline at end of file diff --git a/rorapi/v1/__init__.py b/rorapi/v1/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/rorapi/v1/index_template_es7.json b/rorapi/v1/index_template_es7.json deleted file mode 100644 index c8b36e6f..00000000 --- a/rorapi/v1/index_template_es7.json +++ /dev/null @@ -1,307 +0,0 @@ -{ - "index_patterns": [ - "organizations" - ], - "settings": { - "number_of_shards": 1, - "analysis": { - "analyzer": { - "string_lowercase": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "ascii_folding" - ] - } - }, - "filter": { - "ascii_folding": { - "type": "asciifolding", - "preserve_original": true - } - } - } - }, - "mappings": { - "properties": { - "id": { - "type": "keyword" - }, - "name": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword" - }, - "norm": { - "type": "text", - "analyzer": "string_lowercase", - "fielddata": true - } - } - }, - "links": { - "type": "text", - "analyzer": "simple" - }, - "wikipedia_url": { - "type": "text", - "analyzer": "simple" - }, - "aliases": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword" - }, - "norm": { - "type": "text", - "analyzer": "string_lowercase", - "fielddata": true - } - } - }, - "acronyms": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword" - }, - "norm": { - "type": "text", - "analyzer": "string_lowercase", - "fielddata": true - } - } - }, - "status": { - "type": "keyword" - }, - "labels": { - "properties": { - "label": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword" - }, - "norm": { - "type": "text", - "analyzer": "string_lowercase", - "fielddata": true - } - } - }, - "iso639": { - "type": "keyword" - } - } - }, - "country": { - "properties": { - "country_code": { - "type": "keyword" - }, - "country_name": { - "type": "keyword" - } - } - }, - "types": { - "type": "keyword" - }, - "email_address": { - "type": "text" - }, - "established": { - "type": "date" - }, - "ip_addresses": { - "type": "text" - }, - "addresses": { - "properties": { - "line": { - "type": "text" - }, - "lat": { - "type": "float" - }, - "lng": { - "type": "float" - }, - "postcode": { - "type": "keyword" - }, - "primary": { - "type": "boolean" - }, - "city": { - "type": "keyword" - }, - "state": { - "type": "keyword" - }, - "state_code": { - "type": "keyword" - }, - "geonames_city": { - "properties": { - "id": { - "type": "integer" - }, - "city": { - "type": "keyword" - }, - "nuts_level1": { - "properties": { - "name": { - "type": "text" - }, - "code": { - "type": "keyword" - } - } - }, - "nuts_level2": { - "properties": { - "name": { - "type": "text" - }, - "code": { - "type": "keyword" - } - } - }, - "nuts_level3": { - "properties": { - "name": { - "type": "text" - }, - "code": { - "type": "keyword" - } - } - }, - "geonames_admin1": { - "properties": { - "ascii_name": { - "type": "keyword" - }, - "name": { - "type": "keyword" - }, - "code": { - "type": "keyword" - } - } - }, - "geonames_admin2": { - "properties": { - "ascii_name": { - "type": "keyword" - }, - "name": { - "type": "keyword" - }, - "code": { - "type": "keyword" - } - } - }, - "license": { - "properties": { - "attribution": { - "type": "text" - }, - "license": { - "type": "text" - } - } - } - } - } - } - }, - "relationships": { - "properties": { - "type": { - "type": "keyword" - }, - "label": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword" - }, - "norm": { - "type": "text", - "analyzer": "string_lowercase", - "fielddata": true - } - } - }, - "id": { - "type": "keyword" - } - } - }, - "external_ids": { - "properties": { - "GRID": { - "properties": { - "preferred": { - "type": "keyword" - }, - "all": { - "type": "keyword" - } - } - }, - "ISNI": { - "properties": { - "preferred": { - "type": "keyword" - }, - "all": { - "type": "keyword" - } - } - }, - "FundRef": { - "properties": { - "preferred": { - "type": "keyword" - }, - "all": { - "type": "keyword" - } - } - }, - "Wikidata": { - "properties": { - "preferred": { - "type": "keyword" - }, - "all": { - "type": "keyword" - } - } - } - } - }, - "names_ids": { - "type": "nested", - "properties": { - "id": { - "type": "keyword" - }, - "name": { - "type": "text", - "analyzer": "string_lowercase" - } - } - } - } - } -} \ No newline at end of file diff --git a/rorapi/v1/models.py b/rorapi/v1/models.py deleted file mode 100644 index 18fb9a1b..00000000 --- a/rorapi/v1/models.py +++ /dev/null @@ -1,151 +0,0 @@ -from geonamescache.mappers import country -from rorapi.common.models import TypeBucket, CountryBucket, StatusBucket, Entity - -class Aggregations: - """Aggregations model class""" - - def __init__(self, data): - self.types = [TypeBucket(b) for b in data.types.buckets] - self.countries = [CountryBucket(b) for b in data.countries.buckets] - self.statuses = [StatusBucket(b) for b in data.statuses.buckets] - -class GeoAdmin: - def __init__(self, data): - if hasattr(data, 'id'): - self.id = data.id - else: - self.id = None - if hasattr(data, 'code'): - self.code = data.code - else: - self.code = None - if hasattr(data, 'name'): - self.name = data.name - else: - self.name = None - if hasattr(data, 'ascii_name'): - self.ascii_name = data.ascii_name - else: - self.ascii_name = None - - -class Nuts: - """A model class for storing the NUTS metadata""" - def __init__(self, data): - self.code = getattr(data, 'code', None) - self.name = getattr(data, 'name', None) - - -class License: - """A model class for storing license metadata""" - def __init__(self, data): - self.attribution = getattr(data, 'attribution', None) - self.license = getattr(data, 'license', None) - - -class GeoNamesCity: - """A model class for storing geonames city hash""" - def __init__(self, data): - self.id = getattr(data, 'id', None) - self.city = getattr(data, 'city', None) - if hasattr(data, 'license'): - self.license = License(data.license) - else: - self.license = None - if hasattr(data, 'geonames_admin1'): - self.geonames_admin1 = GeoAdmin(data.geonames_admin1) - else: - self.geonames_admin1 = None - if hasattr(data, 'geonames_admin2'): - self.geonames_admin2 = GeoAdmin(data.geonames_admin2) - else: - self.geonames_admin2 = None - if hasattr(data, 'nuts_level1'): - self.nuts_level1 = GeoAdmin(data.nuts_level1) - else: - self.nuts_level1 = None - if hasattr(data, 'nuts_level2'): - self.nuts_level2 = GeoAdmin(data.nuts_level2) - else: - self.nuts_level2 = None - if hasattr(data, 'nuts_level3'): - self.nuts_level3 = GeoAdmin(data.nuts_level3) - else: - self.nuts_level3 = None - - -class Addresses: - """A model class for storing addresses""" - def __init__(self, data): - self.country_geonames_id = data.country_geonames_id - self.lat = data.lat - self.lng = data.lng - self.line = data.line - self.state_code = data.state_code - self.state = getattr(data, 'state', None) - self.postcode = data.postcode - self.city = data.city - self.primary = data.primary - self.geonames_city = GeoNamesCity(data.geonames_city) - - -class ExternalIds: - """A model class for storing external identifiers""" - def __init__(self, data): - for a in [ - 'ISNI', 'FundRef', 'HESA', 'UCAS', 'UKPRN', 'CNRS', 'OrgRef', - 'Wikidata', 'GRID' - ]: - try: - setattr(self, a, Entity(getattr(data, a), - ['preferred', 'all'])) - except AttributeError: - pass - - -class Organization(Entity): - """Organization model class""" - def __init__(self, data): - if "_source" in data: - data = data["_source"] - super(Organization, self).__init__(data, [ - 'id', 'name', 'types', 'links', 'aliases', 'acronyms', 'status', - 'wikipedia_url', 'established', 'relationships', 'addresses' - ]) - self.labels = [Entity(l, ['label', 'iso639']) for l in data.labels] - self.country = Entity(data.country, ['country_name', 'country_code']) - self.ip_addresses = data.ip_addresses - self.established = getattr(data, 'established', None) - self.email_address = getattr(data, 'email_address', None) - self.relationships = [ - Entity(r, ['type', 'label', 'id']) for r in data.relationships - ] - self.addresses = [Addresses(a) for a in data.addresses] - self.external_ids = ExternalIds(data.external_ids) - - -class ListResult: - """A model class for the list of organizations returned from the search""" - def __init__(self, data): - self.number_of_results = data.hits.total.value - self.time_taken = data.took - self.items = [Organization(x) for x in data] - self.meta = Aggregations(data.aggregations) - - -class MatchedOrganization: - """A model class for an organization matched based on an affiliation - string""" - def __init__(self, data): - self.substring = data.substring - self.score = data.score - self.matching_type = data.matching_type - self.chosen = data.chosen - self.organization = Organization(data.organization) - - -class MatchingResult: - """A model class for the result of affiliation matching""" - def __init__(self, data): - self.number_of_results = len(data) - self.items = [MatchedOrganization(x) for x in data] \ No newline at end of file diff --git a/rorapi/v1/serializers.py b/rorapi/v1/serializers.py deleted file mode 100644 index 41fe9be1..00000000 --- a/rorapi/v1/serializers.py +++ /dev/null @@ -1,123 +0,0 @@ -from rest_framework import serializers -from rorapi.common.serializers import BucketSerializer, OrganizationRelationshipsSerializer - -class AggregationsSerializer(serializers.Serializer): - types = BucketSerializer(many=True) - countries = BucketSerializer(many=True) - statuses = BucketSerializer(many=True) - - -class OrganizationLabelSerializer(serializers.Serializer): - label = serializers.CharField() - iso639 = serializers.CharField() - - -class CountrySerializer(serializers.Serializer): - country_name = serializers.CharField() - country_code = serializers.CharField() - - -class LicenseSerializer(serializers.Serializer): - attribution = serializers.StringRelatedField() - license = serializers.StringRelatedField() - - -class NutsSerializer(serializers.Serializer): - name = serializers.CharField() - code = serializers.CharField() - - -class AddressGeoNamesSerializer(serializers.Serializer): - name = serializers.CharField() - id = serializers.IntegerField() - ascii_name = serializers.CharField() - code = serializers.CharField() - - -class GeoNamesCitySerializer(serializers.Serializer): - id = serializers.IntegerField() - city = serializers.StringRelatedField() - geonames_admin1 = AddressGeoNamesSerializer() - geonames_admin2 = AddressGeoNamesSerializer() - license = LicenseSerializer() - nuts_level1 = NutsSerializer() - nuts_level2 = NutsSerializer() - nuts_level3 = NutsSerializer() - - -class OrganizationAddressesSerializer(serializers.Serializer): - lat = serializers.DecimalField(max_digits=None, - decimal_places=10, - coerce_to_string=False) - lng = serializers.DecimalField(max_digits=None, - decimal_places=10, - coerce_to_string=False) - state = serializers.StringRelatedField() - state_code = serializers.CharField() - city = serializers.CharField() - geonames_city = GeoNamesCitySerializer() - postcode = serializers.CharField() - primary = serializers.BooleanField() - line = serializers.CharField() - country_geonames_id = serializers.IntegerField() - - -class ExternalIdSerializer(serializers.Serializer): - preferred = serializers.CharField() - all = serializers.StringRelatedField(many=True) - - -class GridExternalIdSerializer(serializers.Serializer): - preferred = serializers.CharField() - all = serializers.StringRelatedField() - - -class ExternalIdsSerializer(serializers.Serializer): - ISNI = ExternalIdSerializer(required=False) - FundRef = ExternalIdSerializer(required=False) - HESA = ExternalIdSerializer(required=False) - UCAS = ExternalIdSerializer(required=False) - UKPRN = ExternalIdSerializer(required=False) - CNRS = ExternalIdSerializer(required=False) - OrgRef = ExternalIdSerializer(required=False) - Wikidata = ExternalIdSerializer(required=False) - GRID = GridExternalIdSerializer(required=False) - - -class OrganizationSerializer(serializers.Serializer): - id = serializers.CharField() - name = serializers.CharField() - email_address = serializers.StringRelatedField() - ip_addresses = serializers.StringRelatedField(many=True) - established = serializers.IntegerField() - types = serializers.StringRelatedField(many=True) - relationships = OrganizationRelationshipsSerializer(many=True) - addresses = OrganizationAddressesSerializer(many=True) - links = serializers.StringRelatedField(many=True) - aliases = serializers.StringRelatedField(many=True) - acronyms = serializers.StringRelatedField(many=True) - status = serializers.CharField() - wikipedia_url = serializers.CharField() - labels = OrganizationLabelSerializer(many=True) - country = CountrySerializer() - external_ids = ExternalIdsSerializer() - - -class ListResultSerializer(serializers.Serializer): - number_of_results = serializers.IntegerField() - time_taken = serializers.IntegerField() - items = OrganizationSerializer(many=True) - meta = AggregationsSerializer() - - -class MatchedOrganizationSerializer(serializers.Serializer): - substring = serializers.CharField() - score = serializers.FloatField() - matching_type = serializers.CharField() - chosen = serializers.BooleanField() - organization = OrganizationSerializer() - - -class MatchingResultSerializer(serializers.Serializer): - number_of_results = serializers.IntegerField() - items = MatchedOrganizationSerializer(many=True) From edb41245120b5b3761c09653650edabd351ef3ba Mon Sep 17 00:00:00 2001 From: Adam Date: Wed, 18 Feb 2026 00:49:07 -0800 Subject: [PATCH 2/2] Fix bulkupdate 500 error: remove unused version arg from retrieve_organization call (#523) --- rorapi/common/csv_update.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rorapi/common/csv_update.py b/rorapi/common/csv_update.py index df12c459..2299e2a9 100644 --- a/rorapi/common/csv_update.py +++ b/rorapi/common/csv_update.py @@ -13,7 +13,7 @@ def update_record_from_csv(csv_data, version): errors = [] updated_record = None print("updating record from csv") - existing_org_errors, existing_org = retrieve_organization(csv_data['id'], version) + existing_org_errors, existing_org = retrieve_organization(csv_data['id']) print(existing_org) if existing_org is None: errors.append("No existing record found for ROR ID '{}'".format(csv_data['id']))