diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 65dd657..00babe9 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -55,11 +55,11 @@ jobs: - name: Extract variables shell: bash run: | - echo "::set-output name=BRANCH::$(echo ${GITHUB_REF#refs/heads/} | sed 's/\//_/g')" - echo "::set-output name=TAG::$(git tag --points-at HEAD)" - echo "::set-output name=GIT_SHA::$(git rev-parse HEAD)" - echo "::set-output name=GIT_SHA_SHORT::$(git rev-parse --short HEAD)" - echo "::set-output name=MESSAGE::$(git log --format=%B -n 1 ${{ github.event.after }})" + echo "BRANCH=$(echo ${GITHUB_REF#refs/heads/} | sed 's/\//_/g')" >> $GITHUB_OUTPUT + echo "TAG=$(git tag --points-at HEAD)" >> $GITHUB_OUTPUT + echo "GIT_SHA=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT + echo "GIT_SHA_SHORT=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT + echo "MESSAGE=$(git log --format=%B -n 1 ${{ github.event.after }} | tr '\n' ' ' | sed 's/ */ /g')" >> $GITHUB_OUTPUT id: extract_variables - name: Checkout terraform config repo diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9df990e..5353323 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -28,7 +28,7 @@ jobs: password: ${{ secrets.DOCKERHUB_RORAPI_TOKEN }} - name: Get git tag run: | - echo "::set-output name=GIT_TAG::$(git tag --points-at HEAD)" + echo "GIT_TAG=$(git tag --points-at HEAD)" >> $GITHUB_OUTPUT id: set_git_vars - name: Build and push uses: docker/build-push-action@v2 @@ -52,10 +52,10 @@ jobs: - name: Extract variables shell: bash run: | - echo "::set-output name=BRANCH::$(echo ${GITHUB_REF#refs/heads/} | sed 's/\//_/g')" - echo "::set-output name=TAG::$(git tag --points-at HEAD)" - echo "::set-output name=GIT_SHA::$(git rev-parse HEAD)" - echo "::set-output name=GIT_SHA_SHORT::$(git rev-parse --short HEAD)" + echo "BRANCH=$(echo ${GITHUB_REF#refs/heads/} | sed 's/\//_/g')" >> $GITHUB_OUTPUT + echo "TAG=$(git tag --points-at HEAD)" >> $GITHUB_OUTPUT + echo "GIT_SHA=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT + echo "GIT_SHA_SHORT=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT id: extract_variables - name: Checkout terraform config repo diff --git a/.github/workflows/staging.yml b/.github/workflows/staging.yml index b764937..19756b7 100644 --- a/.github/workflows/staging.yml +++ b/.github/workflows/staging.yml @@ -51,11 +51,11 @@ jobs: - name: Extract variables shell: bash run: | - echo "::set-output name=BRANCH::$(echo ${GITHUB_REF#refs/heads/} | sed 's/\//_/g')" - echo "::set-output name=TAG::$(git tag --points-at HEAD)" - echo "::set-output name=GIT_SHA::$(git rev-parse HEAD)" - echo "::set-output name=GIT_SHA_SHORT::$(git rev-parse --short HEAD)" - echo "::set-output name=MESSAGE::$(git log --format=%B -n 1 ${{ github.event.after }})" + echo "BRANCH=$(echo ${GITHUB_REF#refs/heads/} | sed 's/\//_/g')" >> $GITHUB_OUTPUT + echo "TAG=$(git tag --points-at HEAD)" >> $GITHUB_OUTPUT + echo "GIT_SHA=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT + echo "GIT_SHA_SHORT=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT + echo "MESSAGE=$(git log --format=%B -n 1 ${{ github.event.after }} | tr '\n' ' ' | sed 's/ */ /g')" >> $GITHUB_OUTPUT id: extract_variables - name: Checkout terraform config repo diff --git a/docker-compose.yml b/docker-compose.yml index 698ea39..55bc82c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3' - services: elasticsearch7: image: docker.elastic.co/elasticsearch/elasticsearch:7.10.1 diff --git a/rorapi/common/views.py b/rorapi/common/views.py index b28c770..a180c01 100644 --- a/rorapi/common/views.py +++ b/rorapi/common/views.py @@ -303,11 +303,10 @@ class IndexDataDump(APIView): permission_classes = [OurTokenPermission] def get(self, request, filename, dataenv, version=REST_FRAMEWORK["DEFAULT_VERSION"]): - schema = 1 + # Always use v2 schema - v1 indexing support has been removed + schema = 2 testdata = True st = 200 - if version == 'v2': - schema = 2 if dataenv == 'prod': testdata = False msg = management.call_command("setup", filename, schema=schema, testdata=testdata) diff --git a/rorapi/management/commands/createindex.py b/rorapi/management/commands/createindex.py index 68c37f6..5b936ef 100644 --- a/rorapi/management/commands/createindex.py +++ b/rorapi/management/commands/createindex.py @@ -13,12 +13,8 @@ def create_index(self, index, template_file): self.stdout.write('Created index {}'.format(index)) class Command(BaseCommand): - help = 'Create ROR API index' + help = 'Create ROR API v2 index' def handle(self, *args, **options): - if(options['schema']==1 or options['schema'] is None): - print("creating v1 index") - create_index(self, ES_VARS['INDEX_V1'], ES_VARS['INDEX_TEMPLATE_ES7_V1']) - if(options['schema']==2 or options['schema'] is None): - print("creating v2 index") - create_index(self, ES_VARS['INDEX_V2'], ES_VARS['INDEX_TEMPLATE_ES7_V2']) \ No newline at end of file + self.stdout.write("creating v2 index") + create_index(self, ES_VARS['INDEX_V2'], ES_VARS['INDEX_TEMPLATE_ES7_V2']) \ No newline at end of file diff --git a/rorapi/management/commands/deleteindex.py b/rorapi/management/commands/deleteindex.py index 9d389fa..2d39877 100644 --- a/rorapi/management/commands/deleteindex.py +++ b/rorapi/management/commands/deleteindex.py @@ -10,13 +10,11 @@ def delete_index(self, index): self.stdout.write('Index {} does not exist'.format(index)) class Command(BaseCommand): - help = 'Deletes ROR API index' + help = 'Deletes ROR API v2 index' def handle(self, *args, **options): - if(options['schema']==1 or options['schema'] is None): - print("deleting v1 index") - delete_index(self, ES_VARS['INDEX_V1']) - if(options['schema']==2 or options['schema'] is None): - print("deleting v2 index") + schema = options.get('schema', 2) + if schema == 2 or schema is None: + self.stdout.write("deleting v2 index") delete_index(self, ES_VARS['INDEX_V2']) diff --git a/rorapi/management/commands/indexror.py b/rorapi/management/commands/indexror.py index c86b720..228f17b 100644 --- a/rorapi/management/commands/indexror.py +++ b/rorapi/management/commands/indexror.py @@ -13,30 +13,10 @@ from django.core.management.base import BaseCommand from elasticsearch import TransportError -def get_nested_names_v1(org): - yield org['name'] - for label in org['labels']: - yield label['label'] - for alias in org['aliases']: - yield alias - for acronym in org['acronyms']: - yield acronym - def get_nested_names_v2(org): for name in org['names']: yield name['value'] -def get_nested_ids_v1(org): - yield org['id'] - yield re.sub('https://', '', org['id']) - yield re.sub('https://ror.org/', '', org['id']) - for ext_name, ext_id in org['external_ids'].items(): - if ext_name == 'GRID': - yield ext_id['all'] - else: - for eid in ext_id['all']: - yield eid - def get_nested_ids_v2(org): yield org['id'] yield re.sub('https://', '', org['id']) @@ -150,10 +130,10 @@ def process_files(dir, version): def index(dataset, version): err = {} - if version == 'v2': - index = ES_VARS['INDEX_V2'] - else: - index = ES_VARS['INDEX_V1'] + if version != 'v2': + err[index.__name__] = f"Only v2 schema version is supported. Received: {version}" + return err + index = ES_VARS['INDEX_V2'] backup_index = '{}-tmp'.format(index) ES7.reindex(body={ 'source': { @@ -174,22 +154,14 @@ def index(dataset, version): '_id': org['id'] } }) - if 'v2' in index: - org['names_ids'] = [{ - 'name': n - } for n in get_nested_names_v2(org)] - org['names_ids'] += [{ - 'id': n - } for n in get_nested_ids_v2(org)] - # experimental affiliations_match nested doc - org['affiliation_match'] = get_affiliation_match_doc(org) - else: - org['names_ids'] = [{ - 'name': n - } for n in get_nested_names_v1(org)] - org['names_ids'] += [{ - 'id': n - } for n in get_nested_ids_v1(org)] + org['names_ids'] = [{ + 'name': n + } for n in get_nested_names_v2(org)] + org['names_ids'] += [{ + 'id': n + } for n in get_nested_ids_v2(org)] + # experimental affiliations_match nested doc + org['affiliation_match'] = get_affiliation_match_doc(org) body.append(org) ES7.bulk(body) except TransportError: @@ -211,11 +183,10 @@ class Command(BaseCommand): def add_arguments(self, parser): parser.add_argument('dir', type=str, help='add directory name for S3 bucket to be processed') - parser.add_argument('version', type=str, help='schema version of files to be processed') def handle(self,*args, **options): dir = options['dir'] - version = options['version'] + version = 'v2' process_files(dir, version) diff --git a/rorapi/management/commands/indexrordump.py b/rorapi/management/commands/indexrordump.py index 6719705..e4ce244 100644 --- a/rorapi/management/commands/indexrordump.py +++ b/rorapi/management/commands/indexrordump.py @@ -12,30 +12,10 @@ HEADERS = {'Accept': 'application/vnd.github.v3+json'} -def get_nested_names_v1(org): - yield org['name'] - for label in org['labels']: - yield label['label'] - for alias in org['aliases']: - yield alias - for acronym in org['acronyms']: - yield acronym - def get_nested_names_v2(org): for name in org['names']: yield name['value'] -def get_nested_ids_v1(org): - yield org['id'] - yield re.sub('https://', '', org['id']) - yield re.sub('https://ror.org/', '', org['id']) - for ext_name, ext_id in org['external_ids'].items(): - if ext_name == 'GRID': - yield ext_id['all'] - else: - for eid in ext_id['all']: - yield eid - def get_nested_ids_v2(org): yield org['id'] yield re.sub('https://', '', org['id']) @@ -81,22 +61,14 @@ def index_dump(self, filename, index, dataset): '_id': org['id'] } }) - if 'v2' in index: - org['names_ids'] = [{ - 'name': n - } for n in get_nested_names_v2(org)] - org['names_ids'] += [{ - 'id': n - } for n in get_nested_ids_v2(org)] - # experimental affiliations_match nested doc - org['affiliation_match'] = get_affiliation_match_doc(org) - else: - org['names_ids'] = [{ - 'name': n - } for n in get_nested_names_v1(org)] - org['names_ids'] += [{ - 'id': n - } for n in get_nested_ids_v1(org)] + org['names_ids'] = [{ + 'name': n + } for n in get_nested_names_v2(org)] + org['names_ids'] += [{ + 'id': n + } for n in get_nested_ids_v2(org)] + # experimental affiliations_match nested doc + org['affiliation_match'] = get_affiliation_match_doc(org) body.append(org) ES7.bulk(body) except TransportError: @@ -134,22 +106,25 @@ def handle(self, *args, **options): json_files.append(file) if json_files: for json_file in json_files: - index = None json_path = os.path.join(DATA['WORKING_DIR'], filename, '') + json_file - if 'schema_v2' in json_file and (options['schema']==2 or options['schema'] is None): + # Check if file is v2.0+ format or legacy schema_v2 format + version_match = re.match(r'v(\d+)\.(\d+)', json_file) + is_v2_format = False + if version_match: + major, minor = map(int, version_match.groups()) + if major >= 2: + is_v2_format = True + elif 'schema_v2' in json_file: + # Legacy format with schema_v2 in filename + is_v2_format = True + + if is_v2_format and (options.get('schema') == 2 or options.get('schema') is None): self.stdout.write('Loading JSON') with open(json_path, 'r') as it: dataset = json.load(it) self.stdout.write('Indexing ROR dataset ' + json_file) index = ES_VARS['INDEX_V2'] index_dump(self, json_file, index, dataset) - if 'schema_v2' not in json_file and (options['schema']==1 or options['schema'] is None): - self.stdout.write('Loading JSON') - with open(json_path, 'r') as it: - dataset = json.load(it) - self.stdout.write('Indexing ROR dataset ' + json_file) - index = ES_VARS['INDEX_V1'] - index_dump(self, json_file, index, dataset) else: self.stdout.write("ROR data dump does not contain any JSON files") diff --git a/rorapi/management/commands/setup.py b/rorapi/management/commands/setup.py index a87b7f4..0795078 100644 --- a/rorapi/management/commands/setup.py +++ b/rorapi/management/commands/setup.py @@ -36,7 +36,7 @@ class Command(BaseCommand): def add_arguments(self, parser): parser.add_argument('filename', type=str, help='Name of data dump zip file to index without extension') - parser.add_argument('-s', '--schema', type=int, choices=[1, 2], help='Schema version to index if only indexing 1 version. Only set if not indexing both versions.') + parser.add_argument('-s', '--schema', type=int, choices=[2], default=2, help='Schema version to index (v2 only)') parser.add_argument('-t', '--testdata', action='store_true', help='Set flag to pull data dump from ror-data-test instead of ror-data') def handle(self, *args, **options): @@ -57,7 +57,7 @@ def handle(self, *args, **options): DeleteIndexCommand().handle(*args, **options) CreateIndexCommand().handle(*args, **options) IndexRorDumpCommand().handle(*args, **options) - msg = 'SUCCESS: ROR dataset {} indexed in version {}. Using test repo: {}'.format(filename, str(options['schema']), str(use_test_data)) + msg = 'SUCCESS: ROR dataset {} indexed in v2. Using test repo: {}'.format(filename, str(use_test_data)) except: msg = 'ERROR: Could not index ROR data dump. Check API logs for details.' else: diff --git a/rorapi/settings.py b/rorapi/settings.py index 513550e..db643c8 100644 --- a/rorapi/settings.py +++ b/rorapi/settings.py @@ -18,6 +18,7 @@ from dotenv import load_dotenv from elasticsearch import Elasticsearch, RequestsHttpConnection from requests_aws4auth import AWS4Auth +from corsheaders.defaults import default_headers from sentry_sdk.integrations.django import DjangoIntegration sentry_sdk.init(dsn=os.environ.get('SENTRY_DSN', None), @@ -144,6 +145,7 @@ USE_TZ = True CORS_ORIGIN_ALLOW_ALL = True +CORS_ALLOW_HEADERS = list(default_headers) + ['Client-Id'] # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/2.2/howto/static-files/ @@ -151,8 +153,7 @@ STATIC_ROOT = os.path.join(BASE_DIR, 'static/') ES_VARS = { - 'INDEX_V1': 'organizations', - 'INDEX_TEMPLATE_ES7_V1': os.path.join(BASE_DIR, 'rorapi', 'v1', 'index_template_es7.json'), + 'INDEX_V1': 'organizations', # Kept for v1 API queries (backward compatibility) 'INDEX_V2': 'organizations-v2', 'INDEX_TEMPLATE_ES7_V2': os.path.join(BASE_DIR, 'rorapi', 'v2', 'index_template_es7.json'), 'BATCH_SIZE': 20, diff --git a/rorapi/tests/tests_unit/tests_cors.py b/rorapi/tests/tests_unit/tests_cors.py new file mode 100644 index 0000000..5140e5e --- /dev/null +++ b/rorapi/tests/tests_unit/tests_cors.py @@ -0,0 +1,18 @@ +from django.test import TestCase + + +class CORSClientIdTestCase(TestCase): + """Test that CORS preflight allows the Client-Id header.""" + + def test_preflight_allows_client_id_header(self): + response = self.client.options( + '/v2/organizations/02feahw73', + HTTP_ORIGIN='http://localhost:5173', + HTTP_ACCESS_CONTROL_REQUEST_METHOD='GET', + HTTP_ACCESS_CONTROL_REQUEST_HEADERS='Client-Id', + ) + self.assertIn(response.status_code, (200, 204)) + allow_headers = response.get('Access-Control-Allow-Headers') + self.assertIsNotNone(allow_headers) + allowed = [h.strip().lower() for h in allow_headers.split(',')] + self.assertIn('client-id', allowed)