diff --git a/search-api-spec.yaml b/search-api-spec.yaml index a76d9348..79dc29eb 100644 --- a/search-api-spec.yaml +++ b/search-api-spec.yaml @@ -1101,10 +1101,59 @@ paths: required: true schema: type: string + - name: priority + in: query + description: The priority level of the reindex 1, 2, or 3; defaults to 1. 1 is the highest priority (completed first) and 3 is the lowest (completed last). Subsequent reindexes of associated entities will be enqueued at a priority level of 2 if the initial reindex was a 1. If the initial is a priority 2, the subsequent reindexes will also be priority 2. If the initial is priority 3, the subsequent reindexes will also be priority 3. + required: false + schema: + type: string + enum: ['1', '2', '3'] responses: '202': description: The request has been accepted and reindex is in process - + '/reindex-status': + get: + summary: Retrieves a json containing information about the current reindex queue status. Including numbers in the queue at different priority levels + description: To retrieve infomration about the current state of the queue, including how many items are in each priority, use this endpoint. To view all current jobs in the queue, include the query parameter all-queued. To view all jobs currently being executed by a worker, include the parameter all-reindexing. + parameters: + - name: all-queued + in: query + description: A boolean that, when set to true, returns data about each item currently in the queue, but not items currently being reindexed (no longer in the queue). + required: false + schema: + type: string + enum: ['true', 'false'] + - name: all-reindexed + in: query + description: A boolean that, when set to true, returns data about each item currently being reindexed by a worker, but not items in the queue (not yet being reindexed). + required: false + schema: + type: string + enum: ['true', 'false'] + responses: + '200': + description: Returns the status data as a json describing the current state of the queue. + content: + application/json: + type: object + '500': + description: A failure has occurred retrieving the status data from the job queue. + '/reindex-status/{identifier}': + get: + summary: Retrieves a json containing information about a particularitem in the queue including its priority level, and its position in the queue. + description: To retrieve infomration about an individual entity in the queue, include either the id used when submitting the reindex request (uuid or hubmap_id) or the job_id (returned when submitting the request) as a path variable. + parameters: + - name: identifier + in: path + description: The id of the chosen entity. Either the original id used during reindex submission (uuid or HuBMAP ID, whichever was used) or the job_id returned when the reindex was submitted. + responses: + '200': + description: Returns the status data as a json describing the current state of the chosen entity. + content: + application/json: + type: object + '500': + description: A failure has occurred retrieving the status data from the job queue. '/mget': post: summary: Retrieves multiple documents by their IDs in a single request. diff --git a/src/hubmap_translator.py b/src/hubmap_translator.py index a157eb01..1170a11d 100644 --- a/src/hubmap_translator.py +++ b/src/hubmap_translator.py @@ -9,11 +9,15 @@ import sys import time from redis import Redis, ConnectionError, RedisError +from urllib3.exceptions import InsecureRequestWarning from yaml import safe_load, YAMLError from http.client import HTTPException from enum import Enum from types import MappingProxyType +# Suppress InsecureRequestWarning warning when requesting status on https with ssl cert verify disabled +requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning) + # For reusing the app.cfg configuration when running indexer_base.py as script from flask import Flask, Response @@ -700,6 +704,7 @@ def enqueue_reindex(self, entity_id, reindex_queue, priority): subsequent_priority = max(priority, 2) job_id = reindex_queue.enqueue( + job_metadata = {"uuid": entity.get('uuid'), "hubmap_id": entity.get('hubmap_id')}, task_func=reindex_entity_queued_wrapper, entity_id=entity_id, args=[entity_id, self.token], @@ -789,11 +794,23 @@ def enqueue_reindex(self, entity_id, reindex_queue, priority): upload_associations + collection_associations ) - + logger.info(f"Enqueueing {len(target_ids)} related entities for {entity_id}") - + + url = f"{self.entity_api_url}/entities/batch-ids" + associated_metadata = {} + try: + response = requests.post(url, headers=self.request_headers, json=list(target_ids)) + if response.status_code == 200: + associated_metadata = response.json() + else: + self.logger.error(f"Failed to fetch batch metadata: {response.status_code}") + associated_metadata = {} + except Exception as e: + logger.error(f"Unable to retrieve uuid and hubmap_id from entity-api. Proceed with enqueuing but this info will be missing from logging and status. {e}") for related_entity_id in target_ids: reindex_queue.enqueue( + job_metadata = associated_metadata.get(related_entity_id), task_func=reindex_entity_queued_wrapper, entity_id=related_entity_id, args=[related_entity_id, self.token], diff --git a/src/search-adaptor b/src/search-adaptor index 16037a81..eca2c5f5 160000 --- a/src/search-adaptor +++ b/src/search-adaptor @@ -1 +1 @@ -Subproject commit 16037a81efe5a6edccd152021bbe650bc2e7faa2 +Subproject commit eca2c5f50c7789ee899ff1b821bb84c2d10f316e