Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion search-api-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1101,10 +1101,59 @@ paths:
required: true
schema:
type: string
- name: priority
in: query
description: The priority level of the reindex 1, 2, or 3; defaults to 1. 1 is the highest priority (completed first) and 3 is the lowest (completed last). Subsequent reindexes of associated entities will be enqueued at a priority level of 2 if the initial reindex was a 1. If the initial is a priority 2, the subsequent reindexes will also be priority 2. If the initial is priority 3, the subsequent reindexes will also be priority 3.
required: false
schema:
type: string
enum: ['1', '2', '3']
responses:
'202':
description: The request has been accepted and reindex is in process

'/reindex-status':
get:
summary: Retrieves a json containing information about the current reindex queue status. Including numbers in the queue at different priority levels
description: To retrieve infomration about the current state of the queue, including how many items are in each priority, use this endpoint. To view all current jobs in the queue, include the query parameter all-queued. To view all jobs currently being executed by a worker, include the parameter all-reindexing.
parameters:
- name: all-queued
in: query
description: A boolean that, when set to true, returns data about each item currently in the queue, but not items currently being reindexed (no longer in the queue).
required: false
schema:
type: string
enum: ['true', 'false']
- name: all-reindexed
in: query
description: A boolean that, when set to true, returns data about each item currently being reindexed by a worker, but not items in the queue (not yet being reindexed).
required: false
schema:
type: string
enum: ['true', 'false']
responses:
'200':
description: Returns the status data as a json describing the current state of the queue.
content:
application/json:
type: object
'500':
description: A failure has occurred retrieving the status data from the job queue.
'/reindex-status/{identifier}':
get:
summary: Retrieves a json containing information about a particularitem in the queue including its priority level, and its position in the queue.
description: To retrieve infomration about an individual entity in the queue, include either the id used when submitting the reindex request (uuid or hubmap_id) or the job_id (returned when submitting the request) as a path variable.
parameters:
- name: identifier
in: path
description: The id of the chosen entity. Either the original id used during reindex submission (uuid or HuBMAP ID, whichever was used) or the job_id returned when the reindex was submitted.
responses:
'200':
description: Returns the status data as a json describing the current state of the chosen entity.
content:
application/json:
type: object
'500':
description: A failure has occurred retrieving the status data from the job queue.
'/mget':
post:
summary: Retrieves multiple documents by their IDs in a single request.
Expand Down
21 changes: 19 additions & 2 deletions src/hubmap_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,15 @@
import sys
import time
from redis import Redis, ConnectionError, RedisError
from urllib3.exceptions import InsecureRequestWarning
from yaml import safe_load, YAMLError
from http.client import HTTPException
from enum import Enum
from types import MappingProxyType

# Suppress InsecureRequestWarning warning when requesting status on https with ssl cert verify disabled
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)

# For reusing the app.cfg configuration when running indexer_base.py as script
from flask import Flask, Response

Expand Down Expand Up @@ -700,6 +704,7 @@ def enqueue_reindex(self, entity_id, reindex_queue, priority):
subsequent_priority = max(priority, 2)

job_id = reindex_queue.enqueue(
job_metadata = {"uuid": entity.get('uuid'), "hubmap_id": entity.get('hubmap_id')},
task_func=reindex_entity_queued_wrapper,
entity_id=entity_id,
args=[entity_id, self.token],
Expand Down Expand Up @@ -789,11 +794,23 @@ def enqueue_reindex(self, entity_id, reindex_queue, priority):
upload_associations +
collection_associations
)

logger.info(f"Enqueueing {len(target_ids)} related entities for {entity_id}")


url = f"{self.entity_api_url}/entities/batch-ids"
associated_metadata = {}
try:
response = requests.post(url, headers=self.request_headers, json=list(target_ids))
if response.status_code == 200:
associated_metadata = response.json()
else:
self.logger.error(f"Failed to fetch batch metadata: {response.status_code}")
associated_metadata = {}
except Exception as e:
logger.error(f"Unable to retrieve uuid and hubmap_id from entity-api. Proceed with enqueuing but this info will be missing from logging and status. {e}")
for related_entity_id in target_ids:
reindex_queue.enqueue(
job_metadata = associated_metadata.get(related_entity_id),
task_func=reindex_entity_queued_wrapper,
entity_id=related_entity_id,
args=[related_entity_id, self.token],
Expand Down
2 changes: 1 addition & 1 deletion src/search-adaptor
Submodule search-adaptor updated 2 files
+1 −1 VERSION
+3 −1 src/app.py