diff --git a/CHANGELOG.md b/CHANGELOG.md index dee3dede..dff95e78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - SDAP-461: Added 4 remaining Saildrone insitu datasets. - SDAP-473: Added support for matchup job prioritization - SDAP-483: Added `.asf.yaml` to configure Jira auto-linking. +- Added logging message for start of insitu query + added status code & elapsed time to post query log message. +- Added explicit timeouts for all insitu related queries to prevent hanging issue. ### Changed - SDAP-453: Updated results storage and retrieval to support output JSON from `/cdmsresults` that matches output from `/match_spark`. - **NOTE:** Deploying these changes to an existing SDAP deployment will require modifying the Cassandra database with stored results. There is a script to do so at `/tools/update-doms-data-schema/update.py` diff --git a/analysis/webservice/algorithms/doms/insitu.py b/analysis/webservice/algorithms/doms/insitu.py index ae35b4a5..07b741ac 100644 --- a/analysis/webservice/algorithms/doms/insitu.py +++ b/analysis/webservice/algorithms/doms/insitu.py @@ -20,6 +20,15 @@ import requests from datetime import datetime from webservice.algorithms.doms import config as insitu_endpoints +from urllib.parse import urlencode +from webservice.webmodel import NexusProcessingException + + +CONNECT_TIMEOUT = 9.05 # Recommended to be just above a multiple of 3 seconds +READ_TIMEOUT = 303 # Just above current gateway timeout +TIMEOUTS = (CONNECT_TIMEOUT, READ_TIMEOUT) + +logger = logging.getLogger(__name__) def query_insitu_schema(): @@ -29,8 +38,12 @@ def query_insitu_schema(): metadata """ schema_endpoint = insitu_endpoints.getSchemaEndpoint() - logging.info("Querying schema") - response = requests.get(schema_endpoint) + logger.info("Querying schema") + try: + response = requests.get(schema_endpoint, timeout=TIMEOUTS) + except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectTimeout): + raise NexusProcessingException(code=504, reason=f'Insitu schema request timed out') + response.raise_for_status() return response.json() @@ -75,12 +88,22 @@ def query_insitu(dataset, variable, start_time, end_time, bbox, platform, depth_ # Page through all insitu results next_page_url = insitu_endpoints.getEndpoint(provider, dataset) while next_page_url is not None and next_page_url != 'NA': - if session is not None: - response = session.get(next_page_url, params=params) + thetime = datetime.now() + if params == {}: + logger.info(f"Starting insitu request: {next_page_url}") else: - response = requests.get(next_page_url, params=params) + logger.info(f"Starting insitu request: {next_page_url}?{urlencode(params)}") + - logging.info(f'Insitu request {response.url}') + try: + if session is not None: + response = session.get(next_page_url, params=params, timeout=TIMEOUTS) + else: + response = requests.get(next_page_url, params=params, timeout=TIMEOUTS) + except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectTimeout): + raise NexusProcessingException(code=504, reason=f'Insitu request timed out after {str(datetime.now() - thetime)} seconds') + + logger.info(f'Insitu request {response.url} finished. Code: {response.status_code} Time: {str(datetime.now() - thetime)}') response.raise_for_status() insitu_page_response = response.json() @@ -93,4 +116,6 @@ def query_insitu(dataset, variable, start_time, end_time, bbox, platform, depth_ next_page_url = insitu_page_response.get('next', None) params = {} # Remove params, they are already included in above URL + logger.info(f"Insitu query completed, returning {len(insitu_response['results']):,} points") + return insitu_response