Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
0aafb34
Added time & status to insitu request logging
Oct 6, 2022
ba003d0
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Oct 14, 2022
5accf13
Attempt at logging IS query URL before request is made
RKuttruff Oct 14, 2022
1071995
Handle logging for next page
Oct 14, 2022
b3de561
Changelog
Oct 14, 2022
a3a0ef4
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Oct 15, 2022
b475f17
Merge remote-tracking branch 'origin/master' into insitu-query-logging
Oct 18, 2022
5e35ca6
Added explicit timeouts plus NPEs on timout
Oct 24, 2022
8841861
Updated CHANGELOG.md
Oct 24, 2022
c595569
Schema timeout
Oct 24, 2022
ececbd0
Made timeout variables
Oct 25, 2022
8c95f33
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Oct 27, 2022
2fb6962
Merge remote-tracking branch 'origin/master' into insitu-query-logging
Nov 21, 2022
89429fc
Merge remote-tracking branch 'origin/master' into insitu-query-logging
Nov 30, 2022
f6c43b9
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Nov 30, 2022
5c4b421
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Dec 7, 2022
469d789
Moved changelog entry
Dec 14, 2022
3b1d916
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Jan 23, 2023
8b6225b
Merge remote-tracking branch 'origin/master' into insitu-query-logging
Jan 23, 2023
170bf45
Merge remote-tracking branch 'origin/master' into insitu-query-logging
Feb 14, 2023
ec62e69
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Feb 21, 2023
90e086d
Logging: non-root + log count of returned points
Mar 2, 2023
44b1b18
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Mar 13, 2023
ae7db38
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Mar 20, 2023
d7aa528
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Mar 23, 2023
0251e5b
Merge remote-tracking branch 'origin/master' into insitu-query-logging
Mar 30, 2023
ef272e8
Merge remote-tracking branch 'origin/master' into insitu-query-logging
Mar 31, 2023
e7b8251
Merge branch 'apache:master' into insitu-query-logging
RKuttruff May 4, 2023
9d998f5
Merge remote-tracking branch 'origin/master' into insitu-query-logging
May 16, 2023
6e38477
Merge branch 'apache:master' into insitu-query-logging
RKuttruff May 18, 2023
6be503b
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Jun 22, 2023
7a74bae
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Jul 5, 2023
373aef9
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Jul 10, 2023
8e8f5d6
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Aug 1, 2023
68d8227
Merge remote-tracking branch 'origin/master' into insitu-query-logging
Aug 21, 2023
34a23ef
Merge remote-tracking branch 'origin/master' into insitu-query-logging
Aug 22, 2023
8985b7d
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Aug 23, 2023
69907b4
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Sep 6, 2023
42b71ec
Merge branch 'apache:master' into insitu-query-logging
RKuttruff Sep 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- SDAP-461: Added 4 remaining Saildrone insitu datasets.
- SDAP-473: Added support for matchup job prioritization
- SDAP-483: Added `.asf.yaml` to configure Jira auto-linking.
- Added logging message for start of insitu query + added status code & elapsed time to post query log message.
- Added explicit timeouts for all insitu related queries to prevent hanging issue.
### Changed
- SDAP-453: Updated results storage and retrieval to support output JSON from `/cdmsresults` that matches output from `/match_spark`.
- **NOTE:** Deploying these changes to an existing SDAP deployment will require modifying the Cassandra database with stored results. There is a script to do so at `/tools/update-doms-data-schema/update.py`
Expand Down
37 changes: 31 additions & 6 deletions analysis/webservice/algorithms/doms/insitu.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@
import requests
from datetime import datetime
from webservice.algorithms.doms import config as insitu_endpoints
from urllib.parse import urlencode
from webservice.webmodel import NexusProcessingException


CONNECT_TIMEOUT = 9.05 # Recommended to be just above a multiple of 3 seconds
READ_TIMEOUT = 303 # Just above current gateway timeout
TIMEOUTS = (CONNECT_TIMEOUT, READ_TIMEOUT)

logger = logging.getLogger(__name__)


def query_insitu_schema():
Expand All @@ -29,8 +38,12 @@ def query_insitu_schema():
metadata
"""
schema_endpoint = insitu_endpoints.getSchemaEndpoint()
logging.info("Querying schema")
response = requests.get(schema_endpoint)
logger.info("Querying schema")
try:
response = requests.get(schema_endpoint, timeout=TIMEOUTS)
except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectTimeout):
raise NexusProcessingException(code=504, reason=f'Insitu schema request timed out')

response.raise_for_status()
return response.json()

Expand Down Expand Up @@ -75,12 +88,22 @@ def query_insitu(dataset, variable, start_time, end_time, bbox, platform, depth_
# Page through all insitu results
next_page_url = insitu_endpoints.getEndpoint(provider, dataset)
while next_page_url is not None and next_page_url != 'NA':
if session is not None:
response = session.get(next_page_url, params=params)
thetime = datetime.now()
if params == {}:
logger.info(f"Starting insitu request: {next_page_url}")
else:
response = requests.get(next_page_url, params=params)
logger.info(f"Starting insitu request: {next_page_url}?{urlencode(params)}")


logging.info(f'Insitu request {response.url}')
try:
if session is not None:
response = session.get(next_page_url, params=params, timeout=TIMEOUTS)
else:
response = requests.get(next_page_url, params=params, timeout=TIMEOUTS)
except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectTimeout):
raise NexusProcessingException(code=504, reason=f'Insitu request timed out after {str(datetime.now() - thetime)} seconds')

logger.info(f'Insitu request {response.url} finished. Code: {response.status_code} Time: {str(datetime.now() - thetime)}')

response.raise_for_status()
insitu_page_response = response.json()
Expand All @@ -93,4 +116,6 @@ def query_insitu(dataset, variable, start_time, end_time, bbox, platform, depth_
next_page_url = insitu_page_response.get('next', None)
params = {} # Remove params, they are already included in above URL

logger.info(f"Insitu query completed, returning {len(insitu_response['results']):,} points")

return insitu_response