Skip to content

Commit f72d429

Browse files
tloubrieu-jpltloubrieu-jplthomas loubrieu
authored
Add metadata to remote coll (#173)
* fix bugs in helm chart after update for proxy feature * update CHANGELOG for ticket 388 (proxy) * makes --collection-path optional, add remote collection cache for list metadata * add unit tests for remote collection cache * add versions to avoid build failure in docker * make code more robust Co-authored-by: tloubrieu-jpl <loubrieu@jpl.nasa.gov> Co-authored-by: thomas loubrieu <thomas.loubrieu@jpl.nasa.gov>
1 parent a9932a9 commit f72d429

File tree

9 files changed

+206
-29
lines changed

9 files changed

+206
-29
lines changed
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import unittest
2+
from unittest import mock
3+
import requests
4+
from datetime import datetime
5+
from datetime import timedelta
6+
7+
from webservice.redirect import RemoteSDAPCache
8+
from webservice.redirect import CollectionNotFound
9+
from webservice.redirect.RemoteSDAPCache import RemoteSDAPList
10+
11+
class MockResponse:
12+
def __init__(self, json_data, status_code):
13+
self.json_data = json_data
14+
self.status_code = status_code
15+
16+
def json(self):
17+
return self.json_data
18+
19+
LIST_CONTENT = [
20+
{
21+
"shortName": "PM25",
22+
"title": "PM25",
23+
"tileCount": 21515,
24+
"start": 1514818800.0,
25+
"end": 1640991600.0,
26+
"iso_start": "2018-01-01T15:00:00+0000",
27+
"iso_end": "2021-12-31T23:00:00+0000"
28+
}
29+
]
30+
31+
LIST_CONTENT_FORMER = [LIST_CONTENT[0].copy()]
32+
LIST_CONTENT_FORMER[0]['start'] = 0
33+
34+
def mocked_requests_get(*asgs, **kwargs):
35+
return MockResponse(LIST_CONTENT, 200)
36+
37+
38+
def mocked_requests_get_timeout(*asgs, **kwargs):
39+
raise requests.exceptions.ConnectTimeout()
40+
41+
42+
def mocked_requests_get_not_found(*asgs, **kwargs):
43+
return MockResponse({}, 404)
44+
45+
46+
47+
class MyTestCase(unittest.TestCase):
48+
49+
@mock.patch('requests.get', side_effect=mocked_requests_get)
50+
def test_get(self, mock_get):
51+
remote_sdap_cache = RemoteSDAPCache()
52+
53+
collection = remote_sdap_cache.get('https://aq-sdap.stcenter.net/nexus/', 'PM25')
54+
self.assertEqual(collection["start"], 1514818800.0)
55+
56+
@mock.patch('requests.get', side_effect=mocked_requests_get_timeout)
57+
def test_get_timeout(self, mock_get):
58+
remote_sdap_cache = RemoteSDAPCache()
59+
with self.assertRaises(CollectionNotFound):
60+
remote_sdap_cache.get('https://aq-sdap.stcenter.net/nexus/', 'PM25')
61+
62+
63+
@mock.patch('requests.get', side_effect=mocked_requests_get_not_found)
64+
def test_get_not_found(self, mock_get):
65+
remote_sdap_cache = RemoteSDAPCache()
66+
with self.assertRaises(CollectionNotFound):
67+
remote_sdap_cache.get('https://aq-sdap.stcenter.net/nexus/', 'PM25')
68+
69+
@mock.patch('requests.get', side_effect=mocked_requests_get)
70+
def test_get_expired(self, mock_get):
71+
remote_sdap_cache = RemoteSDAPCache()
72+
73+
remote_sdap_cache.sdap_lists['https://aq-sdap.stcenter.net/nexus/'] = RemoteSDAPList(
74+
list=LIST_CONTENT_FORMER,
75+
outdated_at=datetime.now() - timedelta(seconds=3600*25)
76+
)
77+
78+
collection = remote_sdap_cache.get('https://aq-sdap.stcenter.net/nexus/', 'PM25')
79+
80+
# check requests.get is called once
81+
self.assertEqual(mock_get.call_count, 1)
82+
self.assertEqual(collection["start"], 1514818800.0)
83+
84+
@mock.patch('requests.get', side_effect=mocked_requests_get)
85+
def test_get_cached_valid(self, mock_get):
86+
remote_sdap_cache = RemoteSDAPCache()
87+
88+
remote_sdap_cache.sdap_lists['https://aq-sdap.stcenter.net/nexus'] = RemoteSDAPList(
89+
list=LIST_CONTENT_FORMER,
90+
outdated_at=datetime.now() - timedelta(seconds=3600 * 23)
91+
)
92+
93+
collection = remote_sdap_cache.get('https://aq-sdap.stcenter.net/nexus/', 'PM25')
94+
95+
# check requests.get is called once
96+
self.assertEqual(mock_get.call_count, 0)
97+
self.assertEqual(collection["start"], 0)
98+
99+
100+
101+
if __name__ == '__main__':
102+
unittest.main()

analysis/webservice/algorithms/DataSeriesList.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
import json
1818

1919
from webservice.algorithms.NexusCalcHandler import NexusCalcHandler
20+
from webservice.redirect import RemoteSDAPCache
21+
from webservice.redirect import CollectionNotFound
2022
from webservice.NexusHandler import nexus_handler
2123
from webservice.webmodel import cached
2224

@@ -31,8 +33,9 @@ class DataSeriesListCalcHandlerImpl(NexusCalcHandler):
3133
path = "/list"
3234
description = "Lists datasets currently available for analysis"
3335
params = {}
36+
remote_sdaps = RemoteSDAPCache()
3437

35-
def __init__(self, tile_service_factory, remote_collections, **kwargs):
38+
def __init__(self, tile_service_factory, remote_collections=None, **kwargs):
3639
super().__init__(tile_service_factory, **kwargs)
3740
self._remote_collections = remote_collections
3841

@@ -49,13 +52,26 @@ def toJson(self):
4952
collection_list = self._get_tile_service().get_dataseries_list()
5053

5154
# add remote collections
52-
for collection in self._remote_collections.values():
53-
collection_list.append(
54-
{
55-
"shortName": collection["id"],
56-
"remoteUrl": collection["path"],
57-
"remoteShortName": collection["remote_id"] if 'remote_id' in collection else collection["id"]
58-
}
59-
)
55+
if self._remote_collections:
56+
for collection in self._remote_collections.values():
57+
58+
current_collection = {
59+
"shortName": collection["id"],
60+
"remoteUrl": collection["path"],
61+
"remoteShortName": collection["remote_id"] if 'remote_id' in collection else collection["id"]
62+
}
63+
64+
try:
65+
remote_collection = self.remote_sdaps.get(
66+
collection["path"],
67+
current_collection["remoteShortName"]
68+
)
69+
del remote_collection['shortName']
70+
current_collection.update(remote_collection)
71+
72+
except CollectionNotFound as e:
73+
logger.warning(e)
74+
finally:
75+
collection_list.append(current_collection)
6076

6177
return SimpleResult(collection_list)

analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,13 @@
44

55

66
class HandlerArgsBuilder:
7-
def __init__(self, max_request_threads, tile_service_factory, algorithm_config, remote_collections):
7+
def __init__(
8+
self,
9+
max_request_threads,
10+
tile_service_factory,
11+
algorithm_config,
12+
remote_collections=None
13+
):
814
self.request_thread_pool = tornado.concurrent.futures.ThreadPoolExecutor(max_request_threads)
915
self.tile_service_factory = tile_service_factory
1016
self.algorithm_config = algorithm_config
@@ -22,7 +28,7 @@ def handler_needs_algorithm_config(class_wrapper):
2228

2329
@staticmethod
2430
def handler_needs_remote_collections(class_wrapper):
25-
return class_wrapper == webservice.algorithms.DataSeriesList.D
31+
return class_wrapper == webservice.algorithms.DataSeriesList.DataSeriesListCalcHandlerImpl
2632

2733
def get_args(self, clazz_wrapper):
2834
args = dict(
@@ -37,7 +43,7 @@ def get_args(self, clazz_wrapper):
3743
if self.handler_needs_algorithm_config(clazz_wrapper):
3844
args['config'] = self.algorithm_config
3945

40-
if clazz_wrapper == webservice.algorithms.DataSeriesList.DataSeriesListCalcHandlerImpl:
46+
if self.handler_needs_remote_collections(clazz_wrapper):
4147
args['remote_collections'] = self.remote_collections
4248

4349
return args

analysis/webservice/nexus_tornado/app_builders/NexusAppBuilder.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def get(self):
3030
r'/apidocs/(.*)', tornado.web.StaticFileHandler,
3131
{'path': str(apidocs_path), "default_filename": "index.html"}))
3232

33-
def set_modules(self, module_dir, algorithm_config, remote_collections, max_request_threads=4):
33+
def set_modules(self, module_dir, algorithm_config, remote_collections=None, max_request_threads=4):
3434
for moduleDir in module_dir:
3535
self.log.info("Loading modules from %s" % moduleDir)
3636
importlib.import_module(moduleDir)
@@ -44,7 +44,7 @@ def set_modules(self, module_dir, algorithm_config, remote_collections, max_requ
4444
max_request_threads,
4545
tile_service_factory,
4646
algorithm_config,
47-
remote_collections
47+
remote_collections=remote_collections
4848
)
4949

5050
for clazzWrapper in NexusHandler.AVAILABLE_HANDLERS:

analysis/webservice/redirect/RedirectHandler.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
logger = logging.getLogger(__name__)
66

7+
78
class RedirectHandler(tornado.web.RequestHandler):
89

910
def initialize(self, redirected_collections=None):
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import requests
2+
import logging
3+
from datetime import datetime
4+
from datetime import timedelta
5+
from dataclasses import dataclass
6+
7+
logger = logging.getLogger(__name__)
8+
9+
@dataclass
10+
class RemoteSDAPList:
11+
list: dict
12+
outdated_at: datetime
13+
14+
15+
class CollectionNotFound(Exception):
16+
pass
17+
18+
19+
class RemoteSDAPCache:
20+
def __init__(self):
21+
self.sdap_lists = {}
22+
23+
def _add(self, url, timeout=2, max_age=3600*24):
24+
list_url = f"{url}/list"
25+
try:
26+
r = requests.get(list_url, timeout=timeout)
27+
if r.status_code == 200:
28+
logger.info("Caching list for sdap %s: %s", list_url, r.text)
29+
self.sdap_lists[url] = RemoteSDAPList(
30+
list=r.json(),
31+
outdated_at=datetime.now()+timedelta(seconds=max_age)
32+
)
33+
else:
34+
raise CollectionNotFound("url %s was not reachable, responded with status %s", list_url, r.status_code)
35+
except requests.exceptions.ConnectTimeout as e:
36+
raise CollectionNotFound("url %s was not reachable in %i s", list_url, timeout)
37+
38+
def get(self, url, short_name):
39+
stripped_url = url.strip('/')
40+
if stripped_url not in self.sdap_lists or self.sdap_lists[stripped_url].outdated_at>datetime.now():
41+
self._add(stripped_url)
42+
43+
for collection in self.sdap_lists[stripped_url].list:
44+
if 'shortName' in collection and collection['shortName'] == short_name:
45+
return collection
46+
47+
raise CollectionNotFound("collection %s has not been found in url %s", short_name, stripped_url)
48+
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
from .RedirectHandler import RedirectHandler
2-
from .RemoteCollectionMatcher import RemoteCollectionMatcher
2+
from .RemoteCollectionMatcher import RemoteCollectionMatcher
3+
from .RemoteSDAPCache import RemoteSDAPCache
4+
from .RemoteSDAPCache import CollectionNotFound

analysis/webservice/webapp.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -74,18 +74,27 @@ def main():
7474
define('cassandra_host', help='cassandra host')
7575
define('cassandra_username', help='cassandra username')
7676
define('cassandra_password', help='cassandra password')
77-
define('collections_path', help='collection config path')
77+
define('collections_path', default=None, help='collection config path')
7878

7979
parse_command_line()
8080
algorithm_config = inject_args_in_config(options, algorithm_config)
8181

82-
remote_collection_matcher = RemoteCollectionMatcher(options.collections_path)
82+
remote_collections = None
83+
router_rules = []
84+
if options.collections_path:
85+
# build retirect app
86+
remote_collection_matcher = RemoteCollectionMatcher(options.collections_path)
87+
remote_collections = remote_collection_matcher.get_remote_collections()
88+
remote_sdap_app = RedirectAppBuilder(remote_collection_matcher).build(
89+
host=options.address,
90+
debug=options.debug)
91+
router_rules.append(Rule(remote_collection_matcher, remote_sdap_app))
8392

8493
# build nexus app
8594
nexus_app_builder = NexusAppBuilder().set_modules(
8695
web_config.get("modules", "module_dirs").split(","),
8796
algorithm_config,
88-
remote_collection_matcher.get_remote_collections()
97+
remote_collections=remote_collections
8998
)
9099

91100
if web_config.get("static", "static_enabled") == "true":
@@ -96,17 +105,9 @@ def main():
96105
log.info("Static resources disabled")
97106

98107
local_sdap_app = nexus_app_builder.build(host=options.address, debug=options.debug)
108+
router_rules.append(Rule(AnyMatches(), local_sdap_app))
99109

100-
# build redirect app
101-
remote_sdap_app = RedirectAppBuilder(remote_collection_matcher).build(
102-
host=options.address,
103-
debug=options.debug)
104-
105-
router = RuleRouter([
106-
Rule(remote_collection_matcher, remote_sdap_app),
107-
Rule(AnyMatches(), local_sdap_app)
108-
]
109-
)
110+
router = RuleRouter(router_rules)
110111

111112
log.info("Initializing on host address '%s'" % options.address)
112113
log.info("Initializing on port '%s'" % options.port)

data-access/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
cassandra-driver==3.24.0
22
pysolr==3.9.0
3-
elasticsearch
3+
elasticsearch==8.3.1
4+
urllib3==1.26.2
45
requests
56
nexusproto
67
Shapely

0 commit comments

Comments
 (0)