From 6b1b7cc431165e78c0a91a9c41ddf398ff7da75d Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 29 Jun 2023 13:51:38 -0700
Subject: [PATCH 01/70] Separated NTS backends

---
 data-access/nexustiles/AbstractTileService.py |  329 +++++
 data-access/nexustiles/backends/__init__.py   |    0
 .../backends/nexusproto/__init__.py           |    0
 .../nexustiles/backends/nexusproto/backend.py |  566 ++++++++
 .../backends/nexusproto/config/datastores.ini |   36 +
 .../nexusproto/config/datastores.ini.default  |   39 +
 .../backends/nexusproto/dao/CassandraProxy.py |  317 +++++
 .../backends/nexusproto/dao/DynamoProxy.py    |  146 ++
 .../nexusproto/dao/ElasticsearchProxy.py      | 1235 +++++++++++++++++
 .../backends/nexusproto/dao/S3Proxy.py        |  141 ++
 .../backends/nexusproto/dao/SolrProxy.py      |  731 ++++++++++
 .../backends/nexusproto/dao/__init__.py       |   14 +
 .../nexustiles/backends/zarr/__init__.py      |    0
 data-access/nexustiles/nexustiles.py          |   97 +-
 14 files changed, 3564 insertions(+), 87 deletions(-)
 create mode 100644 data-access/nexustiles/AbstractTileService.py
 create mode 100644 data-access/nexustiles/backends/__init__.py
 create mode 100644 data-access/nexustiles/backends/nexusproto/__init__.py
 create mode 100644 data-access/nexustiles/backends/nexusproto/backend.py
 create mode 100644 data-access/nexustiles/backends/nexusproto/config/datastores.ini
 create mode 100644 data-access/nexustiles/backends/nexusproto/config/datastores.ini.default
 create mode 100644 data-access/nexustiles/backends/nexusproto/dao/CassandraProxy.py
 create mode 100644 data-access/nexustiles/backends/nexusproto/dao/DynamoProxy.py
 create mode 100644 data-access/nexustiles/backends/nexusproto/dao/ElasticsearchProxy.py
 create mode 100644 data-access/nexustiles/backends/nexusproto/dao/S3Proxy.py
 create mode 100644 data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py
 create mode 100644 data-access/nexustiles/backends/nexusproto/dao/__init__.py
 create mode 100644 data-access/nexustiles/backends/zarr/__init__.py

diff --git a/data-access/nexustiles/AbstractTileService.py b/data-access/nexustiles/AbstractTileService.py
new file mode 100644
index 00000000..f4f4449c
--- /dev/null
+++ b/data-access/nexustiles/AbstractTileService.py
@@ -0,0 +1,329 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import configparser
+import logging
+import sys
+import json
+from abc import ABC, abstractmethod
+from datetime import datetime
+from functools import reduce
+
+import numpy as np
+import numpy.ma as ma
+import pkg_resources
+from pytz import timezone, UTC
+from shapely.geometry import MultiPolygon, box
+
+from .dao import CassandraProxy
+from .dao import DynamoProxy
+from .dao import S3Proxy
+from .dao import SolrProxy
+from .dao import ElasticsearchProxy
+
+from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable
+from nexustiles.nexustiles import NexusTileServiceException
+
+class AbstractTileService(ABC):
+    @abstractmethod
+    def get_dataseries_list(self, simple=False):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def find_tile_by_id(self, tile_id, **kwargs):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def find_tiles_by_id(self, tile_ids, ds=None, **kwargs):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time,
+                               metrics_callback=None, **kwargs):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs):
+        """
+        Given a bounding polygon, dataset, and day of year, find tiles in that dataset with the same bounding
+        polygon and the closest day of year.
+
+        For example:
+            given a polygon minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; and day of year=32
+            search for first tile in MY_DS with identical bbox and day_of_year <= 32 (sorted by day_of_year desc)
+
+        Valid matches:
+            minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32
+            minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30
+
+        Invalid matches:
+            minx=1, miny=0, maxx=2, maxy=1; dataset=MY_DS; day of year = 32
+            minx=0, miny=0, maxx=1, maxy=1; dataset=MY_OTHER_DS; day of year = 32
+            minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 if minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 also exists
+
+        :param bounding_polygon: The exact bounding polygon of tiles to search for
+        :param ds: The dataset name being searched
+        :param day_of_year: Tile day of year to search for, tile nearest to this day (without going over) will be returned
+        :return: List of one tile from ds with bounding_polygon on or before day_of_year or raise NexusTileServiceException if no tile found
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs):
+        # Find tiles that fall in the given box in the Solr index
+        raise NotImplementedError()
+
+    @abstractmethod
+    def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=0, end_time=-1, **kwargs):
+        # Find tiles that fall within the polygon in the Solr index
+        raise NotImplementedError()
+
+    @abstractmethod
+    def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs):
+        """
+        Return list of tiles whose metadata matches the specified metadata, start_time, end_time.
+        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
+        :param ds: The dataset name to search
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :return: A list of tiles
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs):
+        """
+        Return list of tiles that matches the specified metadata, start_time, end_time with tile data outside of time
+        range properly masked out.
+        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
+        :param ds: The dataset name to search
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :return: A list of tiles
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs):
+        """
+        The method will return tiles with the exact given bounds within the time range. It differs from
+        find_tiles_in_polygon in that only tiles with exactly the given bounds will be returned as opposed to
+        doing a polygon intersection with the given bounds.
+
+        :param bounds: (minx, miny, maxx, maxy) bounds to search for
+        :param ds: Dataset name to search
+        :param start_time: Start time to search (seconds since epoch)
+        :param end_time: End time to search (seconds since epoch)
+        :param kwargs: fetch_data: True/False = whether or not to retrieve tile data
+        :return:
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_tiles_bounded_by_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1,
+                                 **kwargs):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_tiles_bounded_by_polygon(self, polygon, ds=None, start_time=0, end_time=-1, **kwargs):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_min_max_time_by_granule(self, ds, granule_name):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_dataset_overall_stats(self, ds):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_tiles_bounded_by_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_tiles_bounded_by_polygon_at_time(self, polygon, dataset, time, **kwargs):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_stats_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_bounding_box(self, tile_ids):
+        """
+        Retrieve a bounding box that encompasses all of the tiles represented by the given tile ids.
+        :param tile_ids: List of tile ids
+        :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_min_time(self, tile_ids, ds=None):
+        """
+        Get the minimum tile date from the list of tile ids
+        :param tile_ids: List of tile ids
+        :param ds: Filter by a specific dataset. Defaults to None (queries all datasets)
+        :return: long time in seconds since epoch
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_max_time(self, tile_ids, ds=None):
+        """
+        Get the maximum tile date from the list of tile ids
+        :param tile_ids: List of tile ids
+        :param ds: Filter by a specific dataset. Defaults to None (queries all datasets)
+        :return: long time in seconds since epoch
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time):
+        """
+        Get a list of distinct tile bounding boxes from all tiles within the given polygon and time range.
+        :param bounding_polygon: The bounding polygon of tiles to search for
+        :param ds: The dataset name to search
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :return: A list of distinct bounding boxes (as shapely polygons) for tiles in the search polygon
+        """
+        raise NotImplementedError()
+
+    def mask_tiles_to_bbox(self, min_lat, max_lat, min_lon, max_lon, tiles):
+        for tile in tiles:
+            tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat)
+            tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon)
+
+            # Or together the masks of the individual arrays to create the new mask
+            data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \
+                        | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \
+                        | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :]
+
+            # If this is multi-var, need to mask each variable separately.
+            if tile.is_multi:
+                # Combine space/time mask with existing mask on data
+                data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask])
+
+                num_vars = len(tile.data)
+                multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0)
+                tile.data = ma.masked_where(multi_data_mask, tile.data)
+            else:
+                tile.data = ma.masked_where(data_mask, tile.data)
+
+        tiles[:] = [tile for tile in tiles if not tile.data.mask.all()]
+
+        return tiles
+
+    def mask_tiles_to_bbox_and_time(self, min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles):
+        for tile in tiles:
+            tile.times = ma.masked_outside(tile.times, start_time, end_time)
+            tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat)
+            tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon)
+
+            # Or together the masks of the individual arrays to create the new mask
+            data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \
+                        | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \
+                        | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :]
+
+            tile.data = ma.masked_where(data_mask, tile.data)
+
+        tiles[:] = [tile for tile in tiles if not tile.data.mask.all()]
+
+        return tiles
+
+    def mask_tiles_to_polygon(self, bounding_polygon, tiles):
+
+        min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds
+
+        return self.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, tiles)
+
+    def mask_tiles_to_polygon_and_time(self, bounding_polygon, start_time, end_time, tiles):
+        min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds
+
+        return self.mask_tiles_to_bbox_and_time(min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles)
+
+    def mask_tiles_to_time_range(self, start_time, end_time, tiles):
+        """
+        Masks data in tiles to specified time range.
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :param tiles: List of tiles
+        :return: A list tiles with data masked to specified time range
+        """
+        if 0 <= start_time <= end_time:
+            for tile in tiles:
+                tile.times = ma.masked_outside(tile.times, start_time, end_time)
+
+                # Or together the masks of the individual arrays to create the new mask
+                data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \
+                            | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \
+                            | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :]
+
+                # If this is multi-var, need to mask each variable separately.
+                if tile.is_multi:
+                    # Combine space/time mask with existing mask on data
+                    data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask])
+
+                    num_vars = len(tile.data)
+                    multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0)
+                    tile.data = ma.masked_where(multi_data_mask, tile.data)
+                else:
+                    tile.data = ma.masked_where(data_mask, tile.data)
+
+            tiles[:] = [tile for tile in tiles if not tile.data.mask.all()]
+
+        return tiles
+
+    @abstractmethod
+    def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs):
+        """
+        Return number of tiles that match search criteria.
+        :param ds: The dataset name to search
+        :param bounding_polygon: The polygon to search for tiles
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
+        :return: number of tiles that match search criteria
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def fetch_data_for_tiles(self, *tiles):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def open_dataset(self, dataset):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _metadata_store_docs_to_tiles(self, *store_docs):
+        raise NotImplementedError()
+
diff --git a/data-access/nexustiles/backends/__init__.py b/data-access/nexustiles/backends/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/data-access/nexustiles/backends/nexusproto/__init__.py b/data-access/nexustiles/backends/nexusproto/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/data-access/nexustiles/backends/nexusproto/backend.py b/data-access/nexustiles/backends/nexusproto/backend.py
new file mode 100644
index 00000000..86d5ca6a
--- /dev/null
+++ b/data-access/nexustiles/backends/nexusproto/backend.py
@@ -0,0 +1,566 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import configparser
+import logging
+import sys
+import json
+from datetime import datetime
+from functools import reduce
+
+import numpy as np
+import numpy.ma as ma
+import pkg_resources
+from pytz import timezone, UTC
+from shapely.geometry import MultiPolygon, box
+
+from .dao import CassandraProxy
+from .dao import DynamoProxy
+from .dao import S3Proxy
+from .dao import SolrProxy
+from .dao import ElasticsearchProxy
+
+from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable
+from nexustiles.nexustiles import NexusTileServiceException
+
+EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout)
+logger = logging.getLogger("testing")
+
+
+class NexusprotoTileService(object):
+    def __init__(self, skipDatastore=False, skipMetadatastore=False, config=None):
+        self._datastore = None
+        self._metadatastore = None
+
+        self._config = configparser.RawConfigParser()
+        self._config.read(NexusprotoTileService._get_config_files('config/datastores.ini'))
+
+        if config:
+            self.override_config(config)
+
+        if not skipDatastore:
+            datastore = self._config.get("datastore", "store")
+            if datastore == "cassandra":
+                self._datastore = CassandraProxy.CassandraProxy(self._config)
+            elif datastore == "s3":
+                self._datastore = S3Proxy.S3Proxy(self._config)
+            elif datastore == "dynamo":
+                self._datastore = DynamoProxy.DynamoProxy(self._config)
+            else:
+                raise ValueError("Error reading datastore from config file")
+
+        if not skipMetadatastore:
+            metadatastore = self._config.get("metadatastore", "store", fallback='solr')
+            if metadatastore == "solr":
+                self._metadatastore = SolrProxy.SolrProxy(self._config)
+            elif metadatastore == "elasticsearch":
+                self._metadatastore = ElasticsearchProxy.ElasticsearchProxy(self._config)
+
+    def override_config(self, config):
+        for section in config.sections():
+            if self._config.has_section(section):  # only override preexisting section, ignores the other
+                for option in config.options(section):
+                    if config.get(section, option) is not None:
+                        self._config.set(section, option, config.get(section, option))
+
+    def get_dataseries_list(self, simple=False):
+        if simple:
+            return self._metadatastore.get_data_series_list_simple()
+        else:
+            return self._metadatastore.get_data_series_list()
+
+    def find_tile_by_id(self, tile_id, **kwargs):
+        return self._metadatastore.find_tile_by_id(tile_id)
+
+    def find_tiles_by_id(self, tile_ids, ds=None, **kwargs):
+        return self._metadatastore.find_tiles_by_id(tile_ids, ds=ds, **kwargs)
+
+    def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time,
+                               metrics_callback=None, **kwargs):
+        start = datetime.now()
+        result = self._metadatastore.find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon, dataset, start_time,
+                                                            end_time,
+                                                            **kwargs)
+        duration = (datetime.now() - start).total_seconds()
+        if metrics_callback:
+            metrics_callback(solr=duration)
+        return result
+
+    def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs):
+        """
+        Given a bounding polygon, dataset, and day of year, find tiles in that dataset with the same bounding
+        polygon and the closest day of year.
+
+        For example:
+            given a polygon minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; and day of year=32
+            search for first tile in MY_DS with identical bbox and day_of_year <= 32 (sorted by day_of_year desc)
+
+        Valid matches:
+            minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32
+            minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30
+
+        Invalid matches:
+            minx=1, miny=0, maxx=2, maxy=1; dataset=MY_DS; day of year = 32
+            minx=0, miny=0, maxx=1, maxy=1; dataset=MY_OTHER_DS; day of year = 32
+            minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 if minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 also exists
+
+        :param bounding_polygon: The exact bounding polygon of tiles to search for
+        :param ds: The dataset name being searched
+        :param day_of_year: Tile day of year to search for, tile nearest to this day (without going over) will be returned
+        :return: List of one tile from ds with bounding_polygon on or before day_of_year or raise NexusTileServiceException if no tile found
+        """
+        try:
+            tile = self._metadatastore.find_tile_by_polygon_and_most_recent_day_of_year(bounding_polygon, ds,
+                                                                                        day_of_year)
+        except IndexError:
+            raise NexusTileServiceException("No tile found.").with_traceback(sys.exc_info()[2])
+
+        return tile
+
+    def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
+        return self._metadatastore.find_all_tiles_in_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time,
+                                                                 rows=5000,
+                                                                 **kwargs)
+
+    def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs):
+        return self._metadatastore.find_all_tiles_in_polygon_at_time(bounding_polygon, dataset, time, rows=5000,
+                                                                     **kwargs)
+
+    def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs):
+        # Find tiles that fall in the given box in the Solr index
+        if type(start_time) is datetime:
+            start_time = (start_time - EPOCH).total_seconds()
+        if type(end_time) is datetime:
+            end_time = (end_time - EPOCH).total_seconds()
+        return self._metadatastore.find_all_tiles_in_box_sorttimeasc(min_lat, max_lat, min_lon, max_lon, ds, start_time,
+                                                                     end_time, **kwargs)
+
+    def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=0, end_time=-1, **kwargs):
+        # Find tiles that fall within the polygon in the Solr index
+        if 'sort' in list(kwargs.keys()):
+            tiles = self._metadatastore.find_all_tiles_in_polygon(bounding_polygon, ds, start_time, end_time, **kwargs)
+        else:
+            tiles = self._metadatastore.find_all_tiles_in_polygon_sorttimeasc(bounding_polygon, ds, start_time,
+                                                                              end_time,
+                                                                              **kwargs)
+        return tiles
+
+    def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs):
+        """
+        Return list of tiles whose metadata matches the specified metadata, start_time, end_time.
+        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
+        :param ds: The dataset name to search
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :return: A list of tiles
+        """
+        tiles = self._metadatastore.find_all_tiles_by_metadata(metadata, ds, start_time, end_time, **kwargs)
+
+        return tiles
+
+    def get_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs):
+        """
+        Return list of tiles that matches the specified metadata, start_time, end_time with tile data outside of time
+        range properly masked out.
+        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
+        :param ds: The dataset name to search
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :return: A list of tiles
+        """
+        tiles = self.find_tiles_by_metadata(metadata, ds, start_time, end_time, **kwargs)
+        tiles = self.mask_tiles_to_time_range(start_time, end_time, tiles)
+
+        return tiles
+
+    def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs):
+        """
+        The method will return tiles with the exact given bounds within the time range. It differs from
+        find_tiles_in_polygon in that only tiles with exactly the given bounds will be returned as opposed to
+        doing a polygon intersection with the given bounds.
+
+        :param bounds: (minx, miny, maxx, maxy) bounds to search for
+        :param ds: Dataset name to search
+        :param start_time: Start time to search (seconds since epoch)
+        :param end_time: End time to search (seconds since epoch)
+        :param kwargs: fetch_data: True/False = whether or not to retrieve tile data
+        :return:
+        """
+        tiles = self._metadatastore.find_tiles_by_exact_bounds(bounds[0], bounds[1], bounds[2], bounds[3], ds,
+                                                               start_time,
+                                                               end_time)
+        return tiles
+
+    def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
+        return self._metadatastore.find_all_boundary_tiles_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time,
+                                                                   rows=5000,
+                                                                   **kwargs)
+
+    def get_tiles_bounded_by_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1,
+                                 **kwargs):
+        tiles = self.find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs)
+        tiles = self.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, tiles)
+        if 0 <= start_time <= end_time:
+            tiles = self.mask_tiles_to_time_range(start_time, end_time, tiles)
+
+        return tiles
+
+    def get_tiles_bounded_by_polygon(self, polygon, ds=None, start_time=0, end_time=-1, **kwargs):
+        tiles = self.find_tiles_in_polygon(polygon, ds, start_time, end_time,
+                                           **kwargs)
+        tiles = self.mask_tiles_to_polygon(polygon, tiles)
+        if 0 <= start_time <= end_time:
+            tiles = self.mask_tiles_to_time_range(start_time, end_time, tiles)
+
+        return tiles
+
+    def get_min_max_time_by_granule(self, ds, granule_name):
+        start_time, end_time = self._metadatastore.find_min_max_date_from_granule(ds, granule_name)
+
+        return start_time, end_time
+
+    def get_dataset_overall_stats(self, ds):
+        return self._metadatastore.get_data_series_stats(ds)
+
+    def get_tiles_bounded_by_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
+        tiles = self.find_all_tiles_in_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs)
+        tiles = self.mask_tiles_to_bbox_and_time(min_lat, max_lat, min_lon, max_lon, time, time, tiles)
+
+        return tiles
+
+    def get_tiles_bounded_by_polygon_at_time(self, polygon, dataset, time, **kwargs):
+        tiles = self.find_all_tiles_in_polygon_at_time(polygon, dataset, time, **kwargs)
+        tiles = self.mask_tiles_to_polygon_and_time(polygon, time, time, tiles)
+
+        return tiles
+
+    def get_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
+        tiles = self.find_all_boundary_tiles_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs)
+        tiles = self.mask_tiles_to_bbox_and_time(min_lat, max_lat, min_lon, max_lon, time, time, tiles)
+
+        return tiles
+
+    def get_stats_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
+        tiles = self._metadatastore.find_all_tiles_within_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time,
+                                                                      **kwargs)
+
+        return tiles
+
+    def get_bounding_box(self, tile_ids):
+        """
+        Retrieve a bounding box that encompasses all of the tiles represented by the given tile ids.
+        :param tile_ids: List of tile ids
+        :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles
+        """
+        tiles = self.find_tiles_by_id(tile_ids, fl=['tile_min_lat', 'tile_max_lat', 'tile_min_lon', 'tile_max_lon'],
+                                      fetch_data=False, rows=len(tile_ids))
+        polys = []
+        for tile in tiles:
+            polys.append(box(tile.bbox.min_lon, tile.bbox.min_lat, tile.bbox.max_lon, tile.bbox.max_lat))
+        return box(*MultiPolygon(polys).bounds)
+
+    def get_min_time(self, tile_ids, ds=None):
+        """
+        Get the minimum tile date from the list of tile ids
+        :param tile_ids: List of tile ids
+        :param ds: Filter by a specific dataset. Defaults to None (queries all datasets)
+        :return: long time in seconds since epoch
+        """
+        min_time = self._metadatastore.find_min_date_from_tiles(tile_ids, ds=ds)
+        return int((min_time - EPOCH).total_seconds())
+
+    def get_max_time(self, tile_ids, ds=None):
+        """
+        Get the maximum tile date from the list of tile ids
+        :param tile_ids: List of tile ids
+        :param ds: Filter by a specific dataset. Defaults to None (queries all datasets)
+        :return: long time in seconds since epoch
+        """
+        max_time = self._metadatastore.find_max_date_from_tiles(tile_ids, ds=ds)
+        return int((max_time - EPOCH).total_seconds())
+
+    def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time):
+        """
+        Get a list of distinct tile bounding boxes from all tiles within the given polygon and time range.
+        :param bounding_polygon: The bounding polygon of tiles to search for
+        :param ds: The dataset name to search
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :return: A list of distinct bounding boxes (as shapely polygons) for tiles in the search polygon
+        """
+        bounds = self._metadatastore.find_distinct_bounding_boxes_in_polygon(bounding_polygon, ds, start_time, end_time)
+        return [box(*b) for b in bounds]
+
+    def mask_tiles_to_bbox(self, min_lat, max_lat, min_lon, max_lon, tiles):
+
+        for tile in tiles:
+            tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat)
+            tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon)
+
+            # Or together the masks of the individual arrays to create the new mask
+            data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \
+                        | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \
+                        | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :]
+
+            # If this is multi-var, need to mask each variable separately.
+            if tile.is_multi:
+                # Combine space/time mask with existing mask on data
+                data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask])
+
+                num_vars = len(tile.data)
+                multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0)
+                tile.data = ma.masked_where(multi_data_mask, tile.data)
+            else:
+                tile.data = ma.masked_where(data_mask, tile.data)
+
+        tiles[:] = [tile for tile in tiles if not tile.data.mask.all()]
+
+        return tiles
+
+    def mask_tiles_to_bbox_and_time(self, min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles):
+        for tile in tiles:
+            tile.times = ma.masked_outside(tile.times, start_time, end_time)
+            tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat)
+            tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon)
+
+            # Or together the masks of the individual arrays to create the new mask
+            data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \
+                        | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \
+                        | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :]
+
+            tile.data = ma.masked_where(data_mask, tile.data)
+
+        tiles[:] = [tile for tile in tiles if not tile.data.mask.all()]
+
+        return tiles
+
+    def mask_tiles_to_polygon(self, bounding_polygon, tiles):
+
+        min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds
+
+        return self.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, tiles)
+
+    def mask_tiles_to_polygon_and_time(self, bounding_polygon, start_time, end_time, tiles):
+        min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds
+
+        return self.mask_tiles_to_bbox_and_time(min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles)
+
+    def mask_tiles_to_time_range(self, start_time, end_time, tiles):
+        """
+        Masks data in tiles to specified time range.
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :param tiles: List of tiles
+        :return: A list tiles with data masked to specified time range
+        """
+        if 0 <= start_time <= end_time:
+            for tile in tiles:
+                tile.times = ma.masked_outside(tile.times, start_time, end_time)
+
+                # Or together the masks of the individual arrays to create the new mask
+                data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \
+                            | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \
+                            | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :]
+
+                # If this is multi-var, need to mask each variable separately.
+                if tile.is_multi:
+                    # Combine space/time mask with existing mask on data
+                    data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask])
+
+                    num_vars = len(tile.data)
+                    multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0)
+                    tile.data = ma.masked_where(multi_data_mask, tile.data)
+                else:
+                    tile.data = ma.masked_where(data_mask, tile.data)
+
+            tiles[:] = [tile for tile in tiles if not tile.data.mask.all()]
+
+        return tiles
+
+    def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs):
+        """
+        Return number of tiles that match search criteria.
+        :param ds: The dataset name to search
+        :param bounding_polygon: The polygon to search for tiles
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
+        :return: number of tiles that match search criteria
+        """
+        return self._metadatastore.get_tile_count(ds, bounding_polygon, start_time, end_time, metadata, **kwargs)
+
+    def fetch_data_for_tiles(self, *tiles):
+
+        nexus_tile_ids = set([tile.tile_id for tile in tiles])
+        matched_tile_data = self._datastore.fetch_nexus_tiles(*nexus_tile_ids)
+
+        tile_data_by_id = {str(a_tile_data.tile_id): a_tile_data for a_tile_data in matched_tile_data}
+
+        missing_data = nexus_tile_ids.difference(list(tile_data_by_id.keys()))
+        if len(missing_data) > 0:
+            raise Exception("Missing data for tile_id(s) %s." % missing_data)
+
+        for a_tile in tiles:
+            lats, lons, times, data, meta, is_multi_var = tile_data_by_id[a_tile.tile_id].get_lat_lon_time_data_meta()
+
+            a_tile.latitudes = lats
+            a_tile.longitudes = lons
+            a_tile.times = times
+            a_tile.data = data
+            a_tile.meta_data = meta
+            a_tile.is_multi = is_multi_var
+
+            del (tile_data_by_id[a_tile.tile_id])
+
+        return tiles
+
+    def _metadata_store_docs_to_tiles(self, *store_docs):
+
+        tiles = []
+        for store_doc in store_docs:
+            tile = Tile()
+            try:
+                tile.tile_id = store_doc['id']
+            except KeyError:
+                pass
+
+            try:
+                min_lat = store_doc['tile_min_lat']
+                min_lon = store_doc['tile_min_lon']
+                max_lat = store_doc['tile_max_lat']
+                max_lon = store_doc['tile_max_lon']
+
+                if isinstance(min_lat, list):
+                    min_lat = min_lat[0]
+                if isinstance(min_lon, list):
+                    min_lon = min_lon[0]
+                if isinstance(max_lat, list):
+                    max_lat = max_lat[0]
+                if isinstance(max_lon, list):
+                    max_lon = max_lon[0]
+
+                tile.bbox = BBox(min_lat, max_lat, min_lon, max_lon)
+            except KeyError:
+                pass
+
+            try:
+                tile.dataset = store_doc['dataset_s']
+            except KeyError:
+                pass
+
+            try:
+                tile.dataset_id = store_doc['dataset_id_s']
+            except KeyError:
+                pass
+
+            try:
+                tile.granule = store_doc['granule_s']
+            except KeyError:
+                pass
+
+            try:
+                tile.min_time = datetime.strptime(store_doc['tile_min_time_dt'], "%Y-%m-%dT%H:%M:%SZ").replace(
+                    tzinfo=UTC)
+            except KeyError:
+                pass
+
+            try:
+                tile.max_time = datetime.strptime(store_doc['tile_max_time_dt'], "%Y-%m-%dT%H:%M:%SZ").replace(
+                    tzinfo=UTC)
+            except KeyError:
+                pass
+
+            try:
+                tile.section_spec = store_doc['sectionSpec_s']
+            except KeyError:
+                pass
+
+            try:
+                tile.tile_stats = TileStats(
+                    store_doc['tile_min_val_d'], store_doc['tile_max_val_d'],
+                    store_doc['tile_avg_val_d'], store_doc['tile_count_i']
+                )
+            except KeyError:
+                pass
+
+            try:
+                # Ensure backwards compatibility by working with old
+                # tile_var_name_s and tile_standard_name_s fields to
+
+                # will be overwritten if tile_var_name_ss is present
+                # as well.
+                if '[' in store_doc['tile_var_name_s']:
+                    var_names = json.loads(store_doc['tile_var_name_s'])
+                else:
+                    var_names = [store_doc['tile_var_name_s']]
+
+                standard_name = store_doc.get(
+                    'tile_standard_name_s',
+                    json.dumps([None] * len(var_names))
+                )
+                if '[' in standard_name:
+                    standard_names = json.loads(standard_name)
+                else:
+                    standard_names = [standard_name]
+
+                tile.variables = []
+                for var_name, standard_name in zip(var_names, standard_names):
+                    tile.variables.append(TileVariable(
+                        variable_name=var_name,
+                        standard_name=standard_name
+                    ))
+            except KeyError:
+                pass
+
+            if 'tile_var_name_ss' in store_doc:
+                tile.variables = []
+                for var_name in store_doc['tile_var_name_ss']:
+                    standard_name_key = f'{var_name}.tile_standard_name_s'
+                    standard_name = store_doc.get(standard_name_key)
+                    tile.variables.append(TileVariable(
+                        variable_name=var_name,
+                        standard_name=standard_name
+                    ))
+
+            tiles.append(tile)
+
+        return tiles
+
+    def pingSolr(self):
+        status = self._metadatastore.ping()
+        if status and status["status"] == "OK":
+            return True
+        else:
+            return False
+
+    @staticmethod
+    def _get_config_files(filename):
+        log = logging.getLogger(__name__)
+        candidates = []
+        extensions = ['.default', '']
+        for extension in extensions:
+            try:
+                candidate = pkg_resources.resource_filename(__name__, filename + extension)
+                log.info('use config file {}'.format(filename + extension))
+                candidates.append(candidate)
+            except KeyError as ke:
+                log.warning('configuration file {} not found'.format(filename + extension))
+
+        return candidates
diff --git a/data-access/nexustiles/backends/nexusproto/config/datastores.ini b/data-access/nexustiles/backends/nexusproto/config/datastores.ini
new file mode 100644
index 00000000..f3facb95
--- /dev/null
+++ b/data-access/nexustiles/backends/nexusproto/config/datastores.ini
@@ -0,0 +1,36 @@
+[cassandra]
+host=localhost
+port=9042
+keyspace=nexustiles
+local_datacenter=datacenter1
+protocol_version=3
+dc_policy=WhiteListRoundRobinPolicy
+username=cassandra
+password=cassandra
+
+[dynamo]
+table=nexus-jpl-table
+region=us-west-2
+
+[solr]
+host=http://localhost:8983
+core=nexustiles
+
+[s3]
+bucket=cdms-dev-zarr
+#key=MUR_aggregate/
+#key=MUR_1wk_7_100_100/
+#key=MUR_1wk_7_1500_2500/
+#key=MUR_2017_9dy_7_1500_2500/
+#key=MUR_2017_9dy_7_120_240/
+key=MUR_2017_2yr_30_120_240/
+#key=SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_7_120_240.zarr/
+#key=SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_1_240_240.zarr/
+#key=SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_90_120_240.zarr/
+public=false
+region=us-west-2
+profile=saml-pub
+
+[datastore]
+store=cassandra
+#store=zarrS3
diff --git a/data-access/nexustiles/backends/nexusproto/config/datastores.ini.default b/data-access/nexustiles/backends/nexusproto/config/datastores.ini.default
new file mode 100644
index 00000000..d8db1902
--- /dev/null
+++ b/data-access/nexustiles/backends/nexusproto/config/datastores.ini.default
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[cassandra]
+host=localhost
+port=9042
+keyspace=nexustiles
+local_datacenter=datacenter1
+protocol_version=3
+dc_policy=DCAwareRoundRobinPolicy
+username=
+password=
+
+[s3]
+bucket=nexus-jpl
+region=us-west-2
+
+[dynamo]
+table=nexus-jpl-table
+region=us-west-2
+
+[solr]
+host=http://localhost:8983
+core=nexustiles
+
+[datastore]
+store=cassandra
diff --git a/data-access/nexustiles/backends/nexusproto/dao/CassandraProxy.py b/data-access/nexustiles/backends/nexusproto/dao/CassandraProxy.py
new file mode 100644
index 00000000..96f7c4c6
--- /dev/null
+++ b/data-access/nexustiles/backends/nexusproto/dao/CassandraProxy.py
@@ -0,0 +1,317 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import uuid
+from configparser import NoOptionError
+
+import nexusproto.DataTile_pb2 as nexusproto
+import numpy as np
+from cassandra.auth import PlainTextAuthProvider
+from cassandra.cqlengine import columns, connection, CQLEngineException
+from cassandra.cluster import NoHostAvailable
+from cassandra.cqlengine.models import Model
+from cassandra.policies import TokenAwarePolicy, DCAwareRoundRobinPolicy, WhiteListRoundRobinPolicy
+from multiprocessing.synchronize import Lock
+from nexusproto.serialization import from_shaped_array
+
+INIT_LOCK = Lock(ctx=None)
+
+logger = logging.getLogger(__name__)
+
+class NexusTileData(Model):
+    __table_name__ = 'sea_surface_temp'
+    tile_id = columns.UUID(primary_key=True)
+    tile_blob = columns.Blob()
+
+    __nexus_tile = None
+
+    def _get_nexus_tile(self):
+        if self.__nexus_tile is None:
+            self.__nexus_tile = nexusproto.TileData.FromString(self.tile_blob)
+
+        return self.__nexus_tile
+
+    def get_raw_data_array(self):
+
+        nexus_tile = self._get_nexus_tile()
+        the_tile_type = nexus_tile.tile.WhichOneof("tile_type")
+
+        the_tile_data = getattr(nexus_tile.tile, the_tile_type)
+
+        return from_shaped_array(the_tile_data.variable_data)
+
+    def get_lat_lon_time_data_meta(self):
+        """
+        Retrieve data from data store and metadata from metadata store
+        for this tile. For gridded tiles, the tile shape of the data
+        will match the input shape. For example, if the input was a
+        30x30 tile, all variables will also be 30x30. However, if the
+        tile is a swath tile, the data will be transformed along the
+        diagonal of the data matrix. For example, a 30x30 tile would
+        become 900x900 where the 900 points are along the diagonal.
+
+        Multi-variable tile will also include an extra dimension in the
+        data array. For example, a 30 x 30 x 30 array would be
+        transformed to N x 30 x 30 x 30 where N is the number of
+         variables in this tile.
+
+        latitude_data, longitude_data, np.array([grid_tile.time]), grid_tile_data, meta_data, is_multi_var
+
+        :return: latitude data
+        :return: longitude data
+        :return: time data
+        :return: data
+        :return: meta data dictionary
+        :return: boolean flag, True if this tile has more than one variable
+        """
+        is_multi_var = False
+
+        if self._get_nexus_tile().HasField('grid_tile'):
+            grid_tile = self._get_nexus_tile().grid_tile
+
+            grid_tile_data = np.ma.masked_invalid(from_shaped_array(grid_tile.variable_data))
+            latitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.latitude))
+            longitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.longitude))
+
+            if len(grid_tile_data.shape) == 2:
+                grid_tile_data = grid_tile_data[np.newaxis, :]
+
+            # Extract the meta data
+            meta_data = {}
+            for meta_data_obj in grid_tile.meta_data:
+                name = meta_data_obj.name
+                meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
+                if len(meta_array.shape) == 2:
+                    meta_array = meta_array[np.newaxis, :]
+                meta_data[name] = meta_array
+
+            return latitude_data, longitude_data, np.array([grid_tile.time]), grid_tile_data, meta_data, is_multi_var
+        elif self._get_nexus_tile().HasField('swath_tile'):
+            swath_tile = self._get_nexus_tile().swath_tile
+
+            latitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.latitude)).reshape(-1)
+            longitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.longitude)).reshape(-1)
+            time_data = np.ma.masked_invalid(from_shaped_array(swath_tile.time)).reshape(-1)
+
+            # Simplify the tile if the time dimension is the same value repeated
+            if np.all(time_data == np.min(time_data)):
+                time_data = np.array([np.min(time_data)])
+
+            swath_tile_data = np.ma.masked_invalid(from_shaped_array(swath_tile.variable_data))
+
+            tile_data = self._to_standard_index(swath_tile_data,
+                                                (len(time_data), len(latitude_data), len(longitude_data)))
+
+            # Extract the meta data
+            meta_data = {}
+            for meta_data_obj in swath_tile.meta_data:
+                name = meta_data_obj.name
+                actual_meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
+                reshaped_meta_array = self._to_standard_index(actual_meta_array, tile_data.shape)
+                meta_data[name] = reshaped_meta_array
+
+            return latitude_data, longitude_data, time_data, tile_data, meta_data, is_multi_var
+        elif self._get_nexus_tile().HasField('time_series_tile'):
+            time_series_tile = self._get_nexus_tile().time_series_tile
+
+            time_series_tile_data = np.ma.masked_invalid(from_shaped_array(time_series_tile.variable_data))
+            time_data = np.ma.masked_invalid(from_shaped_array(time_series_tile.time)).reshape(-1)
+            latitude_data = np.ma.masked_invalid(from_shaped_array(time_series_tile.latitude))
+            longitude_data = np.ma.masked_invalid(from_shaped_array(time_series_tile.longitude))
+
+            reshaped_array = np.ma.masked_all((len(time_data), len(latitude_data), len(longitude_data)))
+            idx = np.arange(len(latitude_data))
+            reshaped_array[:, idx, idx] = time_series_tile_data
+            tile_data = reshaped_array
+            # Extract the meta data
+            meta_data = {}
+            for meta_data_obj in time_series_tile.meta_data:
+                name = meta_data_obj.name
+                meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
+
+                reshaped_meta_array = np.ma.masked_all((len(time_data), len(latitude_data), len(longitude_data)))
+                idx = np.arange(len(latitude_data))
+                reshaped_meta_array[:, idx, idx] = meta_array
+
+                meta_data[name] = reshaped_meta_array
+
+            return latitude_data, longitude_data, time_data, tile_data, meta_data, is_multi_var
+        elif self._get_nexus_tile().HasField('swath_multi_variable_tile'):
+            swath_tile = self._get_nexus_tile().swath_multi_variable_tile
+            is_multi_var = True
+
+            latitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.latitude)).reshape(-1)
+            longitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.longitude)).reshape(-1)
+            time_data = np.ma.masked_invalid(from_shaped_array(swath_tile.time)).reshape(-1)
+
+            # Simplify the tile if the time dimension is the same value repeated
+            if np.all(time_data == np.min(time_data)):
+                time_data = np.array([np.min(time_data)])
+
+            swath_tile_data = np.ma.masked_invalid(from_shaped_array(swath_tile.variable_data))
+
+            desired_shape = (
+                len(time_data),
+                len(latitude_data),
+                len(longitude_data),
+            )
+            tile_data = self._to_standard_index(swath_tile_data, desired_shape, is_multi_var=True)
+
+            # Extract the meta data
+            meta_data = {}
+            for meta_data_obj in swath_tile.meta_data:
+                name = meta_data_obj.name
+                actual_meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
+                reshaped_meta_array = self._to_standard_index(actual_meta_array, tile_data.shape)
+                meta_data[name] = reshaped_meta_array
+
+            return latitude_data, longitude_data, time_data, tile_data, meta_data, is_multi_var
+        elif self._get_nexus_tile().HasField('grid_multi_variable_tile'):
+            grid_multi_variable_tile = self._get_nexus_tile().grid_multi_variable_tile
+            is_multi_var = True
+
+            grid_tile_data = np.ma.masked_invalid(from_shaped_array(grid_multi_variable_tile.variable_data))
+            latitude_data = np.ma.masked_invalid(from_shaped_array(grid_multi_variable_tile.latitude))
+            longitude_data = np.ma.masked_invalid(from_shaped_array(grid_multi_variable_tile.longitude))
+
+            # If there are 3 dimensions, that means the time dimension
+            # was squeezed. Add back in
+            if len(grid_tile_data.shape) == 3:
+                grid_tile_data = np.expand_dims(grid_tile_data, axis=1)
+            # If there are 4 dimensions, that means the time dimension
+            # is present. Move the multivar dimension.
+            if len(grid_tile_data.shape) == 4:
+                grid_tile_data = np.moveaxis(grid_tile_data, -1, 0)
+
+            # Extract the meta data
+            meta_data = {}
+            for meta_data_obj in grid_multi_variable_tile.meta_data:
+                name = meta_data_obj.name
+                meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
+                if len(meta_array.shape) == 2:
+                    meta_array = meta_array[np.newaxis, :]
+                meta_data[name] = meta_array
+
+            return latitude_data, longitude_data, np.array([grid_multi_variable_tile.time]), grid_tile_data, meta_data, is_multi_var
+        else:
+            raise NotImplementedError("Only supports grid_tile, swath_tile, swath_multi_variable_tile, and time_series_tile")
+
+    @staticmethod
+    def _to_standard_index(data_array, desired_shape, is_multi_var=False):
+        """
+        Transform swath data to a standard format where data runs along
+        diagonal of ND matrix and the non-diagonal data points are
+        masked
+
+        :param data_array: The data array to be transformed
+        :param desired_shape: The desired shape of the resulting array
+        :param is_multi_var: True if this is a multi-variable tile
+        :type data_array: np.array
+        :type desired_shape: tuple
+        :type is_multi_var: bool
+        :return: Reshaped array
+        :rtype: np.array
+        """
+
+        reshaped_array = []
+        if is_multi_var:
+            reshaped_data_array = np.moveaxis(data_array, -1, 0)
+        else:
+            reshaped_data_array = [data_array]
+
+        for variable_data_array in reshaped_data_array:
+            if desired_shape[0] == 1:
+                variable_reshaped_array = np.ma.masked_all((desired_shape[1], desired_shape[2]))
+            else:
+                variable_reshaped_array = np.ma.masked_all(desired_shape)
+
+            row, col = np.indices(variable_data_array.shape)
+
+            variable_reshaped_array[
+                np.diag_indices(desired_shape[1], len(variable_reshaped_array.shape))] = \
+                variable_data_array[
+                    row.flat, col.flat]
+            variable_reshaped_array.mask[
+                np.diag_indices(desired_shape[1], len(variable_reshaped_array.shape))] = \
+                variable_data_array.mask[
+                    row.flat, col.flat]
+
+            if desired_shape[0] == 1:
+                reshaped_array.append(variable_reshaped_array[np.newaxis, :])
+            else:
+                reshaped_array.append(variable_reshaped_array)
+
+        if not is_multi_var:
+            # If single var, squeeze extra dim out of array
+            reshaped_array = reshaped_array[0]
+
+        return reshaped_array
+
+
+class CassandraProxy(object):
+    def __init__(self, config):
+        self.config = config
+        self.__cass_url = config.get("cassandra", "host")
+        self.__cass_username = config.get("cassandra", "username")
+        self.__cass_password = config.get("cassandra", "password")
+        self.__cass_keyspace = config.get("cassandra", "keyspace")
+        self.__cass_local_DC = config.get("cassandra", "local_datacenter")
+        self.__cass_protocol_version = config.getint("cassandra", "protocol_version")
+        self.__cass_dc_policy = config.get("cassandra", "dc_policy")
+
+        try:
+            self.__cass_port = config.getint("cassandra", "port")
+        except NoOptionError:
+            self.__cass_port = 9042
+
+        with INIT_LOCK:
+            try:
+                connection.get_cluster()
+            except CQLEngineException:
+                self.__open()
+
+    def __open(self):
+        if self.__cass_dc_policy == 'DCAwareRoundRobinPolicy':
+            dc_policy = DCAwareRoundRobinPolicy(self.__cass_local_DC)
+            token_policy = TokenAwarePolicy(dc_policy)
+        elif self.__cass_dc_policy == 'WhiteListRoundRobinPolicy':
+            token_policy = WhiteListRoundRobinPolicy([self.__cass_url])
+
+        if self.__cass_username and self.__cass_password:
+            auth_provider = PlainTextAuthProvider(username=self.__cass_username, password=self.__cass_password)
+        else:
+            auth_provider = None
+        try:
+            connection.setup(
+                [host for host in self.__cass_url.split(',')], self.__cass_keyspace,
+                protocol_version=self.__cass_protocol_version, load_balancing_policy=token_policy,
+                port=self.__cass_port,
+                auth_provider=auth_provider
+            )
+        except NoHostAvailable as e:
+            logger.error("Cassandra is not accessible, SDAP will not server local datasets", e)
+
+    def fetch_nexus_tiles(self, *tile_ids):
+        tile_ids = [uuid.UUID(str(tile_id)) for tile_id in tile_ids if
+                    (isinstance(tile_id, str) or isinstance(tile_id, str))]
+
+        res = []
+        for tile_id in tile_ids:
+            filterResults = NexusTileData.objects.filter(tile_id=tile_id)
+            if len(filterResults) > 0:
+                res.append(filterResults[0])
+
+        return res
diff --git a/data-access/nexustiles/backends/nexusproto/dao/DynamoProxy.py b/data-access/nexustiles/backends/nexusproto/dao/DynamoProxy.py
new file mode 100644
index 00000000..1ee70ac1
--- /dev/null
+++ b/data-access/nexustiles/backends/nexusproto/dao/DynamoProxy.py
@@ -0,0 +1,146 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import uuid
+import nexusproto.DataTile_pb2 as nexusproto
+from nexusproto.serialization import from_shaped_array
+import numpy as np
+import boto3
+
+class NexusTileData(object):
+    __nexus_tile = None
+    __data = None
+    tile_id = None
+
+    def __init__(self, data, _tile_id):
+        if self.__data is None:
+            self.__data = data
+        if self.tile_id is None:
+            self.tile_id = _tile_id
+
+    def _get_nexus_tile(self):
+        if self.__nexus_tile is None:
+            self.__nexus_tile = nexusproto.TileData.FromString(self.__data)
+
+        return self.__nexus_tile
+
+    def get_raw_data_array(self):
+
+        nexus_tile = self._get_nexus_tile()
+        the_tile_type = nexus_tile.tile.WhichOneof("tile_type")
+
+        the_tile_data = getattr(nexus_tile.tile, the_tile_type)
+
+        return from_shaped_array(the_tile_data.variable_data)
+
+    def get_lat_lon_time_data_meta(self):
+        if self._get_nexus_tile().HasField('grid_tile'):
+            grid_tile = self._get_nexus_tile().grid_tile
+
+            grid_tile_data = np.ma.masked_invalid(from_shaped_array(grid_tile.variable_data))
+            latitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.latitude))
+            longitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.longitude))
+
+            if len(grid_tile_data.shape) == 2:
+                grid_tile_data = grid_tile_data[np.newaxis, :]
+
+            # Extract the meta data
+            meta_data = {}
+            for meta_data_obj in grid_tile.meta_data:
+                name = meta_data_obj.name
+                meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
+                if len(meta_array.shape) == 2:
+                    meta_array = meta_array[np.newaxis, :]
+                meta_data[name] = meta_array
+
+            return latitude_data, longitude_data, np.array([grid_tile.time]), grid_tile_data, meta_data
+        elif self._get_nexus_tile().HasField('swath_tile'):
+            swath_tile = self._get_nexus_tile().swath_tile
+
+            latitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.latitude)).reshape(-1)
+            longitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.longitude)).reshape(-1)
+            time_data = np.ma.masked_invalid(from_shaped_array(swath_tile.time)).reshape(-1)
+
+            # Simplify the tile if the time dimension is the same value repeated
+            if np.all(time_data == np.min(time_data)):
+                time_data = np.array([np.min(time_data)])
+
+            swath_tile_data = np.ma.masked_invalid(from_shaped_array(swath_tile.variable_data))
+
+            tile_data = self._to_standard_index(swath_tile_data,
+                                                (len(time_data), len(latitude_data), len(longitude_data)))
+
+            # Extract the meta data
+            meta_data = {}
+            for meta_data_obj in swath_tile.meta_data:
+                name = meta_data_obj.name
+                actual_meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
+                reshaped_meta_array = self._to_standard_index(actual_meta_array, tile_data.shape)
+                meta_data[name] = reshaped_meta_array
+
+            return latitude_data, longitude_data, time_data, tile_data, meta_data
+        else:
+            raise NotImplementedError("Only supports grid_tile and swath_tile")
+
+    @staticmethod
+    def _to_standard_index(data_array, desired_shape):
+
+        if desired_shape[0] == 1:
+            reshaped_array = np.ma.masked_all((desired_shape[1], desired_shape[2]))
+            row, col = np.indices(data_array.shape)
+
+            reshaped_array[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array[
+                row.flat, col.flat]
+            reshaped_array.mask[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array.mask[
+                row.flat, col.flat]
+            reshaped_array = reshaped_array[np.newaxis, :]
+        else:
+            reshaped_array = np.ma.masked_all(desired_shape)
+            row, col = np.indices(data_array.shape)
+
+            reshaped_array[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array[
+                row.flat, col.flat]
+            reshaped_array.mask[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array.mask[
+                row.flat, col.flat]
+
+        return reshaped_array
+
+
+class DynamoProxy(object):
+    def __init__(self, config):
+        self.config = config
+        self.__dynamo_tablename = config.get("dynamo", "table")
+        self.__dynamo_region = config.get("dynamo", "region")
+        self.__dynamo = boto3.resource('dynamodb', region_name=self.__dynamo_region)
+        self.__dynamo_table = self.__dynamo.Table(self.__dynamo_tablename)
+        self.__nexus_tile = None
+
+    def fetch_nexus_tiles(self, *tile_ids):
+
+        tile_ids = [uuid.UUID(str(tile_id)) for tile_id in tile_ids if
+                    (isinstance(tile_id, str) or isinstance(tile_id, str))]
+        res = []
+        for tile_id in tile_ids:
+            response = self.__dynamo_table.get_item(
+                Key = {
+                    'tile_id': str(tile_id)
+                }
+            )
+            item = response['Item']
+            data = item['data'].__str__()
+            nexus_tile = NexusTileData(data, str(tile_id))
+            res.append(nexus_tile)
+
+        return res
\ No newline at end of file
diff --git a/data-access/nexustiles/backends/nexusproto/dao/ElasticsearchProxy.py b/data-access/nexustiles/backends/nexusproto/dao/ElasticsearchProxy.py
new file mode 100644
index 00000000..157630f6
--- /dev/null
+++ b/data-access/nexustiles/backends/nexusproto/dao/ElasticsearchProxy.py
@@ -0,0 +1,1235 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import logging
+import threading
+import time
+import re
+from datetime import datetime
+from pytz import timezone, UTC
+
+import requests
+import pysolr
+from shapely import wkt
+from elasticsearch import Elasticsearch
+
+ELASTICSEARCH_CON_LOCK = threading.Lock()
+thread_local = threading.local()
+
+EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
+ELASTICSEARCH_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
+ISO_8601 = '%Y-%m-%dT%H:%M:%S%z'
+
+
+class ElasticsearchProxy(object):
+    def __init__(self, config):
+        self.elasticsearchHosts = config.get("elasticsearch", "host").split(',')
+        self.elasticsearchIndex = config.get("elasticsearch", "index")
+        self.elasticsearchUsername = config.get("elasticsearch", "username")
+        self.elasticsearchPassword = config.get("elasticsearch", "password")
+        self.logger = logging.getLogger(__name__)
+
+        with ELASTICSEARCH_CON_LOCK:
+            elasticsearchcon = getattr(thread_local, 'elasticsearchcon', None)
+            if elasticsearchcon is None:
+                elasticsearchcon = Elasticsearch(hosts=self.elasticsearchHosts, http_auth=(self.elasticsearchUsername, self.elasticsearchPassword))
+                thread_local.elasticsearchcon = elasticsearchcon
+
+            self.elasticsearchcon = elasticsearchcon
+
+    def find_tile_by_id(self, tile_id):
+
+        params = {
+            "size": 1,
+            "query": {
+                "term": {
+                    "id": {
+                        "value": tile_id
+                    }
+                }
+            }
+        }
+
+        results, _, hits = self.do_query(*(None, None, None, True, None), **params)
+        assert hits == 1, f"Found {hits} results, expected exactly 1"
+        return [results[0]["_source"]]
+
+    def find_tiles_by_id(self, tile_ids, ds=None, **kwargs):
+
+        params = {
+            "query": {
+                "bool": {
+                    "filter": [],
+                    "should": [],
+                    "minimum_should_match": 1
+                }
+            }
+        }
+
+        for tile_id in tile_ids:
+            params['query']['bool']['should'].append({"term": {"id": {"value": tile_id}}})
+            
+        if ds is not None:
+            params['query']['bool']['filter'].append({"term": {"dataset_s": {"value": ds}}})
+
+        self._merge_kwargs(params, **kwargs)
+
+        results = self.do_query_all(*(None, None, None, False, None), **params)
+        assert len(results) == len(tile_ids), "Found %s results, expected exactly %s" % (len(results), len(tile_ids))
+        return results
+
+    def find_min_date_from_tiles(self, tile_ids, ds=None, **kwargs):
+        params = {
+            "size": 0,
+            "query": {
+                "bool": {
+                    "filter": [],
+                    "should": []
+                }
+            },
+            "aggs": {
+                "min_date_agg": {
+                    "min": {
+                        "field": "tile_min_time_dt"
+                    }
+                }
+            }            
+        }
+        
+        for tile_id in tile_ids:
+            params['query']['bool']['should'].append({"term": {"id": {"value": tile_id}}})
+        if ds is not None:
+            params['query']['bool']['filter'].append({"term": {"dataset_s": {"value": ds}}})
+
+        aggregations = self.do_aggregation(*(None, None, None, True, None), **params)
+        return self.convert_iso_to_datetime(aggregations['min_date_agg']["value_as_string"])
+
+    def find_max_date_from_tiles(self, tile_ids, ds=None, **kwargs):
+
+        params = {
+            "size": 0,
+            "query": {
+                "bool": {
+                    "filter": [],
+                    "should": []
+                }
+            },
+            "aggs": {
+                "max_date_agg": {
+                    "max": {
+                        "field": "tile_max_time_dt"
+                    }
+                }
+            }            
+        }
+        
+        for tile_id in tile_ids:
+            params['query']['bool']['should'].append({"term": {"id": {"value": tile_id}}})
+        if ds is not None:
+            params['query']['bool']['filter'].append({"term": {"dataset_s": {"value": ds}}})        
+
+        aggregations = self.do_aggregation(*(None, None, None, True, None), **params)
+        return self.convert_iso_to_datetime(aggregations['max_date_agg']["value_as_string"])
+
+
+    def find_min_max_date_from_granule(self, ds, granule_name, **kwargs):
+        
+        params = {
+            "query": {
+                "bool": {
+                    "filter": [
+                        {
+                            "term": {
+                                "dataset_s": {
+                                    "value": ds
+                                }
+                            }
+                        },
+                        {
+                            "term": {
+                                "granule_s": {
+                                    "value": granule_name
+                                }
+                            }
+                        }
+                    ]
+                }
+            },
+            "aggs": {
+                "min_date_agg": {
+                    "max": {
+                        "field": "tile_min_time_dt"
+                    }
+                },
+                "max_date_agg": {
+                    "max": {
+                        "field": "tile_max_time_dt"
+                    }
+                }
+            }
+        }
+
+        self._merge_kwargs(params, **kwargs)
+        
+        aggregations = self.do_aggregation(*(None, None, None, False, None), **params)
+        start_time = self.convert_iso_to_datetime(aggregations['min_date_agg']["value_as_string"])
+        end_time = self.convert_iso_to_datetime(aggregations['max_date_agg']["value_as_string"])
+
+        return start_time, end_time
+
+    def get_data_series_list(self):
+
+        datasets = self.get_data_series_list_simple()
+
+        for dataset in datasets:
+            min_date = self.find_min_date_from_tiles([], ds=dataset['title'])
+            max_date = self.find_max_date_from_tiles([], ds=dataset['title'])
+            dataset['start'] = (min_date - EPOCH).total_seconds()
+            dataset['end'] = (max_date - EPOCH).total_seconds()
+            dataset['iso_start'] = min_date.strftime(ISO_8601)
+            dataset['iso_end'] = max_date.strftime(ISO_8601)
+
+        return datasets
+
+    def get_data_series_list_simple(self):
+        
+        params = {
+            'size': 0,
+            "aggs": {
+                "dataset_list_agg": {
+                    "composite": {
+                        "size":100,
+                        "sources": [
+                            {
+                                "dataset_s": {
+                                    "terms": {
+                                        "field": "dataset_s"
+                                    }
+                                }
+                            }
+                        ]
+                    }
+                }
+            }
+        }
+
+        aggregations = self.do_aggregation_all(params, 'dataset_list_agg')
+        l = []
+
+        for dataset in aggregations:
+            l.append({
+                "shortName": dataset['key']['dataset_s'],
+                "title": dataset['key']['dataset_s'],
+                "tileCount": dataset["doc_count"]
+            })
+
+        l = sorted(l, key=lambda entry: entry["title"])
+        return l
+
+    def get_data_series_stats(self, ds):
+
+        params = {
+            "size": 0,
+            "query": {
+                "term":{
+                    "dataset_s": {
+                        "value": ds
+                    }
+                }     
+            },
+            "aggs": {
+                "available_dates": {
+                    "composite": {
+                        "size": 100,
+                        "sources": [
+                            {"terms_tile_max_time_dt": {"terms": {"field": "tile_max_time_dt"}}}
+                        ]
+                    }
+                }
+            }
+        }
+
+        aggregations = self.do_aggregation_all(params, 'available_dates')
+        stats = {}
+        stats['available_dates'] = []
+
+        for dt in aggregations:
+            stats['available_dates'].append(dt['key']['terms_tile_max_time_dt'] / 1000)
+
+        stats['available_dates'] = sorted(stats['available_dates'])
+
+        params = {
+            "size": 0,
+            "query": {
+                "term":{
+                    "dataset_s": {
+                        "value": ds
+                    }
+                }
+            }, 
+            "aggs": {
+                "min_tile_min_val_d": {
+                    "min": {
+                        "field": "tile_min_val_d"
+                    }
+                },
+                "min_tile_max_time_dt": {
+                    "min": {
+                        "field": "tile_max_time_dt"
+                    }
+                },
+                "max_tile_max_time_dt": {
+                    "max": {
+                        "field": "tile_max_time_dt"
+                    }
+                },
+                "max_tile_max_val_d": {
+                    "max": {
+                        "field": "tile_max_val_d"
+                    }
+                }
+            }
+        }
+
+        aggregations = self.do_aggregation(*(None, None, None, False, None), **params)
+        stats["start"] = int(aggregations["min_tile_max_time_dt"]["value"]) / 1000
+        stats["end"] = int(aggregations["max_tile_max_time_dt"]["value"]) / 1000
+        stats["minValue"] = aggregations["min_tile_min_val_d"]["value"]
+        stats["maxValue"] = aggregations["max_tile_max_val_d"]["value"]
+
+        return stats
+
+    # day_of_year_i added (SDAP-347)
+    def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year):
+
+        max_lat = bounding_polygon.bounds[3]
+        min_lon = bounding_polygon.bounds[0]
+        min_lat = bounding_polygon.bounds[1]
+        max_lon = bounding_polygon.bounds[2]
+        
+        params = {
+            "size": "1",
+            "query": {
+                "bool": {
+                    "filter": [
+                        {   
+                            "term": {
+                                "dataset_s": {
+                                    "value": ds
+                                }
+                            }
+                        },
+                        {
+                            "geo_shape": {
+                                "geo": {
+                                    "shape": {
+                                        "type": "envelope",
+                                        "coordinates": [[min_lon, max_lat], [max_lon, min_lat]]
+                                    },
+                                    "relation": "intersects"
+                                }
+                            }
+                        },
+                        { 
+                            "range": {
+                                "tile_count_i": {
+                                    "gte": 1
+                                }
+                            }   
+                        },
+                        { 
+                            "range": {
+                                "day_of_year_i": {
+                                    "lte": day_of_year
+                                }
+                            } 
+                        }
+                    ]
+                }
+            }
+        }
+        result, _, _ = self.do_query(*(None, None, None, True, 'day_of_year_i desc'), **params)
+        
+        return [result[0]]
+
+    def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs):
+
+        search_start_s = datetime.utcfromtimestamp(start_time).strftime(ELASTICSEARCH_FORMAT)
+        search_end_s = datetime.utcfromtimestamp(end_time).strftime(ELASTICSEARCH_FORMAT)
+
+        params = {
+            "size": "0",
+            "_source": "tile_min_time_dt",
+            "query": {
+                "bool": {
+                    "filter": [
+                        {
+                            "term": {
+                                "dataset_s": {
+                                    "value": ds
+                                }
+                            }
+                        },
+                        {
+                            "range": {
+                                "tile_min_time_dt": {
+                                    "gte": search_start_s,
+                                    "lte": search_end_s
+                                }
+                            }
+                        },
+                        {
+                            "geo_shape": {
+                                "geo": {
+                                    "shape": {
+                                        "type": "envelope",
+                                        "coordinates": [[min_lon, max_lat],[max_lon, min_lat]]
+                                    },
+                                    "relation": "intersects"
+                                }
+                            }
+                        }
+                    ]
+                }
+            },
+            "aggs": {
+                "days_range_agg": {
+                    "composite": {
+                        "size":100,
+                        "sources": [
+                            {
+                                "tile_min_time_dt": {
+                                    "terms": {
+                                        "field": "tile_min_time_dt"
+                                    }
+                                }
+                            }
+                        ]
+                    }
+                }
+            }
+        }
+
+        aggregations = self.do_aggregation_all(params, 'days_range_agg')
+        results = [res['key']['tile_min_time_dt'] for res in aggregations]
+        daysinrangeasc = sorted([(res / 1000) for res in results])
+        return daysinrangeasc
+
+    def find_all_tiles_in_box_sorttimeasc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time=0,
+                                          end_time=-1, **kwargs):
+
+        params = {
+            "size": 1000,
+            "query": {
+                "bool": {
+                    "filter": [
+                        {
+                            "term": {
+                                "dataset_s": {
+                                    "value": ds
+                                }
+                            }
+                        },
+                        {
+                            "geo_shape": {
+                                "geo": {
+                                    "shape": {
+                                        "type": "envelope",
+                                        "coordinates": [[min_lon, max_lat],[max_lon, min_lat]]
+                                    },
+                                    "relation": "intersects"
+                                }
+                            }
+                        },
+                        {
+                            "range": {
+                                "tile_count_i": {
+                                    "gte": 1
+                                 }
+                             }
+                         }
+                     ]
+                 }
+             }
+         }
+                            
+
+        if 0 < start_time <= end_time:
+            params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time)
+            params["query"]["bool"]["minimum_should_match"] = 1
+
+        self._merge_kwargs(params, **kwargs)
+
+        return self.do_query_all(*(None, None, None, False, 'tile_min_time_dt asc,tile_max_time_dt asc'), **params)
+
+    def find_all_tiles_in_polygon_sorttimeasc(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs):
+
+        nums = re.findall(r'\d+(?:\.\d*)?', bounding_polygon.wkt.rpartition(',')[0])
+        polygon_coordinates = list(zip(*[iter(nums)] * 2))
+
+        max_lat = bounding_polygon.bounds[3]
+        min_lon = bounding_polygon.bounds[0]
+        min_lat = bounding_polygon.bounds[1]
+        max_lon = bounding_polygon.bounds[2]
+
+        params = {
+            "query": {
+                "bool": {
+                    "filter": [
+                        { 
+                            "term": {
+                                "dataset_s": {
+                                    "value": ds
+                                }
+                            } 
+                        },
+                        {
+                            "geo_shape": {
+                                "geo": {
+                                    "shape": {
+                                        "type": "envelope",
+                                        "coordinates": [[min_lon, max_lat], [max_lon, min_lat]]
+                                    },
+                                    "relation": "intersects"
+                                }
+                            }
+                        }
+                    ]
+                }
+            }
+        }
+
+        try:
+            if 'fl' in list(kwargs.keys()):
+                params["_source"] = kwargs["fl"].split(',')
+        except KeyError:
+            pass
+
+        if 0 < start_time <= end_time:
+            params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time)
+            params["query"]["bool"]["minimum_should_match"] = 1
+
+        return self.do_query_all(*(None, None, None, False, 'tile_min_time_dt asc,tile_max_time_dt asc'), **params)
+
+    def find_all_tiles_in_polygon(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs):
+
+        nums = re.findall(r'\d+(?:\.\d*)?', bounding_polygon.wkt.rpartition(',')[0])
+        polygon_coordinates = list(zip(*[iter(nums)] * 2))
+
+        max_lat = bounding_polygon.bounds[3]
+        min_lon = bounding_polygon.bounds[0]
+        min_lat = bounding_polygon.bounds[1]
+        max_lon = bounding_polygon.bounds[2]
+
+        params = {
+            "size": 1000,
+            "query": {
+                "bool": {
+                    "filter": [
+                        { 
+                            "term": {
+                                "dataset_s": {
+                                    "value": ds
+                                }
+                            } 
+                        },
+                        {
+                            "geo_shape": {
+                                "geo": {
+                                    "shape": {
+                                        "type": "envelope",
+                                        "coordinates": [[min_lon, max_lat], [max_lon, min_lat]]
+                                    },
+                                    "relation": "intersects"
+                                }
+                            }
+                        },
+                        {
+                            "range": {
+                                "tile_count_i": {
+                                    "gte": 1
+                                }
+                            }
+                        }
+                    ]
+                }
+            }
+        }
+
+        try:
+            if 'fl' in list(kwargs.keys()):
+                params["_source"] = kwargs["fl"].split(',')
+        except KeyError:
+            pass
+
+        if 0 < start_time <= end_time:
+            params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time)
+            params["query"]["bool"]["minimum_should_match"] = 1
+
+        self._merge_kwargs(params, **kwargs)
+
+        return self.do_query_all(*(None, None, None, False, None), **params)
+
+    def find_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs):
+        
+        tile_max_lat = bounding_polygon.bounds[3]
+        tile_min_lon = bounding_polygon.bounds[0]
+        tile_min_lat = bounding_polygon.bounds[1]
+        tile_max_lon = bounding_polygon.bounds[2]
+
+        params = {
+            "size": 0,
+            "query": {
+                "bool": {
+                    "filter": [
+                        {
+                            "term": {
+                                "dataset_s": {
+                                    "value": ds
+                                }
+                            }
+                        },
+                        {
+                            "geo_shape": {
+                                "geo": {
+                                    "shape": {
+                                        "type": "envelope",
+                                        "coordinates": [[tile_min_lon, tile_max_lat], [tile_max_lon, tile_min_lat]]
+                                    },
+                                    "relation": "intersects"
+                                }
+                            }
+                        }
+                    ]
+                }
+            },
+            "aggs": {
+                "distinct_bounding_boxes": {
+                    "composite": {
+                        "size": 100,
+                        "sources": [
+                            {
+                                "bounding_box": {
+                                    "terms": {
+                                        "script": {
+                                            "source": "String.valueOf(doc['tile_min_lon'].value) + ', ' + String.valueOf(doc['tile_max_lon'].value) + ', ' + String.valueOf(doc['tile_min_lat'].value) + ', ' + String.valueOf(doc['tile_max_lat'].value)",
+                                            "lang": "painless"
+                                        }
+                                    }
+                                }
+                            }
+                        ]
+                    }
+                }
+            }
+        }
+                            
+        if 0 < start_time <= end_time:
+            params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time)
+            params["query"]["bool"]["minimum_should_match"] = 1
+
+        self._merge_kwargs(params, **kwargs)
+        aggregations = self.do_aggregation_all(params, 'distinct_bounding_boxes')
+        distinct_bounds = []
+        for agg in aggregations:   
+            coords = agg['key']['bounding_box'].split(',')
+            min_lon = round(float(coords[0]), 2)
+            max_lon = round(float(coords[1]), 2)
+            min_lat = round(float(coords[2]), 2)
+            max_lat = round(float(coords[3]), 2)
+            polygon = 'POLYGON((%s %s, %s %s, %s %s, %s %s, %s %s))' % (min_lon, max_lat, min_lon, min_lat, max_lon, min_lat, max_lon, max_lat, min_lon, max_lat)
+            distinct_bounds.append(wkt.loads(polygon).bounds)
+        
+        return distinct_bounds
+    
+    def find_tiles_by_exact_bounds(self, minx, miny, maxx, maxy, ds, start_time=0, end_time=-1, **kwargs):
+            
+        params = {
+            "query": {
+                "bool": {
+                    "filter": [
+                    {
+                        "term": {
+                            "dataset_s": {
+                                "value": ds
+                            }
+                        }
+                    },
+                    {
+                        "term": {
+                            "tile_min_lon": {
+                                "value": minx
+                            }
+                        }
+                    },
+                    {
+                        "term": {
+                            "tile_min_lat": {
+                                "value": miny
+                            }
+                        }
+                    },
+                    {
+                        "term": {
+                            "tile_max_lon": {
+                                "value": maxx
+                            }
+                        }
+                    },
+                    {
+                        "term": {
+                            "tile_max_lat": {
+                                "value": maxy
+                            }
+                        }
+                    }
+                ]
+            }
+        }}    
+        
+        if 0 < start_time <= end_time:
+            params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time)
+            params["query"]["bool"]["minimum_should_match"] = 1
+
+        self._merge_kwargs(params, **kwargs)
+
+        return self.do_query_all(*(None, None, None, False, None), **params)
+
+    def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, search_time, **kwargs):
+        
+        the_time = datetime.utcfromtimestamp(search_time).strftime(ELASTICSEARCH_FORMAT)
+
+        params = {
+            "size": 1000,
+            "query": {
+                "bool": {
+                    "filter": [
+                        {
+                            "term": {
+                                "dataset_s": {
+                                    "value": ds
+                                }
+                            }
+                        },
+                        {
+                            "geo_shape": {
+                                "geo": {
+                                    "shape": {
+                                        "type": "envelope",
+                                        "coordinates": [[min_lon, max_lat],[max_lon, min_lat]]
+                                    },
+                                    "relation": "intersects"
+                                }
+                            }
+                        },
+                        { 
+                            "range": {
+                                "tile_min_time_dt": {
+                                    "lte": the_time
+                                }
+                            }   
+                        },
+                        { 
+                            "range": {
+                                "tile_max_time_dt": {
+                                    "gte": the_time
+                                }
+                            } 
+                        }
+                    ]
+                }
+            }
+        }
+
+        self._merge_kwargs(params, **kwargs)
+
+        return self.do_query_all(*(None, None, None, False, None), **params)
+
+    def find_all_tiles_in_polygon_at_time(self, bounding_polygon, ds, search_time, **kwargs):
+
+        the_time = datetime.utcfromtimestamp(search_time).strftime(ELASTICSEARCH_FORMAT)
+
+        max_lat = bounding_polygon.bounds[3]
+        min_lon = bounding_polygon.bounds[0]
+        min_lat = bounding_polygon.bounds[1]
+        max_lon = bounding_polygon.bounds[2]
+
+        params = {
+            "size": 1000,
+            "query": {
+                "bool": {
+                    "filter": [
+                        {
+                            "term": {
+                                "dataset_s": {
+                                    "value": ds
+                                }
+                            }
+                        },
+                        {
+                            "geo_shape": {
+                                "geo": {
+                                    "shape": {
+                                        "type": "envelope",
+                                        "coordinates": [[min_lon, max_lat],[max_lon, min_lat]]
+                                    },
+                                    "relation": "intersects"
+                                }
+                            }
+                        },
+                        { "range": {
+                            "tile_min_time_dt": {
+                                "lte": the_time
+                            }
+                        } },
+                        { "range": {
+                            "tile_max_time_dt": {
+                                "gte": the_time
+                            }
+                        } }
+                    ]
+                }
+            }
+        }
+        
+        self._merge_kwargs(params, **kwargs)
+
+        return self.do_query_all(*(None, None, None, False, None), **params)
+
+
+    def find_all_tiles_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, time, **kwargs):
+
+        the_time = datetime.utcfromtimestamp(time).strftime(ELASTICSEARCH_FORMAT)
+        
+        params = {
+            "size": 1000,
+            "query": {
+                "bool": {
+                    "filter": [
+                        {
+                            "term": {
+                                "dataset_s": {
+                                    "value": ds
+                                }
+                            }
+                        },
+                        {
+                            "geo_shape": {
+                                "geo": {
+                                    "shape": {
+                                        "type": "envelope",
+                                        "coordinates": [[min_lon, max_lat],[max_lon, min_lat]]
+                                    },
+                                    "relation": "within"
+                                }
+                            }
+                        },
+                        {
+                            "range": {
+                                "tile_count_i": {
+                                    "gte": 1
+                                }
+                            }
+                        },
+                        { 
+                            "range": {
+                                "tile_min_time_dt": {
+                                    "lte": the_time
+                                }
+                            } 
+                        },
+                        { 
+                            "range": {
+                                "tile_max_time_dt": {
+                                    "gte": the_time
+                                }
+                            }
+                        }
+                    ]
+                }
+            }
+        }
+
+
+        self._merge_kwargs(params, **kwargs)
+
+        return self.do_query_all(*(None, "product(tile_avg_val_d, tile_count_i),*", None, False, None), **params)
+
+    def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, time, **kwargs):
+        
+        the_time = datetime.utcfromtimestamp(time).strftime(ELASTICSEARCH_FORMAT)
+        
+        params = {
+            "size": 1000,
+            "query": {
+                "bool": {
+                    "filter": [
+                        {
+                            "term": {
+                                "dataset_s": {
+                                    "value": ds
+                                }
+                            }
+                        },
+                        {
+                            "geo_shape": {
+                                "geo": {
+                                    "shape": {
+                                        "type": "multilinestring",
+                                        "coordinates": [[[min_lon, max_lat], [max_lon, max_lat], [min_lon, max_lat], [min_lon, min_lat], [max_lon, max_lat], [max_lon, min_lat], [min_lon, min_lat], [max_lon, min_lat]]]
+                                    },
+                                    "relation": "intersects"
+                                }
+                            }
+                        },
+                        {
+                            "range": {
+                                "tile_count_i": {
+                                    "gte": 1
+                                }
+                            }
+                        },
+                        { 
+                            "range": {
+                                "tile_min_time_dt": {
+                                    "lte": the_time
+                                }
+                            } 
+                        },
+                        { 
+                            "range": {
+                                "tile_max_time_dt": {
+                                    "gte": the_time
+                                }
+                            }
+                        }
+                    ],
+                    "must_not" : {
+                        "geo_shape": {
+                            "geo": {
+                                "shape": {
+                                    "type": "envelope",
+                                    "coordinates": [[min_lon, max_lat], [max_lon, min_lat]]
+                                },
+                                "relation": "within"
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        self._merge_kwargs(params, **kwargs)
+
+        return self.do_query_all(*(None, None, None, False, None), **params)
+
+    def find_all_tiles_by_metadata(self, metadata, ds, start_time=0, end_time=-1, **kwargs):
+        """
+        Get a list of tile metadata that matches the specified metadata, start_time, end_time.
+        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
+        :param ds: The dataset name to search
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :return: A list of tile metadata
+        """
+
+        params = {
+            "query": {
+                "bool": {
+                    "must": [
+                        {
+                            "term": {
+                                "dataset_s": {"value": ds}
+                            }
+                        }
+                    ]
+                }
+            }
+        }
+
+        if len(metadata) > 0:
+            for key_value in metadata:
+                key = key_value.split(':')[0]
+                value = key_value.split(':')[1]
+                params['query']['bool']['must'].append({"match": {key: value}})
+
+        if 0 < start_time <= end_time:
+            params['query']['bool']['should'] = self.get_formatted_time_clause(start_time, end_time)
+            params["query"]["bool"]["minimum_should_match"] = 1
+
+        self._merge_kwargs(params, **kwargs)
+        return self.do_query_all(*(None, None, None, False, None), **params)
+
+    def get_formatted_time_clause(self, start_time, end_time):
+        search_start_s = datetime.utcfromtimestamp(start_time).strftime(ELASTICSEARCH_FORMAT)
+        search_end_s = datetime.utcfromtimestamp(end_time).strftime(ELASTICSEARCH_FORMAT)
+ 
+        time_clause = [ 
+            { 
+                "range": {
+                    "tile_min_time_dt": {
+                        "lte": search_end_s,
+                        "gte": search_start_s    
+                    }
+                }
+            },
+            { 
+                "range": {
+                    "tile_max_time_dt": {
+                        "lte": search_end_s,
+                        "gte": search_start_s
+                    }
+                }
+            },
+            { 
+                "bool": { 
+                    "must": [
+                        { 
+                            "range": {
+                                "tile_min_time_dt": {
+                                    "gte": search_start_s
+                                }
+                            } 
+                        },
+                        {
+                            "range": {
+                                "tile_max_time_dt": {
+                                    "lte": search_end_s
+                                }
+                            } 
+                        }
+                    ] 
+                } 
+            }
+        ]
+
+        return time_clause
+
+    def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs):
+        """
+        Return number of tiles that match search criteria.
+        :param ds: The dataset name to search
+        :param bounding_polygon: The polygon to search for tiles
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
+        :return: number of tiles that match search criteria
+        """
+        
+        params = {
+            "size": 0,
+            "query": {
+                "bool": {
+                    "filter": [
+                        {
+                            "term": {
+                                "dataset_s": {
+                                    "value": ds
+                                }
+                            }
+                        },
+                        {
+                            "range": {
+                                "tile_count_i": {
+                                    "gte": 1
+                                }
+                            }
+                        }
+                    ]
+                }
+            }
+        }
+
+        if bounding_polygon:
+            min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds
+            geo_clause = {
+                "geo_shape": {
+                    "geo": {
+                        "shape": {
+                            "type": "envelope",
+                            "coordinates": [[min_lon, max_lat], [max_lon, min_lat]]
+                        }
+                    }
+                }
+            }
+                
+            params['query']['bool']['filter'].append(geo_clause)
+
+        if 0 < start_time <= end_time:
+            params['query']['bool']['should'] = self.get_formatted_time_clause(start_time, end_time)
+            params["query"]["bool"]["minimum_should_match"] = 1
+
+        if len(metadata) > 0:
+            for key_value in metadata:
+                key = key_value.split(':')[0]
+                value = key_value.split(':')[1]
+                params['query']['bool']['filter'].append({"term": {key: {"value": value}}})
+
+        self._merge_kwargs(params, **kwargs)
+        _, _, found = self.do_query(*(None, None, None, True, None), **params)
+
+        return found
+    
+    def do_aggregation(self, *args, **params):
+        # Gets raw aggregations
+
+        response = self.do_query_raw(*args, **params)
+        aggregations = response.get('aggregations', None)
+        return aggregations
+
+    def do_aggregation_all(self, params, agg_name):
+        # Used for pagination when results can exceed ES max size (use of after_key)
+
+        with ELASTICSEARCH_CON_LOCK:
+            response = self.elasticsearchcon.search(index=self.elasticsearchIndex, body=params)
+        all_buckets = []
+        
+        try:
+            aggregations = response.get('aggregations', None)
+            current_buckets = aggregations.get(agg_name, None)
+            buckets = current_buckets.get('buckets', None)
+            all_buckets += buckets
+            after_bucket = current_buckets.get('after_key', None)    
+            
+            while after_bucket is not None:
+                for agg in params['aggs']:
+                    params['aggs'][agg]['composite']['after'] = {}
+                    for source in params['aggs'][agg]['composite']['sources']:
+                        key_name = next(iter(source))
+                        params['aggs'][agg]['composite']['after'][key_name] = after_bucket[key_name]
+                with ELASTICSEARCH_CON_LOCK:
+                    response = self.elasticsearchcon.search(index=self.elasticsearchIndex, body=params)
+                
+                aggregations = response.get('aggregations', None)
+                current_buckets = aggregations.get(agg_name, None)
+                buckets = current_buckets.get('buckets', None)
+                all_buckets += buckets
+                after_bucket = current_buckets.get('after_key', None)
+                
+        except AttributeError as e:
+            self.logger.error('Error when accessing aggregation buckets - ' + str(e))
+
+        return all_buckets
+
+    def do_query(self, *args, **params):
+        response = self.do_query_raw(*args, **params)
+        return response['hits']['hits'], None, response['hits']['total']['value']
+
+    def do_query_raw(self, *args, **params):
+
+        if args[4]:
+
+            sort_fields = args[4].split(",")
+
+            if 'sort' not in list(params.keys()):
+                params["sort"] = []
+
+            for field in sort_fields:
+                field_order = field.split(' ')
+                sort_instruction = {field_order[0]: field_order[1]}
+                if sort_instruction not in params['sort']:
+                    params["sort"].append(sort_instruction)
+        with ELASTICSEARCH_CON_LOCK:
+            response = self.elasticsearchcon.search(index=self.elasticsearchIndex, body=params)
+        
+        return response
+
+    def do_query_all(self, *args, **params):
+        # Used to paginate with search_after. 
+        # The method calling this might already have a sort clause, 
+        # so we merge both sort clauses inside do_query_raw
+        
+        results = []
+
+        search = None
+        
+        # Add track option to not be blocked at 10000 hits per worker
+        if 'track_total_hits' not in params.keys():
+            params['track_total_hits'] = True
+
+        # Add sort instruction order to paginate the results :
+        params["sort"] = [
+            { "tile_min_time_dt": "asc"},
+            { "_id": "asc" }
+        ]
+
+        response = self.do_query_raw(*args, **params)
+        results.extend([r["_source"] for r in response["hits"]["hits"]])
+
+        total_hits = response["hits"]["total"]["value"]
+
+        try:
+            search_after = []
+            for sort_param in response["hits"]["hits"][-1]["sort"]:
+                search_after.append(str(sort_param))
+        except (KeyError, IndexError):
+            search_after = []
+
+        try:
+            while len(results) < total_hits:
+                params["search_after"] = search_after
+                response = self.do_query_raw(*args, **params)
+                results.extend([r["_source"] for r in response["hits"]["hits"]])
+                
+                search_after = []
+                for sort_param in response["hits"]["hits"][-1]["sort"]:
+                    search_after.append(str(sort_param))
+        
+        except (KeyError, IndexError):
+            pass
+
+        return results
+
+    def convert_iso_to_datetime(self, date):
+        return datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=UTC)
+
+    def convert_iso_to_timestamp(self, date):
+        return (self.convert_iso_to_datetime(date) - EPOCH).total_seconds()
+
+    @staticmethod
+    def _merge_kwargs(params, **kwargs):
+        # Only Solr-specific kwargs are parsed
+        # And the special 'limit'
+        try:
+            params['limit'] = kwargs['limit']
+        except KeyError:
+            pass
+
+        try:
+            params['_route_'] = kwargs['_route_']
+        except KeyError:
+            pass
+
+        try:
+            params['size'] = kwargs['size']
+        except KeyError:
+            pass
+
+        try:
+            params['start'] = kwargs['start']
+        except KeyError:
+            pass
+
+        try:
+            s = kwargs['sort'] if isinstance(kwargs['sort'], list) else [kwargs['sort']]
+        except KeyError:
+            s = None
+
+        try:
+            params['sort'].extend(s)
+        except KeyError:
+            if s is not None:
+                params['sort'] = s
diff --git a/data-access/nexustiles/backends/nexusproto/dao/S3Proxy.py b/data-access/nexustiles/backends/nexusproto/dao/S3Proxy.py
new file mode 100644
index 00000000..c8d3adfe
--- /dev/null
+++ b/data-access/nexustiles/backends/nexusproto/dao/S3Proxy.py
@@ -0,0 +1,141 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import uuid
+
+import boto3
+import nexusproto.DataTile_pb2 as nexusproto
+import numpy as np
+from nexusproto.serialization import from_shaped_array
+
+
+class NexusTileData(object):
+    __nexus_tile = None
+    __data = None
+    tile_id = None
+
+    def __init__(self, data, _tile_id):
+        if self.__data is None:
+            self.__data = data
+        if self.tile_id is None:
+            self.tile_id = _tile_id
+
+    def _get_nexus_tile(self):
+        if self.__nexus_tile is None:
+            self.__nexus_tile = nexusproto.TileData.FromString(self.__data)
+
+        return self.__nexus_tile
+
+    def get_raw_data_array(self):
+
+        nexus_tile = self._get_nexus_tile()
+        the_tile_type = nexus_tile.tile.WhichOneof("tile_type")
+
+        the_tile_data = getattr(nexus_tile.tile, the_tile_type)
+
+        return from_shaped_array(the_tile_data.variable_data)
+
+    def get_lat_lon_time_data_meta(self):
+        if self._get_nexus_tile().HasField('grid_tile'):
+            grid_tile = self._get_nexus_tile().grid_tile
+
+            grid_tile_data = np.ma.masked_invalid(from_shaped_array(grid_tile.variable_data))
+            latitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.latitude))
+            longitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.longitude))
+
+            if len(grid_tile_data.shape) == 2:
+                grid_tile_data = grid_tile_data[np.newaxis, :]
+
+            # Extract the meta data
+            meta_data = {}
+            for meta_data_obj in grid_tile.meta_data:
+                name = meta_data_obj.name
+                meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
+                if len(meta_array.shape) == 2:
+                    meta_array = meta_array[np.newaxis, :]
+                meta_data[name] = meta_array
+
+            return latitude_data, longitude_data, np.array([grid_tile.time]), grid_tile_data, meta_data
+        elif self._get_nexus_tile().HasField('swath_tile'):
+            swath_tile = self._get_nexus_tile().swath_tile
+
+            latitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.latitude)).reshape(-1)
+            longitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.longitude)).reshape(-1)
+            time_data = np.ma.masked_invalid(from_shaped_array(swath_tile.time)).reshape(-1)
+
+            # Simplify the tile if the time dimension is the same value repeated
+            if np.all(time_data == np.min(time_data)):
+                time_data = np.array([np.min(time_data)])
+
+            swath_tile_data = np.ma.masked_invalid(from_shaped_array(swath_tile.variable_data))
+
+            tile_data = self._to_standard_index(swath_tile_data,
+                                                (len(time_data), len(latitude_data), len(longitude_data)))
+
+            # Extract the meta data
+            meta_data = {}
+            for meta_data_obj in swath_tile.meta_data:
+                name = meta_data_obj.name
+                actual_meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
+                reshaped_meta_array = self._to_standard_index(actual_meta_array, tile_data.shape)
+                meta_data[name] = reshaped_meta_array
+
+            return latitude_data, longitude_data, time_data, tile_data, meta_data
+        else:
+            raise NotImplementedError("Only supports grid_tile and swath_tile")
+
+    @staticmethod
+    def _to_standard_index(data_array, desired_shape):
+
+        if desired_shape[0] == 1:
+            reshaped_array = np.ma.masked_all((desired_shape[1], desired_shape[2]))
+            row, col = np.indices(data_array.shape)
+
+            reshaped_array[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array[
+                row.flat, col.flat]
+            reshaped_array.mask[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array.mask[
+                row.flat, col.flat]
+            reshaped_array = reshaped_array[np.newaxis, :]
+        else:
+            reshaped_array = np.ma.masked_all(desired_shape)
+            row, col = np.indices(data_array.shape)
+
+            reshaped_array[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array[
+                row.flat, col.flat]
+            reshaped_array.mask[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array.mask[
+                row.flat, col.flat]
+
+        return reshaped_array
+
+
+class S3Proxy(object):
+    def __init__(self, config):
+        self.config = config
+        self.__s3_bucketname = config.get("s3", "bucket")
+        self.__s3_region = config.get("s3", "region")
+        self.__s3 = boto3.resource('s3')
+        self.__nexus_tile = None
+
+    def fetch_nexus_tiles(self, *tile_ids):
+        tile_ids = [uuid.UUID(str(tile_id)) for tile_id in tile_ids if
+                    (isinstance(tile_id, str) or isinstance(tile_id, str))]
+        res = []
+        for tile_id in tile_ids:
+            obj = self.__s3.Object(self.__s3_bucketname, str(tile_id))
+            data = obj.get()['Body'].read()
+            nexus_tile = NexusTileData(data, str(tile_id))
+            res.append(nexus_tile)
+
+        return res
diff --git a/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py b/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py
new file mode 100644
index 00000000..9b16533d
--- /dev/null
+++ b/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py
@@ -0,0 +1,731 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import logging
+import threading
+import time
+from datetime import datetime
+from pytz import timezone, UTC
+
+import requests
+import pysolr
+from shapely import wkt
+
+SOLR_CON_LOCK = threading.Lock()
+thread_local = threading.local()
+
+EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
+SOLR_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
+ISO_8601 = '%Y-%m-%dT%H:%M:%S%z'
+
+
+class SolrProxy(object):
+    def __init__(self, config):
+        self.solrUrl = config.get("solr", "host")
+        self.solrCore = config.get("solr", "core")
+        solr_kargs = {}
+        if config.has_option("solr", "time_out"):
+            solr_kargs["timeout"] = config.get("solr", "time_out")
+        self.logger = logging.getLogger('nexus')
+
+        with SOLR_CON_LOCK:
+            solrcon = getattr(thread_local, 'solrcon', None)
+            if solrcon is None:
+                solr_url = '%s/solr/%s' % (self.solrUrl, self.solrCore)
+                self.logger.info("connect to solr, url {} with option(s) = {}".format(solr_url, solr_kargs))
+                solrcon = pysolr.Solr(solr_url, **solr_kargs)
+                thread_local.solrcon = solrcon
+
+            self.solrcon = solrcon
+
+    def find_tile_by_id(self, tile_id):
+
+        search = 'id:%s' % tile_id
+
+        params = {
+            'rows': 1
+        }
+
+        results, start, found = self.do_query(*(search, None, None, True, None), **params)
+
+        assert len(results) == 1, "Found %s results, expected exactly 1" % len(results)
+        return [results[0]]
+
+    def find_tiles_by_id(self, tile_ids, ds=None, **kwargs):
+
+        if ds is not None:
+            search = 'dataset_s:%s' % ds
+        else:
+            search = '*:*'
+
+        additionalparams = {
+            'fq': [
+                "{!terms f=id}%s" % ','.join(tile_ids)
+            ]
+        }
+
+        self._merge_kwargs(additionalparams, **kwargs)
+
+        results = self.do_query_all(*(search, None, None, False, None), **additionalparams)
+
+        assert len(results) == len(tile_ids), "Found %s results, expected exactly %s" % (len(results), len(tile_ids))
+        return results
+
+    def find_min_date_from_tiles(self, tile_ids, ds=None, **kwargs):
+
+        if ds is not None:
+            search = 'dataset_s:%s' % ds
+        else:
+            search = '*:*'
+
+        kwargs['rows'] = 1
+        kwargs['fl'] = 'tile_min_time_dt'
+        kwargs['sort'] = ['tile_min_time_dt asc']
+        additionalparams = {
+            'fq': [
+                "{!terms f=id}%s" % ','.join(tile_ids) if len(tile_ids) > 0 else ''
+            ]
+        }
+
+        self._merge_kwargs(additionalparams, **kwargs)
+
+        results, start, found = self.do_query(*(search, None, None, True, None), **additionalparams)
+
+        return self.convert_iso_to_datetime(results[0]['tile_min_time_dt'])
+
+    def find_max_date_from_tiles(self, tile_ids, ds=None, **kwargs):
+
+        if ds is not None:
+            search = 'dataset_s:%s' % ds
+        else:
+            search = '*:*'
+
+        kwargs['rows'] = 1
+        kwargs['fl'] = 'tile_max_time_dt'
+        kwargs['sort'] = ['tile_max_time_dt desc']
+        additionalparams = {
+            'fq': [
+                "{!terms f=id}%s" % ','.join(tile_ids) if len(tile_ids) > 0 else ''
+            ]
+        }
+
+        self._merge_kwargs(additionalparams, **kwargs)
+
+        results, start, found = self.do_query(*(search, None, None, True, None), **additionalparams)
+
+        return self.convert_iso_to_datetime(results[0]['tile_max_time_dt'])
+
+    def find_min_max_date_from_granule(self, ds, granule_name, **kwargs):
+        search = 'dataset_s:%s' % ds
+
+        kwargs['rows'] = 1
+        kwargs['fl'] = 'tile_min_time_dt'
+        kwargs['sort'] = ['tile_min_time_dt asc']
+        additionalparams = {
+            'fq': [
+                "granule_s:%s" % granule_name
+            ]
+        }
+
+        self._merge_kwargs(additionalparams, **kwargs)
+        results, start, found = self.do_query(*(search, None, None, False, None), **additionalparams)
+        start_time = self.convert_iso_to_datetime(results[0]['tile_min_time_dt'])
+
+        kwargs['fl'] = 'tile_max_time_dt'
+        kwargs['sort'] = ['tile_max_time_dt desc']
+        additionalparams = {
+            'fq': [
+                "granule_s:%s" % granule_name
+            ]
+        }
+
+        self._merge_kwargs(additionalparams, **kwargs)
+        results, start, found = self.do_query(*(search, None, None, False, None), **additionalparams)
+        end_time = self.convert_iso_to_datetime(results[0]['tile_max_time_dt'])
+
+        return start_time, end_time
+
+    def get_data_series_list(self):
+
+        datasets = self.get_data_series_list_simple()
+
+        for dataset in datasets:
+            min_date = self.find_min_date_from_tiles([], ds=dataset['title'])
+            max_date = self.find_max_date_from_tiles([], ds=dataset['title'])
+            dataset['start'] = (min_date - EPOCH).total_seconds()
+            dataset['end'] = (max_date - EPOCH).total_seconds()
+            dataset['iso_start'] = min_date.strftime(ISO_8601)
+            dataset['iso_end'] = max_date.strftime(ISO_8601)
+
+        return datasets
+
+    def get_data_series_list_simple(self):
+        search = "*:*"
+        params = {
+            'rows': 0,
+            "facet": "true",
+            "facet.field": "dataset_s",
+            "facet.mincount": "1",
+            "facet.limit": "-1"
+        }
+
+
+        response = self.do_query_raw(*(search, None, None, False, None), **params)
+        l = []
+        for g, v in zip(*[iter(response.facets["facet_fields"]["dataset_s"])]*2):
+            l.append({
+                "shortName": g,
+                "title": g,
+                "tileCount": v
+            })
+        l = sorted(l, key=lambda entry: entry["title"])
+        return l
+
+    def get_data_series_stats(self, ds):
+        search = "dataset_s:%s" % ds
+        params = {
+            "facet": "true",
+            "facet.field": ["dataset_s", "tile_max_time_dt"],
+            "facet.limit": "-1",
+            "facet.mincount": "1",
+            "facet.pivot": "{!stats=piv1}dataset_s",
+            "stats": "on",
+            "stats.field": ["{!tag=piv1 min=true max=true sum=false}tile_max_time_dt","{!tag=piv1 min=true max=false sum=false}tile_min_val_d","{!tag=piv1 min=false max=true sum=false}tile_max_val_d"]
+        }
+
+        response = self.do_query_raw(*(search, None, None, False, None), **params)
+
+        stats = {}
+
+        for g in response.facets["facet_pivot"]["dataset_s"]:
+            if g["value"] == ds:
+                stats["start"] = self.convert_iso_to_timestamp(g["stats"]["stats_fields"]["tile_max_time_dt"]["min"])
+                stats["end"] = self.convert_iso_to_timestamp(g["stats"]["stats_fields"]["tile_max_time_dt"]["max"])
+                stats["minValue"] = g["stats"]["stats_fields"]["tile_min_val_d"]["min"]
+                stats["maxValue"] = g["stats"]["stats_fields"]["tile_max_val_d"]["max"]
+
+
+        stats["availableDates"] = []
+        for dt in response.facets["facet_fields"]["tile_max_time_dt"][::2]:
+            stats["availableDates"].append(self.convert_iso_to_timestamp(dt))
+
+        stats["availableDates"] = sorted(stats["availableDates"])
+
+        return stats
+
+    def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year):
+
+        search = 'dataset_s:%s' % ds
+
+        params = {
+            'fq': [
+                "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt,
+                "tile_count_i:[1 TO *]",
+                "day_of_year_i:[* TO %s]" % day_of_year
+            ],
+            'rows': 1
+        }
+
+        results, start, found = self.do_query(
+            *(search, None, None, True, ('day_of_year_i desc',)), **params)
+
+        return [results[0]]
+
+    def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs):
+
+        search = 'dataset_s:%s' % ds
+
+        search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT)
+        search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT)
+
+        additionalparams = {
+            'fq': [
+                "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon),
+                "{!frange l=0 u=0}ms(tile_min_time_dt,tile_max_time_dt)",
+                "tile_count_i:[1 TO *]",
+                "tile_min_time_dt:[%s TO %s] " % (search_start_s, search_end_s)
+            ],
+            'rows': 0,
+            'facet': 'true',
+            'facet.field': 'tile_min_time_dt',
+            'facet.mincount': '1',
+            'facet.limit': '-1'
+        }
+
+        self._merge_kwargs(additionalparams, **kwargs)
+
+        response = self.do_query_raw(*(search, None, None, False, None), **additionalparams)
+
+        daysinrangeasc = sorted(
+            [(datetime.strptime(a_date, SOLR_FORMAT) - datetime.utcfromtimestamp(0)).total_seconds() for a_date
+             in response.facets['facet_fields']['tile_min_time_dt'][::2]])
+
+        return daysinrangeasc
+
+    def find_all_tiles_in_box_sorttimeasc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time=0,
+                                          end_time=-1, **kwargs):
+
+        search = 'dataset_s:%s' % ds
+
+        additionalparams = {
+            'fq': [
+                "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon),
+                "tile_count_i:[1 TO *]"
+            ]
+        }
+
+        if 0 <= start_time <= end_time:
+            search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT)
+            search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT)
+
+            time_clause = "(" \
+                          "tile_min_time_dt:[%s TO %s] " \
+                          "OR tile_max_time_dt:[%s TO %s] " \
+                          "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \
+                          ")" % (
+                              search_start_s, search_end_s,
+                              search_start_s, search_end_s,
+                              search_start_s, search_end_s
+                          )
+            additionalparams['fq'].append(time_clause)
+
+        self._merge_kwargs(additionalparams, **kwargs)
+
+        return self.do_query_all(
+            *(search, None, None, False, 'tile_min_time_dt asc, tile_max_time_dt asc'),
+            **additionalparams)
+
+    def find_all_tiles_in_polygon_sorttimeasc(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs):
+
+        search = 'dataset_s:%s' % ds
+
+        additionalparams = {
+            'fq': [
+                "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt,
+                "tile_count_i:[1 TO *]"
+            ]
+        }
+
+        if 0 <= start_time <= end_time:
+            search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT)
+            search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT)
+
+            time_clause = "(" \
+                          "tile_min_time_dt:[%s TO %s] " \
+                          "OR tile_max_time_dt:[%s TO %s] " \
+                          "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \
+                          ")" % (
+                              search_start_s, search_end_s,
+                              search_start_s, search_end_s,
+                              search_start_s, search_end_s
+                          )
+            additionalparams['fq'].append(time_clause)
+
+        self._merge_kwargs(additionalparams, **kwargs)
+
+        return self.do_query_all(
+            *(search, None, None, False, 'tile_min_time_dt asc, tile_max_time_dt asc'),
+            **additionalparams)
+
+    def find_all_tiles_in_polygon(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs):
+
+        search = 'dataset_s:%s' % ds
+
+        additionalparams = {
+            'fq': [
+                "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt,
+                "tile_count_i:[1 TO *]"
+            ]
+        }
+
+        if 0 <= start_time <= end_time:
+            search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT)
+            search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT)
+
+            time_clause = "(" \
+                          "tile_min_time_dt:[%s TO %s] " \
+                          "OR tile_max_time_dt:[%s TO %s] " \
+                          "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \
+                          ")" % (
+                              search_start_s, search_end_s,
+                              search_start_s, search_end_s,
+                              search_start_s, search_end_s
+                          )
+            additionalparams['fq'].append(time_clause)
+
+        self._merge_kwargs(additionalparams, **kwargs)
+
+        return self.do_query_all(
+            *(search, None, None, False, None),
+            **additionalparams)
+
+    def find_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs):
+
+        search = 'dataset_s:%s' % ds
+
+        additionalparams = {
+            'fq': [
+                "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt,
+                "tile_count_i:[1 TO *]"
+            ],
+            'rows': 0,
+            'facet': 'true',
+            'facet.field': 'geo_s',
+            'facet.limit': -1,
+            'facet.mincount': 1
+        }
+
+        if 0 <= start_time <= end_time:
+            search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT)
+            search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT)
+
+            time_clause = "(" \
+                          "tile_min_time_dt:[%s TO %s] " \
+                          "OR tile_max_time_dt:[%s TO %s] " \
+                          "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \
+                          ")" % (
+                              search_start_s, search_end_s,
+                              search_start_s, search_end_s,
+                              search_start_s, search_end_s
+                          )
+            additionalparams['fq'].append(time_clause)
+
+        self._merge_kwargs(additionalparams, **kwargs)
+
+        response = self.do_query_raw(*(search, None, None, False, None), **additionalparams)
+
+        distinct_bounds = [wkt.loads(key).bounds for key in response.facets["facet_fields"]["geo_s"][::2]]
+
+        return distinct_bounds
+
+    def find_tiles_by_exact_bounds(self, minx, miny, maxx, maxy, ds, start_time=0, end_time=-1, **kwargs):
+
+        search = 'dataset_s:%s' % ds
+
+        additionalparams = {
+            'fq': [
+                "tile_min_lon:\"%s\"" % minx,
+                "tile_min_lat:\"%s\"" % miny,
+                "tile_max_lon:\"%s\"" % maxx,
+                "tile_max_lat:\"%s\"" % maxy,
+                "tile_count_i:[1 TO *]"
+            ]
+        }
+
+        if 0 <= start_time <= end_time:
+            search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT)
+            search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT)
+
+            time_clause = "(" \
+                          "tile_min_time_dt:[%s TO %s] " \
+                          "OR tile_max_time_dt:[%s TO %s] " \
+                          "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \
+                          ")" % (
+                              search_start_s, search_end_s,
+                              search_start_s, search_end_s,
+                              search_start_s, search_end_s
+                          )
+            additionalparams['fq'].append(time_clause)
+
+        self._merge_kwargs(additionalparams, **kwargs)
+
+        return self.do_query_all(
+            *(search, None, None, False, None),
+            **additionalparams)
+
+    def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, search_time, **kwargs):
+        search = 'dataset_s:%s' % ds
+
+        the_time = datetime.utcfromtimestamp(search_time).strftime(SOLR_FORMAT)
+        time_clause = "(" \
+                      "tile_min_time_dt:[* TO %s] " \
+                      "AND tile_max_time_dt:[%s TO *] " \
+                      ")" % (
+                          the_time, the_time
+                      )
+
+        additionalparams = {
+            'fq': [
+                "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon),
+                "tile_count_i:[1 TO *]",
+                time_clause
+            ]
+        }
+
+        self._merge_kwargs(additionalparams, **kwargs)
+
+        return self.do_query_all(*(search, None, None, False, None), **additionalparams)
+
+    def find_all_tiles_in_polygon_at_time(self, bounding_polygon, ds, search_time, **kwargs):
+        search = 'dataset_s:%s' % ds
+
+        the_time = datetime.utcfromtimestamp(search_time).strftime(SOLR_FORMAT)
+        time_clause = "(" \
+                      "tile_min_time_dt:[* TO %s] " \
+                      "AND tile_max_time_dt:[%s TO *] " \
+                      ")" % (
+                          the_time, the_time
+                      )
+
+        additionalparams = {
+            'fq': [
+                "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt,
+                "tile_count_i:[1 TO *]",
+                time_clause
+            ]
+        }
+
+        self._merge_kwargs(additionalparams, **kwargs)
+
+        return self.do_query_all(*(search, None, None, False, None), **additionalparams)
+
+    def find_all_tiles_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, time, **kwargs):
+        search = 'dataset_s:%s' % ds
+
+        the_time = datetime.utcfromtimestamp(time).strftime(SOLR_FORMAT)
+        time_clause = "(" \
+                      "tile_min_time_dt:[* TO %s] " \
+                      "AND tile_max_time_dt:[%s TO *] " \
+                      ")" % (
+                          the_time, the_time
+                      )
+
+        additionalparams = {
+            'fq': [
+                "geo:\"Within(ENVELOPE(%s,%s,%s,%s))\"" % (min_lon, max_lon, max_lat, min_lat),
+                "tile_count_i:[1 TO *]",
+                time_clause
+            ]
+        }
+
+        self._merge_kwargs(additionalparams, **kwargs)
+
+        return self.do_query_all(*(search, "product(tile_avg_val_d, tile_count_i),*", None, False, None),
+                                 **additionalparams)
+
+    def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, time, **kwargs):
+        search = 'dataset_s:%s' % ds
+
+        the_time = datetime.utcfromtimestamp(time).strftime(SOLR_FORMAT)
+        time_clause = "(" \
+                      "tile_min_time_dt:[* TO %s] " \
+                      "AND tile_max_time_dt:[%s TO *] " \
+                      ")" % (
+                          the_time, the_time
+                      )
+
+        additionalparams = {
+            'fq': [
+                "geo:\"Intersects(MultiLineString((%s %s, %s %s),(%s %s, %s %s),(%s %s, %s %s),(%s %s, %s %s)))\"" % (
+                    min_lon, max_lat, max_lon, max_lat, min_lon, max_lat, min_lon, min_lat, max_lon, max_lat, max_lon,
+                    min_lat, min_lon, min_lat, max_lon, min_lat),
+                "-geo:\"Within(ENVELOPE(%s,%s,%s,%s))\"" % (min_lon, max_lon, max_lat, min_lat),
+                "tile_count_i:[1 TO *]",
+                time_clause
+            ]
+        }
+
+        self._merge_kwargs(additionalparams, **kwargs)
+
+        return self.do_query_all(*(search, None, None, False, None), **additionalparams)
+
+    def find_all_tiles_by_metadata(self, metadata, ds, start_time=0, end_time=-1, **kwargs):
+        """
+        Get a list of tile metadata that matches the specified metadata, start_time, end_time.
+        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
+        :param ds: The dataset name to search
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :return: A list of tile metadata
+        """
+        search = 'dataset_s:%s' % ds
+
+        additionalparams = {
+            'fq': metadata
+        }
+
+        if 0 <= start_time <= end_time:
+            additionalparams['fq'].append(self.get_formatted_time_clause(start_time, end_time))
+
+        self._merge_kwargs(additionalparams, **kwargs)
+
+        return self.do_query_all(
+            *(search, None, None, False, None),
+            **additionalparams)
+
+    def get_formatted_time_clause(self, start_time, end_time):
+        search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT)
+        search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT)
+
+        time_clause = "(" \
+                      "tile_min_time_dt:[%s TO %s] " \
+                      "OR tile_max_time_dt:[%s TO %s] " \
+                      "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \
+                      ")" % (
+                          search_start_s, search_end_s,
+                          search_start_s, search_end_s,
+                          search_start_s, search_end_s
+                          )
+        return time_clause
+
+    def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs):
+        """
+        Return number of tiles that match search criteria.
+        :param ds: The dataset name to search
+        :param bounding_polygon: The polygon to search for tiles
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
+        :return: number of tiles that match search criteria
+        """
+        search = 'dataset_s:%s' % ds
+
+        additionalparams = {
+            'fq': [
+                "tile_count_i:[1 TO *]"
+            ],
+            'rows': 0
+        }
+
+        if bounding_polygon:
+            min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds
+            additionalparams['fq'].append("geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon))
+
+        if 0 <= start_time <= end_time:
+            additionalparams['fq'].append(self.get_formatted_time_clause(start_time, end_time))
+
+        if metadata:
+            additionalparams['fq'].extend(metadata)
+
+        self._merge_kwargs(additionalparams, **kwargs)
+
+        results, start, found = self.do_query(*(search, None, None, True, None), **additionalparams)
+
+        return found
+
+    def do_query(self, *args, **params):
+
+        response = self.do_query_raw(*args, **params)
+
+        return response.docs, response.raw_response['response']['start'], response.hits
+
+    def do_query_raw(self, *args, **params):
+
+        if 'fl' not in list(params.keys()) and args[1]:
+            params['fl'] = args[1]
+
+        if 'sort' not in list(params.keys()) and args[4]:
+            params['sort'] = args[4]
+
+        # If dataset_s is specified as the search term,
+        # add the _route_ parameter to limit the search to the correct shard
+        if 'dataset_s:' in args[0]:
+            ds = args[0].split(':')[-1]
+            params['shard_keys'] = ds + '!'
+
+        with SOLR_CON_LOCK:
+            response = self.solrcon.search(args[0], **params)
+
+        return response
+
+
+    def do_query_all(self, *args, **params):
+
+        results = []
+
+        response = self.do_query_raw(*args, **params)
+        results.extend(response.docs)
+
+        limit = min(params.get('limit', float('inf')), response.hits)
+
+        while len(results) < limit:
+            params['start'] = len(results)
+            response = self.do_query_raw(*args, **params)
+            results.extend(response.docs)
+
+        assert len(results) == limit
+
+        return results
+
+    def convert_iso_to_datetime(self, date):
+        return datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=UTC)
+
+    def convert_iso_to_timestamp(self, date):
+        return (self.convert_iso_to_datetime(date) - EPOCH).total_seconds()
+
+    def ping(self):
+        solrAdminPing = '%s/solr/%s/admin/ping' % (self.solrUrl, self.solrCore)
+        try:
+            r = requests.get(solrAdminPing, params={'wt': 'json'})
+            results = json.loads(r.text)
+            return results
+        except:
+            return None
+
+    @staticmethod
+    def _merge_kwargs(additionalparams, **kwargs):
+        # Only Solr-specific kwargs are parsed
+        # And the special 'limit'
+        try:
+            additionalparams['limit'] = kwargs['limit']
+        except KeyError:
+            pass
+
+        try:
+            additionalparams['_route_'] = kwargs['_route_']
+        except KeyError:
+            pass
+
+        try:
+            additionalparams['rows'] = kwargs['rows']
+        except KeyError:
+            pass
+
+        try:
+            additionalparams['start'] = kwargs['start']
+        except KeyError:
+            pass
+
+        try:
+            kwfq = kwargs['fq'] if isinstance(kwargs['fq'], list) else list(kwargs['fq'])
+        except KeyError:
+            kwfq = []
+
+        try:
+            additionalparams['fq'].extend(kwfq)
+        except KeyError:
+            additionalparams['fq'] = kwfq
+
+        try:
+            kwfl = kwargs['fl'] if isinstance(kwargs['fl'], list) else [kwargs['fl']]
+        except KeyError:
+            kwfl = []
+
+        try:
+            additionalparams['fl'].extend(kwfl)
+        except KeyError:
+            additionalparams['fl'] = kwfl
+
+        try:
+            s = kwargs['sort'] if isinstance(kwargs['sort'], list) else [kwargs['sort']]
+        except KeyError:
+            s = None
+
+        try:
+            additionalparams['sort'].extend(s)
+        except KeyError:
+            if s is not None:
+                additionalparams['sort'] = s
diff --git a/data-access/nexustiles/backends/nexusproto/dao/__init__.py b/data-access/nexustiles/backends/nexusproto/dao/__init__.py
new file mode 100644
index 00000000..6acb5d12
--- /dev/null
+++ b/data-access/nexustiles/backends/nexusproto/dao/__init__.py
@@ -0,0 +1,14 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/data-access/nexustiles/backends/zarr/__init__.py b/data-access/nexustiles/backends/zarr/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py
index a3aa61e9..333b0c55 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -32,6 +32,13 @@
 from .dao import SolrProxy
 from .dao import ElasticsearchProxy
 
+from .backends.nexusproto.backend import NexusprotoTileService
+
+
+from abc import ABC, abstractmethod
+
+from .AbstractTileService import AbstractTileService
+
 from .model.nexusmodel import Tile, BBox, TileStats, TileVariable
 
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
@@ -78,7 +85,9 @@ class NexusTileServiceException(Exception):
     pass
 
 
-class NexusTileService(object):
+class NexusTileService(AbstractTileService):
+    backends = {}
+
     def __init__(self, skipDatastore=False, skipMetadatastore=False, config=None):
         self._datastore = None
         self._metadatastore = None
@@ -352,92 +361,6 @@ def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_tim
         bounds = self._metadatastore.find_distinct_bounding_boxes_in_polygon(bounding_polygon, ds, start_time, end_time)
         return [box(*b) for b in bounds]
 
-    def mask_tiles_to_bbox(self, min_lat, max_lat, min_lon, max_lon, tiles):
-
-        for tile in tiles:
-            tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat)
-            tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon)
-
-            # Or together the masks of the individual arrays to create the new mask
-            data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \
-                        | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \
-                        | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :]
-
-            # If this is multi-var, need to mask each variable separately.
-            if tile.is_multi:
-                # Combine space/time mask with existing mask on data
-                data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask])
-
-                num_vars = len(tile.data)
-                multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0)
-                tile.data = ma.masked_where(multi_data_mask, tile.data)
-            else:
-                tile.data = ma.masked_where(data_mask, tile.data)
-
-        tiles[:] = [tile for tile in tiles if not tile.data.mask.all()]
-
-        return tiles
-
-    def mask_tiles_to_bbox_and_time(self, min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles):
-        for tile in tiles:
-            tile.times = ma.masked_outside(tile.times, start_time, end_time)
-            tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat)
-            tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon)
-
-            # Or together the masks of the individual arrays to create the new mask
-            data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \
-                        | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \
-                        | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :]
-
-            tile.data = ma.masked_where(data_mask, tile.data)
-
-        tiles[:] = [tile for tile in tiles if not tile.data.mask.all()]
-
-        return tiles
-
-    def mask_tiles_to_polygon(self, bounding_polygon, tiles):
-
-        min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds
-
-        return self.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, tiles)
-
-    def mask_tiles_to_polygon_and_time(self, bounding_polygon, start_time, end_time, tiles):
-        min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds
-
-        return self.mask_tiles_to_bbox_and_time(min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles)
-
-    def mask_tiles_to_time_range(self, start_time, end_time, tiles):
-        """
-        Masks data in tiles to specified time range.
-        :param start_time: The start time to search for tiles
-        :param end_time: The end time to search for tiles
-        :param tiles: List of tiles
-        :return: A list tiles with data masked to specified time range
-        """
-        if 0 <= start_time <= end_time:
-            for tile in tiles:
-                tile.times = ma.masked_outside(tile.times, start_time, end_time)
-
-                # Or together the masks of the individual arrays to create the new mask
-                data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \
-                            | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \
-                            | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :]
-
-                # If this is multi-var, need to mask each variable separately.
-                if tile.is_multi:
-                    # Combine space/time mask with existing mask on data
-                    data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask])
-
-                    num_vars = len(tile.data)
-                    multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0)
-                    tile.data = ma.masked_where(multi_data_mask, tile.data)
-                else:
-                    tile.data = ma.masked_where(data_mask, tile.data)
-
-            tiles[:] = [tile for tile in tiles if not tile.data.mask.all()]
-
-        return tiles
-
     def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs):
         """
         Return number of tiles that match search criteria.

From 4f3f6112f0156f5d928f8549ed0a58d6d8f64e9e Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Wed, 5 Jul 2023 13:09:11 -0700
Subject: [PATCH 02/70] n/a

---
 data-access/nexustiles/AbstractTileService.py |  5 ++
 data-access/nexustiles/config/datasets.ini    | 18 +++++
 .../nexustiles/config/datasets.ini.default    | 18 +++++
 data-access/nexustiles/nexustiles.py          | 78 +++++++++++++------
 4 files changed, 96 insertions(+), 23 deletions(-)
 create mode 100644 data-access/nexustiles/config/datasets.ini
 create mode 100644 data-access/nexustiles/config/datasets.ini.default

diff --git a/data-access/nexustiles/AbstractTileService.py b/data-access/nexustiles/AbstractTileService.py
index f4f4449c..307a2c15 100644
--- a/data-access/nexustiles/AbstractTileService.py
+++ b/data-access/nexustiles/AbstractTileService.py
@@ -37,6 +37,11 @@
 from nexustiles.nexustiles import NexusTileServiceException
 
 class AbstractTileService(ABC):
+    @staticmethod
+    @abstractmethod
+    def open_dataset(dataset_s, **kwargs):
+        pass
+
     @abstractmethod
     def get_dataseries_list(self, simple=False):
         raise NotImplementedError()
diff --git a/data-access/nexustiles/config/datasets.ini b/data-access/nexustiles/config/datasets.ini
new file mode 100644
index 00000000..9f586cf2
--- /dev/null
+++ b/data-access/nexustiles/config/datasets.ini
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[solr]
+host=http://localhost:8983
+core=nexusdatasets
diff --git a/data-access/nexustiles/config/datasets.ini.default b/data-access/nexustiles/config/datasets.ini.default
new file mode 100644
index 00000000..9f586cf2
--- /dev/null
+++ b/data-access/nexustiles/config/datasets.ini.default
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[solr]
+host=http://localhost:8983
+core=nexusdatasets
diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py
index 333b0c55..622792eb 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -18,14 +18,16 @@
 import sys
 import json
 from datetime import datetime
-from functools import wraps, reduce
+from functools import wraps, reduce, partial
 
 import numpy as np
 import numpy.ma as ma
 import pkg_resources
 from pytz import timezone, UTC
 from shapely.geometry import MultiPolygon, box
+import pysolr
 
+import threading
 from .dao import CassandraProxy
 from .dao import DynamoProxy
 from .dao import S3Proxy
@@ -41,6 +43,8 @@
 
 from .model.nexusmodel import Tile, BBox, TileStats, TileVariable
 
+from webservice.webmodel import DatasetNotFoundException
+
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
 
 logging.basicConfig(
@@ -85,36 +89,64 @@ class NexusTileServiceException(Exception):
     pass
 
 
-class NexusTileService(AbstractTileService):
-    backends = {}
+SOLR_LOCK = threading.Lock()
+thread_local = threading.local()
+
 
-    def __init__(self, skipDatastore=False, skipMetadatastore=False, config=None):
-        self._datastore = None
-        self._metadatastore = None
 
+class NexusTileService(AbstractTileService):
+    backends = {} # relate ds names to factory func objects
+
+    def __init__(self, config=None):
         self._config = configparser.RawConfigParser()
-        self._config.read(NexusTileService._get_config_files('config/datastores.ini'))
+        self._config.read(NexusTileService._get_config_files('config/datasets.ini'))
+
+        self._alg_config = config
 
         if config:
             self.override_config(config)
 
-        if not skipDatastore:
-            datastore = self._config.get("datastore", "store")
-            if datastore == "cassandra":
-                self._datastore = CassandraProxy.CassandraProxy(self._config)
-            elif datastore == "s3":
-                self._datastore = S3Proxy.S3Proxy(self._config)
-            elif datastore == "dynamo":
-                self._datastore = DynamoProxy.DynamoProxy(self._config)
+        NexusTileService.backends[None] = NexusprotoTileService(False, False, config)
+        NexusTileService.backends['__nexusproto__'] = NexusTileService.backends[None]
+
+
+
+    def _get_ingested_datasets(self):
+        solr_url = self._config.get("solr", "host")
+        solr_core = self._config.get("solr", "core")
+        solr_kwargs = {}
+
+        if self._config.has_option("solr", "time_out"):
+            solr_kwargs["timeout"] = self._config.get("solr", "time_out")
+
+        with SOLR_LOCK:
+            solrcon = getattr(thread_local, 'solrcon', None)
+            if solrcon is None:
+                solr_url = '%s/solr/%s' % (solr_url, solr_core)
+                solrcon = pysolr.Solr(solr_url, **solr_kwargs)
+                thread_local.solrcon = solrcon
+
+            solrcon = solrcon
+
+            response = solrcon.search('*:*')
+
+        for dataset in response.docs:
+            d_id = dataset['dataset_s']
+            store_type = dataset.get('store_type_s', 'nexusproto')
+
+            if store_type == 'nexus_proto':
+                NexusTileService.backends[d_id] = NexusTileService.backends[None]
             else:
-                raise ValueError("Error reading datastore from config file")
-
-        if not skipMetadatastore:
-            metadatastore = self._config.get("metadatastore", "store", fallback='solr')
-            if metadatastore == "solr":
-                self._metadatastore = SolrProxy.SolrProxy(self._config)
-            elif metadatastore == "elasticsearch":
-                self._metadatastore = ElasticsearchProxy.ElasticsearchProxy(self._config)
+                ds_config = dataset['config']
+                # NexusTileService.backends[d_id] =
+
+
+
+
+    def get_tileservice_factory(self, dataset=None):
+        pass
+
+
 
     def override_config(self, config):
         for section in config.sections():

From e32d5addd4488ced41bc895a744c7c3de70f4301 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 6 Jul 2023 14:30:58 -0700
Subject: [PATCH 03/70] More nts backend stuff

---
 data-access/nexustiles/AbstractTileService.py |  45 +----
 .../nexustiles/backends/nexusproto/backend.py |   4 +-
 .../nexustiles/backends/zarr/backend.py       |  45 +++++
 data-access/nexustiles/nexustiles.py          | 181 +++++++++---------
 4 files changed, 143 insertions(+), 132 deletions(-)
 create mode 100644 data-access/nexustiles/backends/zarr/backend.py

diff --git a/data-access/nexustiles/AbstractTileService.py b/data-access/nexustiles/AbstractTileService.py
index 307a2c15..6426295b 100644
--- a/data-access/nexustiles/AbstractTileService.py
+++ b/data-access/nexustiles/AbstractTileService.py
@@ -36,11 +36,16 @@
 from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable
 from nexustiles.nexustiles import NexusTileServiceException
 
+
 class AbstractTileService(ABC):
-    @staticmethod
+    # @staticmethod
+    # @abstractmethod
+    # def open_dataset(dataset_s, **kwargs):
+    #     pass
+
     @abstractmethod
-    def open_dataset(dataset_s, **kwargs):
-        pass
+    def try_connect(self) -> bool:
+        raise NotImplementedError()
 
     @abstractmethod
     def get_dataseries_list(self, simple=False):
@@ -115,19 +120,6 @@ def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, *
         """
         raise NotImplementedError()
 
-    @abstractmethod
-    def get_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs):
-        """
-        Return list of tiles that matches the specified metadata, start_time, end_time with tile data outside of time
-        range properly masked out.
-        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
-        :param ds: The dataset name to search
-        :param start_time: The start time to search for tiles
-        :param end_time: The end time to search for tiles
-        :return: A list of tiles
-        """
-        raise NotImplementedError()
-
     @abstractmethod
     def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs):
         """
@@ -148,15 +140,6 @@ def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs)
     def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
         raise NotImplementedError()
 
-    @abstractmethod
-    def get_tiles_bounded_by_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1,
-                                 **kwargs):
-        raise NotImplementedError()
-
-    @abstractmethod
-    def get_tiles_bounded_by_polygon(self, polygon, ds=None, start_time=0, end_time=-1, **kwargs):
-        raise NotImplementedError()
-
     @abstractmethod
     def get_min_max_time_by_granule(self, ds, granule_name):
         raise NotImplementedError()
@@ -165,18 +148,6 @@ def get_min_max_time_by_granule(self, ds, granule_name):
     def get_dataset_overall_stats(self, ds):
         raise NotImplementedError()
 
-    @abstractmethod
-    def get_tiles_bounded_by_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
-        raise NotImplementedError()
-
-    @abstractmethod
-    def get_tiles_bounded_by_polygon_at_time(self, polygon, dataset, time, **kwargs):
-        raise NotImplementedError()
-
-    @abstractmethod
-    def get_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
-        raise NotImplementedError()
-
     @abstractmethod
     def get_stats_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
         raise NotImplementedError()
diff --git a/data-access/nexustiles/backends/nexusproto/backend.py b/data-access/nexustiles/backends/nexusproto/backend.py
index 86d5ca6a..aa0ab290 100644
--- a/data-access/nexustiles/backends/nexusproto/backend.py
+++ b/data-access/nexustiles/backends/nexusproto/backend.py
@@ -34,6 +34,7 @@
 
 from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable
 from nexustiles.nexustiles import NexusTileServiceException
+from nexustiles.AbstractTileService import AbstractTileService
 
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
 
@@ -44,8 +45,9 @@
 logger = logging.getLogger("testing")
 
 
-class NexusprotoTileService(object):
+class NexusprotoTileService(AbstractTileService):
     def __init__(self, skipDatastore=False, skipMetadatastore=False, config=None):
+        AbstractTileService.__init__(self)
         self._datastore = None
         self._metadatastore = None
 
diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py
new file mode 100644
index 00000000..019cd753
--- /dev/null
+++ b/data-access/nexustiles/backends/zarr/backend.py
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import configparser
+import logging
+import sys
+import json
+from datetime import datetime
+from functools import reduce
+
+import numpy as np
+import numpy.ma as ma
+import pkg_resources
+from pytz import timezone, UTC
+from shapely.geometry import MultiPolygon, box
+
+from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable
+from nexustiles.nexustiles import NexusTileServiceException
+from nexustiles.AbstractTileService import AbstractTileService
+
+EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout)
+logger = logging.getLogger("testing")
+
+
+class ZarrBackend(AbstractTileService):
+    def __init__(self, config):
+        AbstractTileService.__init__(self)
+        self.__config = config
diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py
index 622792eb..fde0a5f3 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -28,13 +28,10 @@
 import pysolr
 
 import threading
-from .dao import CassandraProxy
-from .dao import DynamoProxy
-from .dao import S3Proxy
-from .dao import SolrProxy
-from .dao import ElasticsearchProxy
+from time import sleep
 
 from .backends.nexusproto.backend import NexusprotoTileService
+from .backends.zarr.backend import ZarrBackend
 
 
 from abc import ABC, abstractmethod
@@ -42,8 +39,9 @@
 from .AbstractTileService import AbstractTileService
 
 from .model.nexusmodel import Tile, BBox, TileStats, TileVariable
+from typing import Dict, Union
 
-from webservice.webmodel import DatasetNotFoundException
+from webservice.webmodel import DatasetNotFoundException, NexusProcessingException
 
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
 
@@ -90,12 +88,13 @@ class NexusTileServiceException(Exception):
 
 
 SOLR_LOCK = threading.Lock()
+DS_LOCK = threading.Lock()
 thread_local = threading.local()
 
 
 
 class NexusTileService(AbstractTileService):
-    backends = {} # relate ds names to factory func objects
+    backends: Dict[Union[None, str], Dict[str, Union[AbstractTileService, bool]]] = {}
 
     def __init__(self, config=None):
         self._config = configparser.RawConfigParser()
@@ -106,12 +105,37 @@ def __init__(self, config=None):
         if config:
             self.override_config(config)
 
-        NexusTileService.backends[None] = NexusprotoTileService(False, False, config)
+        NexusTileService.backends[None] = {"backend": NexusprotoTileService(False, False, config), 'up': True}
         NexusTileService.backends['__nexusproto__'] = NexusTileService.backends[None]
 
+        def __update_datasets():
+            while True:
+                with DS_LOCK:
+                    self._update_datasets()
+                sleep(3600)
 
+        threading.Thread(target=__update_datasets, name='dataset_update', daemon=False).start()
 
-    def _get_ingested_datasets(self):
+
+
+    @staticmethod
+    def __get_backend(dataset_s) -> AbstractTileService:
+        if dataset_s not in NexusTileService.backends:
+            raise DatasetNotFoundException(reason=f'Dataset {dataset_s} is not currently loaded/ingested')
+
+        b = NexusTileService.backends[dataset_s]
+
+        if not b['up']:
+            success = b['backend'].try_connect()
+
+            if not success:
+                raise NexusProcessingException(reason=f'Dataset {dataset_s} is currently unavailable')
+            else:
+                NexusTileService.backends[dataset_s]['up'] = True
+
+        return b['backend']
+
+    def _update_datasets(self):
         solr_url = self._config.get("solr", "host")
         solr_core = self._config.get("solr", "core")
         solr_kwargs = {}
@@ -130,23 +154,34 @@ def _get_ingested_datasets(self):
 
             response = solrcon.search('*:*')
 
+        present_datasets = set()
+
         for dataset in response.docs:
             d_id = dataset['dataset_s']
             store_type = dataset.get('store_type_s', 'nexusproto')
 
-            if store_type == 'nexus_proto':
-                NexusTileService.backends[d_id] = NexusTileService.backends[None]
-            else:
-                ds_config = dataset['config']
-                # NexusTileService.backends[d_id] =
-
+            present_datasets.add(d_id)
 
+            if d_id in NexusTileService.backends:
+                continue
+                # is_up = NexusTileService.backends[d_id]['backend'].try_connect()
 
+            if store_type == 'nexus_proto' or store_type == 'nexusproto':
+                NexusTileService.backends[d_id] = NexusTileService.backends[None]
+            elif store_type == 'zarr':
+                ds_config = json.loads(dataset['config'][0])
+                NexusTileService.backends[d_id] = {
+                    'backend': ZarrBackend(ds_config),
+                    'up': True
+                }
+            else:
+                logger.warning(f'Unsupported backend {store_type} for dataset {d_id}')
 
-    def get_tileservice_factory(self, dataset=None):
-        pass
-
+        removed_datasets = set(NexusTileService.backends.keys()).difference(present_datasets)
 
+        for dataset in removed_datasets:
+            logger.info(f"Removing dataset {dataset}")
+            del NexusTileService.backends[dataset]
 
     def override_config(self, config):
         for section in config.sections():
@@ -163,65 +198,35 @@ def get_dataseries_list(self, simple=False):
 
     @tile_data()
     def find_tile_by_id(self, tile_id, **kwargs):
-        return self._metadatastore.find_tile_by_id(tile_id)
+        return NexusTileService.__get_backend('__nexusproto__').find_tile_by_id(tile_id)
 
     @tile_data()
     def find_tiles_by_id(self, tile_ids, ds=None, **kwargs):
-        return self._metadatastore.find_tiles_by_id(tile_ids, ds=ds, **kwargs)
+        return NexusTileService.__get_backend('__nexusproto__').find_tiles_by_id(tile_ids, ds=ds, **kwargs)
 
     def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time,
                                metrics_callback=None, **kwargs):
-        start = datetime.now()
-        result = self._metadatastore.find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon, dataset, start_time,
-                                                            end_time,
-                                                            **kwargs)
-        duration = (datetime.now() - start).total_seconds()
-        if metrics_callback:
-            metrics_callback(solr=duration)
-        return result
+        return NexusTileService.__get_backend(dataset).find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon,
+                                                                              dataset, start_time, end_time,
+                                                                              metrics_callback, **kwargs)
 
     @tile_data()
     def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs):
-        """
-        Given a bounding polygon, dataset, and day of year, find tiles in that dataset with the same bounding
-        polygon and the closest day of year.
-
-        For example:
-            given a polygon minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; and day of year=32
-            search for first tile in MY_DS with identical bbox and day_of_year <= 32 (sorted by day_of_year desc)
-
-        Valid matches:
-            minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32
-            minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30
-
-        Invalid matches:
-            minx=1, miny=0, maxx=2, maxy=1; dataset=MY_DS; day of year = 32
-            minx=0, miny=0, maxx=1, maxy=1; dataset=MY_OTHER_DS; day of year = 32
-            minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 if minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 also exists
-
-        :param bounding_polygon: The exact bounding polygon of tiles to search for
-        :param ds: The dataset name being searched
-        :param day_of_year: Tile day of year to search for, tile nearest to this day (without going over) will be returned
-        :return: List of one tile from ds with bounding_polygon on or before day_of_year or raise NexusTileServiceException if no tile found
-        """
-        try:
-            tile = self._metadatastore.find_tile_by_polygon_and_most_recent_day_of_year(bounding_polygon, ds,
-                                                                                        day_of_year)
-        except IndexError:
-            raise NexusTileServiceException("No tile found.").with_traceback(sys.exc_info()[2])
-
-        return tile
+        return NexusTileService.__get_backend(ds).find_tile_by_polygon_and_most_recent_day_of_year(
+            bounding_polygon, ds, day_of_year, **kwargs
+        )
 
     @tile_data()
     def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
-        return self._metadatastore.find_all_tiles_in_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time,
-                                                                 rows=5000,
-                                                                 **kwargs)
+        return NexusTileService.__get_backend(dataset).find_all_tiles_in_box_at_time(
+            min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs
+        )
 
     @tile_data()
     def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs):
-        return self._metadatastore.find_all_tiles_in_polygon_at_time(bounding_polygon, dataset, time, rows=5000,
-                                                                     **kwargs)
+        return NexusTileService.__get_backend(dataset).find_all_tiles_in_polygon_at_time(
+            bounding_polygon, dataset, time, **kwargs
+        )
 
     @tile_data()
     def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs):
@@ -230,33 +235,22 @@ def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_t
             start_time = (start_time - EPOCH).total_seconds()
         if type(end_time) is datetime:
             end_time = (end_time - EPOCH).total_seconds()
-        return self._metadatastore.find_all_tiles_in_box_sorttimeasc(min_lat, max_lat, min_lon, max_lon, ds, start_time,
-                                                                     end_time, **kwargs)
+
+        return NexusTileService.__get_backend(ds).find_tiles_in_box(
+            min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs
+        )
 
     @tile_data()
     def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=0, end_time=-1, **kwargs):
-        # Find tiles that fall within the polygon in the Solr index
-        if 'sort' in list(kwargs.keys()):
-            tiles = self._metadatastore.find_all_tiles_in_polygon(bounding_polygon, ds, start_time, end_time, **kwargs)
-        else:
-            tiles = self._metadatastore.find_all_tiles_in_polygon_sorttimeasc(bounding_polygon, ds, start_time,
-                                                                              end_time,
-                                                                              **kwargs)
-        return tiles
+        return NexusTileService.__get_backend(ds).find_tiles_in_polygon(
+            bounding_polygon, ds, start_time, end_time, **kwargs
+        )
 
     @tile_data()
     def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs):
-        """
-        Return list of tiles whose metadata matches the specified metadata, start_time, end_time.
-        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
-        :param ds: The dataset name to search
-        :param start_time: The start time to search for tiles
-        :param end_time: The end time to search for tiles
-        :return: A list of tiles
-        """
-        tiles = self._metadatastore.find_all_tiles_by_metadata(metadata, ds, start_time, end_time, **kwargs)
-
-        return tiles
+        return NexusTileService.__get_backend(ds).find_tiles_by_metadata(
+            metadata, ds, start_time, end_time, **kwargs
+        )
 
     def get_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs):
         """
@@ -287,16 +281,15 @@ def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs)
         :param kwargs: fetch_data: True/False = whether or not to retrieve tile data
         :return:
         """
-        tiles = self._metadatastore.find_tiles_by_exact_bounds(bounds[0], bounds[1], bounds[2], bounds[3], ds,
-                                                               start_time,
-                                                               end_time)
-        return tiles
+        return NexusTileService.__get_backend(ds).find_tiles_by_exact_bounds(
+            bounds, ds, start_time, end_time, **kwargs
+        )
 
     @tile_data()
     def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
-        return self._metadatastore.find_all_boundary_tiles_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time,
-                                                                   rows=5000,
-                                                                   **kwargs)
+        return NexusTileService.__get_backend(dataset).find_all_boundary_tiles_at_time(
+            min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs
+        )
 
     def get_tiles_bounded_by_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1,
                                  **kwargs):
@@ -317,12 +310,12 @@ def get_tiles_bounded_by_polygon(self, polygon, ds=None, start_time=0, end_time=
         return tiles
 
     def get_min_max_time_by_granule(self, ds, granule_name):
-        start_time, end_time = self._metadatastore.find_min_max_date_from_granule(ds, granule_name)
-
-        return start_time, end_time
+        return NexusTileService.__get_backend(ds).get_min_max_time_by_granule(
+            ds, granule_name
+        )
 
     def get_dataset_overall_stats(self, ds):
-        return self._metadatastore.get_data_series_stats(ds)
+        return NexusTileService.__get_backend(ds).get_dataset_overall_stats(ds)
 
     def get_tiles_bounded_by_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
         tiles = self.find_all_tiles_in_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs)

From ccc0de4e56122e570a1acc8dbbf6f9443dfebc23 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Mon, 10 Jul 2023 15:26:22 -0700
Subject: [PATCH 04/70] Working(?) np backend

---
 .../algorithms/DailyDifferenceAverage.py      |    3 +-
 .../algorithms/StandardDeviationSearch.py     |    2 +-
 .../app_builders/NexusAppBuilder.py           |    2 +-
 data-access/nexustiles/AbstractTileService.py |  112 +-
 .../nexustiles/backends/nexusproto/backend.py |    2 +-
 .../nexustiles/backends/zarr/backend.py       |    2 +-
 data-access/nexustiles/dao/CassandraProxy.py  |  317 -----
 data-access/nexustiles/dao/DynamoProxy.py     |  146 --
 .../nexustiles/dao/ElasticsearchProxy.py      | 1235 -----------------
 data-access/nexustiles/dao/S3Proxy.py         |  141 --
 data-access/nexustiles/dao/SolrProxy.py       |  731 ----------
 data-access/nexustiles/dao/__init__.py        |   14 -
 data-access/nexustiles/exception.py           |    2 +
 data-access/nexustiles/nexustiles.py          |  286 ++--
 data-access/setup.py                          |   18 +-
 15 files changed, 205 insertions(+), 2808 deletions(-)
 delete mode 100644 data-access/nexustiles/dao/CassandraProxy.py
 delete mode 100644 data-access/nexustiles/dao/DynamoProxy.py
 delete mode 100644 data-access/nexustiles/dao/ElasticsearchProxy.py
 delete mode 100644 data-access/nexustiles/dao/S3Proxy.py
 delete mode 100644 data-access/nexustiles/dao/SolrProxy.py
 delete mode 100644 data-access/nexustiles/dao/__init__.py
 create mode 100644 data-access/nexustiles/exception.py

diff --git a/analysis/webservice/algorithms/DailyDifferenceAverage.py b/analysis/webservice/algorithms/DailyDifferenceAverage.py
index 05274fc2..c6c84951 100644
--- a/analysis/webservice/algorithms/DailyDifferenceAverage.py
+++ b/analysis/webservice/algorithms/DailyDifferenceAverage.py
@@ -21,7 +21,8 @@
 
 import numpy as np
 import pytz
-from nexustiles.nexustiles import NexusTileService, NexusTileServiceException
+from nexustiles.nexustiles import NexusTileService
+from nexustiles.exception import NexusTileServiceException
 from shapely.geometry import box
 
 from webservice.NexusHandler import nexus_handler
diff --git a/analysis/webservice/algorithms/StandardDeviationSearch.py b/analysis/webservice/algorithms/StandardDeviationSearch.py
index ae0566f1..26451cb1 100644
--- a/analysis/webservice/algorithms/StandardDeviationSearch.py
+++ b/analysis/webservice/algorithms/StandardDeviationSearch.py
@@ -19,7 +19,7 @@
 from datetime import datetime
 from functools import partial
 
-from nexustiles.nexustiles import NexusTileServiceException
+from nexustiles.exception import NexusTileServiceException
 from pytz import timezone
 
 from webservice.NexusHandler import nexus_handler
diff --git a/analysis/webservice/nexus_tornado/app_builders/NexusAppBuilder.py b/analysis/webservice/nexus_tornado/app_builders/NexusAppBuilder.py
index afe7d690..01798583 100644
--- a/analysis/webservice/nexus_tornado/app_builders/NexusAppBuilder.py
+++ b/analysis/webservice/nexus_tornado/app_builders/NexusAppBuilder.py
@@ -53,7 +53,7 @@ def set_modules(self, module_dir, algorithm_config, remote_collections=None, max
         NexusHandler.executeInitializers(algorithm_config)
 
         self.log.info("Initializing request ThreadPool to %s" % max_request_threads)
-        tile_service_factory = partial(NexusTileService, False, False, algorithm_config)
+        tile_service_factory = partial(NexusTileService, algorithm_config)
         handler_args_builder = HandlerArgsBuilder(
             max_request_threads,
             tile_service_factory,
diff --git a/data-access/nexustiles/AbstractTileService.py b/data-access/nexustiles/AbstractTileService.py
index 6426295b..6e5b4640 100644
--- a/data-access/nexustiles/AbstractTileService.py
+++ b/data-access/nexustiles/AbstractTileService.py
@@ -13,28 +13,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import configparser
-import logging
-import sys
-import json
 from abc import ABC, abstractmethod
-from datetime import datetime
 from functools import reduce
 
 import numpy as np
 import numpy.ma as ma
-import pkg_resources
-from pytz import timezone, UTC
-from shapely.geometry import MultiPolygon, box
-
-from .dao import CassandraProxy
-from .dao import DynamoProxy
-from .dao import S3Proxy
-from .dao import SolrProxy
-from .dao import ElasticsearchProxy
-
-from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable
-from nexustiles.nexustiles import NexusTileServiceException
 
 
 class AbstractTileService(ABC):
@@ -43,9 +26,9 @@ class AbstractTileService(ABC):
     # def open_dataset(dataset_s, **kwargs):
     #     pass
 
-    @abstractmethod
-    def try_connect(self) -> bool:
-        raise NotImplementedError()
+    # @abstractmethod
+    # def try_connect(self) -> bool:
+    #     raise NotImplementedError()
 
     @abstractmethod
     def get_dataseries_list(self, simple=False):
@@ -193,91 +176,6 @@ def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_tim
         """
         raise NotImplementedError()
 
-    def mask_tiles_to_bbox(self, min_lat, max_lat, min_lon, max_lon, tiles):
-        for tile in tiles:
-            tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat)
-            tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon)
-
-            # Or together the masks of the individual arrays to create the new mask
-            data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \
-                        | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \
-                        | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :]
-
-            # If this is multi-var, need to mask each variable separately.
-            if tile.is_multi:
-                # Combine space/time mask with existing mask on data
-                data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask])
-
-                num_vars = len(tile.data)
-                multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0)
-                tile.data = ma.masked_where(multi_data_mask, tile.data)
-            else:
-                tile.data = ma.masked_where(data_mask, tile.data)
-
-        tiles[:] = [tile for tile in tiles if not tile.data.mask.all()]
-
-        return tiles
-
-    def mask_tiles_to_bbox_and_time(self, min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles):
-        for tile in tiles:
-            tile.times = ma.masked_outside(tile.times, start_time, end_time)
-            tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat)
-            tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon)
-
-            # Or together the masks of the individual arrays to create the new mask
-            data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \
-                        | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \
-                        | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :]
-
-            tile.data = ma.masked_where(data_mask, tile.data)
-
-        tiles[:] = [tile for tile in tiles if not tile.data.mask.all()]
-
-        return tiles
-
-    def mask_tiles_to_polygon(self, bounding_polygon, tiles):
-
-        min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds
-
-        return self.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, tiles)
-
-    def mask_tiles_to_polygon_and_time(self, bounding_polygon, start_time, end_time, tiles):
-        min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds
-
-        return self.mask_tiles_to_bbox_and_time(min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles)
-
-    def mask_tiles_to_time_range(self, start_time, end_time, tiles):
-        """
-        Masks data in tiles to specified time range.
-        :param start_time: The start time to search for tiles
-        :param end_time: The end time to search for tiles
-        :param tiles: List of tiles
-        :return: A list tiles with data masked to specified time range
-        """
-        if 0 <= start_time <= end_time:
-            for tile in tiles:
-                tile.times = ma.masked_outside(tile.times, start_time, end_time)
-
-                # Or together the masks of the individual arrays to create the new mask
-                data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \
-                            | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \
-                            | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :]
-
-                # If this is multi-var, need to mask each variable separately.
-                if tile.is_multi:
-                    # Combine space/time mask with existing mask on data
-                    data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask])
-
-                    num_vars = len(tile.data)
-                    multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0)
-                    tile.data = ma.masked_where(multi_data_mask, tile.data)
-                else:
-                    tile.data = ma.masked_where(data_mask, tile.data)
-
-            tiles[:] = [tile for tile in tiles if not tile.data.mask.all()]
-
-        return tiles
-
     @abstractmethod
     def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs):
         """
@@ -295,10 +193,6 @@ def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, m
     def fetch_data_for_tiles(self, *tiles):
         raise NotImplementedError()
 
-    @abstractmethod
-    def open_dataset(self, dataset):
-        raise NotImplementedError()
-
     @abstractmethod
     def _metadata_store_docs_to_tiles(self, *store_docs):
         raise NotImplementedError()
diff --git a/data-access/nexustiles/backends/nexusproto/backend.py b/data-access/nexustiles/backends/nexusproto/backend.py
index aa0ab290..6aa63644 100644
--- a/data-access/nexustiles/backends/nexusproto/backend.py
+++ b/data-access/nexustiles/backends/nexusproto/backend.py
@@ -33,7 +33,7 @@
 from .dao import ElasticsearchProxy
 
 from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable
-from nexustiles.nexustiles import NexusTileServiceException
+from nexustiles.exception import NexusTileServiceException
 from nexustiles.AbstractTileService import AbstractTileService
 
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py
index 019cd753..93963166 100644
--- a/data-access/nexustiles/backends/zarr/backend.py
+++ b/data-access/nexustiles/backends/zarr/backend.py
@@ -27,7 +27,7 @@
 from shapely.geometry import MultiPolygon, box
 
 from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable
-from nexustiles.nexustiles import NexusTileServiceException
+from nexustiles.exception import NexusTileServiceException
 from nexustiles.AbstractTileService import AbstractTileService
 
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
diff --git a/data-access/nexustiles/dao/CassandraProxy.py b/data-access/nexustiles/dao/CassandraProxy.py
deleted file mode 100644
index 96f7c4c6..00000000
--- a/data-access/nexustiles/dao/CassandraProxy.py
+++ /dev/null
@@ -1,317 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import uuid
-from configparser import NoOptionError
-
-import nexusproto.DataTile_pb2 as nexusproto
-import numpy as np
-from cassandra.auth import PlainTextAuthProvider
-from cassandra.cqlengine import columns, connection, CQLEngineException
-from cassandra.cluster import NoHostAvailable
-from cassandra.cqlengine.models import Model
-from cassandra.policies import TokenAwarePolicy, DCAwareRoundRobinPolicy, WhiteListRoundRobinPolicy
-from multiprocessing.synchronize import Lock
-from nexusproto.serialization import from_shaped_array
-
-INIT_LOCK = Lock(ctx=None)
-
-logger = logging.getLogger(__name__)
-
-class NexusTileData(Model):
-    __table_name__ = 'sea_surface_temp'
-    tile_id = columns.UUID(primary_key=True)
-    tile_blob = columns.Blob()
-
-    __nexus_tile = None
-
-    def _get_nexus_tile(self):
-        if self.__nexus_tile is None:
-            self.__nexus_tile = nexusproto.TileData.FromString(self.tile_blob)
-
-        return self.__nexus_tile
-
-    def get_raw_data_array(self):
-
-        nexus_tile = self._get_nexus_tile()
-        the_tile_type = nexus_tile.tile.WhichOneof("tile_type")
-
-        the_tile_data = getattr(nexus_tile.tile, the_tile_type)
-
-        return from_shaped_array(the_tile_data.variable_data)
-
-    def get_lat_lon_time_data_meta(self):
-        """
-        Retrieve data from data store and metadata from metadata store
-        for this tile. For gridded tiles, the tile shape of the data
-        will match the input shape. For example, if the input was a
-        30x30 tile, all variables will also be 30x30. However, if the
-        tile is a swath tile, the data will be transformed along the
-        diagonal of the data matrix. For example, a 30x30 tile would
-        become 900x900 where the 900 points are along the diagonal.
-
-        Multi-variable tile will also include an extra dimension in the
-        data array. For example, a 30 x 30 x 30 array would be
-        transformed to N x 30 x 30 x 30 where N is the number of
-         variables in this tile.
-
-        latitude_data, longitude_data, np.array([grid_tile.time]), grid_tile_data, meta_data, is_multi_var
-
-        :return: latitude data
-        :return: longitude data
-        :return: time data
-        :return: data
-        :return: meta data dictionary
-        :return: boolean flag, True if this tile has more than one variable
-        """
-        is_multi_var = False
-
-        if self._get_nexus_tile().HasField('grid_tile'):
-            grid_tile = self._get_nexus_tile().grid_tile
-
-            grid_tile_data = np.ma.masked_invalid(from_shaped_array(grid_tile.variable_data))
-            latitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.latitude))
-            longitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.longitude))
-
-            if len(grid_tile_data.shape) == 2:
-                grid_tile_data = grid_tile_data[np.newaxis, :]
-
-            # Extract the meta data
-            meta_data = {}
-            for meta_data_obj in grid_tile.meta_data:
-                name = meta_data_obj.name
-                meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
-                if len(meta_array.shape) == 2:
-                    meta_array = meta_array[np.newaxis, :]
-                meta_data[name] = meta_array
-
-            return latitude_data, longitude_data, np.array([grid_tile.time]), grid_tile_data, meta_data, is_multi_var
-        elif self._get_nexus_tile().HasField('swath_tile'):
-            swath_tile = self._get_nexus_tile().swath_tile
-
-            latitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.latitude)).reshape(-1)
-            longitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.longitude)).reshape(-1)
-            time_data = np.ma.masked_invalid(from_shaped_array(swath_tile.time)).reshape(-1)
-
-            # Simplify the tile if the time dimension is the same value repeated
-            if np.all(time_data == np.min(time_data)):
-                time_data = np.array([np.min(time_data)])
-
-            swath_tile_data = np.ma.masked_invalid(from_shaped_array(swath_tile.variable_data))
-
-            tile_data = self._to_standard_index(swath_tile_data,
-                                                (len(time_data), len(latitude_data), len(longitude_data)))
-
-            # Extract the meta data
-            meta_data = {}
-            for meta_data_obj in swath_tile.meta_data:
-                name = meta_data_obj.name
-                actual_meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
-                reshaped_meta_array = self._to_standard_index(actual_meta_array, tile_data.shape)
-                meta_data[name] = reshaped_meta_array
-
-            return latitude_data, longitude_data, time_data, tile_data, meta_data, is_multi_var
-        elif self._get_nexus_tile().HasField('time_series_tile'):
-            time_series_tile = self._get_nexus_tile().time_series_tile
-
-            time_series_tile_data = np.ma.masked_invalid(from_shaped_array(time_series_tile.variable_data))
-            time_data = np.ma.masked_invalid(from_shaped_array(time_series_tile.time)).reshape(-1)
-            latitude_data = np.ma.masked_invalid(from_shaped_array(time_series_tile.latitude))
-            longitude_data = np.ma.masked_invalid(from_shaped_array(time_series_tile.longitude))
-
-            reshaped_array = np.ma.masked_all((len(time_data), len(latitude_data), len(longitude_data)))
-            idx = np.arange(len(latitude_data))
-            reshaped_array[:, idx, idx] = time_series_tile_data
-            tile_data = reshaped_array
-            # Extract the meta data
-            meta_data = {}
-            for meta_data_obj in time_series_tile.meta_data:
-                name = meta_data_obj.name
-                meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
-
-                reshaped_meta_array = np.ma.masked_all((len(time_data), len(latitude_data), len(longitude_data)))
-                idx = np.arange(len(latitude_data))
-                reshaped_meta_array[:, idx, idx] = meta_array
-
-                meta_data[name] = reshaped_meta_array
-
-            return latitude_data, longitude_data, time_data, tile_data, meta_data, is_multi_var
-        elif self._get_nexus_tile().HasField('swath_multi_variable_tile'):
-            swath_tile = self._get_nexus_tile().swath_multi_variable_tile
-            is_multi_var = True
-
-            latitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.latitude)).reshape(-1)
-            longitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.longitude)).reshape(-1)
-            time_data = np.ma.masked_invalid(from_shaped_array(swath_tile.time)).reshape(-1)
-
-            # Simplify the tile if the time dimension is the same value repeated
-            if np.all(time_data == np.min(time_data)):
-                time_data = np.array([np.min(time_data)])
-
-            swath_tile_data = np.ma.masked_invalid(from_shaped_array(swath_tile.variable_data))
-
-            desired_shape = (
-                len(time_data),
-                len(latitude_data),
-                len(longitude_data),
-            )
-            tile_data = self._to_standard_index(swath_tile_data, desired_shape, is_multi_var=True)
-
-            # Extract the meta data
-            meta_data = {}
-            for meta_data_obj in swath_tile.meta_data:
-                name = meta_data_obj.name
-                actual_meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
-                reshaped_meta_array = self._to_standard_index(actual_meta_array, tile_data.shape)
-                meta_data[name] = reshaped_meta_array
-
-            return latitude_data, longitude_data, time_data, tile_data, meta_data, is_multi_var
-        elif self._get_nexus_tile().HasField('grid_multi_variable_tile'):
-            grid_multi_variable_tile = self._get_nexus_tile().grid_multi_variable_tile
-            is_multi_var = True
-
-            grid_tile_data = np.ma.masked_invalid(from_shaped_array(grid_multi_variable_tile.variable_data))
-            latitude_data = np.ma.masked_invalid(from_shaped_array(grid_multi_variable_tile.latitude))
-            longitude_data = np.ma.masked_invalid(from_shaped_array(grid_multi_variable_tile.longitude))
-
-            # If there are 3 dimensions, that means the time dimension
-            # was squeezed. Add back in
-            if len(grid_tile_data.shape) == 3:
-                grid_tile_data = np.expand_dims(grid_tile_data, axis=1)
-            # If there are 4 dimensions, that means the time dimension
-            # is present. Move the multivar dimension.
-            if len(grid_tile_data.shape) == 4:
-                grid_tile_data = np.moveaxis(grid_tile_data, -1, 0)
-
-            # Extract the meta data
-            meta_data = {}
-            for meta_data_obj in grid_multi_variable_tile.meta_data:
-                name = meta_data_obj.name
-                meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
-                if len(meta_array.shape) == 2:
-                    meta_array = meta_array[np.newaxis, :]
-                meta_data[name] = meta_array
-
-            return latitude_data, longitude_data, np.array([grid_multi_variable_tile.time]), grid_tile_data, meta_data, is_multi_var
-        else:
-            raise NotImplementedError("Only supports grid_tile, swath_tile, swath_multi_variable_tile, and time_series_tile")
-
-    @staticmethod
-    def _to_standard_index(data_array, desired_shape, is_multi_var=False):
-        """
-        Transform swath data to a standard format where data runs along
-        diagonal of ND matrix and the non-diagonal data points are
-        masked
-
-        :param data_array: The data array to be transformed
-        :param desired_shape: The desired shape of the resulting array
-        :param is_multi_var: True if this is a multi-variable tile
-        :type data_array: np.array
-        :type desired_shape: tuple
-        :type is_multi_var: bool
-        :return: Reshaped array
-        :rtype: np.array
-        """
-
-        reshaped_array = []
-        if is_multi_var:
-            reshaped_data_array = np.moveaxis(data_array, -1, 0)
-        else:
-            reshaped_data_array = [data_array]
-
-        for variable_data_array in reshaped_data_array:
-            if desired_shape[0] == 1:
-                variable_reshaped_array = np.ma.masked_all((desired_shape[1], desired_shape[2]))
-            else:
-                variable_reshaped_array = np.ma.masked_all(desired_shape)
-
-            row, col = np.indices(variable_data_array.shape)
-
-            variable_reshaped_array[
-                np.diag_indices(desired_shape[1], len(variable_reshaped_array.shape))] = \
-                variable_data_array[
-                    row.flat, col.flat]
-            variable_reshaped_array.mask[
-                np.diag_indices(desired_shape[1], len(variable_reshaped_array.shape))] = \
-                variable_data_array.mask[
-                    row.flat, col.flat]
-
-            if desired_shape[0] == 1:
-                reshaped_array.append(variable_reshaped_array[np.newaxis, :])
-            else:
-                reshaped_array.append(variable_reshaped_array)
-
-        if not is_multi_var:
-            # If single var, squeeze extra dim out of array
-            reshaped_array = reshaped_array[0]
-
-        return reshaped_array
-
-
-class CassandraProxy(object):
-    def __init__(self, config):
-        self.config = config
-        self.__cass_url = config.get("cassandra", "host")
-        self.__cass_username = config.get("cassandra", "username")
-        self.__cass_password = config.get("cassandra", "password")
-        self.__cass_keyspace = config.get("cassandra", "keyspace")
-        self.__cass_local_DC = config.get("cassandra", "local_datacenter")
-        self.__cass_protocol_version = config.getint("cassandra", "protocol_version")
-        self.__cass_dc_policy = config.get("cassandra", "dc_policy")
-
-        try:
-            self.__cass_port = config.getint("cassandra", "port")
-        except NoOptionError:
-            self.__cass_port = 9042
-
-        with INIT_LOCK:
-            try:
-                connection.get_cluster()
-            except CQLEngineException:
-                self.__open()
-
-    def __open(self):
-        if self.__cass_dc_policy == 'DCAwareRoundRobinPolicy':
-            dc_policy = DCAwareRoundRobinPolicy(self.__cass_local_DC)
-            token_policy = TokenAwarePolicy(dc_policy)
-        elif self.__cass_dc_policy == 'WhiteListRoundRobinPolicy':
-            token_policy = WhiteListRoundRobinPolicy([self.__cass_url])
-
-        if self.__cass_username and self.__cass_password:
-            auth_provider = PlainTextAuthProvider(username=self.__cass_username, password=self.__cass_password)
-        else:
-            auth_provider = None
-        try:
-            connection.setup(
-                [host for host in self.__cass_url.split(',')], self.__cass_keyspace,
-                protocol_version=self.__cass_protocol_version, load_balancing_policy=token_policy,
-                port=self.__cass_port,
-                auth_provider=auth_provider
-            )
-        except NoHostAvailable as e:
-            logger.error("Cassandra is not accessible, SDAP will not server local datasets", e)
-
-    def fetch_nexus_tiles(self, *tile_ids):
-        tile_ids = [uuid.UUID(str(tile_id)) for tile_id in tile_ids if
-                    (isinstance(tile_id, str) or isinstance(tile_id, str))]
-
-        res = []
-        for tile_id in tile_ids:
-            filterResults = NexusTileData.objects.filter(tile_id=tile_id)
-            if len(filterResults) > 0:
-                res.append(filterResults[0])
-
-        return res
diff --git a/data-access/nexustiles/dao/DynamoProxy.py b/data-access/nexustiles/dao/DynamoProxy.py
deleted file mode 100644
index 1ee70ac1..00000000
--- a/data-access/nexustiles/dao/DynamoProxy.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import uuid
-import nexusproto.DataTile_pb2 as nexusproto
-from nexusproto.serialization import from_shaped_array
-import numpy as np
-import boto3
-
-class NexusTileData(object):
-    __nexus_tile = None
-    __data = None
-    tile_id = None
-
-    def __init__(self, data, _tile_id):
-        if self.__data is None:
-            self.__data = data
-        if self.tile_id is None:
-            self.tile_id = _tile_id
-
-    def _get_nexus_tile(self):
-        if self.__nexus_tile is None:
-            self.__nexus_tile = nexusproto.TileData.FromString(self.__data)
-
-        return self.__nexus_tile
-
-    def get_raw_data_array(self):
-
-        nexus_tile = self._get_nexus_tile()
-        the_tile_type = nexus_tile.tile.WhichOneof("tile_type")
-
-        the_tile_data = getattr(nexus_tile.tile, the_tile_type)
-
-        return from_shaped_array(the_tile_data.variable_data)
-
-    def get_lat_lon_time_data_meta(self):
-        if self._get_nexus_tile().HasField('grid_tile'):
-            grid_tile = self._get_nexus_tile().grid_tile
-
-            grid_tile_data = np.ma.masked_invalid(from_shaped_array(grid_tile.variable_data))
-            latitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.latitude))
-            longitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.longitude))
-
-            if len(grid_tile_data.shape) == 2:
-                grid_tile_data = grid_tile_data[np.newaxis, :]
-
-            # Extract the meta data
-            meta_data = {}
-            for meta_data_obj in grid_tile.meta_data:
-                name = meta_data_obj.name
-                meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
-                if len(meta_array.shape) == 2:
-                    meta_array = meta_array[np.newaxis, :]
-                meta_data[name] = meta_array
-
-            return latitude_data, longitude_data, np.array([grid_tile.time]), grid_tile_data, meta_data
-        elif self._get_nexus_tile().HasField('swath_tile'):
-            swath_tile = self._get_nexus_tile().swath_tile
-
-            latitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.latitude)).reshape(-1)
-            longitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.longitude)).reshape(-1)
-            time_data = np.ma.masked_invalid(from_shaped_array(swath_tile.time)).reshape(-1)
-
-            # Simplify the tile if the time dimension is the same value repeated
-            if np.all(time_data == np.min(time_data)):
-                time_data = np.array([np.min(time_data)])
-
-            swath_tile_data = np.ma.masked_invalid(from_shaped_array(swath_tile.variable_data))
-
-            tile_data = self._to_standard_index(swath_tile_data,
-                                                (len(time_data), len(latitude_data), len(longitude_data)))
-
-            # Extract the meta data
-            meta_data = {}
-            for meta_data_obj in swath_tile.meta_data:
-                name = meta_data_obj.name
-                actual_meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
-                reshaped_meta_array = self._to_standard_index(actual_meta_array, tile_data.shape)
-                meta_data[name] = reshaped_meta_array
-
-            return latitude_data, longitude_data, time_data, tile_data, meta_data
-        else:
-            raise NotImplementedError("Only supports grid_tile and swath_tile")
-
-    @staticmethod
-    def _to_standard_index(data_array, desired_shape):
-
-        if desired_shape[0] == 1:
-            reshaped_array = np.ma.masked_all((desired_shape[1], desired_shape[2]))
-            row, col = np.indices(data_array.shape)
-
-            reshaped_array[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array[
-                row.flat, col.flat]
-            reshaped_array.mask[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array.mask[
-                row.flat, col.flat]
-            reshaped_array = reshaped_array[np.newaxis, :]
-        else:
-            reshaped_array = np.ma.masked_all(desired_shape)
-            row, col = np.indices(data_array.shape)
-
-            reshaped_array[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array[
-                row.flat, col.flat]
-            reshaped_array.mask[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array.mask[
-                row.flat, col.flat]
-
-        return reshaped_array
-
-
-class DynamoProxy(object):
-    def __init__(self, config):
-        self.config = config
-        self.__dynamo_tablename = config.get("dynamo", "table")
-        self.__dynamo_region = config.get("dynamo", "region")
-        self.__dynamo = boto3.resource('dynamodb', region_name=self.__dynamo_region)
-        self.__dynamo_table = self.__dynamo.Table(self.__dynamo_tablename)
-        self.__nexus_tile = None
-
-    def fetch_nexus_tiles(self, *tile_ids):
-
-        tile_ids = [uuid.UUID(str(tile_id)) for tile_id in tile_ids if
-                    (isinstance(tile_id, str) or isinstance(tile_id, str))]
-        res = []
-        for tile_id in tile_ids:
-            response = self.__dynamo_table.get_item(
-                Key = {
-                    'tile_id': str(tile_id)
-                }
-            )
-            item = response['Item']
-            data = item['data'].__str__()
-            nexus_tile = NexusTileData(data, str(tile_id))
-            res.append(nexus_tile)
-
-        return res
\ No newline at end of file
diff --git a/data-access/nexustiles/dao/ElasticsearchProxy.py b/data-access/nexustiles/dao/ElasticsearchProxy.py
deleted file mode 100644
index 157630f6..00000000
--- a/data-access/nexustiles/dao/ElasticsearchProxy.py
+++ /dev/null
@@ -1,1235 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import logging
-import threading
-import time
-import re
-from datetime import datetime
-from pytz import timezone, UTC
-
-import requests
-import pysolr
-from shapely import wkt
-from elasticsearch import Elasticsearch
-
-ELASTICSEARCH_CON_LOCK = threading.Lock()
-thread_local = threading.local()
-
-EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
-ELASTICSEARCH_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
-ISO_8601 = '%Y-%m-%dT%H:%M:%S%z'
-
-
-class ElasticsearchProxy(object):
-    def __init__(self, config):
-        self.elasticsearchHosts = config.get("elasticsearch", "host").split(',')
-        self.elasticsearchIndex = config.get("elasticsearch", "index")
-        self.elasticsearchUsername = config.get("elasticsearch", "username")
-        self.elasticsearchPassword = config.get("elasticsearch", "password")
-        self.logger = logging.getLogger(__name__)
-
-        with ELASTICSEARCH_CON_LOCK:
-            elasticsearchcon = getattr(thread_local, 'elasticsearchcon', None)
-            if elasticsearchcon is None:
-                elasticsearchcon = Elasticsearch(hosts=self.elasticsearchHosts, http_auth=(self.elasticsearchUsername, self.elasticsearchPassword))
-                thread_local.elasticsearchcon = elasticsearchcon
-
-            self.elasticsearchcon = elasticsearchcon
-
-    def find_tile_by_id(self, tile_id):
-
-        params = {
-            "size": 1,
-            "query": {
-                "term": {
-                    "id": {
-                        "value": tile_id
-                    }
-                }
-            }
-        }
-
-        results, _, hits = self.do_query(*(None, None, None, True, None), **params)
-        assert hits == 1, f"Found {hits} results, expected exactly 1"
-        return [results[0]["_source"]]
-
-    def find_tiles_by_id(self, tile_ids, ds=None, **kwargs):
-
-        params = {
-            "query": {
-                "bool": {
-                    "filter": [],
-                    "should": [],
-                    "minimum_should_match": 1
-                }
-            }
-        }
-
-        for tile_id in tile_ids:
-            params['query']['bool']['should'].append({"term": {"id": {"value": tile_id}}})
-            
-        if ds is not None:
-            params['query']['bool']['filter'].append({"term": {"dataset_s": {"value": ds}}})
-
-        self._merge_kwargs(params, **kwargs)
-
-        results = self.do_query_all(*(None, None, None, False, None), **params)
-        assert len(results) == len(tile_ids), "Found %s results, expected exactly %s" % (len(results), len(tile_ids))
-        return results
-
-    def find_min_date_from_tiles(self, tile_ids, ds=None, **kwargs):
-        params = {
-            "size": 0,
-            "query": {
-                "bool": {
-                    "filter": [],
-                    "should": []
-                }
-            },
-            "aggs": {
-                "min_date_agg": {
-                    "min": {
-                        "field": "tile_min_time_dt"
-                    }
-                }
-            }            
-        }
-        
-        for tile_id in tile_ids:
-            params['query']['bool']['should'].append({"term": {"id": {"value": tile_id}}})
-        if ds is not None:
-            params['query']['bool']['filter'].append({"term": {"dataset_s": {"value": ds}}})
-
-        aggregations = self.do_aggregation(*(None, None, None, True, None), **params)
-        return self.convert_iso_to_datetime(aggregations['min_date_agg']["value_as_string"])
-
-    def find_max_date_from_tiles(self, tile_ids, ds=None, **kwargs):
-
-        params = {
-            "size": 0,
-            "query": {
-                "bool": {
-                    "filter": [],
-                    "should": []
-                }
-            },
-            "aggs": {
-                "max_date_agg": {
-                    "max": {
-                        "field": "tile_max_time_dt"
-                    }
-                }
-            }            
-        }
-        
-        for tile_id in tile_ids:
-            params['query']['bool']['should'].append({"term": {"id": {"value": tile_id}}})
-        if ds is not None:
-            params['query']['bool']['filter'].append({"term": {"dataset_s": {"value": ds}}})        
-
-        aggregations = self.do_aggregation(*(None, None, None, True, None), **params)
-        return self.convert_iso_to_datetime(aggregations['max_date_agg']["value_as_string"])
-
-
-    def find_min_max_date_from_granule(self, ds, granule_name, **kwargs):
-        
-        params = {
-            "query": {
-                "bool": {
-                    "filter": [
-                        {
-                            "term": {
-                                "dataset_s": {
-                                    "value": ds
-                                }
-                            }
-                        },
-                        {
-                            "term": {
-                                "granule_s": {
-                                    "value": granule_name
-                                }
-                            }
-                        }
-                    ]
-                }
-            },
-            "aggs": {
-                "min_date_agg": {
-                    "max": {
-                        "field": "tile_min_time_dt"
-                    }
-                },
-                "max_date_agg": {
-                    "max": {
-                        "field": "tile_max_time_dt"
-                    }
-                }
-            }
-        }
-
-        self._merge_kwargs(params, **kwargs)
-        
-        aggregations = self.do_aggregation(*(None, None, None, False, None), **params)
-        start_time = self.convert_iso_to_datetime(aggregations['min_date_agg']["value_as_string"])
-        end_time = self.convert_iso_to_datetime(aggregations['max_date_agg']["value_as_string"])
-
-        return start_time, end_time
-
-    def get_data_series_list(self):
-
-        datasets = self.get_data_series_list_simple()
-
-        for dataset in datasets:
-            min_date = self.find_min_date_from_tiles([], ds=dataset['title'])
-            max_date = self.find_max_date_from_tiles([], ds=dataset['title'])
-            dataset['start'] = (min_date - EPOCH).total_seconds()
-            dataset['end'] = (max_date - EPOCH).total_seconds()
-            dataset['iso_start'] = min_date.strftime(ISO_8601)
-            dataset['iso_end'] = max_date.strftime(ISO_8601)
-
-        return datasets
-
-    def get_data_series_list_simple(self):
-        
-        params = {
-            'size': 0,
-            "aggs": {
-                "dataset_list_agg": {
-                    "composite": {
-                        "size":100,
-                        "sources": [
-                            {
-                                "dataset_s": {
-                                    "terms": {
-                                        "field": "dataset_s"
-                                    }
-                                }
-                            }
-                        ]
-                    }
-                }
-            }
-        }
-
-        aggregations = self.do_aggregation_all(params, 'dataset_list_agg')
-        l = []
-
-        for dataset in aggregations:
-            l.append({
-                "shortName": dataset['key']['dataset_s'],
-                "title": dataset['key']['dataset_s'],
-                "tileCount": dataset["doc_count"]
-            })
-
-        l = sorted(l, key=lambda entry: entry["title"])
-        return l
-
-    def get_data_series_stats(self, ds):
-
-        params = {
-            "size": 0,
-            "query": {
-                "term":{
-                    "dataset_s": {
-                        "value": ds
-                    }
-                }     
-            },
-            "aggs": {
-                "available_dates": {
-                    "composite": {
-                        "size": 100,
-                        "sources": [
-                            {"terms_tile_max_time_dt": {"terms": {"field": "tile_max_time_dt"}}}
-                        ]
-                    }
-                }
-            }
-        }
-
-        aggregations = self.do_aggregation_all(params, 'available_dates')
-        stats = {}
-        stats['available_dates'] = []
-
-        for dt in aggregations:
-            stats['available_dates'].append(dt['key']['terms_tile_max_time_dt'] / 1000)
-
-        stats['available_dates'] = sorted(stats['available_dates'])
-
-        params = {
-            "size": 0,
-            "query": {
-                "term":{
-                    "dataset_s": {
-                        "value": ds
-                    }
-                }
-            }, 
-            "aggs": {
-                "min_tile_min_val_d": {
-                    "min": {
-                        "field": "tile_min_val_d"
-                    }
-                },
-                "min_tile_max_time_dt": {
-                    "min": {
-                        "field": "tile_max_time_dt"
-                    }
-                },
-                "max_tile_max_time_dt": {
-                    "max": {
-                        "field": "tile_max_time_dt"
-                    }
-                },
-                "max_tile_max_val_d": {
-                    "max": {
-                        "field": "tile_max_val_d"
-                    }
-                }
-            }
-        }
-
-        aggregations = self.do_aggregation(*(None, None, None, False, None), **params)
-        stats["start"] = int(aggregations["min_tile_max_time_dt"]["value"]) / 1000
-        stats["end"] = int(aggregations["max_tile_max_time_dt"]["value"]) / 1000
-        stats["minValue"] = aggregations["min_tile_min_val_d"]["value"]
-        stats["maxValue"] = aggregations["max_tile_max_val_d"]["value"]
-
-        return stats
-
-    # day_of_year_i added (SDAP-347)
-    def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year):
-
-        max_lat = bounding_polygon.bounds[3]
-        min_lon = bounding_polygon.bounds[0]
-        min_lat = bounding_polygon.bounds[1]
-        max_lon = bounding_polygon.bounds[2]
-        
-        params = {
-            "size": "1",
-            "query": {
-                "bool": {
-                    "filter": [
-                        {   
-                            "term": {
-                                "dataset_s": {
-                                    "value": ds
-                                }
-                            }
-                        },
-                        {
-                            "geo_shape": {
-                                "geo": {
-                                    "shape": {
-                                        "type": "envelope",
-                                        "coordinates": [[min_lon, max_lat], [max_lon, min_lat]]
-                                    },
-                                    "relation": "intersects"
-                                }
-                            }
-                        },
-                        { 
-                            "range": {
-                                "tile_count_i": {
-                                    "gte": 1
-                                }
-                            }   
-                        },
-                        { 
-                            "range": {
-                                "day_of_year_i": {
-                                    "lte": day_of_year
-                                }
-                            } 
-                        }
-                    ]
-                }
-            }
-        }
-        result, _, _ = self.do_query(*(None, None, None, True, 'day_of_year_i desc'), **params)
-        
-        return [result[0]]
-
-    def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs):
-
-        search_start_s = datetime.utcfromtimestamp(start_time).strftime(ELASTICSEARCH_FORMAT)
-        search_end_s = datetime.utcfromtimestamp(end_time).strftime(ELASTICSEARCH_FORMAT)
-
-        params = {
-            "size": "0",
-            "_source": "tile_min_time_dt",
-            "query": {
-                "bool": {
-                    "filter": [
-                        {
-                            "term": {
-                                "dataset_s": {
-                                    "value": ds
-                                }
-                            }
-                        },
-                        {
-                            "range": {
-                                "tile_min_time_dt": {
-                                    "gte": search_start_s,
-                                    "lte": search_end_s
-                                }
-                            }
-                        },
-                        {
-                            "geo_shape": {
-                                "geo": {
-                                    "shape": {
-                                        "type": "envelope",
-                                        "coordinates": [[min_lon, max_lat],[max_lon, min_lat]]
-                                    },
-                                    "relation": "intersects"
-                                }
-                            }
-                        }
-                    ]
-                }
-            },
-            "aggs": {
-                "days_range_agg": {
-                    "composite": {
-                        "size":100,
-                        "sources": [
-                            {
-                                "tile_min_time_dt": {
-                                    "terms": {
-                                        "field": "tile_min_time_dt"
-                                    }
-                                }
-                            }
-                        ]
-                    }
-                }
-            }
-        }
-
-        aggregations = self.do_aggregation_all(params, 'days_range_agg')
-        results = [res['key']['tile_min_time_dt'] for res in aggregations]
-        daysinrangeasc = sorted([(res / 1000) for res in results])
-        return daysinrangeasc
-
-    def find_all_tiles_in_box_sorttimeasc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time=0,
-                                          end_time=-1, **kwargs):
-
-        params = {
-            "size": 1000,
-            "query": {
-                "bool": {
-                    "filter": [
-                        {
-                            "term": {
-                                "dataset_s": {
-                                    "value": ds
-                                }
-                            }
-                        },
-                        {
-                            "geo_shape": {
-                                "geo": {
-                                    "shape": {
-                                        "type": "envelope",
-                                        "coordinates": [[min_lon, max_lat],[max_lon, min_lat]]
-                                    },
-                                    "relation": "intersects"
-                                }
-                            }
-                        },
-                        {
-                            "range": {
-                                "tile_count_i": {
-                                    "gte": 1
-                                 }
-                             }
-                         }
-                     ]
-                 }
-             }
-         }
-                            
-
-        if 0 < start_time <= end_time:
-            params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time)
-            params["query"]["bool"]["minimum_should_match"] = 1
-
-        self._merge_kwargs(params, **kwargs)
-
-        return self.do_query_all(*(None, None, None, False, 'tile_min_time_dt asc,tile_max_time_dt asc'), **params)
-
-    def find_all_tiles_in_polygon_sorttimeasc(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs):
-
-        nums = re.findall(r'\d+(?:\.\d*)?', bounding_polygon.wkt.rpartition(',')[0])
-        polygon_coordinates = list(zip(*[iter(nums)] * 2))
-
-        max_lat = bounding_polygon.bounds[3]
-        min_lon = bounding_polygon.bounds[0]
-        min_lat = bounding_polygon.bounds[1]
-        max_lon = bounding_polygon.bounds[2]
-
-        params = {
-            "query": {
-                "bool": {
-                    "filter": [
-                        { 
-                            "term": {
-                                "dataset_s": {
-                                    "value": ds
-                                }
-                            } 
-                        },
-                        {
-                            "geo_shape": {
-                                "geo": {
-                                    "shape": {
-                                        "type": "envelope",
-                                        "coordinates": [[min_lon, max_lat], [max_lon, min_lat]]
-                                    },
-                                    "relation": "intersects"
-                                }
-                            }
-                        }
-                    ]
-                }
-            }
-        }
-
-        try:
-            if 'fl' in list(kwargs.keys()):
-                params["_source"] = kwargs["fl"].split(',')
-        except KeyError:
-            pass
-
-        if 0 < start_time <= end_time:
-            params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time)
-            params["query"]["bool"]["minimum_should_match"] = 1
-
-        return self.do_query_all(*(None, None, None, False, 'tile_min_time_dt asc,tile_max_time_dt asc'), **params)
-
-    def find_all_tiles_in_polygon(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs):
-
-        nums = re.findall(r'\d+(?:\.\d*)?', bounding_polygon.wkt.rpartition(',')[0])
-        polygon_coordinates = list(zip(*[iter(nums)] * 2))
-
-        max_lat = bounding_polygon.bounds[3]
-        min_lon = bounding_polygon.bounds[0]
-        min_lat = bounding_polygon.bounds[1]
-        max_lon = bounding_polygon.bounds[2]
-
-        params = {
-            "size": 1000,
-            "query": {
-                "bool": {
-                    "filter": [
-                        { 
-                            "term": {
-                                "dataset_s": {
-                                    "value": ds
-                                }
-                            } 
-                        },
-                        {
-                            "geo_shape": {
-                                "geo": {
-                                    "shape": {
-                                        "type": "envelope",
-                                        "coordinates": [[min_lon, max_lat], [max_lon, min_lat]]
-                                    },
-                                    "relation": "intersects"
-                                }
-                            }
-                        },
-                        {
-                            "range": {
-                                "tile_count_i": {
-                                    "gte": 1
-                                }
-                            }
-                        }
-                    ]
-                }
-            }
-        }
-
-        try:
-            if 'fl' in list(kwargs.keys()):
-                params["_source"] = kwargs["fl"].split(',')
-        except KeyError:
-            pass
-
-        if 0 < start_time <= end_time:
-            params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time)
-            params["query"]["bool"]["minimum_should_match"] = 1
-
-        self._merge_kwargs(params, **kwargs)
-
-        return self.do_query_all(*(None, None, None, False, None), **params)
-
-    def find_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs):
-        
-        tile_max_lat = bounding_polygon.bounds[3]
-        tile_min_lon = bounding_polygon.bounds[0]
-        tile_min_lat = bounding_polygon.bounds[1]
-        tile_max_lon = bounding_polygon.bounds[2]
-
-        params = {
-            "size": 0,
-            "query": {
-                "bool": {
-                    "filter": [
-                        {
-                            "term": {
-                                "dataset_s": {
-                                    "value": ds
-                                }
-                            }
-                        },
-                        {
-                            "geo_shape": {
-                                "geo": {
-                                    "shape": {
-                                        "type": "envelope",
-                                        "coordinates": [[tile_min_lon, tile_max_lat], [tile_max_lon, tile_min_lat]]
-                                    },
-                                    "relation": "intersects"
-                                }
-                            }
-                        }
-                    ]
-                }
-            },
-            "aggs": {
-                "distinct_bounding_boxes": {
-                    "composite": {
-                        "size": 100,
-                        "sources": [
-                            {
-                                "bounding_box": {
-                                    "terms": {
-                                        "script": {
-                                            "source": "String.valueOf(doc['tile_min_lon'].value) + ', ' + String.valueOf(doc['tile_max_lon'].value) + ', ' + String.valueOf(doc['tile_min_lat'].value) + ', ' + String.valueOf(doc['tile_max_lat'].value)",
-                                            "lang": "painless"
-                                        }
-                                    }
-                                }
-                            }
-                        ]
-                    }
-                }
-            }
-        }
-                            
-        if 0 < start_time <= end_time:
-            params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time)
-            params["query"]["bool"]["minimum_should_match"] = 1
-
-        self._merge_kwargs(params, **kwargs)
-        aggregations = self.do_aggregation_all(params, 'distinct_bounding_boxes')
-        distinct_bounds = []
-        for agg in aggregations:   
-            coords = agg['key']['bounding_box'].split(',')
-            min_lon = round(float(coords[0]), 2)
-            max_lon = round(float(coords[1]), 2)
-            min_lat = round(float(coords[2]), 2)
-            max_lat = round(float(coords[3]), 2)
-            polygon = 'POLYGON((%s %s, %s %s, %s %s, %s %s, %s %s))' % (min_lon, max_lat, min_lon, min_lat, max_lon, min_lat, max_lon, max_lat, min_lon, max_lat)
-            distinct_bounds.append(wkt.loads(polygon).bounds)
-        
-        return distinct_bounds
-    
-    def find_tiles_by_exact_bounds(self, minx, miny, maxx, maxy, ds, start_time=0, end_time=-1, **kwargs):
-            
-        params = {
-            "query": {
-                "bool": {
-                    "filter": [
-                    {
-                        "term": {
-                            "dataset_s": {
-                                "value": ds
-                            }
-                        }
-                    },
-                    {
-                        "term": {
-                            "tile_min_lon": {
-                                "value": minx
-                            }
-                        }
-                    },
-                    {
-                        "term": {
-                            "tile_min_lat": {
-                                "value": miny
-                            }
-                        }
-                    },
-                    {
-                        "term": {
-                            "tile_max_lon": {
-                                "value": maxx
-                            }
-                        }
-                    },
-                    {
-                        "term": {
-                            "tile_max_lat": {
-                                "value": maxy
-                            }
-                        }
-                    }
-                ]
-            }
-        }}    
-        
-        if 0 < start_time <= end_time:
-            params["query"]["bool"]["should"] = self.get_formatted_time_clause(start_time, end_time)
-            params["query"]["bool"]["minimum_should_match"] = 1
-
-        self._merge_kwargs(params, **kwargs)
-
-        return self.do_query_all(*(None, None, None, False, None), **params)
-
-    def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, search_time, **kwargs):
-        
-        the_time = datetime.utcfromtimestamp(search_time).strftime(ELASTICSEARCH_FORMAT)
-
-        params = {
-            "size": 1000,
-            "query": {
-                "bool": {
-                    "filter": [
-                        {
-                            "term": {
-                                "dataset_s": {
-                                    "value": ds
-                                }
-                            }
-                        },
-                        {
-                            "geo_shape": {
-                                "geo": {
-                                    "shape": {
-                                        "type": "envelope",
-                                        "coordinates": [[min_lon, max_lat],[max_lon, min_lat]]
-                                    },
-                                    "relation": "intersects"
-                                }
-                            }
-                        },
-                        { 
-                            "range": {
-                                "tile_min_time_dt": {
-                                    "lte": the_time
-                                }
-                            }   
-                        },
-                        { 
-                            "range": {
-                                "tile_max_time_dt": {
-                                    "gte": the_time
-                                }
-                            } 
-                        }
-                    ]
-                }
-            }
-        }
-
-        self._merge_kwargs(params, **kwargs)
-
-        return self.do_query_all(*(None, None, None, False, None), **params)
-
-    def find_all_tiles_in_polygon_at_time(self, bounding_polygon, ds, search_time, **kwargs):
-
-        the_time = datetime.utcfromtimestamp(search_time).strftime(ELASTICSEARCH_FORMAT)
-
-        max_lat = bounding_polygon.bounds[3]
-        min_lon = bounding_polygon.bounds[0]
-        min_lat = bounding_polygon.bounds[1]
-        max_lon = bounding_polygon.bounds[2]
-
-        params = {
-            "size": 1000,
-            "query": {
-                "bool": {
-                    "filter": [
-                        {
-                            "term": {
-                                "dataset_s": {
-                                    "value": ds
-                                }
-                            }
-                        },
-                        {
-                            "geo_shape": {
-                                "geo": {
-                                    "shape": {
-                                        "type": "envelope",
-                                        "coordinates": [[min_lon, max_lat],[max_lon, min_lat]]
-                                    },
-                                    "relation": "intersects"
-                                }
-                            }
-                        },
-                        { "range": {
-                            "tile_min_time_dt": {
-                                "lte": the_time
-                            }
-                        } },
-                        { "range": {
-                            "tile_max_time_dt": {
-                                "gte": the_time
-                            }
-                        } }
-                    ]
-                }
-            }
-        }
-        
-        self._merge_kwargs(params, **kwargs)
-
-        return self.do_query_all(*(None, None, None, False, None), **params)
-
-
-    def find_all_tiles_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, time, **kwargs):
-
-        the_time = datetime.utcfromtimestamp(time).strftime(ELASTICSEARCH_FORMAT)
-        
-        params = {
-            "size": 1000,
-            "query": {
-                "bool": {
-                    "filter": [
-                        {
-                            "term": {
-                                "dataset_s": {
-                                    "value": ds
-                                }
-                            }
-                        },
-                        {
-                            "geo_shape": {
-                                "geo": {
-                                    "shape": {
-                                        "type": "envelope",
-                                        "coordinates": [[min_lon, max_lat],[max_lon, min_lat]]
-                                    },
-                                    "relation": "within"
-                                }
-                            }
-                        },
-                        {
-                            "range": {
-                                "tile_count_i": {
-                                    "gte": 1
-                                }
-                            }
-                        },
-                        { 
-                            "range": {
-                                "tile_min_time_dt": {
-                                    "lte": the_time
-                                }
-                            } 
-                        },
-                        { 
-                            "range": {
-                                "tile_max_time_dt": {
-                                    "gte": the_time
-                                }
-                            }
-                        }
-                    ]
-                }
-            }
-        }
-
-
-        self._merge_kwargs(params, **kwargs)
-
-        return self.do_query_all(*(None, "product(tile_avg_val_d, tile_count_i),*", None, False, None), **params)
-
-    def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, time, **kwargs):
-        
-        the_time = datetime.utcfromtimestamp(time).strftime(ELASTICSEARCH_FORMAT)
-        
-        params = {
-            "size": 1000,
-            "query": {
-                "bool": {
-                    "filter": [
-                        {
-                            "term": {
-                                "dataset_s": {
-                                    "value": ds
-                                }
-                            }
-                        },
-                        {
-                            "geo_shape": {
-                                "geo": {
-                                    "shape": {
-                                        "type": "multilinestring",
-                                        "coordinates": [[[min_lon, max_lat], [max_lon, max_lat], [min_lon, max_lat], [min_lon, min_lat], [max_lon, max_lat], [max_lon, min_lat], [min_lon, min_lat], [max_lon, min_lat]]]
-                                    },
-                                    "relation": "intersects"
-                                }
-                            }
-                        },
-                        {
-                            "range": {
-                                "tile_count_i": {
-                                    "gte": 1
-                                }
-                            }
-                        },
-                        { 
-                            "range": {
-                                "tile_min_time_dt": {
-                                    "lte": the_time
-                                }
-                            } 
-                        },
-                        { 
-                            "range": {
-                                "tile_max_time_dt": {
-                                    "gte": the_time
-                                }
-                            }
-                        }
-                    ],
-                    "must_not" : {
-                        "geo_shape": {
-                            "geo": {
-                                "shape": {
-                                    "type": "envelope",
-                                    "coordinates": [[min_lon, max_lat], [max_lon, min_lat]]
-                                },
-                                "relation": "within"
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        self._merge_kwargs(params, **kwargs)
-
-        return self.do_query_all(*(None, None, None, False, None), **params)
-
-    def find_all_tiles_by_metadata(self, metadata, ds, start_time=0, end_time=-1, **kwargs):
-        """
-        Get a list of tile metadata that matches the specified metadata, start_time, end_time.
-        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
-        :param ds: The dataset name to search
-        :param start_time: The start time to search for tiles
-        :param end_time: The end time to search for tiles
-        :return: A list of tile metadata
-        """
-
-        params = {
-            "query": {
-                "bool": {
-                    "must": [
-                        {
-                            "term": {
-                                "dataset_s": {"value": ds}
-                            }
-                        }
-                    ]
-                }
-            }
-        }
-
-        if len(metadata) > 0:
-            for key_value in metadata:
-                key = key_value.split(':')[0]
-                value = key_value.split(':')[1]
-                params['query']['bool']['must'].append({"match": {key: value}})
-
-        if 0 < start_time <= end_time:
-            params['query']['bool']['should'] = self.get_formatted_time_clause(start_time, end_time)
-            params["query"]["bool"]["minimum_should_match"] = 1
-
-        self._merge_kwargs(params, **kwargs)
-        return self.do_query_all(*(None, None, None, False, None), **params)
-
-    def get_formatted_time_clause(self, start_time, end_time):
-        search_start_s = datetime.utcfromtimestamp(start_time).strftime(ELASTICSEARCH_FORMAT)
-        search_end_s = datetime.utcfromtimestamp(end_time).strftime(ELASTICSEARCH_FORMAT)
- 
-        time_clause = [ 
-            { 
-                "range": {
-                    "tile_min_time_dt": {
-                        "lte": search_end_s,
-                        "gte": search_start_s    
-                    }
-                }
-            },
-            { 
-                "range": {
-                    "tile_max_time_dt": {
-                        "lte": search_end_s,
-                        "gte": search_start_s
-                    }
-                }
-            },
-            { 
-                "bool": { 
-                    "must": [
-                        { 
-                            "range": {
-                                "tile_min_time_dt": {
-                                    "gte": search_start_s
-                                }
-                            } 
-                        },
-                        {
-                            "range": {
-                                "tile_max_time_dt": {
-                                    "lte": search_end_s
-                                }
-                            } 
-                        }
-                    ] 
-                } 
-            }
-        ]
-
-        return time_clause
-
-    def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs):
-        """
-        Return number of tiles that match search criteria.
-        :param ds: The dataset name to search
-        :param bounding_polygon: The polygon to search for tiles
-        :param start_time: The start time to search for tiles
-        :param end_time: The end time to search for tiles
-        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
-        :return: number of tiles that match search criteria
-        """
-        
-        params = {
-            "size": 0,
-            "query": {
-                "bool": {
-                    "filter": [
-                        {
-                            "term": {
-                                "dataset_s": {
-                                    "value": ds
-                                }
-                            }
-                        },
-                        {
-                            "range": {
-                                "tile_count_i": {
-                                    "gte": 1
-                                }
-                            }
-                        }
-                    ]
-                }
-            }
-        }
-
-        if bounding_polygon:
-            min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds
-            geo_clause = {
-                "geo_shape": {
-                    "geo": {
-                        "shape": {
-                            "type": "envelope",
-                            "coordinates": [[min_lon, max_lat], [max_lon, min_lat]]
-                        }
-                    }
-                }
-            }
-                
-            params['query']['bool']['filter'].append(geo_clause)
-
-        if 0 < start_time <= end_time:
-            params['query']['bool']['should'] = self.get_formatted_time_clause(start_time, end_time)
-            params["query"]["bool"]["minimum_should_match"] = 1
-
-        if len(metadata) > 0:
-            for key_value in metadata:
-                key = key_value.split(':')[0]
-                value = key_value.split(':')[1]
-                params['query']['bool']['filter'].append({"term": {key: {"value": value}}})
-
-        self._merge_kwargs(params, **kwargs)
-        _, _, found = self.do_query(*(None, None, None, True, None), **params)
-
-        return found
-    
-    def do_aggregation(self, *args, **params):
-        # Gets raw aggregations
-
-        response = self.do_query_raw(*args, **params)
-        aggregations = response.get('aggregations', None)
-        return aggregations
-
-    def do_aggregation_all(self, params, agg_name):
-        # Used for pagination when results can exceed ES max size (use of after_key)
-
-        with ELASTICSEARCH_CON_LOCK:
-            response = self.elasticsearchcon.search(index=self.elasticsearchIndex, body=params)
-        all_buckets = []
-        
-        try:
-            aggregations = response.get('aggregations', None)
-            current_buckets = aggregations.get(agg_name, None)
-            buckets = current_buckets.get('buckets', None)
-            all_buckets += buckets
-            after_bucket = current_buckets.get('after_key', None)    
-            
-            while after_bucket is not None:
-                for agg in params['aggs']:
-                    params['aggs'][agg]['composite']['after'] = {}
-                    for source in params['aggs'][agg]['composite']['sources']:
-                        key_name = next(iter(source))
-                        params['aggs'][agg]['composite']['after'][key_name] = after_bucket[key_name]
-                with ELASTICSEARCH_CON_LOCK:
-                    response = self.elasticsearchcon.search(index=self.elasticsearchIndex, body=params)
-                
-                aggregations = response.get('aggregations', None)
-                current_buckets = aggregations.get(agg_name, None)
-                buckets = current_buckets.get('buckets', None)
-                all_buckets += buckets
-                after_bucket = current_buckets.get('after_key', None)
-                
-        except AttributeError as e:
-            self.logger.error('Error when accessing aggregation buckets - ' + str(e))
-
-        return all_buckets
-
-    def do_query(self, *args, **params):
-        response = self.do_query_raw(*args, **params)
-        return response['hits']['hits'], None, response['hits']['total']['value']
-
-    def do_query_raw(self, *args, **params):
-
-        if args[4]:
-
-            sort_fields = args[4].split(",")
-
-            if 'sort' not in list(params.keys()):
-                params["sort"] = []
-
-            for field in sort_fields:
-                field_order = field.split(' ')
-                sort_instruction = {field_order[0]: field_order[1]}
-                if sort_instruction not in params['sort']:
-                    params["sort"].append(sort_instruction)
-        with ELASTICSEARCH_CON_LOCK:
-            response = self.elasticsearchcon.search(index=self.elasticsearchIndex, body=params)
-        
-        return response
-
-    def do_query_all(self, *args, **params):
-        # Used to paginate with search_after. 
-        # The method calling this might already have a sort clause, 
-        # so we merge both sort clauses inside do_query_raw
-        
-        results = []
-
-        search = None
-        
-        # Add track option to not be blocked at 10000 hits per worker
-        if 'track_total_hits' not in params.keys():
-            params['track_total_hits'] = True
-
-        # Add sort instruction order to paginate the results :
-        params["sort"] = [
-            { "tile_min_time_dt": "asc"},
-            { "_id": "asc" }
-        ]
-
-        response = self.do_query_raw(*args, **params)
-        results.extend([r["_source"] for r in response["hits"]["hits"]])
-
-        total_hits = response["hits"]["total"]["value"]
-
-        try:
-            search_after = []
-            for sort_param in response["hits"]["hits"][-1]["sort"]:
-                search_after.append(str(sort_param))
-        except (KeyError, IndexError):
-            search_after = []
-
-        try:
-            while len(results) < total_hits:
-                params["search_after"] = search_after
-                response = self.do_query_raw(*args, **params)
-                results.extend([r["_source"] for r in response["hits"]["hits"]])
-                
-                search_after = []
-                for sort_param in response["hits"]["hits"][-1]["sort"]:
-                    search_after.append(str(sort_param))
-        
-        except (KeyError, IndexError):
-            pass
-
-        return results
-
-    def convert_iso_to_datetime(self, date):
-        return datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=UTC)
-
-    def convert_iso_to_timestamp(self, date):
-        return (self.convert_iso_to_datetime(date) - EPOCH).total_seconds()
-
-    @staticmethod
-    def _merge_kwargs(params, **kwargs):
-        # Only Solr-specific kwargs are parsed
-        # And the special 'limit'
-        try:
-            params['limit'] = kwargs['limit']
-        except KeyError:
-            pass
-
-        try:
-            params['_route_'] = kwargs['_route_']
-        except KeyError:
-            pass
-
-        try:
-            params['size'] = kwargs['size']
-        except KeyError:
-            pass
-
-        try:
-            params['start'] = kwargs['start']
-        except KeyError:
-            pass
-
-        try:
-            s = kwargs['sort'] if isinstance(kwargs['sort'], list) else [kwargs['sort']]
-        except KeyError:
-            s = None
-
-        try:
-            params['sort'].extend(s)
-        except KeyError:
-            if s is not None:
-                params['sort'] = s
diff --git a/data-access/nexustiles/dao/S3Proxy.py b/data-access/nexustiles/dao/S3Proxy.py
deleted file mode 100644
index c8d3adfe..00000000
--- a/data-access/nexustiles/dao/S3Proxy.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import uuid
-
-import boto3
-import nexusproto.DataTile_pb2 as nexusproto
-import numpy as np
-from nexusproto.serialization import from_shaped_array
-
-
-class NexusTileData(object):
-    __nexus_tile = None
-    __data = None
-    tile_id = None
-
-    def __init__(self, data, _tile_id):
-        if self.__data is None:
-            self.__data = data
-        if self.tile_id is None:
-            self.tile_id = _tile_id
-
-    def _get_nexus_tile(self):
-        if self.__nexus_tile is None:
-            self.__nexus_tile = nexusproto.TileData.FromString(self.__data)
-
-        return self.__nexus_tile
-
-    def get_raw_data_array(self):
-
-        nexus_tile = self._get_nexus_tile()
-        the_tile_type = nexus_tile.tile.WhichOneof("tile_type")
-
-        the_tile_data = getattr(nexus_tile.tile, the_tile_type)
-
-        return from_shaped_array(the_tile_data.variable_data)
-
-    def get_lat_lon_time_data_meta(self):
-        if self._get_nexus_tile().HasField('grid_tile'):
-            grid_tile = self._get_nexus_tile().grid_tile
-
-            grid_tile_data = np.ma.masked_invalid(from_shaped_array(grid_tile.variable_data))
-            latitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.latitude))
-            longitude_data = np.ma.masked_invalid(from_shaped_array(grid_tile.longitude))
-
-            if len(grid_tile_data.shape) == 2:
-                grid_tile_data = grid_tile_data[np.newaxis, :]
-
-            # Extract the meta data
-            meta_data = {}
-            for meta_data_obj in grid_tile.meta_data:
-                name = meta_data_obj.name
-                meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
-                if len(meta_array.shape) == 2:
-                    meta_array = meta_array[np.newaxis, :]
-                meta_data[name] = meta_array
-
-            return latitude_data, longitude_data, np.array([grid_tile.time]), grid_tile_data, meta_data
-        elif self._get_nexus_tile().HasField('swath_tile'):
-            swath_tile = self._get_nexus_tile().swath_tile
-
-            latitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.latitude)).reshape(-1)
-            longitude_data = np.ma.masked_invalid(from_shaped_array(swath_tile.longitude)).reshape(-1)
-            time_data = np.ma.masked_invalid(from_shaped_array(swath_tile.time)).reshape(-1)
-
-            # Simplify the tile if the time dimension is the same value repeated
-            if np.all(time_data == np.min(time_data)):
-                time_data = np.array([np.min(time_data)])
-
-            swath_tile_data = np.ma.masked_invalid(from_shaped_array(swath_tile.variable_data))
-
-            tile_data = self._to_standard_index(swath_tile_data,
-                                                (len(time_data), len(latitude_data), len(longitude_data)))
-
-            # Extract the meta data
-            meta_data = {}
-            for meta_data_obj in swath_tile.meta_data:
-                name = meta_data_obj.name
-                actual_meta_array = np.ma.masked_invalid(from_shaped_array(meta_data_obj.meta_data))
-                reshaped_meta_array = self._to_standard_index(actual_meta_array, tile_data.shape)
-                meta_data[name] = reshaped_meta_array
-
-            return latitude_data, longitude_data, time_data, tile_data, meta_data
-        else:
-            raise NotImplementedError("Only supports grid_tile and swath_tile")
-
-    @staticmethod
-    def _to_standard_index(data_array, desired_shape):
-
-        if desired_shape[0] == 1:
-            reshaped_array = np.ma.masked_all((desired_shape[1], desired_shape[2]))
-            row, col = np.indices(data_array.shape)
-
-            reshaped_array[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array[
-                row.flat, col.flat]
-            reshaped_array.mask[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array.mask[
-                row.flat, col.flat]
-            reshaped_array = reshaped_array[np.newaxis, :]
-        else:
-            reshaped_array = np.ma.masked_all(desired_shape)
-            row, col = np.indices(data_array.shape)
-
-            reshaped_array[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array[
-                row.flat, col.flat]
-            reshaped_array.mask[np.diag_indices(desired_shape[1], len(reshaped_array.shape))] = data_array.mask[
-                row.flat, col.flat]
-
-        return reshaped_array
-
-
-class S3Proxy(object):
-    def __init__(self, config):
-        self.config = config
-        self.__s3_bucketname = config.get("s3", "bucket")
-        self.__s3_region = config.get("s3", "region")
-        self.__s3 = boto3.resource('s3')
-        self.__nexus_tile = None
-
-    def fetch_nexus_tiles(self, *tile_ids):
-        tile_ids = [uuid.UUID(str(tile_id)) for tile_id in tile_ids if
-                    (isinstance(tile_id, str) or isinstance(tile_id, str))]
-        res = []
-        for tile_id in tile_ids:
-            obj = self.__s3.Object(self.__s3_bucketname, str(tile_id))
-            data = obj.get()['Body'].read()
-            nexus_tile = NexusTileData(data, str(tile_id))
-            res.append(nexus_tile)
-
-        return res
diff --git a/data-access/nexustiles/dao/SolrProxy.py b/data-access/nexustiles/dao/SolrProxy.py
deleted file mode 100644
index 9b16533d..00000000
--- a/data-access/nexustiles/dao/SolrProxy.py
+++ /dev/null
@@ -1,731 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import logging
-import threading
-import time
-from datetime import datetime
-from pytz import timezone, UTC
-
-import requests
-import pysolr
-from shapely import wkt
-
-SOLR_CON_LOCK = threading.Lock()
-thread_local = threading.local()
-
-EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
-SOLR_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
-ISO_8601 = '%Y-%m-%dT%H:%M:%S%z'
-
-
-class SolrProxy(object):
-    def __init__(self, config):
-        self.solrUrl = config.get("solr", "host")
-        self.solrCore = config.get("solr", "core")
-        solr_kargs = {}
-        if config.has_option("solr", "time_out"):
-            solr_kargs["timeout"] = config.get("solr", "time_out")
-        self.logger = logging.getLogger('nexus')
-
-        with SOLR_CON_LOCK:
-            solrcon = getattr(thread_local, 'solrcon', None)
-            if solrcon is None:
-                solr_url = '%s/solr/%s' % (self.solrUrl, self.solrCore)
-                self.logger.info("connect to solr, url {} with option(s) = {}".format(solr_url, solr_kargs))
-                solrcon = pysolr.Solr(solr_url, **solr_kargs)
-                thread_local.solrcon = solrcon
-
-            self.solrcon = solrcon
-
-    def find_tile_by_id(self, tile_id):
-
-        search = 'id:%s' % tile_id
-
-        params = {
-            'rows': 1
-        }
-
-        results, start, found = self.do_query(*(search, None, None, True, None), **params)
-
-        assert len(results) == 1, "Found %s results, expected exactly 1" % len(results)
-        return [results[0]]
-
-    def find_tiles_by_id(self, tile_ids, ds=None, **kwargs):
-
-        if ds is not None:
-            search = 'dataset_s:%s' % ds
-        else:
-            search = '*:*'
-
-        additionalparams = {
-            'fq': [
-                "{!terms f=id}%s" % ','.join(tile_ids)
-            ]
-        }
-
-        self._merge_kwargs(additionalparams, **kwargs)
-
-        results = self.do_query_all(*(search, None, None, False, None), **additionalparams)
-
-        assert len(results) == len(tile_ids), "Found %s results, expected exactly %s" % (len(results), len(tile_ids))
-        return results
-
-    def find_min_date_from_tiles(self, tile_ids, ds=None, **kwargs):
-
-        if ds is not None:
-            search = 'dataset_s:%s' % ds
-        else:
-            search = '*:*'
-
-        kwargs['rows'] = 1
-        kwargs['fl'] = 'tile_min_time_dt'
-        kwargs['sort'] = ['tile_min_time_dt asc']
-        additionalparams = {
-            'fq': [
-                "{!terms f=id}%s" % ','.join(tile_ids) if len(tile_ids) > 0 else ''
-            ]
-        }
-
-        self._merge_kwargs(additionalparams, **kwargs)
-
-        results, start, found = self.do_query(*(search, None, None, True, None), **additionalparams)
-
-        return self.convert_iso_to_datetime(results[0]['tile_min_time_dt'])
-
-    def find_max_date_from_tiles(self, tile_ids, ds=None, **kwargs):
-
-        if ds is not None:
-            search = 'dataset_s:%s' % ds
-        else:
-            search = '*:*'
-
-        kwargs['rows'] = 1
-        kwargs['fl'] = 'tile_max_time_dt'
-        kwargs['sort'] = ['tile_max_time_dt desc']
-        additionalparams = {
-            'fq': [
-                "{!terms f=id}%s" % ','.join(tile_ids) if len(tile_ids) > 0 else ''
-            ]
-        }
-
-        self._merge_kwargs(additionalparams, **kwargs)
-
-        results, start, found = self.do_query(*(search, None, None, True, None), **additionalparams)
-
-        return self.convert_iso_to_datetime(results[0]['tile_max_time_dt'])
-
-    def find_min_max_date_from_granule(self, ds, granule_name, **kwargs):
-        search = 'dataset_s:%s' % ds
-
-        kwargs['rows'] = 1
-        kwargs['fl'] = 'tile_min_time_dt'
-        kwargs['sort'] = ['tile_min_time_dt asc']
-        additionalparams = {
-            'fq': [
-                "granule_s:%s" % granule_name
-            ]
-        }
-
-        self._merge_kwargs(additionalparams, **kwargs)
-        results, start, found = self.do_query(*(search, None, None, False, None), **additionalparams)
-        start_time = self.convert_iso_to_datetime(results[0]['tile_min_time_dt'])
-
-        kwargs['fl'] = 'tile_max_time_dt'
-        kwargs['sort'] = ['tile_max_time_dt desc']
-        additionalparams = {
-            'fq': [
-                "granule_s:%s" % granule_name
-            ]
-        }
-
-        self._merge_kwargs(additionalparams, **kwargs)
-        results, start, found = self.do_query(*(search, None, None, False, None), **additionalparams)
-        end_time = self.convert_iso_to_datetime(results[0]['tile_max_time_dt'])
-
-        return start_time, end_time
-
-    def get_data_series_list(self):
-
-        datasets = self.get_data_series_list_simple()
-
-        for dataset in datasets:
-            min_date = self.find_min_date_from_tiles([], ds=dataset['title'])
-            max_date = self.find_max_date_from_tiles([], ds=dataset['title'])
-            dataset['start'] = (min_date - EPOCH).total_seconds()
-            dataset['end'] = (max_date - EPOCH).total_seconds()
-            dataset['iso_start'] = min_date.strftime(ISO_8601)
-            dataset['iso_end'] = max_date.strftime(ISO_8601)
-
-        return datasets
-
-    def get_data_series_list_simple(self):
-        search = "*:*"
-        params = {
-            'rows': 0,
-            "facet": "true",
-            "facet.field": "dataset_s",
-            "facet.mincount": "1",
-            "facet.limit": "-1"
-        }
-
-
-        response = self.do_query_raw(*(search, None, None, False, None), **params)
-        l = []
-        for g, v in zip(*[iter(response.facets["facet_fields"]["dataset_s"])]*2):
-            l.append({
-                "shortName": g,
-                "title": g,
-                "tileCount": v
-            })
-        l = sorted(l, key=lambda entry: entry["title"])
-        return l
-
-    def get_data_series_stats(self, ds):
-        search = "dataset_s:%s" % ds
-        params = {
-            "facet": "true",
-            "facet.field": ["dataset_s", "tile_max_time_dt"],
-            "facet.limit": "-1",
-            "facet.mincount": "1",
-            "facet.pivot": "{!stats=piv1}dataset_s",
-            "stats": "on",
-            "stats.field": ["{!tag=piv1 min=true max=true sum=false}tile_max_time_dt","{!tag=piv1 min=true max=false sum=false}tile_min_val_d","{!tag=piv1 min=false max=true sum=false}tile_max_val_d"]
-        }
-
-        response = self.do_query_raw(*(search, None, None, False, None), **params)
-
-        stats = {}
-
-        for g in response.facets["facet_pivot"]["dataset_s"]:
-            if g["value"] == ds:
-                stats["start"] = self.convert_iso_to_timestamp(g["stats"]["stats_fields"]["tile_max_time_dt"]["min"])
-                stats["end"] = self.convert_iso_to_timestamp(g["stats"]["stats_fields"]["tile_max_time_dt"]["max"])
-                stats["minValue"] = g["stats"]["stats_fields"]["tile_min_val_d"]["min"]
-                stats["maxValue"] = g["stats"]["stats_fields"]["tile_max_val_d"]["max"]
-
-
-        stats["availableDates"] = []
-        for dt in response.facets["facet_fields"]["tile_max_time_dt"][::2]:
-            stats["availableDates"].append(self.convert_iso_to_timestamp(dt))
-
-        stats["availableDates"] = sorted(stats["availableDates"])
-
-        return stats
-
-    def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year):
-
-        search = 'dataset_s:%s' % ds
-
-        params = {
-            'fq': [
-                "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt,
-                "tile_count_i:[1 TO *]",
-                "day_of_year_i:[* TO %s]" % day_of_year
-            ],
-            'rows': 1
-        }
-
-        results, start, found = self.do_query(
-            *(search, None, None, True, ('day_of_year_i desc',)), **params)
-
-        return [results[0]]
-
-    def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs):
-
-        search = 'dataset_s:%s' % ds
-
-        search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT)
-        search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT)
-
-        additionalparams = {
-            'fq': [
-                "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon),
-                "{!frange l=0 u=0}ms(tile_min_time_dt,tile_max_time_dt)",
-                "tile_count_i:[1 TO *]",
-                "tile_min_time_dt:[%s TO %s] " % (search_start_s, search_end_s)
-            ],
-            'rows': 0,
-            'facet': 'true',
-            'facet.field': 'tile_min_time_dt',
-            'facet.mincount': '1',
-            'facet.limit': '-1'
-        }
-
-        self._merge_kwargs(additionalparams, **kwargs)
-
-        response = self.do_query_raw(*(search, None, None, False, None), **additionalparams)
-
-        daysinrangeasc = sorted(
-            [(datetime.strptime(a_date, SOLR_FORMAT) - datetime.utcfromtimestamp(0)).total_seconds() for a_date
-             in response.facets['facet_fields']['tile_min_time_dt'][::2]])
-
-        return daysinrangeasc
-
-    def find_all_tiles_in_box_sorttimeasc(self, min_lat, max_lat, min_lon, max_lon, ds, start_time=0,
-                                          end_time=-1, **kwargs):
-
-        search = 'dataset_s:%s' % ds
-
-        additionalparams = {
-            'fq': [
-                "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon),
-                "tile_count_i:[1 TO *]"
-            ]
-        }
-
-        if 0 <= start_time <= end_time:
-            search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT)
-            search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT)
-
-            time_clause = "(" \
-                          "tile_min_time_dt:[%s TO %s] " \
-                          "OR tile_max_time_dt:[%s TO %s] " \
-                          "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \
-                          ")" % (
-                              search_start_s, search_end_s,
-                              search_start_s, search_end_s,
-                              search_start_s, search_end_s
-                          )
-            additionalparams['fq'].append(time_clause)
-
-        self._merge_kwargs(additionalparams, **kwargs)
-
-        return self.do_query_all(
-            *(search, None, None, False, 'tile_min_time_dt asc, tile_max_time_dt asc'),
-            **additionalparams)
-
-    def find_all_tiles_in_polygon_sorttimeasc(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs):
-
-        search = 'dataset_s:%s' % ds
-
-        additionalparams = {
-            'fq': [
-                "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt,
-                "tile_count_i:[1 TO *]"
-            ]
-        }
-
-        if 0 <= start_time <= end_time:
-            search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT)
-            search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT)
-
-            time_clause = "(" \
-                          "tile_min_time_dt:[%s TO %s] " \
-                          "OR tile_max_time_dt:[%s TO %s] " \
-                          "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \
-                          ")" % (
-                              search_start_s, search_end_s,
-                              search_start_s, search_end_s,
-                              search_start_s, search_end_s
-                          )
-            additionalparams['fq'].append(time_clause)
-
-        self._merge_kwargs(additionalparams, **kwargs)
-
-        return self.do_query_all(
-            *(search, None, None, False, 'tile_min_time_dt asc, tile_max_time_dt asc'),
-            **additionalparams)
-
-    def find_all_tiles_in_polygon(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs):
-
-        search = 'dataset_s:%s' % ds
-
-        additionalparams = {
-            'fq': [
-                "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt,
-                "tile_count_i:[1 TO *]"
-            ]
-        }
-
-        if 0 <= start_time <= end_time:
-            search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT)
-            search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT)
-
-            time_clause = "(" \
-                          "tile_min_time_dt:[%s TO %s] " \
-                          "OR tile_max_time_dt:[%s TO %s] " \
-                          "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \
-                          ")" % (
-                              search_start_s, search_end_s,
-                              search_start_s, search_end_s,
-                              search_start_s, search_end_s
-                          )
-            additionalparams['fq'].append(time_clause)
-
-        self._merge_kwargs(additionalparams, **kwargs)
-
-        return self.do_query_all(
-            *(search, None, None, False, None),
-            **additionalparams)
-
-    def find_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time=0, end_time=-1, **kwargs):
-
-        search = 'dataset_s:%s' % ds
-
-        additionalparams = {
-            'fq': [
-                "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt,
-                "tile_count_i:[1 TO *]"
-            ],
-            'rows': 0,
-            'facet': 'true',
-            'facet.field': 'geo_s',
-            'facet.limit': -1,
-            'facet.mincount': 1
-        }
-
-        if 0 <= start_time <= end_time:
-            search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT)
-            search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT)
-
-            time_clause = "(" \
-                          "tile_min_time_dt:[%s TO %s] " \
-                          "OR tile_max_time_dt:[%s TO %s] " \
-                          "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \
-                          ")" % (
-                              search_start_s, search_end_s,
-                              search_start_s, search_end_s,
-                              search_start_s, search_end_s
-                          )
-            additionalparams['fq'].append(time_clause)
-
-        self._merge_kwargs(additionalparams, **kwargs)
-
-        response = self.do_query_raw(*(search, None, None, False, None), **additionalparams)
-
-        distinct_bounds = [wkt.loads(key).bounds for key in response.facets["facet_fields"]["geo_s"][::2]]
-
-        return distinct_bounds
-
-    def find_tiles_by_exact_bounds(self, minx, miny, maxx, maxy, ds, start_time=0, end_time=-1, **kwargs):
-
-        search = 'dataset_s:%s' % ds
-
-        additionalparams = {
-            'fq': [
-                "tile_min_lon:\"%s\"" % minx,
-                "tile_min_lat:\"%s\"" % miny,
-                "tile_max_lon:\"%s\"" % maxx,
-                "tile_max_lat:\"%s\"" % maxy,
-                "tile_count_i:[1 TO *]"
-            ]
-        }
-
-        if 0 <= start_time <= end_time:
-            search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT)
-            search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT)
-
-            time_clause = "(" \
-                          "tile_min_time_dt:[%s TO %s] " \
-                          "OR tile_max_time_dt:[%s TO %s] " \
-                          "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \
-                          ")" % (
-                              search_start_s, search_end_s,
-                              search_start_s, search_end_s,
-                              search_start_s, search_end_s
-                          )
-            additionalparams['fq'].append(time_clause)
-
-        self._merge_kwargs(additionalparams, **kwargs)
-
-        return self.do_query_all(
-            *(search, None, None, False, None),
-            **additionalparams)
-
-    def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, search_time, **kwargs):
-        search = 'dataset_s:%s' % ds
-
-        the_time = datetime.utcfromtimestamp(search_time).strftime(SOLR_FORMAT)
-        time_clause = "(" \
-                      "tile_min_time_dt:[* TO %s] " \
-                      "AND tile_max_time_dt:[%s TO *] " \
-                      ")" % (
-                          the_time, the_time
-                      )
-
-        additionalparams = {
-            'fq': [
-                "geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon),
-                "tile_count_i:[1 TO *]",
-                time_clause
-            ]
-        }
-
-        self._merge_kwargs(additionalparams, **kwargs)
-
-        return self.do_query_all(*(search, None, None, False, None), **additionalparams)
-
-    def find_all_tiles_in_polygon_at_time(self, bounding_polygon, ds, search_time, **kwargs):
-        search = 'dataset_s:%s' % ds
-
-        the_time = datetime.utcfromtimestamp(search_time).strftime(SOLR_FORMAT)
-        time_clause = "(" \
-                      "tile_min_time_dt:[* TO %s] " \
-                      "AND tile_max_time_dt:[%s TO *] " \
-                      ")" % (
-                          the_time, the_time
-                      )
-
-        additionalparams = {
-            'fq': [
-                "{!field f=geo}Intersects(%s)" % bounding_polygon.wkt,
-                "tile_count_i:[1 TO *]",
-                time_clause
-            ]
-        }
-
-        self._merge_kwargs(additionalparams, **kwargs)
-
-        return self.do_query_all(*(search, None, None, False, None), **additionalparams)
-
-    def find_all_tiles_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, time, **kwargs):
-        search = 'dataset_s:%s' % ds
-
-        the_time = datetime.utcfromtimestamp(time).strftime(SOLR_FORMAT)
-        time_clause = "(" \
-                      "tile_min_time_dt:[* TO %s] " \
-                      "AND tile_max_time_dt:[%s TO *] " \
-                      ")" % (
-                          the_time, the_time
-                      )
-
-        additionalparams = {
-            'fq': [
-                "geo:\"Within(ENVELOPE(%s,%s,%s,%s))\"" % (min_lon, max_lon, max_lat, min_lat),
-                "tile_count_i:[1 TO *]",
-                time_clause
-            ]
-        }
-
-        self._merge_kwargs(additionalparams, **kwargs)
-
-        return self.do_query_all(*(search, "product(tile_avg_val_d, tile_count_i),*", None, False, None),
-                                 **additionalparams)
-
-    def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, ds, time, **kwargs):
-        search = 'dataset_s:%s' % ds
-
-        the_time = datetime.utcfromtimestamp(time).strftime(SOLR_FORMAT)
-        time_clause = "(" \
-                      "tile_min_time_dt:[* TO %s] " \
-                      "AND tile_max_time_dt:[%s TO *] " \
-                      ")" % (
-                          the_time, the_time
-                      )
-
-        additionalparams = {
-            'fq': [
-                "geo:\"Intersects(MultiLineString((%s %s, %s %s),(%s %s, %s %s),(%s %s, %s %s),(%s %s, %s %s)))\"" % (
-                    min_lon, max_lat, max_lon, max_lat, min_lon, max_lat, min_lon, min_lat, max_lon, max_lat, max_lon,
-                    min_lat, min_lon, min_lat, max_lon, min_lat),
-                "-geo:\"Within(ENVELOPE(%s,%s,%s,%s))\"" % (min_lon, max_lon, max_lat, min_lat),
-                "tile_count_i:[1 TO *]",
-                time_clause
-            ]
-        }
-
-        self._merge_kwargs(additionalparams, **kwargs)
-
-        return self.do_query_all(*(search, None, None, False, None), **additionalparams)
-
-    def find_all_tiles_by_metadata(self, metadata, ds, start_time=0, end_time=-1, **kwargs):
-        """
-        Get a list of tile metadata that matches the specified metadata, start_time, end_time.
-        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
-        :param ds: The dataset name to search
-        :param start_time: The start time to search for tiles
-        :param end_time: The end time to search for tiles
-        :return: A list of tile metadata
-        """
-        search = 'dataset_s:%s' % ds
-
-        additionalparams = {
-            'fq': metadata
-        }
-
-        if 0 <= start_time <= end_time:
-            additionalparams['fq'].append(self.get_formatted_time_clause(start_time, end_time))
-
-        self._merge_kwargs(additionalparams, **kwargs)
-
-        return self.do_query_all(
-            *(search, None, None, False, None),
-            **additionalparams)
-
-    def get_formatted_time_clause(self, start_time, end_time):
-        search_start_s = datetime.utcfromtimestamp(start_time).strftime(SOLR_FORMAT)
-        search_end_s = datetime.utcfromtimestamp(end_time).strftime(SOLR_FORMAT)
-
-        time_clause = "(" \
-                      "tile_min_time_dt:[%s TO %s] " \
-                      "OR tile_max_time_dt:[%s TO %s] " \
-                      "OR (tile_min_time_dt:[* TO %s] AND tile_max_time_dt:[%s TO *])" \
-                      ")" % (
-                          search_start_s, search_end_s,
-                          search_start_s, search_end_s,
-                          search_start_s, search_end_s
-                          )
-        return time_clause
-
-    def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs):
-        """
-        Return number of tiles that match search criteria.
-        :param ds: The dataset name to search
-        :param bounding_polygon: The polygon to search for tiles
-        :param start_time: The start time to search for tiles
-        :param end_time: The end time to search for tiles
-        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
-        :return: number of tiles that match search criteria
-        """
-        search = 'dataset_s:%s' % ds
-
-        additionalparams = {
-            'fq': [
-                "tile_count_i:[1 TO *]"
-            ],
-            'rows': 0
-        }
-
-        if bounding_polygon:
-            min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds
-            additionalparams['fq'].append("geo:[%s,%s TO %s,%s]" % (min_lat, min_lon, max_lat, max_lon))
-
-        if 0 <= start_time <= end_time:
-            additionalparams['fq'].append(self.get_formatted_time_clause(start_time, end_time))
-
-        if metadata:
-            additionalparams['fq'].extend(metadata)
-
-        self._merge_kwargs(additionalparams, **kwargs)
-
-        results, start, found = self.do_query(*(search, None, None, True, None), **additionalparams)
-
-        return found
-
-    def do_query(self, *args, **params):
-
-        response = self.do_query_raw(*args, **params)
-
-        return response.docs, response.raw_response['response']['start'], response.hits
-
-    def do_query_raw(self, *args, **params):
-
-        if 'fl' not in list(params.keys()) and args[1]:
-            params['fl'] = args[1]
-
-        if 'sort' not in list(params.keys()) and args[4]:
-            params['sort'] = args[4]
-
-        # If dataset_s is specified as the search term,
-        # add the _route_ parameter to limit the search to the correct shard
-        if 'dataset_s:' in args[0]:
-            ds = args[0].split(':')[-1]
-            params['shard_keys'] = ds + '!'
-
-        with SOLR_CON_LOCK:
-            response = self.solrcon.search(args[0], **params)
-
-        return response
-
-
-    def do_query_all(self, *args, **params):
-
-        results = []
-
-        response = self.do_query_raw(*args, **params)
-        results.extend(response.docs)
-
-        limit = min(params.get('limit', float('inf')), response.hits)
-
-        while len(results) < limit:
-            params['start'] = len(results)
-            response = self.do_query_raw(*args, **params)
-            results.extend(response.docs)
-
-        assert len(results) == limit
-
-        return results
-
-    def convert_iso_to_datetime(self, date):
-        return datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=UTC)
-
-    def convert_iso_to_timestamp(self, date):
-        return (self.convert_iso_to_datetime(date) - EPOCH).total_seconds()
-
-    def ping(self):
-        solrAdminPing = '%s/solr/%s/admin/ping' % (self.solrUrl, self.solrCore)
-        try:
-            r = requests.get(solrAdminPing, params={'wt': 'json'})
-            results = json.loads(r.text)
-            return results
-        except:
-            return None
-
-    @staticmethod
-    def _merge_kwargs(additionalparams, **kwargs):
-        # Only Solr-specific kwargs are parsed
-        # And the special 'limit'
-        try:
-            additionalparams['limit'] = kwargs['limit']
-        except KeyError:
-            pass
-
-        try:
-            additionalparams['_route_'] = kwargs['_route_']
-        except KeyError:
-            pass
-
-        try:
-            additionalparams['rows'] = kwargs['rows']
-        except KeyError:
-            pass
-
-        try:
-            additionalparams['start'] = kwargs['start']
-        except KeyError:
-            pass
-
-        try:
-            kwfq = kwargs['fq'] if isinstance(kwargs['fq'], list) else list(kwargs['fq'])
-        except KeyError:
-            kwfq = []
-
-        try:
-            additionalparams['fq'].extend(kwfq)
-        except KeyError:
-            additionalparams['fq'] = kwfq
-
-        try:
-            kwfl = kwargs['fl'] if isinstance(kwargs['fl'], list) else [kwargs['fl']]
-        except KeyError:
-            kwfl = []
-
-        try:
-            additionalparams['fl'].extend(kwfl)
-        except KeyError:
-            additionalparams['fl'] = kwfl
-
-        try:
-            s = kwargs['sort'] if isinstance(kwargs['sort'], list) else [kwargs['sort']]
-        except KeyError:
-            s = None
-
-        try:
-            additionalparams['sort'].extend(s)
-        except KeyError:
-            if s is not None:
-                additionalparams['sort'] = s
diff --git a/data-access/nexustiles/dao/__init__.py b/data-access/nexustiles/dao/__init__.py
deleted file mode 100644
index 6acb5d12..00000000
--- a/data-access/nexustiles/dao/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/data-access/nexustiles/exception.py b/data-access/nexustiles/exception.py
new file mode 100644
index 00000000..33ab5296
--- /dev/null
+++ b/data-access/nexustiles/exception.py
@@ -0,0 +1,2 @@
+class NexusTileServiceException(Exception):
+    pass
diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py
index fde0a5f3..d09c3aa6 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -14,34 +14,27 @@
 # limitations under the License.
 
 import configparser
+import json
 import logging
 import sys
-import json
+import threading
 from datetime import datetime
-from functools import wraps, reduce, partial
+from functools import reduce, wraps
+from time import sleep
+from typing import Dict, Union
 
 import numpy as np
 import numpy.ma as ma
 import pkg_resources
-from pytz import timezone, UTC
-from shapely.geometry import MultiPolygon, box
 import pysolr
+from pytz import timezone, UTC
+from shapely.geometry import box
+from webservice.webmodel import DatasetNotFoundException, NexusProcessingException
 
-import threading
-from time import sleep
-
+from .AbstractTileService import AbstractTileService
 from .backends.nexusproto.backend import NexusprotoTileService
 from .backends.zarr.backend import ZarrBackend
-
-
-from abc import ABC, abstractmethod
-
-from .AbstractTileService import AbstractTileService
-
 from .model.nexusmodel import Tile, BBox, TileStats, TileVariable
-from typing import Dict, Union
-
-from webservice.webmodel import DatasetNotFoundException, NexusProcessingException
 
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
 
@@ -49,7 +42,7 @@
     level=logging.INFO,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
     datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout)
-logger = logging.getLogger("testing")
+logger = logging.getLogger("nexus-tile-svc")
 
 
 def tile_data(default_fetch=True):
@@ -83,19 +76,25 @@ def fetch_data_for_func(*args, **kwargs):
     return tile_data_decorator
 
 
-class NexusTileServiceException(Exception):
-    pass
-
-
 SOLR_LOCK = threading.Lock()
 DS_LOCK = threading.Lock()
 thread_local = threading.local()
 
 
-
-class NexusTileService(AbstractTileService):
+class NexusTileService:
     backends: Dict[Union[None, str], Dict[str, Union[AbstractTileService, bool]]] = {}
 
+    ds_config = None
+
+    __update_thread = None
+
+    @staticmethod
+    def __update_datasets():
+        while True:
+            with DS_LOCK:
+                NexusTileService._update_datasets()
+            sleep(3600)
+
     def __init__(self, config=None):
         self._config = configparser.RawConfigParser()
         self._config.read(NexusTileService._get_config_files('config/datasets.ini'))
@@ -105,43 +104,54 @@ def __init__(self, config=None):
         if config:
             self.override_config(config)
 
-        NexusTileService.backends[None] = {"backend": NexusprotoTileService(False, False, config), 'up': True}
-        NexusTileService.backends['__nexusproto__'] = NexusTileService.backends[None]
+        if not NexusTileService.backends:
+            NexusTileService.ds_config = configparser.RawConfigParser()
+            NexusTileService.ds_config.read(NexusTileService._get_config_files('config/datasets.ini'))
 
-        def __update_datasets():
-            while True:
-                with DS_LOCK:
-                    self._update_datasets()
-                sleep(3600)
+            default_backend = {"backend": NexusprotoTileService(False, False, config), 'up': True}
+
+            NexusTileService.backends[None] = default_backend
+            NexusTileService.backends['__nexusproto__'] = default_backend
 
-        threading.Thread(target=__update_datasets, name='dataset_update', daemon=False).start()
+        if not NexusTileService.__update_thread:
+            NexusTileService.__update_thread = threading.Thread(
+                target=NexusTileService.__update_datasets,
+                name='dataset_update',
+                daemon=False
+            )
 
+            logger.info('Starting dataset refresh thread')
 
+            NexusTileService.__update_thread.start()
 
     @staticmethod
     def __get_backend(dataset_s) -> AbstractTileService:
-        if dataset_s not in NexusTileService.backends:
-            raise DatasetNotFoundException(reason=f'Dataset {dataset_s} is not currently loaded/ingested')
+        with DS_LOCK:
+            if dataset_s not in NexusTileService.backends:
+                raise DatasetNotFoundException(reason=f'Dataset {dataset_s} is not currently loaded/ingested')
 
-        b = NexusTileService.backends[dataset_s]
+            b = NexusTileService.backends[dataset_s]
 
-        if not b['up']:
-            success = b['backend'].try_connect()
+            if not b['up']:
+                success = b['backend'].try_connect()
 
-            if not success:
-                raise NexusProcessingException(reason=f'Dataset {dataset_s} is currently unavailable')
-            else:
-                NexusTileService.backends[dataset_s]['up'] = True
+                if not success:
+                    raise NexusProcessingException(reason=f'Dataset {dataset_s} is currently unavailable')
+                else:
+                    NexusTileService.backends[dataset_s]['up'] = True
 
-        return b['backend']
+            return b['backend']
 
-    def _update_datasets(self):
-        solr_url = self._config.get("solr", "host")
-        solr_core = self._config.get("solr", "core")
+    @staticmethod
+    def _update_datasets():
+        solr_url = NexusTileService.ds_config.get("solr", "host")
+        solr_core = NexusTileService.ds_config.get("solr", "core")
         solr_kwargs = {}
 
-        if self._config.has_option("solr", "time_out"):
-            solr_kwargs["timeout"] = self._config.get("solr", "time_out")
+        update_logger = logging.getLogger("nexus-tile-svc.backends")
+
+        if NexusTileService.ds_config.has_option("solr", "time_out"):
+            solr_kwargs["timeout"] = NexusTileService.ds_config.get("solr", "time_out")
 
         with SOLR_LOCK:
             solrcon = getattr(thread_local, 'solrcon', None)
@@ -152,33 +162,53 @@ def _update_datasets(self):
 
             solrcon = solrcon
 
-            response = solrcon.search('*:*')
+            update_logger.info('Executing update query to check for new datasets')
 
-        present_datasets = set()
+            present_datasets = {None, '__nexusproto__'}
+            next_cursor_mark = '*'
 
-        for dataset in response.docs:
-            d_id = dataset['dataset_s']
-            store_type = dataset.get('store_type_s', 'nexusproto')
+            while True:
+                response = solrcon.search('*:*', cursorMark=next_cursor_mark, sort='id asc')
 
-            present_datasets.add(d_id)
+                try:
+                    response_cursor_mark = response.nextCursorMark
+                except AttributeError:
+                    break
 
-            if d_id in NexusTileService.backends:
-                continue
-                # is_up = NexusTileService.backends[d_id]['backend'].try_connect()
+                if response_cursor_mark == next_cursor_mark:
+                    break
+                else:
+                    next_cursor_mark = response_cursor_mark
 
-            if store_type == 'nexus_proto' or store_type == 'nexusproto':
-                NexusTileService.backends[d_id] = NexusTileService.backends[None]
-            elif store_type == 'zarr':
-                ds_config = json.loads(dataset['config'][0])
-                NexusTileService.backends[d_id] = {
-                    'backend': ZarrBackend(ds_config),
-                    'up': True
-                }
-            else:
-                logger.warning(f'Unsupported backend {store_type} for dataset {d_id}')
+                for dataset in response.docs:
+                    d_id = dataset['dataset_s']
+                    store_type = dataset.get('store_type_s', 'nexusproto')
+
+                    present_datasets.add(d_id)
+
+                    if d_id in NexusTileService.backends:
+                        continue
+                        # is_up = NexusTileService.backends[d_id]['backend'].try_connect()
+
+                    if store_type == 'nexus_proto' or store_type == 'nexusproto':
+                        update_logger.info(f"Detected new nexusproto dataset {d_id}, using default nexusproto backend")
+                        NexusTileService.backends[d_id] = NexusTileService.backends[None]
+                    elif store_type == 'zarr':
+                        update_logger.info(f"Detected new zarr dataset {d_id}, opening new zarr backend")
+
+                        ds_config = json.loads(dataset['config'][0])
+                        NexusTileService.backends[d_id] = {
+                            'backend': ZarrBackend(ds_config),
+                            'up': True
+                        }
+                    else:
+                        logger.warning(f'Unsupported backend {store_type} for dataset {d_id}')
 
         removed_datasets = set(NexusTileService.backends.keys()).difference(present_datasets)
 
+        if len(removed_datasets) > 0:
+            logger.info(f'{len(removed_datasets)} marked for removal')
+
         for dataset in removed_datasets:
             logger.info(f"Removing dataset {dataset}")
             del NexusTileService.backends[dataset]
@@ -336,23 +366,17 @@ def get_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset
         return tiles
 
     def get_stats_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
-        tiles = self._metadatastore.find_all_tiles_within_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time,
-                                                                      **kwargs)
-
-        return tiles
+        return NexusTileService.get_stats_within_box_at_time(
+            min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs
+        )
 
-    def get_bounding_box(self, tile_ids):
+    def get_bounding_box(self, tile_ids, ds=None):
         """
         Retrieve a bounding box that encompasses all of the tiles represented by the given tile ids.
         :param tile_ids: List of tile ids
         :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles
         """
-        tiles = self.find_tiles_by_id(tile_ids, fl=['tile_min_lat', 'tile_max_lat', 'tile_min_lon', 'tile_max_lon'],
-                                      fetch_data=False, rows=len(tile_ids))
-        polys = []
-        for tile in tiles:
-            polys.append(box(tile.bbox.min_lon, tile.bbox.min_lat, tile.bbox.max_lon, tile.bbox.max_lat))
-        return box(*MultiPolygon(polys).bounds)
+        return NexusTileService.__get_backend(ds).get_bounding_box(tile_ids, ds)
 
     def get_min_time(self, tile_ids, ds=None):
         """
@@ -361,8 +385,7 @@ def get_min_time(self, tile_ids, ds=None):
         :param ds: Filter by a specific dataset. Defaults to None (queries all datasets)
         :return: long time in seconds since epoch
         """
-        min_time = self._metadatastore.find_min_date_from_tiles(tile_ids, ds=ds)
-        return int((min_time - EPOCH).total_seconds())
+        return NexusTileService.__get_backend(ds).get_min_time(tile_ids, ds)
 
     def get_max_time(self, tile_ids, ds=None):
         """
@@ -371,8 +394,7 @@ def get_max_time(self, tile_ids, ds=None):
         :param ds: Filter by a specific dataset. Defaults to None (queries all datasets)
         :return: long time in seconds since epoch
         """
-        max_time = self._metadatastore.find_max_date_from_tiles(tile_ids, ds=ds)
-        return int((max_time - EPOCH).total_seconds())
+        return int(NexusTileService.__get_backend(ds).get_max_time(tile_ids))
 
     def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time):
         """
@@ -398,33 +420,95 @@ def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, m
         """
         return self._metadatastore.get_tile_count(ds, bounding_polygon, start_time, end_time, metadata, **kwargs)
 
-    def fetch_data_for_tiles(self, *tiles):
+    def mask_tiles_to_bbox(self, min_lat, max_lat, min_lon, max_lon, tiles):
+        for tile in tiles:
+            tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat)
+            tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon)
+
+            # Or together the masks of the individual arrays to create the new mask
+            data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \
+                        | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \
+                        | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :]
+
+            # If this is multi-var, need to mask each variable separately.
+            if tile.is_multi:
+                # Combine space/time mask with existing mask on data
+                data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask])
+
+                num_vars = len(tile.data)
+                multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0)
+                tile.data = ma.masked_where(multi_data_mask, tile.data)
+            else:
+                tile.data = ma.masked_where(data_mask, tile.data)
 
-        nexus_tile_ids = set([tile.tile_id for tile in tiles])
-        matched_tile_data = self._datastore.fetch_nexus_tiles(*nexus_tile_ids)
+        tiles[:] = [tile for tile in tiles if not tile.data.mask.all()]
 
-        tile_data_by_id = {str(a_tile_data.tile_id): a_tile_data for a_tile_data in matched_tile_data}
+        return tiles
 
-        missing_data = nexus_tile_ids.difference(list(tile_data_by_id.keys()))
-        if len(missing_data) > 0:
-            raise Exception("Missing data for tile_id(s) %s." % missing_data)
+    def mask_tiles_to_bbox_and_time(self, min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles):
+        for tile in tiles:
+            tile.times = ma.masked_outside(tile.times, start_time, end_time)
+            tile.latitudes = ma.masked_outside(tile.latitudes, min_lat, max_lat)
+            tile.longitudes = ma.masked_outside(tile.longitudes, min_lon, max_lon)
 
-        for a_tile in tiles:
-            lats, lons, times, data, meta, is_multi_var = tile_data_by_id[a_tile.tile_id].get_lat_lon_time_data_meta()
+            # Or together the masks of the individual arrays to create the new mask
+            data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \
+                        | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \
+                        | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :]
 
-            a_tile.latitudes = lats
-            a_tile.longitudes = lons
-            a_tile.times = times
-            a_tile.data = data
-            a_tile.meta_data = meta
-            a_tile.is_multi = is_multi_var
+            tile.data = ma.masked_where(data_mask, tile.data)
 
-            del (tile_data_by_id[a_tile.tile_id])
+        tiles[:] = [tile for tile in tiles if not tile.data.mask.all()]
 
         return tiles
 
-    def _metadata_store_docs_to_tiles(self, *store_docs):
+    def mask_tiles_to_polygon(self, bounding_polygon, tiles):
+
+        min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds
+
+        return self.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, tiles)
 
+    def mask_tiles_to_polygon_and_time(self, bounding_polygon, start_time, end_time, tiles):
+        min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds
+
+        return self.mask_tiles_to_bbox_and_time(min_lat, max_lat, min_lon, max_lon, start_time, end_time, tiles)
+
+    def mask_tiles_to_time_range(self, start_time, end_time, tiles):
+        """
+        Masks data in tiles to specified time range.
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :param tiles: List of tiles
+        :return: A list tiles with data masked to specified time range
+        """
+        if 0 <= start_time <= end_time:
+            for tile in tiles:
+                tile.times = ma.masked_outside(tile.times, start_time, end_time)
+
+                # Or together the masks of the individual arrays to create the new mask
+                data_mask = ma.getmaskarray(tile.times)[:, np.newaxis, np.newaxis] \
+                            | ma.getmaskarray(tile.latitudes)[np.newaxis, :, np.newaxis] \
+                            | ma.getmaskarray(tile.longitudes)[np.newaxis, np.newaxis, :]
+
+                # If this is multi-var, need to mask each variable separately.
+                if tile.is_multi:
+                    # Combine space/time mask with existing mask on data
+                    data_mask = reduce(np.logical_or, [tile.data[0].mask, data_mask])
+
+                    num_vars = len(tile.data)
+                    multi_data_mask = np.repeat(data_mask[np.newaxis, ...], num_vars, axis=0)
+                    tile.data = ma.masked_where(multi_data_mask, tile.data)
+                else:
+                    tile.data = ma.masked_where(data_mask, tile.data)
+
+            tiles[:] = [tile for tile in tiles if not tile.data.mask.all()]
+
+        return tiles
+
+    def fetch_data_for_tiles(self, *tiles, dataset=None):
+        return NexusTileService.__get_backend(dataset).fetch_data_for_tiles(*tiles)
+
+    def _metadata_store_docs_to_tiles(self, *store_docs):
         tiles = []
         for store_doc in store_docs:
             tile = Tile()
@@ -521,7 +605,6 @@ def _metadata_store_docs_to_tiles(self, *store_docs):
             except KeyError:
                 pass
 
-
             if 'tile_var_name_ss' in store_doc:
                 tile.variables = []
                 for var_name in store_doc['tile_var_name_ss']:
@@ -536,13 +619,6 @@ def _metadata_store_docs_to_tiles(self, *store_docs):
 
         return tiles
 
-    def pingSolr(self):
-        status = self._metadatastore.ping()
-        if status and status["status"] == "OK":
-            return True
-        else:
-            return False
-
     @staticmethod
     def _get_config_files(filename):
         log = logging.getLogger(__name__)
diff --git a/data-access/setup.py b/data-access/setup.py
index ab0248f0..e539e1e0 100644
--- a/data-access/setup.py
+++ b/data-access/setup.py
@@ -12,11 +12,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+import setuptools
 from setuptools import setup
 
-with open('../VERSION.txt', 'r') as f:
-    __version__ = f.read()
+try:
+    with open('../VERSION.txt', 'r') as f:
+        __version__ = f.read()
+except:
+    __version__ = None
 
 
 with open('requirements.txt') as f:
@@ -32,8 +35,13 @@
     description="NEXUS API.",
     long_description=open('README.md').read(),
 
-    packages=['nexustiles', 'nexustiles.model', 'nexustiles.dao'],
-    package_data={'nexustiles': ['config/datastores.ini.default', 'config/datastores.ini']},
+    packages=setuptools.find_packages(),  # ['nexustiles', 'nexustiles.model', 'nexustiles.dao'],
+    package_data={
+        'nexustiles':
+            ['config/datasets.ini.default', 'config/datasets.ini'],
+        'nexustiles.backends.nexusproto':
+            ['config/datastores.ini.default', 'config/datastores.ini']
+    },
     platforms='any',
     python_requires='~=3.8',
     install_requires=pip_requirements,

From b77aa11f65a8eb486509b4e0e7e5c5a149fdbcc0 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Mon, 10 Jul 2023 16:33:52 -0700
Subject: [PATCH 05/70] Working(?) np backend

---
 .../nexustiles/backends/zarr/backend.py       | 150 +++++++++++++++++-
 data-access/nexustiles/nexustiles.py          |  39 ++++-
 data-access/requirements.txt                  |   2 +
 3 files changed, 182 insertions(+), 9 deletions(-)

diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py
index 93963166..13622453 100644
--- a/data-access/nexustiles/backends/zarr/backend.py
+++ b/data-access/nexustiles/backends/zarr/backend.py
@@ -40,6 +40,154 @@
 
 
 class ZarrBackend(AbstractTileService):
-    def __init__(self, config):
+    def __init__(self, path, config):
         AbstractTileService.__init__(self)
         self.__config = config
+        
+    def get_dataseries_list(self, simple=False):
+        raise NotImplementedError()
+
+    def find_tile_by_id(self, tile_id, **kwargs):
+        raise NotImplementedError()
+
+    def find_tiles_by_id(self, tile_ids, ds=None, **kwargs):
+        raise NotImplementedError()
+
+    def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time,
+                               metrics_callback=None, **kwargs):
+        raise NotImplementedError()
+
+    def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs):
+        """
+        Given a bounding polygon, dataset, and day of year, find tiles in that dataset with the same bounding
+        polygon and the closest day of year.
+
+        For example:
+            given a polygon minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; and day of year=32
+            search for first tile in MY_DS with identical bbox and day_of_year <= 32 (sorted by day_of_year desc)
+
+        Valid matches:
+            minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32
+            minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30
+
+        Invalid matches:
+            minx=1, miny=0, maxx=2, maxy=1; dataset=MY_DS; day of year = 32
+            minx=0, miny=0, maxx=1, maxy=1; dataset=MY_OTHER_DS; day of year = 32
+            minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 30 if minx=0, miny=0, maxx=1, maxy=1; dataset=MY_DS; day of year = 32 also exists
+
+        :param bounding_polygon: The exact bounding polygon of tiles to search for
+        :param ds: The dataset name being searched
+        :param day_of_year: Tile day of year to search for, tile nearest to this day (without going over) will be returned
+        :return: List of one tile from ds with bounding_polygon on or before day_of_year or raise NexusTileServiceException if no tile found
+        """
+        raise NotImplementedError()
+
+    def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
+        raise NotImplementedError()
+
+    def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs):
+        raise NotImplementedError()
+
+    def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs):
+        # Find tiles that fall in the given box in the Solr index
+        raise NotImplementedError()
+
+    def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=0, end_time=-1, **kwargs):
+        # Find tiles that fall within the polygon in the Solr index
+        raise NotImplementedError()
+
+    def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs):
+        """
+        Return list of tiles whose metadata matches the specified metadata, start_time, end_time.
+        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
+        :param ds: The dataset name to search
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :return: A list of tiles
+        """
+        raise NotImplementedError()
+
+    def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs):
+        """
+        The method will return tiles with the exact given bounds within the time range. It differs from
+        find_tiles_in_polygon in that only tiles with exactly the given bounds will be returned as opposed to
+        doing a polygon intersection with the given bounds.
+
+        :param bounds: (minx, miny, maxx, maxy) bounds to search for
+        :param ds: Dataset name to search
+        :param start_time: Start time to search (seconds since epoch)
+        :param end_time: End time to search (seconds since epoch)
+        :param kwargs: fetch_data: True/False = whether or not to retrieve tile data
+        :return:
+        """
+        raise NotImplementedError()
+
+    def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
+        raise NotImplementedError()
+
+    def get_min_max_time_by_granule(self, ds, granule_name):
+        raise NotImplementedError()
+
+    def get_dataset_overall_stats(self, ds):
+        raise NotImplementedError()
+
+    def get_stats_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
+        raise NotImplementedError()
+
+    def get_bounding_box(self, tile_ids):
+        """
+        Retrieve a bounding box that encompasses all of the tiles represented by the given tile ids.
+        :param tile_ids: List of tile ids
+        :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles
+        """
+        raise NotImplementedError()
+
+    def get_min_time(self, tile_ids, ds=None):
+        """
+        Get the minimum tile date from the list of tile ids
+        :param tile_ids: List of tile ids
+        :param ds: Filter by a specific dataset. Defaults to None (queries all datasets)
+        :return: long time in seconds since epoch
+        """
+        raise NotImplementedError()
+
+    def get_max_time(self, tile_ids, ds=None):
+        """
+        Get the maximum tile date from the list of tile ids
+        :param tile_ids: List of tile ids
+        :param ds: Filter by a specific dataset. Defaults to None (queries all datasets)
+        :return: long time in seconds since epoch
+        """
+        raise NotImplementedError()
+
+    def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time):
+        """
+        Get a list of distinct tile bounding boxes from all tiles within the given polygon and time range.
+        :param bounding_polygon: The bounding polygon of tiles to search for
+        :param ds: The dataset name to search
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :return: A list of distinct bounding boxes (as shapely polygons) for tiles in the search polygon
+        """
+        raise NotImplementedError()
+
+    def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, metadata=None, **kwargs):
+        """
+        Return number of tiles that match search criteria.
+        :param ds: The dataset name to search
+        :param bounding_polygon: The polygon to search for tiles
+        :param start_time: The start time to search for tiles
+        :param end_time: The end time to search for tiles
+        :param metadata: List of metadata values to search for tiles e.g ["river_id_i:1", "granule_s:granule_name"]
+        :return: number of tiles that match search criteria
+        """
+        raise NotImplementedError()
+
+    def fetch_data_for_tiles(self, *tiles):
+        raise NotImplementedError()
+
+    def _metadata_store_docs_to_tiles(self, *store_docs):
+        raise NotImplementedError()
+
+
+
diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py
index d09c3aa6..405b5b70 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -30,6 +30,7 @@
 from pytz import timezone, UTC
 from shapely.geometry import box
 from webservice.webmodel import DatasetNotFoundException, NexusProcessingException
+from webservice.NexusHandler import nexus_initializer
 
 from .AbstractTileService import AbstractTileService
 from .backends.nexusproto.backend import NexusprotoTileService
@@ -81,6 +82,16 @@ def fetch_data_for_func(*args, **kwargs):
 thread_local = threading.local()
 
 
+@nexus_initializer
+class NTSInitializer:
+    def __init__(self):
+        self._log = logger.getChild('init')
+
+    def init(self, config):
+        self._log.info('*** RUNNING NTS INITIALIZATION ***')
+        NexusTileService(config)
+
+
 class NexusTileService:
     backends: Dict[Union[None, str], Dict[str, Union[AbstractTileService, bool]]] = {}
 
@@ -89,7 +100,7 @@ class NexusTileService:
     __update_thread = None
 
     @staticmethod
-    def __update_datasets():
+    def __update_datasets_loop():
         while True:
             with DS_LOCK:
                 NexusTileService._update_datasets()
@@ -115,7 +126,7 @@ def __init__(self, config=None):
 
         if not NexusTileService.__update_thread:
             NexusTileService.__update_thread = threading.Thread(
-                target=NexusTileService.__update_datasets,
+                target=NexusTileService.__update_datasets_loop,
                 name='dataset_update',
                 daemon=False
             )
@@ -128,7 +139,11 @@ def __init__(self, config=None):
     def __get_backend(dataset_s) -> AbstractTileService:
         with DS_LOCK:
             if dataset_s not in NexusTileService.backends:
-                raise DatasetNotFoundException(reason=f'Dataset {dataset_s} is not currently loaded/ingested')
+                logger.warning(f'Dataset {dataset_s} not currently loaded. Checking to see if it was recently'
+                               f'added')
+                NexusTileService._update_datasets()
+                if dataset_s not in NexusTileService.backends:
+                    raise DatasetNotFoundException(reason=f'Dataset {dataset_s} is not currently loaded/ingested')
 
             b = NexusTileService.backends[dataset_s]
 
@@ -162,11 +177,13 @@ def _update_datasets():
 
             solrcon = solrcon
 
-            update_logger.info('Executing update query to check for new datasets')
+            update_logger.info('Executing Solr query to check for new datasets')
 
             present_datasets = {None, '__nexusproto__'}
             next_cursor_mark = '*'
 
+            added_datasets = 0
+
             while True:
                 response = solrcon.search('*:*', cursorMark=next_cursor_mark, sort='id asc')
 
@@ -190,6 +207,8 @@ def _update_datasets():
                         continue
                         # is_up = NexusTileService.backends[d_id]['backend'].try_connect()
 
+                    added_datasets += 1
+
                     if store_type == 'nexus_proto' or store_type == 'nexusproto':
                         update_logger.info(f"Detected new nexusproto dataset {d_id}, using default nexusproto backend")
                         NexusTileService.backends[d_id] = NexusTileService.backends[None]
@@ -198,21 +217,25 @@ def _update_datasets():
 
                         ds_config = json.loads(dataset['config'][0])
                         NexusTileService.backends[d_id] = {
-                            'backend': ZarrBackend(ds_config),
+                            'backend': ZarrBackend(**ds_config),
                             'up': True
                         }
                     else:
-                        logger.warning(f'Unsupported backend {store_type} for dataset {d_id}')
+                        update_logger.warning(f'Unsupported backend {store_type} for dataset {d_id}')
+                        added_datasets -= 1
 
         removed_datasets = set(NexusTileService.backends.keys()).difference(present_datasets)
 
         if len(removed_datasets) > 0:
-            logger.info(f'{len(removed_datasets)} marked for removal')
+            update_logger.info(f'{len(removed_datasets)} old datasets marked for removal')
 
         for dataset in removed_datasets:
-            logger.info(f"Removing dataset {dataset}")
+            update_logger.info(f"Removing dataset {dataset}")
             del NexusTileService.backends[dataset]
 
+        update_logger.info(f'Finished dataset update: {added_datasets} added, {len(removed_datasets)} removed, '
+                           f'{len(NexusTileService.backends) - 2} total')
+
     def override_config(self, config):
         for section in config.sections():
             if self._config.has_section(section):  # only override preexisting section, ignores the other
diff --git a/data-access/requirements.txt b/data-access/requirements.txt
index 51270182..7d33cced 100644
--- a/data-access/requirements.txt
+++ b/data-access/requirements.txt
@@ -20,3 +20,5 @@ urllib3==1.26.2
 requests
 nexusproto
 Shapely
+s3fs
+fsspec
\ No newline at end of file

From 4ccec2e5bc4fae53feca1426127fe801236ff067 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Mon, 10 Jul 2023 16:41:13 -0700
Subject: [PATCH 06/70] gitignore ini

---
 .gitignore                                    |  3 +-
 .../nexustiles/config/datastores.ini.default  | 39 -------------------
 2 files changed, 2 insertions(+), 40 deletions(-)
 delete mode 100644 data-access/nexustiles/config/datastores.ini.default

diff --git a/.gitignore b/.gitignore
index 12ab2d61..23f84355 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,5 +4,6 @@
 *.idea
 *.DS_Store
 analysis/webservice/algorithms/doms/domsconfig.ini
-data-access/nexustiles/config/datastores.ini
+data-access/nexustiles/backends/nexusproto/config/datastores.ini
+data-access/nexustiles/config/datasets.ini
 venv/
diff --git a/data-access/nexustiles/config/datastores.ini.default b/data-access/nexustiles/config/datastores.ini.default
deleted file mode 100644
index d8db1902..00000000
--- a/data-access/nexustiles/config/datastores.ini.default
+++ /dev/null
@@ -1,39 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-[cassandra]
-host=localhost
-port=9042
-keyspace=nexustiles
-local_datacenter=datacenter1
-protocol_version=3
-dc_policy=DCAwareRoundRobinPolicy
-username=
-password=
-
-[s3]
-bucket=nexus-jpl
-region=us-west-2
-
-[dynamo]
-table=nexus-jpl-table
-region=us-west-2
-
-[solr]
-host=http://localhost:8983
-core=nexustiles
-
-[datastore]
-store=cassandra

From 736a44e8740f601eddda87e4bc23eb92c270a32a Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Mon, 10 Jul 2023 16:43:47 -0700
Subject: [PATCH 07/70] ASF headers

---
 data-access/nexustiles/backends/__init__.py       | 15 +++++++++++++++
 .../nexustiles/backends/nexusproto/__init__.py    | 15 +++++++++++++++
 data-access/nexustiles/backends/zarr/__init__.py  | 15 +++++++++++++++
 data-access/nexustiles/exception.py               | 15 +++++++++++++++
 4 files changed, 60 insertions(+)

diff --git a/data-access/nexustiles/backends/__init__.py b/data-access/nexustiles/backends/__init__.py
index e69de29b..8afd240a 100644
--- a/data-access/nexustiles/backends/__init__.py
+++ b/data-access/nexustiles/backends/__init__.py
@@ -0,0 +1,15 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/data-access/nexustiles/backends/nexusproto/__init__.py b/data-access/nexustiles/backends/nexusproto/__init__.py
index e69de29b..8afd240a 100644
--- a/data-access/nexustiles/backends/nexusproto/__init__.py
+++ b/data-access/nexustiles/backends/nexusproto/__init__.py
@@ -0,0 +1,15 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/data-access/nexustiles/backends/zarr/__init__.py b/data-access/nexustiles/backends/zarr/__init__.py
index e69de29b..8afd240a 100644
--- a/data-access/nexustiles/backends/zarr/__init__.py
+++ b/data-access/nexustiles/backends/zarr/__init__.py
@@ -0,0 +1,15 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/data-access/nexustiles/exception.py b/data-access/nexustiles/exception.py
index 33ab5296..77850a2f 100644
--- a/data-access/nexustiles/exception.py
+++ b/data-access/nexustiles/exception.py
@@ -1,2 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 class NexusTileServiceException(Exception):
     pass

From 70bdab12f4dfd80a59f572376e496ddff8145d37 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Tue, 11 Jul 2023 15:17:50 -0700
Subject: [PATCH 08/70] First functioning test of 2 simultaneous backends

---
 data-access/nexustiles/AbstractTileService.py |   3 +
 .../nexustiles/backends/nexusproto/backend.py |   8 +-
 .../nexustiles/backends/zarr/backend.py       | 195 +++++++++++++++++-
 data-access/nexustiles/nexustiles.py          |  73 ++++---
 data-access/requirements.txt                  |   3 +-
 5 files changed, 238 insertions(+), 44 deletions(-)

diff --git a/data-access/nexustiles/AbstractTileService.py b/data-access/nexustiles/AbstractTileService.py
index 6e5b4640..20467784 100644
--- a/data-access/nexustiles/AbstractTileService.py
+++ b/data-access/nexustiles/AbstractTileService.py
@@ -30,6 +30,9 @@ class AbstractTileService(ABC):
     # def try_connect(self) -> bool:
     #     raise NotImplementedError()
 
+    def __init__(self, dataset_name):
+        self._name = dataset_name
+
     @abstractmethod
     def get_dataseries_list(self, simple=False):
         raise NotImplementedError()
diff --git a/data-access/nexustiles/backends/nexusproto/backend.py b/data-access/nexustiles/backends/nexusproto/backend.py
index 6aa63644..8cca5813 100644
--- a/data-access/nexustiles/backends/nexusproto/backend.py
+++ b/data-access/nexustiles/backends/nexusproto/backend.py
@@ -38,16 +38,12 @@
 
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
 
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-    datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout)
-logger = logging.getLogger("testing")
+logger = logging.getLogger(__name__)
 
 
 class NexusprotoTileService(AbstractTileService):
     def __init__(self, skipDatastore=False, skipMetadatastore=False, config=None):
-        AbstractTileService.__init__(self)
+        AbstractTileService.__init__(self, None)
         self._datastore = None
         self._metadatastore = None
 
diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py
index 13622453..fe5a49dd 100644
--- a/data-access/nexustiles/backends/zarr/backend.py
+++ b/data-access/nexustiles/backends/zarr/backend.py
@@ -30,20 +30,72 @@
 from nexustiles.exception import NexusTileServiceException
 from nexustiles.AbstractTileService import AbstractTileService
 
+from yarl import URL
+
+import xarray as xr
+import s3fs
+from urllib.parse import urlparse
+
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
 
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
     datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout)
-logger = logging.getLogger("testing")
+logger = logging.getLogger()
 
 
 class ZarrBackend(AbstractTileService):
-    def __init__(self, path, config):
-        AbstractTileService.__init__(self)
-        self.__config = config
-        
+    def __init__(self, dataset_name, path, config=None):
+        AbstractTileService.__init__(self, dataset_name)
+        self.__config = config if config is not None else {}
+
+        logger.info(f'Opening zarr backend at {path} for dataset {self._name}')
+
+        url = urlparse(path)
+
+        self.__url = path
+
+        self.__store_type = url.scheme
+        self.__host = url.netloc
+        self.__path = url.path
+
+        if 'variable' in config:
+            data_vars = config['variable']
+        elif 'variables' in config:
+            data_vars = config['variables']
+        else:
+            raise KeyError('Data variables not provided in config')
+
+        if isinstance(data_vars, str):
+            self.__variables = [data_vars]
+        elif isinstance(data_vars, list):
+            self.__variables = data_vars
+        else:
+            raise TypeError(f'Improper type for variables config: {type(data_vars)}')
+
+        self.__longitude = config['coords']['longitude']
+        self.__latitude = config['coords']['latitude']
+        self.__time = config['coords']['time']
+
+        self.__depth = config['coords'].get('depth')
+
+        if self.__store_type in ['', 'file']:
+            store = self.__path
+        elif self.__store_type == 's3':
+            aws_cfg = self.__config['aws']
+
+            if aws_cfg['public']:
+                region = aws_cfg.get('region', 'us-west-2')
+                store = f'https://{self.__host}.s3.{region}.amazonaws.com{self.__path}'
+            else:
+                s3 = s3fs.S3FileSystem(False, key=aws_cfg['accessKeyID'], secret=aws_cfg['secretAccessKey'])
+                store = s3fs.S3Map(root=path, s3=s3, check=False)
+        else:
+            raise ValueError(self.__store_type)
+
+        self.__ds: xr.Dataset = xr.open_zarr(store, consolidated=True)
+
     def get_dataseries_list(self, simple=False):
         raise NotImplementedError()
 
@@ -89,10 +141,31 @@ def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **k
         raise NotImplementedError()
 
     def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs):
-        # Find tiles that fall in the given box in the Solr index
-        raise NotImplementedError()
-
-    def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=0, end_time=-1, **kwargs):
+        if type(start_time) is datetime:
+            start_time = (start_time - EPOCH).total_seconds()
+        if type(end_time) is datetime:
+            end_time = (end_time - EPOCH).total_seconds()
+
+        params = {
+            'min_lat': min_lat,
+            'max_lat': max_lat,
+            'min_lon': min_lon,
+            'max_lon': max_lon
+        }
+
+        if 0 <= start_time <= end_time:
+            params['min_time'] = start_time
+            params['max_time'] = end_time
+
+        if 'depth' in kwargs:
+            params['depth'] = kwargs['depth']
+        elif 'min_depth' in kwargs or 'max_depth' in kwargs:
+            params['min_depth'] = kwargs.get('min_depth')
+            params['max_depth'] = kwargs.get('max_depth')
+
+        return [ZarrBackend.__to_url(self._name, **params)]
+
+    def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=None, end_time=None, **kwargs):
         # Find tiles that fall within the polygon in the Solr index
         raise NotImplementedError()
 
@@ -184,10 +257,110 @@ def get_tile_count(self, ds, bounding_polygon=None, start_time=0, end_time=-1, m
         raise NotImplementedError()
 
     def fetch_data_for_tiles(self, *tiles):
-        raise NotImplementedError()
+        for tile in tiles:
+            self.__fetch_data_for_tile(tile)
+
+        return tiles
+
+    def __fetch_data_for_tile(self, tile: Tile):
+        bbox: BBox = tile.bbox
+
+        min_lat = None
+        min_lon = None
+        max_lat = None
+        max_lon = None
+
+        min_time = float(tile.min_time)
+        max_time = float(tile.max_time)
+
+        if min_time:
+            min_time = datetime.fromtimestamp(min_time)
+
+        if max_time:
+            max_time = datetime.fromtimestamp(max_time)
+
+        if bbox:
+            min_lat = bbox.min_lat
+            min_lon = bbox.min_lon
+            max_lat = bbox.max_lat
+            max_lon = bbox.max_lon
+
+        sel = {
+            self.__latitude: slice(min_lat, max_lat),
+            self.__longitude: slice(min_lon, max_lon),
+            self.__time: slice(min_time, max_time)
+        }
+
+        tile.variables = [
+            TileVariable(v, v) for v in self.__variables
+        ]
+
+        matched = self.__ds.sel(sel)
+
+        tile.latitudes = ma.masked_invalid(matched[self.__latitude].to_numpy())
+        tile.longitudes = ma.masked_invalid(matched[self.__longitude].to_numpy())
+
+        times = matched[self.__time].to_numpy()
+
+        if np.issubdtype(times.dtype, np.datetime64):
+            times = ((times - np.datetime64(EPOCH)) / 1e9).astype(int)
+
+        tile.times = ma.masked_invalid(times)
+
+        tile.data = ma.masked_invalid(
+            [matched[var].to_numpy() for var in self.__variables]
+        )
+
+        tile.is_multi = True
 
     def _metadata_store_docs_to_tiles(self, *store_docs):
-        raise NotImplementedError()
+        return [ZarrBackend.__nts_url_to_tile(d) for d in store_docs]
+
+    @staticmethod
+    def __nts_url_to_tile(nts_url):
+        tile = Tile()
+
+        url = URL(nts_url)
+
+        tile.tile_id = nts_url
+
+        try:
+            min_lat = float(url.query['min_lat'])
+            min_lon = float(url.query['min_lon'])
+            max_lat = float(url.query['max_lat'])
+            max_lon = float(url.query['max_lon'])
+
+            tile.bbox = BBox(min_lat, max_lat, min_lon, max_lon)
+        except KeyError:
+            pass
+
+        tile.dataset = url.host
+
+        try:
+            tile.min_time = int(url.query['min_time'])
+        except KeyError:
+            pass
+
+        try:
+            tile.max_time = int(url.query['max_time'])
+        except KeyError:
+            pass
+
+        return tile
+
+    @staticmethod
+    def __to_url(dataset, **kwargs):
+        if 'dataset' in kwargs:
+            del kwargs['dataset']
+
+        if 'ds' in kwargs:
+            del kwargs['ds']
 
+        return str(URL.build(
+            scheme='nts',
+            host=dataset,
+            path='/',
+            query=kwargs
+        ))
 
 
diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py
index 405b5b70..78fe23d4 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -37,6 +37,8 @@
 from .backends.zarr.backend import ZarrBackend
 from .model.nexusmodel import Tile, BBox, TileStats, TileVariable
 
+from requests.structures import CaseInsensitiveDict
+
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
 
 logging.basicConfig(
@@ -53,13 +55,27 @@ def fetch_data_for_func(*args, **kwargs):
             metadatastore_start = datetime.now()
             metadatastore_docs = func(*args, **kwargs)
             metadatastore_duration = (datetime.now() - metadatastore_start).total_seconds()
-            tiles = args[0]._metadata_store_docs_to_tiles(*metadatastore_docs)
+
+            # Try to determine source dataset to route calls to proper backend
+            guessed_dataset = None
+
+            if 'ds' in kwargs:
+                guessed_dataset = kwargs['ds']
+            elif 'dataset' in kwargs:
+                guessed_dataset = kwargs['dataset']
+            else:
+                for arg in args:
+                    if arg is not None and arg in NexusTileService.backends:
+                        guessed_dataset = arg
+                        break
+
+            tiles = NexusTileService._get_backend(guessed_dataset)._metadata_store_docs_to_tiles(*metadatastore_docs)
 
             cassandra_duration = 0
             if ('fetch_data' in kwargs and kwargs['fetch_data']) or ('fetch_data' not in kwargs and default_fetch):
                 if len(tiles) > 0:
                     cassandra_start = datetime.now()
-                    args[0].fetch_data_for_tiles(*tiles)
+                    NexusTileService._get_backend(guessed_dataset).fetch_data_for_tiles(*tiles)
                     cassandra_duration += (datetime.now() - cassandra_start).total_seconds()
 
             if 'metrics_callback' in kwargs and kwargs['metrics_callback'] is not None:
@@ -128,7 +144,7 @@ def __init__(self, config=None):
             NexusTileService.__update_thread = threading.Thread(
                 target=NexusTileService.__update_datasets_loop,
                 name='dataset_update',
-                daemon=False
+                daemon=True
             )
 
             logger.info('Starting dataset refresh thread')
@@ -136,7 +152,10 @@ def __init__(self, config=None):
             NexusTileService.__update_thread.start()
 
     @staticmethod
-    def __get_backend(dataset_s) -> AbstractTileService:
+    def _get_backend(dataset_s) -> AbstractTileService:
+        if dataset_s is not None:
+            dataset_s = dataset_s.lower()
+
         with DS_LOCK:
             if dataset_s not in NexusTileService.backends:
                 logger.warning(f'Dataset {dataset_s} not currently loaded. Checking to see if it was recently'
@@ -198,7 +217,7 @@ def _update_datasets():
                     next_cursor_mark = response_cursor_mark
 
                 for dataset in response.docs:
-                    d_id = dataset['dataset_s']
+                    d_id = dataset['dataset_s'].lower()
                     store_type = dataset.get('store_type_s', 'nexusproto')
 
                     present_datasets.add(d_id)
@@ -217,7 +236,7 @@ def _update_datasets():
 
                         ds_config = json.loads(dataset['config'][0])
                         NexusTileService.backends[d_id] = {
-                            'backend': ZarrBackend(**ds_config),
+                            'backend': ZarrBackend(dataset_name=dataset['dataset_s'], **ds_config),
                             'up': True
                         }
                     else:
@@ -251,33 +270,33 @@ def get_dataseries_list(self, simple=False):
 
     @tile_data()
     def find_tile_by_id(self, tile_id, **kwargs):
-        return NexusTileService.__get_backend('__nexusproto__').find_tile_by_id(tile_id)
+        return NexusTileService._get_backend('__nexusproto__').find_tile_by_id(tile_id)
 
     @tile_data()
     def find_tiles_by_id(self, tile_ids, ds=None, **kwargs):
-        return NexusTileService.__get_backend('__nexusproto__').find_tiles_by_id(tile_ids, ds=ds, **kwargs)
+        return NexusTileService._get_backend('__nexusproto__').find_tiles_by_id(tile_ids, ds=ds, **kwargs)
 
     def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time,
                                metrics_callback=None, **kwargs):
-        return NexusTileService.__get_backend(dataset).find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon,
-                                                                              dataset, start_time, end_time,
-                                                                              metrics_callback, **kwargs)
+        return NexusTileService._get_backend(dataset).find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon,
+                                                                             dataset, start_time, end_time,
+                                                                             metrics_callback, **kwargs)
 
     @tile_data()
     def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs):
-        return NexusTileService.__get_backend(ds).find_tile_by_polygon_and_most_recent_day_of_year(
+        return NexusTileService._get_backend(ds).find_tile_by_polygon_and_most_recent_day_of_year(
             bounding_polygon, ds, day_of_year, **kwargs
         )
 
     @tile_data()
     def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
-        return NexusTileService.__get_backend(dataset).find_all_tiles_in_box_at_time(
+        return NexusTileService._get_backend(dataset).find_all_tiles_in_box_at_time(
             min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs
         )
 
     @tile_data()
     def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs):
-        return NexusTileService.__get_backend(dataset).find_all_tiles_in_polygon_at_time(
+        return NexusTileService._get_backend(dataset).find_all_tiles_in_polygon_at_time(
             bounding_polygon, dataset, time, **kwargs
         )
 
@@ -289,19 +308,19 @@ def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_t
         if type(end_time) is datetime:
             end_time = (end_time - EPOCH).total_seconds()
 
-        return NexusTileService.__get_backend(ds).find_tiles_in_box(
+        return NexusTileService._get_backend(ds).find_tiles_in_box(
             min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs
         )
 
     @tile_data()
     def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=0, end_time=-1, **kwargs):
-        return NexusTileService.__get_backend(ds).find_tiles_in_polygon(
+        return NexusTileService._get_backend(ds).find_tiles_in_polygon(
             bounding_polygon, ds, start_time, end_time, **kwargs
         )
 
     @tile_data()
     def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs):
-        return NexusTileService.__get_backend(ds).find_tiles_by_metadata(
+        return NexusTileService._get_backend(ds).find_tiles_by_metadata(
             metadata, ds, start_time, end_time, **kwargs
         )
 
@@ -334,13 +353,13 @@ def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs)
         :param kwargs: fetch_data: True/False = whether or not to retrieve tile data
         :return:
         """
-        return NexusTileService.__get_backend(ds).find_tiles_by_exact_bounds(
+        return NexusTileService._get_backend(ds).find_tiles_by_exact_bounds(
             bounds, ds, start_time, end_time, **kwargs
         )
 
     @tile_data()
     def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
-        return NexusTileService.__get_backend(dataset).find_all_boundary_tiles_at_time(
+        return NexusTileService._get_backend(dataset).find_all_boundary_tiles_at_time(
             min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs
         )
 
@@ -363,12 +382,12 @@ def get_tiles_bounded_by_polygon(self, polygon, ds=None, start_time=0, end_time=
         return tiles
 
     def get_min_max_time_by_granule(self, ds, granule_name):
-        return NexusTileService.__get_backend(ds).get_min_max_time_by_granule(
+        return NexusTileService._get_backend(ds).get_min_max_time_by_granule(
             ds, granule_name
         )
 
     def get_dataset_overall_stats(self, ds):
-        return NexusTileService.__get_backend(ds).get_dataset_overall_stats(ds)
+        return NexusTileService._get_backend(ds).get_dataset_overall_stats(ds)
 
     def get_tiles_bounded_by_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
         tiles = self.find_all_tiles_in_box_at_time(min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs)
@@ -399,7 +418,7 @@ def get_bounding_box(self, tile_ids, ds=None):
         :param tile_ids: List of tile ids
         :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles
         """
-        return NexusTileService.__get_backend(ds).get_bounding_box(tile_ids, ds)
+        return NexusTileService._get_backend(ds).get_bounding_box(tile_ids, ds)
 
     def get_min_time(self, tile_ids, ds=None):
         """
@@ -408,7 +427,7 @@ def get_min_time(self, tile_ids, ds=None):
         :param ds: Filter by a specific dataset. Defaults to None (queries all datasets)
         :return: long time in seconds since epoch
         """
-        return NexusTileService.__get_backend(ds).get_min_time(tile_ids, ds)
+        return NexusTileService._get_backend(ds).get_min_time(tile_ids, ds)
 
     def get_max_time(self, tile_ids, ds=None):
         """
@@ -417,7 +436,7 @@ def get_max_time(self, tile_ids, ds=None):
         :param ds: Filter by a specific dataset. Defaults to None (queries all datasets)
         :return: long time in seconds since epoch
         """
-        return int(NexusTileService.__get_backend(ds).get_max_time(tile_ids))
+        return int(NexusTileService._get_backend(ds).get_max_time(tile_ids))
 
     def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time):
         """
@@ -528,8 +547,10 @@ def mask_tiles_to_time_range(self, start_time, end_time, tiles):
 
         return tiles
 
-    def fetch_data_for_tiles(self, *tiles, dataset=None):
-        return NexusTileService.__get_backend(dataset).fetch_data_for_tiles(*tiles)
+    def fetch_data_for_tiles(self, *tiles):
+        dataset = tiles[0].dataset
+
+        return NexusTileService._get_backend(dataset).fetch_data_for_tiles(*tiles)
 
     def _metadata_store_docs_to_tiles(self, *store_docs):
         tiles = []
diff --git a/data-access/requirements.txt b/data-access/requirements.txt
index 7d33cced..ab96e2af 100644
--- a/data-access/requirements.txt
+++ b/data-access/requirements.txt
@@ -21,4 +21,5 @@ requests
 nexusproto
 Shapely
 s3fs
-fsspec
\ No newline at end of file
+fsspec
+xarray~=2022.3.0
\ No newline at end of file

From f3981cd8735b146206fe87955cc26cd4d25ad034 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Wed, 12 Jul 2023 13:38:12 -0700
Subject: [PATCH 09/70] Removed accidentally committed ini files

---
 .../backends/nexusproto/config/datastores.ini | 36 -------------------
 data-access/nexustiles/config/datasets.ini    | 18 ----------
 2 files changed, 54 deletions(-)
 delete mode 100644 data-access/nexustiles/backends/nexusproto/config/datastores.ini
 delete mode 100644 data-access/nexustiles/config/datasets.ini

diff --git a/data-access/nexustiles/backends/nexusproto/config/datastores.ini b/data-access/nexustiles/backends/nexusproto/config/datastores.ini
deleted file mode 100644
index f3facb95..00000000
--- a/data-access/nexustiles/backends/nexusproto/config/datastores.ini
+++ /dev/null
@@ -1,36 +0,0 @@
-[cassandra]
-host=localhost
-port=9042
-keyspace=nexustiles
-local_datacenter=datacenter1
-protocol_version=3
-dc_policy=WhiteListRoundRobinPolicy
-username=cassandra
-password=cassandra
-
-[dynamo]
-table=nexus-jpl-table
-region=us-west-2
-
-[solr]
-host=http://localhost:8983
-core=nexustiles
-
-[s3]
-bucket=cdms-dev-zarr
-#key=MUR_aggregate/
-#key=MUR_1wk_7_100_100/
-#key=MUR_1wk_7_1500_2500/
-#key=MUR_2017_9dy_7_1500_2500/
-#key=MUR_2017_9dy_7_120_240/
-key=MUR_2017_2yr_30_120_240/
-#key=SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_7_120_240.zarr/
-#key=SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_1_240_240.zarr/
-#key=SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5_90_120_240.zarr/
-public=false
-region=us-west-2
-profile=saml-pub
-
-[datastore]
-store=cassandra
-#store=zarrS3
diff --git a/data-access/nexustiles/config/datasets.ini b/data-access/nexustiles/config/datasets.ini
deleted file mode 100644
index 9f586cf2..00000000
--- a/data-access/nexustiles/config/datasets.ini
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-[solr]
-host=http://localhost:8983
-core=nexusdatasets

From 26f6220f2f6a8aaa3787ac6b80effc0b4e236837 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Wed, 12 Jul 2023 14:14:31 -0700
Subject: [PATCH 10/70] Working zarr backend ds list

+ datasets are no longer case sensitive
+ handling for failed zarr ds opens (bad path, bad creds, &c)
---
 .../backends/nexusproto/dao/SolrProxy.py      |  3 +-
 .../nexustiles/backends/zarr/backend.py       | 75 +++++++++++++++----
 data-access/nexustiles/exception.py           |  3 +-
 data-access/nexustiles/nexustiles.py          | 37 ++++++---
 4 files changed, 90 insertions(+), 28 deletions(-)

diff --git a/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py b/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py
index 9b16533d..c9435a2b 100644
--- a/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py
+++ b/data-access/nexustiles/backends/nexusproto/dao/SolrProxy.py
@@ -189,7 +189,8 @@ def get_data_series_list_simple(self):
             l.append({
                 "shortName": g,
                 "title": g,
-                "tileCount": v
+                "tileCount": v,
+                "type": 'nexusproto'
             })
         l = sorted(l, key=lambda entry: entry["title"])
         return l
diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py
index fe5a49dd..de1d86ba 100644
--- a/data-access/nexustiles/backends/zarr/backend.py
+++ b/data-access/nexustiles/backends/zarr/backend.py
@@ -37,12 +37,13 @@
 from urllib.parse import urlparse
 
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
+ISO_8601 = '%Y-%m-%dT%H:%M:%S%z'
 
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
     datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout)
-logger = logging.getLogger()
+logger = logging.getLogger(__name__)
 
 
 class ZarrBackend(AbstractTileService):
@@ -83,21 +84,43 @@ def __init__(self, dataset_name, path, config=None):
         if self.__store_type in ['', 'file']:
             store = self.__path
         elif self.__store_type == 's3':
-            aws_cfg = self.__config['aws']
-
-            if aws_cfg['public']:
-                region = aws_cfg.get('region', 'us-west-2')
-                store = f'https://{self.__host}.s3.{region}.amazonaws.com{self.__path}'
-            else:
-                s3 = s3fs.S3FileSystem(False, key=aws_cfg['accessKeyID'], secret=aws_cfg['secretAccessKey'])
-                store = s3fs.S3Map(root=path, s3=s3, check=False)
+            try:
+                aws_cfg = self.__config['aws']
+
+                if aws_cfg['public']:
+                    region = aws_cfg.get('region', 'us-west-2')
+                    store = f'https://{self.__host}.s3.{region}.amazonaws.com{self.__path}'
+                else:
+                    s3 = s3fs.S3FileSystem(False, key=aws_cfg['accessKeyID'], secret=aws_cfg['secretAccessKey'])
+                    store = s3fs.S3Map(root=path, s3=s3, check=False)
+            except Exception as e:
+                logger.error(f'Failed to open zarr dataset at {self.__path}, ignoring it. Cause: {e}')
+                raise NexusTileServiceException(f'Cannot open S3 dataset ({e})')
         else:
             raise ValueError(self.__store_type)
 
-        self.__ds: xr.Dataset = xr.open_zarr(store, consolidated=True)
+        try:
+            self.__ds: xr.Dataset = xr.open_zarr(store, consolidated=True)
+        except Exception as e:
+            logger.error(f'Failed to open zarr dataset at {self.__path}, ignoring it. Cause: {e}')
+            raise NexusTileServiceException(f'Cannot open dataset ({e})')
 
     def get_dataseries_list(self, simple=False):
-        raise NotImplementedError()
+        ds = {
+            "shortName": self._name,
+            "title": self._name,
+            "type": "zarr"
+        }
+
+        if not simple:
+            min_date = self.get_min_time([])
+            max_date = self.get_max_time([])
+            ds['start'] = min_date
+            ds['end'] = max_date
+            ds['iso_start'] = datetime.fromtimestamp(min_date).strftime(ISO_8601)
+            ds['iso_end'] = datetime.fromtimestamp(max_date).strftime(ISO_8601)
+
+        return [ds]
 
     def find_tile_by_id(self, tile_id, **kwargs):
         raise NotImplementedError()
@@ -215,6 +238,18 @@ def get_bounding_box(self, tile_ids):
         """
         raise NotImplementedError()
 
+    def __get_ds_min_max_date(self):
+        min_date = self.__ds[self.__time].min().to_numpy()
+        max_date = self.__ds[self.__time].max().to_numpy()
+
+        if np.issubdtype(min_date.dtype, np.datetime64):
+            min_date = ((min_date - np.datetime64(EPOCH)) / 1e9).astype(int).item()
+
+        if np.issubdtype(max_date.dtype, np.datetime64):
+            max_date = ((max_date - np.datetime64(EPOCH)) / 1e9).astype(int).item()
+
+        return min_date, max_date
+
     def get_min_time(self, tile_ids, ds=None):
         """
         Get the minimum tile date from the list of tile ids
@@ -222,7 +257,11 @@ def get_min_time(self, tile_ids, ds=None):
         :param ds: Filter by a specific dataset. Defaults to None (queries all datasets)
         :return: long time in seconds since epoch
         """
-        raise NotImplementedError()
+        if len(tile_ids) == 0:
+            min_date, max_date = self.__get_ds_min_max_date()
+            return min_date
+        else:
+            raise NotImplementedError()
 
     def get_max_time(self, tile_ids, ds=None):
         """
@@ -231,7 +270,11 @@ def get_max_time(self, tile_ids, ds=None):
         :param ds: Filter by a specific dataset. Defaults to None (queries all datasets)
         :return: long time in seconds since epoch
         """
-        raise NotImplementedError()
+        if len(tile_ids) == 0:
+            min_date, max_date = self.__get_ds_min_max_date()
+            return max_date
+        else:
+            raise NotImplementedError()
 
     def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time):
         """
@@ -334,7 +377,7 @@ def __nts_url_to_tile(nts_url):
         except KeyError:
             pass
 
-        tile.dataset = url.host
+        tile.dataset = url.path
 
         try:
             tile.min_time = int(url.query['min_time'])
@@ -358,8 +401,8 @@ def __to_url(dataset, **kwargs):
 
         return str(URL.build(
             scheme='nts',
-            host=dataset,
-            path='/',
+            host='',
+            path=dataset,
             query=kwargs
         ))
 
diff --git a/data-access/nexustiles/exception.py b/data-access/nexustiles/exception.py
index 77850a2f..d6ed2c64 100644
--- a/data-access/nexustiles/exception.py
+++ b/data-access/nexustiles/exception.py
@@ -14,4 +14,5 @@
 # limitations under the License.
 
 class NexusTileServiceException(Exception):
-    pass
+    def __init__(self, reason):
+        Exception.__init__(self, reason)
diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py
index 78fe23d4..1b58f156 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -37,6 +37,8 @@
 from .backends.zarr.backend import ZarrBackend
 from .model.nexusmodel import Tile, BBox, TileStats, TileVariable
 
+from .exception import NexusTileServiceException
+
 from requests.structures import CaseInsensitiveDict
 
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
@@ -93,6 +95,16 @@ def fetch_data_for_func(*args, **kwargs):
     return tile_data_decorator
 
 
+def catch_not_implemented(func):
+    def wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except NotImplementedError:
+            raise NexusTileServiceException('Action unsupported by backend')
+
+    return wrapper
+
+
 SOLR_LOCK = threading.Lock()
 DS_LOCK = threading.Lock()
 thread_local = threading.local()
@@ -154,7 +166,7 @@ def __init__(self, config=None):
     @staticmethod
     def _get_backend(dataset_s) -> AbstractTileService:
         if dataset_s is not None:
-            dataset_s = dataset_s.lower()
+            dataset_s = dataset_s
 
         with DS_LOCK:
             if dataset_s not in NexusTileService.backends:
@@ -217,7 +229,7 @@ def _update_datasets():
                     next_cursor_mark = response_cursor_mark
 
                 for dataset in response.docs:
-                    d_id = dataset['dataset_s'].lower()
+                    d_id = dataset['dataset_s']
                     store_type = dataset.get('store_type_s', 'nexusproto')
 
                     present_datasets.add(d_id)
@@ -235,10 +247,13 @@ def _update_datasets():
                         update_logger.info(f"Detected new zarr dataset {d_id}, opening new zarr backend")
 
                         ds_config = json.loads(dataset['config'][0])
-                        NexusTileService.backends[d_id] = {
-                            'backend': ZarrBackend(dataset_name=dataset['dataset_s'], **ds_config),
-                            'up': True
-                        }
+                        try:
+                            NexusTileService.backends[d_id] = {
+                                'backend': ZarrBackend(dataset_name=dataset['dataset_s'], **ds_config),
+                                'up': True
+                            }
+                        except NexusTileServiceException:
+                            added_datasets -= 1
                     else:
                         update_logger.warning(f'Unsupported backend {store_type} for dataset {d_id}')
                         added_datasets -= 1
@@ -263,10 +278,12 @@ def override_config(self, config):
                         self._config.set(section, option, config.get(section, option))
 
     def get_dataseries_list(self, simple=False):
-        if simple:
-            return self._metadatastore.get_data_series_list_simple()
-        else:
-            return self._metadatastore.get_data_series_list()
+        datasets = []
+        for backend in set([b['backend'] for b in NexusTileService.backends.values() if b['up']]):
+            datasets.extend(backend.get_dataseries_list(simple))
+
+        return datasets
+
 
     @tile_data()
     def find_tile_by_id(self, tile_id, **kwargs):

From 91de6efef7b15480bde3993f9ae47aee8401e5ed Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Wed, 12 Jul 2023 16:06:18 -0700
Subject: [PATCH 11/70] Capture and handle NTS requests routed to backend that
 doesn't (yet) support them

---
 data-access/nexustiles/nexustiles.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py
index 1b58f156..b8165a1d 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -286,13 +286,16 @@ def get_dataseries_list(self, simple=False):
 
 
     @tile_data()
+    @catch_not_implemented
     def find_tile_by_id(self, tile_id, **kwargs):
         return NexusTileService._get_backend('__nexusproto__').find_tile_by_id(tile_id)
 
     @tile_data()
+    @catch_not_implemented
     def find_tiles_by_id(self, tile_ids, ds=None, **kwargs):
-        return NexusTileService._get_backend('__nexusproto__').find_tiles_by_id(tile_ids, ds=ds, **kwargs)
+        return NexusTileService._get_backend(ds).find_tiles_by_id(tile_ids, ds=ds, **kwargs)
 
+    @catch_not_implemented
     def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time,
                                metrics_callback=None, **kwargs):
         return NexusTileService._get_backend(dataset).find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon,
@@ -300,24 +303,28 @@ def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, st
                                                                              metrics_callback, **kwargs)
 
     @tile_data()
+    @catch_not_implemented
     def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs):
         return NexusTileService._get_backend(ds).find_tile_by_polygon_and_most_recent_day_of_year(
             bounding_polygon, ds, day_of_year, **kwargs
         )
 
     @tile_data()
+    @catch_not_implemented
     def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
         return NexusTileService._get_backend(dataset).find_all_tiles_in_box_at_time(
             min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs
         )
 
     @tile_data()
+    @catch_not_implemented
     def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs):
         return NexusTileService._get_backend(dataset).find_all_tiles_in_polygon_at_time(
             bounding_polygon, dataset, time, **kwargs
         )
 
     @tile_data()
+    @catch_not_implemented
     def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs):
         # Find tiles that fall in the given box in the Solr index
         if type(start_time) is datetime:
@@ -330,12 +337,14 @@ def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_t
         )
 
     @tile_data()
+    @catch_not_implemented
     def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=0, end_time=-1, **kwargs):
         return NexusTileService._get_backend(ds).find_tiles_in_polygon(
             bounding_polygon, ds, start_time, end_time, **kwargs
         )
 
     @tile_data()
+    @catch_not_implemented
     def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs):
         return NexusTileService._get_backend(ds).find_tiles_by_metadata(
             metadata, ds, start_time, end_time, **kwargs
@@ -357,6 +366,7 @@ def get_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **
         return tiles
 
     @tile_data()
+    @catch_not_implemented
     def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs):
         """
         The method will return tiles with the exact given bounds within the time range. It differs from
@@ -375,6 +385,7 @@ def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs)
         )
 
     @tile_data()
+    @catch_not_implemented
     def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
         return NexusTileService._get_backend(dataset).find_all_boundary_tiles_at_time(
             min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs
@@ -398,11 +409,13 @@ def get_tiles_bounded_by_polygon(self, polygon, ds=None, start_time=0, end_time=
 
         return tiles
 
+    @catch_not_implemented
     def get_min_max_time_by_granule(self, ds, granule_name):
         return NexusTileService._get_backend(ds).get_min_max_time_by_granule(
             ds, granule_name
         )
 
+    @catch_not_implemented
     def get_dataset_overall_stats(self, ds):
         return NexusTileService._get_backend(ds).get_dataset_overall_stats(ds)
 
@@ -424,6 +437,7 @@ def get_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset
 
         return tiles
 
+    @catch_not_implemented
     def get_stats_within_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
         return NexusTileService.get_stats_within_box_at_time(
             min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs
@@ -435,7 +449,7 @@ def get_bounding_box(self, tile_ids, ds=None):
         :param tile_ids: List of tile ids
         :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles
         """
-        return NexusTileService._get_backend(ds).get_bounding_box(tile_ids, ds)
+        return NexusTileService._get_backend(ds).get_bounding_box(tile_ids)
 
     def get_min_time(self, tile_ids, ds=None):
         """

From df23919bc7466d9df12fda9893b581af61d10f80 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Wed, 12 Jul 2023 16:06:59 -0700
Subject: [PATCH 12/70] analysis setup fails to find VERSION.txt when building
 locally

---
 analysis/setup.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/analysis/setup.py b/analysis/setup.py
index 99cd707c..6472621d 100644
--- a/analysis/setup.py
+++ b/analysis/setup.py
@@ -17,8 +17,11 @@
 import setuptools
 from subprocess import check_call, CalledProcessError
 
-with open('../VERSION.txt', 'r') as f:
-    __version__ = f.read()
+try:
+    with open('../VERSION.txt', 'r') as f:
+        __version__ = f.read()
+except:
+    __version__ = None
 
 
 try:

From 07404f063dc9f2b0ae9c2941caef445c7aae26c2 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Wed, 12 Jul 2023 16:07:35 -0700
Subject: [PATCH 13/70] Implemented more NTS functions in zarr backend

---
 .../nexustiles/backends/zarr/backend.py       | 89 ++++++++++++++++---
 1 file changed, 75 insertions(+), 14 deletions(-)

diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py
index de1d86ba..1f46a95e 100644
--- a/data-access/nexustiles/backends/zarr/backend.py
+++ b/data-access/nexustiles/backends/zarr/backend.py
@@ -123,14 +123,38 @@ def get_dataseries_list(self, simple=False):
         return [ds]
 
     def find_tile_by_id(self, tile_id, **kwargs):
-        raise NotImplementedError()
+        return tile_id
 
     def find_tiles_by_id(self, tile_ids, ds=None, **kwargs):
-        raise NotImplementedError()
+        return tile_ids
 
     def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, start_time, end_time,
                                metrics_callback=None, **kwargs):
-        raise NotImplementedError()
+        start = datetime.now()
+
+        if not isinstance(start_time, datetime):
+            start_time = datetime.fromtimestamp(start_time)
+
+        if not isinstance(end_time, datetime):
+            end_time = datetime.fromtimestamp(end_time)
+
+        sel = {
+            self.__latitude: slice(min_lat, max_lat),
+            self.__longitude: slice(min_lon, max_lon),
+            self.__time: slice(start_time, end_time)
+        }
+
+        times = self.__ds.sel(sel)[self.__time].to_numpy()
+
+        if np.issubdtype(times.dtype, np.datetime64):
+            times = ((times - np.datetime64(EPOCH)) / 1e9).astype(int)
+
+        times = sorted(list(times))
+
+        if metrics_callback:
+            metrics_callback(backend=(datetime.now() - start).total_seconds())
+
+        return times
 
     def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds, day_of_year, **kwargs):
         """
@@ -158,10 +182,10 @@ def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds,
         raise NotImplementedError()
 
     def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
-        raise NotImplementedError()
+        return self.find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, dataset, time, time, **kwargs)
 
     def find_all_tiles_in_polygon_at_time(self, bounding_polygon, dataset, time, **kwargs):
-        raise NotImplementedError()
+        return self.find_tiles_in_polygon(bounding_polygon, dataset, time, time, **kwargs)
 
     def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_time=0, end_time=-1, **kwargs):
         if type(start_time) is datetime:
@@ -190,7 +214,14 @@ def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_t
 
     def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=None, end_time=None, **kwargs):
         # Find tiles that fall within the polygon in the Solr index
-        raise NotImplementedError()
+        bounds = bounding_polygon.bounds
+
+        min_lon = bounds[0]
+        min_lat = bounds[1]
+        max_lon = bounds[2]
+        max_lat = bounds[3]
+
+        return self.find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs)
 
     def find_tiles_by_metadata(self, metadata, ds=None, start_time=0, end_time=-1, **kwargs):
         """
@@ -216,10 +247,17 @@ def find_tiles_by_exact_bounds(self, bounds, ds, start_time, end_time, **kwargs)
         :param kwargs: fetch_data: True/False = whether or not to retrieve tile data
         :return:
         """
-        raise NotImplementedError()
+        min_lon = bounds[0]
+        min_lat = bounds[1]
+        max_lon = bounds[2]
+        max_lat = bounds[3]
+
+        return self.find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time, **kwargs)
 
     def find_all_boundary_tiles_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
-        raise NotImplementedError()
+        # Due to the precise nature of gridded Zarr's subsetting, it doesn't make sense to have a boundary region like
+        # this
+        return []
 
     def get_min_max_time_by_granule(self, ds, granule_name):
         raise NotImplementedError()
@@ -236,7 +274,20 @@ def get_bounding_box(self, tile_ids):
         :param tile_ids: List of tile ids
         :return: shapely.geometry.Polygon that represents the smallest bounding box that encompasses all of the tiles
         """
-        raise NotImplementedError()
+
+        bounds = [
+            (
+                float(URL(u).query['min_lon']),
+                float(URL(u).query['min_lat']),
+                float(URL(u).query['max_lon']),
+                float(URL(u).query['max_lat'])
+            )
+            for u in tile_ids
+        ]
+
+        poly = MultiPolygon([box(*b) for b in bounds])
+
+        return box(*poly.bounds)
 
     def __get_ds_min_max_date(self):
         min_date = self.__ds[self.__time].min().to_numpy()
@@ -257,11 +308,13 @@ def get_min_time(self, tile_ids, ds=None):
         :param ds: Filter by a specific dataset. Defaults to None (queries all datasets)
         :return: long time in seconds since epoch
         """
-        if len(tile_ids) == 0:
+        times = list(filter(lambda x: x is not None, [int(URL(tid).query['min_time']) for tid in tile_ids]))
+
+        if len(times) == 0:
             min_date, max_date = self.__get_ds_min_max_date()
             return min_date
         else:
-            raise NotImplementedError()
+            return min(times)
 
     def get_max_time(self, tile_ids, ds=None):
         """
@@ -270,11 +323,13 @@ def get_max_time(self, tile_ids, ds=None):
         :param ds: Filter by a specific dataset. Defaults to None (queries all datasets)
         :return: long time in seconds since epoch
         """
+        times = list(filter(lambda x: x is not None, [int(URL(tid).query['max_time']) for tid in tile_ids]))
+
         if len(tile_ids) == 0:
             min_date, max_date = self.__get_ds_min_max_date()
             return max_date
         else:
-            raise NotImplementedError()
+            max(times)
 
     def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time):
         """
@@ -331,14 +386,20 @@ def __fetch_data_for_tile(self, tile: Tile):
         sel = {
             self.__latitude: slice(min_lat, max_lat),
             self.__longitude: slice(min_lon, max_lon),
-            self.__time: slice(min_time, max_time)
         }
 
+        if min_time == max_time:
+            sel[self.__time] = min_time
+            method = 'nearest'
+        else:
+            sel[self.__time] = slice(min_time, max_time)
+            method = None
+
         tile.variables = [
             TileVariable(v, v) for v in self.__variables
         ]
 
-        matched = self.__ds.sel(sel)
+        matched = self.__ds.sel(sel, method=method)
 
         tile.latitudes = ma.masked_invalid(matched[self.__latitude].to_numpy())
         tile.longitudes = ma.masked_invalid(matched[self.__longitude].to_numpy())

From 72888aa9f83f846dd9535835101e21aef93a8410 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Wed, 12 Jul 2023 16:11:22 -0700
Subject: [PATCH 14/70] Added misc backend time metrics record field in NCSH

---
 analysis/webservice/algorithms_spark/NexusCalcSparkHandler.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/analysis/webservice/algorithms_spark/NexusCalcSparkHandler.py b/analysis/webservice/algorithms_spark/NexusCalcSparkHandler.py
index 4499773a..e0334676 100644
--- a/analysis/webservice/algorithms_spark/NexusCalcSparkHandler.py
+++ b/analysis/webservice/algorithms_spark/NexusCalcSparkHandler.py
@@ -362,6 +362,9 @@ def _create_metrics_record(self):
             SparkAccumulatorMetricsField(key='solr',
                                          description='Cumulative time to fetch data from Solr',
                                          accumulator=self._sc.accumulator(0)),
+            SparkAccumulatorMetricsField(key='backend',
+                                         description='Cumulative time to fetch data from external backend(s)',
+                                         accumulator=self._sc.accumulator(0)),
             SparkAccumulatorMetricsField(key='calculation',
                                          description='Cumulative time to do calculations',
                                          accumulator=self._sc.accumulator(0)),

From 1c4a0e492485be2650c5756541cbfb9376b0a2bf Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 13 Jul 2023 13:55:38 -0700
Subject: [PATCH 15/70] fixes

---
 .../nexustiles/backends/nexusproto/backend.py |  3 +++
 .../nexustiles/backends/zarr/backend.py       | 21 +++++++------------
 data-access/nexustiles/nexustiles.py          | 16 +++++++-------
 3 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/data-access/nexustiles/backends/nexusproto/backend.py b/data-access/nexustiles/backends/nexusproto/backend.py
index 8cca5813..690b109c 100644
--- a/data-access/nexustiles/backends/nexusproto/backend.py
+++ b/data-access/nexustiles/backends/nexusproto/backend.py
@@ -269,6 +269,9 @@ def get_bounding_box(self, tile_ids):
         """
         tiles = self.find_tiles_by_id(tile_ids, fl=['tile_min_lat', 'tile_max_lat', 'tile_min_lon', 'tile_max_lon'],
                                       fetch_data=False, rows=len(tile_ids))
+
+        tiles = self._metadata_store_docs_to_tiles(*tiles)
+
         polys = []
         for tile in tiles:
             polys.append(box(tile.bbox.min_lon, tile.bbox.min_lat, tile.bbox.max_lon, tile.bbox.max_lat))
diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py
index 1f46a95e..f4f92c56 100644
--- a/data-access/nexustiles/backends/zarr/backend.py
+++ b/data-access/nexustiles/backends/zarr/backend.py
@@ -13,29 +13,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import configparser
 import logging
 import sys
-import json
 from datetime import datetime
-from functools import reduce
+from urllib.parse import urlparse
 
 import numpy as np
 import numpy.ma as ma
-import pkg_resources
-from pytz import timezone, UTC
-from shapely.geometry import MultiPolygon, box
-
-from nexustiles.model.nexusmodel import Tile, BBox, TileStats, TileVariable
-from nexustiles.exception import NexusTileServiceException
+import s3fs
+import xarray as xr
 from nexustiles.AbstractTileService import AbstractTileService
-
+from nexustiles.exception import NexusTileServiceException
+from nexustiles.model.nexusmodel import Tile, BBox, TileVariable
+from pytz import timezone
+from shapely.geometry import MultiPolygon, box
 from yarl import URL
 
-import xarray as xr
-import s3fs
-from urllib.parse import urlparse
-
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
 ISO_8601 = '%Y-%m-%dT%H:%M:%S%z'
 
diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py
index b8165a1d..fb8c0f33 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -67,7 +67,7 @@ def fetch_data_for_func(*args, **kwargs):
                 guessed_dataset = kwargs['dataset']
             else:
                 for arg in args:
-                    if arg is not None and arg in NexusTileService.backends:
+                    if isinstance(arg, str) and arg in NexusTileService.backends:
                         guessed_dataset = arg
                         break
 
@@ -178,13 +178,13 @@ def _get_backend(dataset_s) -> AbstractTileService:
 
             b = NexusTileService.backends[dataset_s]
 
-            if not b['up']:
-                success = b['backend'].try_connect()
-
-                if not success:
-                    raise NexusProcessingException(reason=f'Dataset {dataset_s} is currently unavailable')
-                else:
-                    NexusTileService.backends[dataset_s]['up'] = True
+            # if not b['up']:
+            #     success = b['backend'].try_connect()
+            #
+            #     if not success:
+            #         raise NexusProcessingException(reason=f'Dataset {dataset_s} is currently unavailable')
+            #     else:
+            #         NexusTileService.backends[dataset_s]['up'] = True
 
             return b['backend']
 

From 0a7cd7f3f55340107ff1d0e7f924f1dfd1cdfe26 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Mon, 17 Jul 2023 16:28:08 -0700
Subject: [PATCH 16/70] Dynamic dataset management

---
 analysis/webservice/config/web.ini            |   2 +-
 analysis/webservice/management/Datasets.py    |  78 ++++++++++++
 analysis/webservice/management/__init__.py    |  16 +++
 .../request/handlers/NexusRequestHandler.py   |  29 +++++
 .../webservice/webmodel/NexusRequestObject.py |   6 +
 data-access/nexustiles/nexustiles.py          | 113 +++++++++++-------
 6 files changed, 197 insertions(+), 47 deletions(-)
 create mode 100644 analysis/webservice/management/Datasets.py
 create mode 100644 analysis/webservice/management/__init__.py

diff --git a/analysis/webservice/config/web.ini b/analysis/webservice/config/web.ini
index 85849758..a9e3dda8 100644
--- a/analysis/webservice/config/web.ini
+++ b/analysis/webservice/config/web.ini
@@ -29,4 +29,4 @@ static_enabled=true
 static_dir=static
 
 [modules]
-module_dirs=webservice.algorithms,webservice.algorithms_spark,webservice.algorithms.doms
\ No newline at end of file
+module_dirs=webservice.algorithms,webservice.algorithms_spark,webservice.algorithms.doms,webservice.management
\ No newline at end of file
diff --git a/analysis/webservice/management/Datasets.py b/analysis/webservice/management/Datasets.py
new file mode 100644
index 00000000..195ca38e
--- /dev/null
+++ b/analysis/webservice/management/Datasets.py
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from yaml import load
+import json
+from webservice.NexusHandler import nexus_handler
+from nexustiles.nexustiles import NexusTileService
+from webservice.webmodel import NexusRequestObject, NexusProcessingException
+try:
+    from yaml import CLoader as Loader
+except ImportError:
+    from yaml import Loader
+
+
+class DatasetManagement:
+    @classmethod
+    def validate(cls):
+        pass
+
+    @staticmethod
+    def parse_config(request: NexusRequestObject):
+        content_type = request.get_headers()['Content-Type']
+
+        if content_type in ['application/json', 'application/x-json']:
+            return json.loads(request.get_request_body())
+        elif content_type == 'application/yaml':
+            return load(request.get_request_body(), Loader=Loader)
+        else:
+            raise NexusProcessingException(reason='Invalid Content-Type header', code=400)
+
+
+@nexus_handler
+class DatasetAdd(DatasetManagement):
+    name = 'Add dataset'
+    path = '/datasets/add'
+    description = "Add new dataset to running SDAP instance"
+
+    def __init__(self, **args):
+        pass
+
+    def calc(self, request: NexusRequestObject, **args):
+        # print('CALC')
+        try:
+            config = DatasetManagement.parse_config(request)
+        except Exception as e:
+            raise NexusProcessingException(
+                reason=repr(e),
+                code=400
+            )
+
+        name = request.get_argument('name')
+
+        if name is None:
+            raise NexusProcessingException(
+                reason='Name argument must be provided',
+                code=400
+            )
+
+        try:
+            NexusTileService.user_ds_add(name, config)
+        except Exception as e:
+            raise NexusProcessingException(
+                reason=repr(e),
+                code=500
+            )
+
diff --git a/analysis/webservice/management/__init__.py b/analysis/webservice/management/__init__.py
new file mode 100644
index 00000000..7c9f5ef4
--- /dev/null
+++ b/analysis/webservice/management/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from webservice.management.Datasets import DatasetAdd
\ No newline at end of file
diff --git a/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py b/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py
index 26455746..1c7e936c 100644
--- a/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py
+++ b/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py
@@ -63,6 +63,35 @@ def get(self):
         except Exception as e:
             self.async_onerror_callback(str(e), 500)
 
+    @tornado.gen.coroutine
+    def post(self):
+        self.logger.info("Received POST %s" % self._request_summary())
+
+        request = NexusRequestObject(self)
+
+        # create NexusCalcHandler which will process the request
+        instance = self.__clazz(**self._clazz_init_args)
+
+        try:
+            # process the request asynchronously on a different thread,
+            # the current tornado handler is still available to get other user requests
+            results = yield tornado.ioloop.IOLoop.current().run_in_executor(self.executor, instance.calc, request)
+
+            if results:
+                try:
+                    self.set_status(results.status_code)
+                except AttributeError:
+                    pass
+
+                renderer = NexusRendererFactory.get_renderer("JSON")
+                renderer.render(self, results)
+
+        except NexusProcessingException as e:
+            self.async_onerror_callback(e.reason, e.code)
+
+        except Exception as e:
+            self.async_onerror_callback(str(e), 500)
+
     def async_onerror_callback(self, reason, code=500):
         self.logger.error("Error processing request", exc_info=True)
 
diff --git a/analysis/webservice/webmodel/NexusRequestObject.py b/analysis/webservice/webmodel/NexusRequestObject.py
index bbd28280..18962364 100644
--- a/analysis/webservice/webmodel/NexusRequestObject.py
+++ b/analysis/webservice/webmodel/NexusRequestObject.py
@@ -35,6 +35,12 @@ def __init__(self, reqHandler):
         self.requestHandler = reqHandler
         StatsComputeOptions.__init__(self)
 
+    def get_headers(self):
+        return self.requestHandler.request.headers
+
+    def get_request_body(self):
+        return self.requestHandler.request.body
+
     def get_argument(self, name, default=None):
         return self.requestHandler.get_argument(name, default=default)
 
diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py
index fb8c0f33..eaecf941 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -188,14 +188,13 @@ def _get_backend(dataset_s) -> AbstractTileService:
 
             return b['backend']
 
+
     @staticmethod
-    def _update_datasets():
+    def _get_datasets_store():
         solr_url = NexusTileService.ds_config.get("solr", "host")
         solr_core = NexusTileService.ds_config.get("solr", "core")
         solr_kwargs = {}
 
-        update_logger = logging.getLogger("nexus-tile-svc.backends")
-
         if NexusTileService.ds_config.has_option("solr", "time_out"):
             solr_kwargs["timeout"] = NexusTileService.ds_config.get("solr", "time_out")
 
@@ -208,55 +207,62 @@ def _update_datasets():
 
             solrcon = solrcon
 
-            update_logger.info('Executing Solr query to check for new datasets')
+            return solrcon
+
+    @staticmethod
+    def _update_datasets():
+        update_logger = logging.getLogger("nexus-tile-svc.backends")
+        solrcon = NexusTileService._get_datasets_store()
+
+        update_logger.info('Executing Solr query to check for new datasets')
 
-            present_datasets = {None, '__nexusproto__'}
-            next_cursor_mark = '*'
+        present_datasets = {None, '__nexusproto__'}
+        next_cursor_mark = '*'
 
-            added_datasets = 0
+        added_datasets = 0
 
-            while True:
-                response = solrcon.search('*:*', cursorMark=next_cursor_mark, sort='id asc')
+        while True:
+            response = solrcon.search('*:*', cursorMark=next_cursor_mark, sort='id asc')
 
-                try:
-                    response_cursor_mark = response.nextCursorMark
-                except AttributeError:
-                    break
+            try:
+                response_cursor_mark = response.nextCursorMark
+            except AttributeError:
+                break
 
-                if response_cursor_mark == next_cursor_mark:
-                    break
-                else:
-                    next_cursor_mark = response_cursor_mark
-
-                for dataset in response.docs:
-                    d_id = dataset['dataset_s']
-                    store_type = dataset.get('store_type_s', 'nexusproto')
-
-                    present_datasets.add(d_id)
-
-                    if d_id in NexusTileService.backends:
-                        continue
-                        # is_up = NexusTileService.backends[d_id]['backend'].try_connect()
-
-                    added_datasets += 1
-
-                    if store_type == 'nexus_proto' or store_type == 'nexusproto':
-                        update_logger.info(f"Detected new nexusproto dataset {d_id}, using default nexusproto backend")
-                        NexusTileService.backends[d_id] = NexusTileService.backends[None]
-                    elif store_type == 'zarr':
-                        update_logger.info(f"Detected new zarr dataset {d_id}, opening new zarr backend")
-
-                        ds_config = json.loads(dataset['config'][0])
-                        try:
-                            NexusTileService.backends[d_id] = {
-                                'backend': ZarrBackend(dataset_name=dataset['dataset_s'], **ds_config),
-                                'up': True
-                            }
-                        except NexusTileServiceException:
-                            added_datasets -= 1
-                    else:
-                        update_logger.warning(f'Unsupported backend {store_type} for dataset {d_id}')
+            if response_cursor_mark == next_cursor_mark:
+                break
+            else:
+                next_cursor_mark = response_cursor_mark
+
+            for dataset in response.docs:
+                d_id = dataset['dataset_s']
+                store_type = dataset.get('store_type_s', 'nexusproto')
+
+                present_datasets.add(d_id)
+
+                if d_id in NexusTileService.backends:
+                    continue
+                    # is_up = NexusTileService.backends[d_id]['backend'].try_connect()
+
+                added_datasets += 1
+
+                if store_type == 'nexus_proto' or store_type == 'nexusproto':
+                    update_logger.info(f"Detected new nexusproto dataset {d_id}, using default nexusproto backend")
+                    NexusTileService.backends[d_id] = NexusTileService.backends[None]
+                elif store_type == 'zarr':
+                    update_logger.info(f"Detected new zarr dataset {d_id}, opening new zarr backend")
+
+                    ds_config = json.loads(dataset['config'][0])
+                    try:
+                        NexusTileService.backends[d_id] = {
+                            'backend': ZarrBackend(dataset_name=dataset['dataset_s'], **ds_config),
+                            'up': True
+                        }
+                    except NexusTileServiceException:
                         added_datasets -= 1
+                else:
+                    update_logger.warning(f'Unsupported backend {store_type} for dataset {d_id}')
+                    added_datasets -= 1
 
         removed_datasets = set(NexusTileService.backends.keys()).difference(present_datasets)
 
@@ -270,6 +276,21 @@ def _update_datasets():
         update_logger.info(f'Finished dataset update: {added_datasets} added, {len(removed_datasets)} removed, '
                            f'{len(NexusTileService.backends) - 2} total')
 
+    # Update cfg (ie, creds) of dataset
+    @staticmethod
+    def user_ds_update():
+        pass
+
+    # Add dataset + backend
+    @staticmethod
+    def user_ds_add(name, config):
+        pass
+
+    # Delete dataset backend (error if it's a hardcoded one)
+    @staticmethod
+    def user_ds_delete():
+        pass
+
     def override_config(self, config):
         for section in config.sections():
             if self._config.has_section(section):  # only override preexisting section, ignores the other

From c8e7dbb5e178bf9d88928659dea1792132760179 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Tue, 18 Jul 2023 16:12:29 -0700
Subject: [PATCH 17/70] Dynamic dataset management

---
 analysis/conda-requirements.txt            |  2 +-
 analysis/webservice/management/Datasets.py | 58 ++++++++++++++++++++--
 data-access/nexustiles/nexustiles.py       |  2 +-
 data-access/requirements.txt               |  3 +-
 4 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/analysis/conda-requirements.txt b/analysis/conda-requirements.txt
index e27bdeae..902d5114 100644
--- a/analysis/conda-requirements.txt
+++ b/analysis/conda-requirements.txt
@@ -33,4 +33,4 @@ gdal==3.2.1
 mock==4.0.3
 importlib_metadata==4.11.4
 #singledispatch==3.4.0.3
-
+schema
diff --git a/analysis/webservice/management/Datasets.py b/analysis/webservice/management/Datasets.py
index 195ca38e..0f8df06d 100644
--- a/analysis/webservice/management/Datasets.py
+++ b/analysis/webservice/management/Datasets.py
@@ -18,12 +18,34 @@
 from webservice.NexusHandler import nexus_handler
 from nexustiles.nexustiles import NexusTileService
 from webservice.webmodel import NexusRequestObject, NexusProcessingException
+
+from schema import Schema, Or, SchemaError
+from schema import Optional as Opt
+
+from urllib.parse import urlparse
 try:
     from yaml import CLoader as Loader
 except ImportError:
     from yaml import Loader
 
 
+CONFIG_SCHEMA = Schema({
+    Or('variable', 'variables'): Or(str, [str]),
+    'coords': {
+        'latitude': str,
+        'longitude': str,
+        'time': str,
+        Opt('depth'): str
+    },
+    Opt('aws'): {
+        'accessKeyID': Or(str, None),
+        'secretAccessKey': Or(str, None),
+        'public': bool,
+        Opt('region'): str
+    }
+})
+
+
 class DatasetManagement:
     @classmethod
     def validate(cls):
@@ -34,12 +56,22 @@ def parse_config(request: NexusRequestObject):
         content_type = request.get_headers()['Content-Type']
 
         if content_type in ['application/json', 'application/x-json']:
-            return json.loads(request.get_request_body())
+            config_dict = json.loads(request.get_request_body())
         elif content_type == 'application/yaml':
-            return load(request.get_request_body(), Loader=Loader)
+            config_dict = load(request.get_request_body(), Loader=Loader)
         else:
             raise NexusProcessingException(reason='Invalid Content-Type header', code=400)
 
+        try:
+            CONFIG_SCHEMA.validate(config_dict)
+        except SchemaError as e:
+            raise NexusProcessingException(
+                reason=str(e),
+                code=400
+            )
+
+        return config_dict
+
 
 @nexus_handler
 class DatasetAdd(DatasetManagement):
@@ -51,7 +83,6 @@ def __init__(self, **args):
         pass
 
     def calc(self, request: NexusRequestObject, **args):
-        # print('CALC')
         try:
             config = DatasetManagement.parse_config(request)
         except Exception as e:
@@ -68,8 +99,27 @@ def calc(self, request: NexusRequestObject, **args):
                 code=400
             )
 
+        path = request.get_argument('path')
+
+        if path is None:
+            raise NexusProcessingException(
+                reason='Path argument must be provided',
+                code=400
+            )
+
+        try:
+            if urlparse(path).scheme not in ['file','','s3']:
+                raise NexusProcessingException(
+                    reason='Dataset URL must be for a local file or S3 URL',
+                    code=400
+                )
+        except ValueError:
+            raise NexusProcessingException(
+                reason='Could not parse path URL', code=400
+            )
+
         try:
-            NexusTileService.user_ds_add(name, config)
+            NexusTileService.user_ds_add(name, path, config)
         except Exception as e:
             raise NexusProcessingException(
                 reason=repr(e),
diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py
index eaecf941..68a2a584 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -283,7 +283,7 @@ def user_ds_update():
 
     # Add dataset + backend
     @staticmethod
-    def user_ds_add(name, config):
+    def user_ds_add(name, path, config):
         pass
 
     # Delete dataset backend (error if it's a hardcoded one)
diff --git a/data-access/requirements.txt b/data-access/requirements.txt
index ab96e2af..c732bede 100644
--- a/data-access/requirements.txt
+++ b/data-access/requirements.txt
@@ -22,4 +22,5 @@ nexusproto
 Shapely
 s3fs
 fsspec
-xarray~=2022.3.0
\ No newline at end of file
+xarray~=2022.3.0
+numpy==1.24.3
\ No newline at end of file

From e78f7ade3422c97008ff6532593d39c2f863e475 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 20 Jul 2023 12:29:27 -0700
Subject: [PATCH 18/70] Dataset management

---
 analysis/webservice/management/Datasets.py    | 70 +++++++++++++++++
 .../request/handlers/NexusRequestHandler.py   |  4 +-
 data-access/nexustiles/nexustiles.py          | 76 +++++++++++++++++--
 3 files changed, 142 insertions(+), 8 deletions(-)

diff --git a/analysis/webservice/management/Datasets.py b/analysis/webservice/management/Datasets.py
index 0f8df06d..48071f7c 100644
--- a/analysis/webservice/management/Datasets.py
+++ b/analysis/webservice/management/Datasets.py
@@ -73,6 +73,14 @@ def parse_config(request: NexusRequestObject):
         return config_dict
 
 
+class Response:
+    def __init__(self, response):
+        self.response = response if response is not None else {}
+
+    def toJson(self):
+        return json.dumps(self.response)
+
+
 @nexus_handler
 class DatasetAdd(DatasetManagement):
     name = 'Add dataset'
@@ -126,3 +134,65 @@ def calc(self, request: NexusRequestObject, **args):
                 code=500
             )
 
+
+@nexus_handler
+class DatasetUpdate(DatasetManagement):
+    name = 'Update dynamically added dataset'
+    path = '/datasets/update'
+    description = "Update dataset in running SDAP instance"
+
+    def __init__(self, **args):
+        pass
+
+    def calc(self, request: NexusRequestObject, **args):
+        try:
+            config = DatasetManagement.parse_config(request)
+        except Exception as e:
+            raise NexusProcessingException(
+                reason=repr(e),
+                code=400
+            )
+
+        name = request.get_argument('name')
+
+        if name is None:
+            raise NexusProcessingException(
+                reason='Name argument must be provided',
+                code=400
+            )
+
+        try:
+            return Response(NexusTileService.user_ds_update(name, config))
+        except Exception as e:
+            raise NexusProcessingException(
+                reason=repr(e),
+                code=500
+            )
+
+
+@nexus_handler
+class DatasetDelete(DatasetManagement):
+    name = 'Remove dataset'
+    path = '/datasets/remove'
+    description = "Remove dataset from running SDAP instance"
+
+    def __init__(self, **args):
+        pass
+
+    def calc(self, request: NexusRequestObject, **args):
+        name = request.get_argument('name')
+
+        if name is None:
+            raise NexusProcessingException(
+                reason='Name argument must be provided',
+                code=400
+            )
+
+        try:
+            return Response(NexusTileService.user_ds_delete(name))
+        except Exception as e:
+            raise NexusProcessingException(
+                reason=repr(e),
+                code=500
+            )
+
diff --git a/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py b/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py
index 1c7e936c..6392f105 100644
--- a/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py
+++ b/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py
@@ -65,7 +65,7 @@ def get(self):
 
     @tornado.gen.coroutine
     def post(self):
-        self.logger.info("Received POST %s" % self._request_summary())
+        self.logger.info("Received %s" % self._request_summary())
 
         request = NexusRequestObject(self)
 
@@ -83,7 +83,7 @@ def post(self):
                 except AttributeError:
                     pass
 
-                renderer = NexusRendererFactory.get_renderer("JSON")
+                renderer = NexusRendererFactory.get_renderer(request)
                 renderer.render(self, results)
 
         except NexusProcessingException as e:
diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py
index 68a2a584..a5abd241 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -278,18 +278,82 @@ def _update_datasets():
 
     # Update cfg (ie, creds) of dataset
     @staticmethod
-    def user_ds_update():
-        pass
+    def user_ds_update(name, config):
+        solr = NexusTileService._get_datasets_store()
+
+        docs = solr.search(f'dataset_s:{name}').docs
+
+        if len(docs) != 1:
+            raise ValueError(f'Given name must match exactly one existing dataset; matched {len(docs)}')
+
+        ds = docs[0]
+
+        if 'source_s' not in ds or ds['source_s'] == 'collection_config':
+            raise ValueError('Provided dataset is source_s in collection config and cannot be deleted')
+
+        config_dict = json.loads(ds['config'][0])
+
+        config_dict['config'] = config
+
+        solr.delete(id=ds['id'])
+        solr.add([{
+            'id': name,
+            'dataset_s': name,
+            'latest_update_l': int(datetime.now().timestamp()),
+            'store_type_s': ds['store_type_s'],
+            'config': json.dumps(config_dict),
+            'source_s': 'user_added'
+        }])
+        solr.commit()
+
+        return {'success': True}
 
     # Add dataset + backend
     @staticmethod
-    def user_ds_add(name, path, config):
-        pass
+    def user_ds_add(name, path, config, type='zarr'):
+        solr = NexusTileService._get_datasets_store()
+
+        docs = solr.search(f'dataset_s:{name}').docs
+
+        if len(docs) > 0:
+            raise ValueError(f'Dataset {name} already exists')
+
+        config_dict = {
+            'path': path,
+            'config': config
+        }
+
+        solr.add([{
+            'id': name,
+            'dataset_s': name,
+            'latest_update_l': int(datetime.now().timestamp()),
+            'store_type_s': type,
+            'config': json.dumps(config_dict),
+            'source_s': 'user_added'
+        }])
+        solr.commit()
+
+        return {'success': True}
 
     # Delete dataset backend (error if it's a hardcoded one)
     @staticmethod
-    def user_ds_delete():
-        pass
+    def user_ds_delete(name):
+        solr = NexusTileService._get_datasets_store()
+
+        docs = solr.search(f'dataset_s:{name}').docs
+
+        if len(docs) != 1:
+            raise ValueError(f'Given name must match exactly one existing dataset; matched {len(docs)}')
+
+        ds = docs[0]
+
+        if 'source_s' not in ds or ds['source_s'] == 'collection_config':
+            raise ValueError('Provided dataset is source_s in collection config and cannot be deleted')
+
+        solr.delete(id=ds['id'])
+        solr.commit()
+
+        return {'success': True}
 
     def override_config(self, config):
         for section in config.sections():

From a84d77e569fcf224597c73c17d1fa109f36a2a5b Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 27 Jul 2023 10:05:20 -0700
Subject: [PATCH 19/70] Timeseriesspark support

---
 .../algorithms_spark/TimeSeriesSpark.py       |  5 +-
 .../nexustiles/backends/zarr/backend.py       | 47 +++++++++++++------
 2 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
index faeaa0b1..6a353cf4 100644
--- a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
+++ b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
@@ -488,8 +488,9 @@ def calc_average_on_day(tile_service_factory, metrics_callback, normalize_dates,
                                             timestamps[0],
                                             timestamps[-1],
                                             rows=5000,
-                                            metrics_callback=metrics_callback)
-    
+                                            metrics_callback=metrics_callback,
+                                            distinct=True)
+
     calculation_start = datetime.now()
 
     tile_dict = {}
diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py
index f4f92c56..9aab3cff 100644
--- a/data-access/nexustiles/backends/zarr/backend.py
+++ b/data-access/nexustiles/backends/zarr/backend.py
@@ -110,8 +110,8 @@ def get_dataseries_list(self, simple=False):
             max_date = self.get_max_time([])
             ds['start'] = min_date
             ds['end'] = max_date
-            ds['iso_start'] = datetime.fromtimestamp(min_date).strftime(ISO_8601)
-            ds['iso_end'] = datetime.fromtimestamp(max_date).strftime(ISO_8601)
+            ds['iso_start'] = datetime.utcfromtimestamp(min_date).strftime(ISO_8601)
+            ds['iso_end'] = datetime.utcfromtimestamp(max_date).strftime(ISO_8601)
 
         return [ds]
 
@@ -126,10 +126,10 @@ def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, st
         start = datetime.now()
 
         if not isinstance(start_time, datetime):
-            start_time = datetime.fromtimestamp(start_time)
+            start_time = datetime.utcfromtimestamp(start_time)
 
         if not isinstance(end_time, datetime):
-            end_time = datetime.fromtimestamp(end_time)
+            end_time = datetime.utcfromtimestamp(end_time)
 
         sel = {
             self.__latitude: slice(min_lat, max_lat),
@@ -142,7 +142,7 @@ def find_days_in_range_asc(self, min_lat, max_lat, min_lon, max_lon, dataset, st
         if np.issubdtype(times.dtype, np.datetime64):
             times = ((times - np.datetime64(EPOCH)) / 1e9).astype(int)
 
-        times = sorted(list(times))
+        times = sorted(times.tolist())
 
         if metrics_callback:
             metrics_callback(backend=(datetime.now() - start).total_seconds())
@@ -193,9 +193,14 @@ def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_t
             'max_lon': max_lon
         }
 
+        times = None
+
         if 0 <= start_time <= end_time:
-            params['min_time'] = start_time
-            params['max_time'] = end_time
+            if kwargs.get('distinct', False):
+                times_asc = self.find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time)
+                times = [(t, t) for t in times_asc]
+            else:
+                times = [(start_time, end_time)]
 
         if 'depth' in kwargs:
             params['depth'] = kwargs['depth']
@@ -203,7 +208,10 @@ def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_t
             params['min_depth'] = kwargs.get('min_depth')
             params['max_depth'] = kwargs.get('max_depth')
 
-        return [ZarrBackend.__to_url(self._name, **params)]
+        if times:
+            return [ZarrBackend.__to_url(self._name, min_time=t[0], max_time=t[1], **params) for t in times]
+        else:
+            return [ZarrBackend.__to_url(self._name, **params)]
 
     def find_tiles_in_polygon(self, bounding_polygon, ds=None, start_time=None, end_time=None, **kwargs):
         # Find tiles that fall within the polygon in the Solr index
@@ -365,10 +373,10 @@ def __fetch_data_for_tile(self, tile: Tile):
         max_time = float(tile.max_time)
 
         if min_time:
-            min_time = datetime.fromtimestamp(min_time)
+            min_time = datetime.utcfromtimestamp(min_time)
 
         if max_time:
-            max_time = datetime.fromtimestamp(max_time)
+            max_time = datetime.utcfromtimestamp(max_time)
 
         if bbox:
             min_lat = bbox.min_lat
@@ -376,23 +384,25 @@ def __fetch_data_for_tile(self, tile: Tile):
             max_lat = bbox.max_lat
             max_lon = bbox.max_lon
 
-        sel = {
+        sel_g = {
             self.__latitude: slice(min_lat, max_lat),
             self.__longitude: slice(min_lon, max_lon),
         }
 
+        sel_t = {}
+
         if min_time == max_time:
-            sel[self.__time] = min_time
+            sel_t[self.__time] = [min_time]  # List, otherwise self.__time dim will be dropped
             method = 'nearest'
         else:
-            sel[self.__time] = slice(min_time, max_time)
+            sel_t[self.__time] = slice(min_time, max_time)
             method = None
 
         tile.variables = [
             TileVariable(v, v) for v in self.__variables
         ]
 
-        matched = self.__ds.sel(sel, method=method)
+        matched = self.__ds.sel(sel_g).sel(sel_t, method=method)
 
         tile.latitudes = ma.masked_invalid(matched[self.__latitude].to_numpy())
         tile.longitudes = ma.masked_invalid(matched[self.__longitude].to_numpy())
@@ -453,6 +463,15 @@ def __to_url(dataset, **kwargs):
         if 'ds' in kwargs:
             del kwargs['ds']
 
+        # If any params are numpy dtypes, extract them to base python types
+        for kw in kwargs:
+            v = kwargs[kw]
+
+            if isinstance(v, np.generic):
+                v = v.item()
+
+            kwargs[kw] = v
+
         return str(URL.build(
             scheme='nts',
             host='',

From 53190e2834e47b547bffefb3f5ffc407efa5165c Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Mon, 31 Jul 2023 07:54:13 -0700
Subject: [PATCH 20/70] Update backend dict on dataset mgmt query

---
 data-access/nexustiles/nexustiles.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py
index a5abd241..772f6f4f 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -306,6 +306,11 @@ def user_ds_update(name, config):
         }])
         solr.commit()
 
+        logger.info(f'Updated dataset {name} in Solr. Updating backends')
+
+        with DS_LOCK:
+            NexusTileService._update_datasets()
+
         return {'success': True}
 
     # Add dataset + backend
@@ -333,6 +338,11 @@ def user_ds_add(name, path, config, type='zarr'):
         }])
         solr.commit()
 
+        logger.info(f'Added dataset {name} to Solr. Updating backends')
+
+        with DS_LOCK:
+            NexusTileService._update_datasets()
+
         return {'success': True}
 
     # Delete dataset backend (error if it's a hardcoded one)
@@ -353,6 +363,11 @@ def user_ds_delete(name):
         solr.delete(id=ds['id'])
         solr.commit()
 
+        logger.info(f'Removed dataset {name} from Solr. Updating backends')
+
+        with DS_LOCK:
+            NexusTileService._update_datasets()
+
         return {'success': True}
 
     def override_config(self, config):

From 2e7a0dcc280d7f447e9c09e723ddb3a5215d4460 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Mon, 31 Jul 2023 12:36:13 -0700
Subject: [PATCH 21/70] Fixes and improvements

---
 analysis/webservice/management/Datasets.py    | 12 ++++++++--
 .../nexustiles/backends/zarr/backend.py       | 22 ++++++++++++++-----
 data-access/nexustiles/nexustiles.py          | 10 ++++++++-
 3 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/analysis/webservice/management/Datasets.py b/analysis/webservice/management/Datasets.py
index 48071f7c..ded1e8a2 100644
--- a/analysis/webservice/management/Datasets.py
+++ b/analysis/webservice/management/Datasets.py
@@ -38,8 +38,8 @@
         Opt('depth'): str
     },
     Opt('aws'): {
-        'accessKeyID': Or(str, None),
-        'secretAccessKey': Or(str, None),
+        Opt('accessKeyID'): str,
+        Opt('secretAccessKey'): str,
         'public': bool,
         Opt('region'): str
     }
@@ -64,6 +64,14 @@ def parse_config(request: NexusRequestObject):
 
         try:
             CONFIG_SCHEMA.validate(config_dict)
+
+            if 'aws' in config_dict:
+                if not config_dict['aws']['public']:
+                    if 'accessKeyID' not in config_dict['aws'] or 'secretAccessKey' not in config_dict['aws']:
+                        raise NexusProcessingException(
+                            reason='Must provide AWS creds for non-public bucket',
+                            code=400
+                        )
         except SchemaError as e:
             raise NexusProcessingException(
                 reason=str(e),
diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py
index 9aab3cff..214a991b 100644
--- a/data-access/nexustiles/backends/zarr/backend.py
+++ b/data-access/nexustiles/backends/zarr/backend.py
@@ -81,8 +81,10 @@ def __init__(self, dataset_name, path, config=None):
                 aws_cfg = self.__config['aws']
 
                 if aws_cfg['public']:
-                    region = aws_cfg.get('region', 'us-west-2')
-                    store = f'https://{self.__host}.s3.{region}.amazonaws.com{self.__path}'
+                    # region = aws_cfg.get('region', 'us-west-2')
+                    # store = f'https://{self.__host}.s3.{region}.amazonaws.com{self.__path}'
+                    s3 = s3fs.S3FileSystem(True)
+                    store = s3fs.S3Map(root=path, s3=s3, check=False)
                 else:
                     s3 = s3fs.S3FileSystem(False, key=aws_cfg['accessKeyID'], secret=aws_cfg['secretAccessKey'])
                     store = s3fs.S3Map(root=path, s3=s3, check=False)
@@ -116,7 +118,7 @@ def get_dataseries_list(self, simple=False):
         return [ds]
 
     def find_tile_by_id(self, tile_id, **kwargs):
-        return tile_id
+        return [tile_id]
 
     def find_tiles_by_id(self, tile_ids, ds=None, **kwargs):
         return tile_ids
@@ -330,7 +332,7 @@ def get_max_time(self, tile_ids, ds=None):
             min_date, max_date = self.__get_ds_min_max_date()
             return max_date
         else:
-            max(times)
+            return max(times)
 
     def get_distinct_bounding_boxes_in_polygon(self, bounding_polygon, ds, start_time, end_time):
         """
@@ -442,6 +444,7 @@ def __nts_url_to_tile(nts_url):
             pass
 
         tile.dataset = url.path
+        tile.dataset_id = url.path
 
         try:
             tile.min_time = int(url.query['min_time'])
@@ -453,6 +456,8 @@ def __nts_url_to_tile(nts_url):
         except KeyError:
             pass
 
+        tile.meta_data = {}
+
         return tile
 
     @staticmethod
@@ -463,20 +468,25 @@ def __to_url(dataset, **kwargs):
         if 'ds' in kwargs:
             del kwargs['ds']
 
+        params = {}
+
         # If any params are numpy dtypes, extract them to base python types
         for kw in kwargs:
             v = kwargs[kw]
 
+            if v is None:
+                continue
+
             if isinstance(v, np.generic):
                 v = v.item()
 
-            kwargs[kw] = v
+            params[kw] = v
 
         return str(URL.build(
             scheme='nts',
             host='',
             path=dataset,
-            query=kwargs
+            query=params
         ))
 
 
diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py
index 772f6f4f..ed526c55 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -31,6 +31,7 @@
 from shapely.geometry import box
 from webservice.webmodel import DatasetNotFoundException, NexusProcessingException
 from webservice.NexusHandler import nexus_initializer
+from yarl import URL
 
 from .AbstractTileService import AbstractTileService
 from .backends.nexusproto.backend import NexusprotoTileService
@@ -388,11 +389,18 @@ def get_dataseries_list(self, simple=False):
     @tile_data()
     @catch_not_implemented
     def find_tile_by_id(self, tile_id, **kwargs):
-        return NexusTileService._get_backend('__nexusproto__').find_tile_by_id(tile_id)
+        tile = URL(tile_id)
+
+        if tile.scheme == 'nts':
+            return NexusTileService._get_backend(tile.path).find_tile_by_id(tile_id)
+        else:
+            return NexusTileService._get_backend('__nexusproto__').find_tile_by_id(tile_id)
 
     @tile_data()
     @catch_not_implemented
     def find_tiles_by_id(self, tile_ids, ds=None, **kwargs):
+        if ds is None:
+            return [self.find_tile_by_id(tid, **kwargs, fetch_data=False) for tid in tile_ids]
         return NexusTileService._get_backend(ds).find_tiles_by_id(tile_ids, ds=ds, **kwargs)
 
     @catch_not_implemented

From 08693754a542d655069c19c0503adbd41a401a7c Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Mon, 31 Jul 2023 12:38:01 -0700
Subject: [PATCH 22/70] Adapted matchup to work with zarr backends

---
 analysis/webservice/algorithms_spark/Matchup.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py
index f27612a5..7f84063e 100644
--- a/analysis/webservice/algorithms_spark/Matchup.py
+++ b/analysis/webservice/algorithms_spark/Matchup.py
@@ -777,9 +777,9 @@ def match_satellite_to_insitu(tile_ids, primary_b, secondary_b, parameter_b, tt_
     tile_service = tile_service_factory()
 
     # Determine the spatial temporal extents of this partition of tiles
-    tiles_bbox = tile_service.get_bounding_box(tile_ids)
-    tiles_min_time = tile_service.get_min_time(tile_ids)
-    tiles_max_time = tile_service.get_max_time(tile_ids)
+    tiles_bbox = tile_service.get_bounding_box(tile_ids, ds=primary_b.value)
+    tiles_min_time = tile_service.get_min_time(tile_ids, ds=primary_b.value)
+    tiles_max_time = tile_service.get_max_time(tile_ids, ds=primary_b.value)
 
     # Increase spatial extents by the radius tolerance
     matchup_min_lon, matchup_min_lat = add_meters_to_lon_lat(tiles_bbox.bounds[0], tiles_bbox.bounds[1],
@@ -858,7 +858,7 @@ def match_satellite_to_insitu(tile_ids, primary_b, secondary_b, parameter_b, tt_
         edge_results = []
         for tile in matchup_tiles:
             # Retrieve tile data and convert to lat/lon projection
-            tiles = tile_service.find_tile_by_id(tile.tile_id, fetch_data=True)
+            tiles = tile_service.find_tile_by_id(tile.tile_id, fetch_data=True, ds=secondary_b.value)
             tile = tiles[0]
 
             valid_indices = tile.get_indices()
@@ -884,14 +884,14 @@ def match_satellite_to_insitu(tile_ids, primary_b, secondary_b, parameter_b, tt_
 
     # The actual matching happens in the generator. This is so that we only load 1 tile into memory at a time
     match_generators = [match_tile_to_point_generator(tile_service, tile_id, m_tree, edge_results, bounding_wkt_b.value,
-                                                      parameter_b.value, rt_b.value, aeqd_proj) for tile_id
-                        in tile_ids]
+                                                      parameter_b.value, rt_b.value, aeqd_proj, primary_b.value)
+                        for tile_id in tile_ids]
 
     return chain(*match_generators)
 
 
 def match_tile_to_point_generator(tile_service, tile_id, m_tree, edge_results, search_domain_bounding_wkt,
-                                  search_parameter, radius_tolerance, aeqd_proj):
+                                  search_parameter, radius_tolerance, aeqd_proj, primary_ds):
     from nexustiles.model.nexusmodel import NexusPoint
     from webservice.algorithms_spark.Matchup import DomsPoint  # Must import DomsPoint or Spark complains
 
@@ -899,7 +899,7 @@ def match_tile_to_point_generator(tile_service, tile_id, m_tree, edge_results, s
     try:
         the_time = datetime.now()
         tile = tile_service.mask_tiles_to_polygon(wkt.loads(search_domain_bounding_wkt),
-                                                  tile_service.find_tile_by_id(tile_id))[0]
+                                                  tile_service.find_tile_by_id(tile_id, ds=primary_ds))[0]
         print("%s Time to load tile %s" % (str(datetime.now() - the_time), tile_id))
     except IndexError:
         # This should only happen if all measurements in a tile become masked after applying the bounding polygon

From 1eb680bb794914f8a9e3d9b8fae3ebd3a0b970cb Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Tue, 1 Aug 2023 14:45:25 -0700
Subject: [PATCH 23/70] Zarr support

- Distinct slices of time is now default
- No longer assuming+shaping as multivar tiles unless needed
---
 .../webservice/algorithms_spark/HofMoellerSpark.py |  8 ++++----
 data-access/nexustiles/backends/zarr/backend.py    | 14 +++++++++-----
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/analysis/webservice/algorithms_spark/HofMoellerSpark.py b/analysis/webservice/algorithms_spark/HofMoellerSpark.py
index 6231bdb1..90ca87c0 100644
--- a/analysis/webservice/algorithms_spark/HofMoellerSpark.py
+++ b/analysis/webservice/algorithms_spark/HofMoellerSpark.py
@@ -44,12 +44,12 @@ class HofMoellerCalculator(object):
     def hofmoeller_stats(tile_service_factory, metrics_callback, tile_in_spark):
 
         (latlon, tile_id, index,
-         min_lat, max_lat, min_lon, max_lon) = tile_in_spark
+         min_lat, max_lat, min_lon, max_lon, dataset) = tile_in_spark
 
         tile_service = tile_service_factory()
         try:
             # Load the dataset tile
-            tile = tile_service.find_tile_by_id(tile_id, metrics_callback=metrics_callback)[0]
+            tile = tile_service.find_tile_by_id(tile_id, metrics_callback=metrics_callback, ds=dataset)[0]
             calculation_start = datetime.now()
             # Mask it to the search domain
             tile = tile_service.mask_tiles_to_bbox(min_lat, max_lat,
@@ -352,7 +352,7 @@ def calc(self, compute_options, **args):
 
         min_lon, min_lat, max_lon, max_lat = bbox.bounds
 
-        nexus_tiles_spark = [(self._latlon, tile.tile_id, x, min_lat, max_lat, min_lon, max_lon) for x, tile in
+        nexus_tiles_spark = [(self._latlon, tile.tile_id, x, min_lat, max_lat, min_lon, max_lon, tile.dataset) for x, tile in
                              enumerate(self._get_tile_service().find_tiles_in_box(min_lat, max_lat, min_lon, max_lon,
                                                                                   ds, start_time, end_time,
                                                                                   metrics_callback=metrics_record.record_metrics,
@@ -408,7 +408,7 @@ def calc(self, compute_options, **args):
 
         min_lon, min_lat, max_lon, max_lat = bbox.bounds
 
-        nexus_tiles_spark = [(self._latlon, tile.tile_id, x, min_lat, max_lat, min_lon, max_lon) for x, tile in
+        nexus_tiles_spark = [(self._latlon, tile.tile_id, x, min_lat, max_lat, min_lon, max_lon, tile.dataset) for x, tile in
                              enumerate(self._get_tile_service().find_tiles_in_box(min_lat, max_lat, min_lon, max_lon,
                                                                                   ds, start_time, end_time,
                                                                                   metrics_callback=metrics_record.record_metrics,
diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py
index 214a991b..29099d28 100644
--- a/data-access/nexustiles/backends/zarr/backend.py
+++ b/data-access/nexustiles/backends/zarr/backend.py
@@ -198,7 +198,7 @@ def find_tiles_in_box(self, min_lat, max_lat, min_lon, max_lon, ds=None, start_t
         times = None
 
         if 0 <= start_time <= end_time:
-            if kwargs.get('distinct', False):
+            if kwargs.get('distinct', True):
                 times_asc = self.find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time)
                 times = [(t, t) for t in times_asc]
             else:
@@ -416,11 +416,15 @@ def __fetch_data_for_tile(self, tile: Tile):
 
         tile.times = ma.masked_invalid(times)
 
-        tile.data = ma.masked_invalid(
-            [matched[var].to_numpy() for var in self.__variables]
-        )
+        var_data = [matched[var].to_numpy() for var in self.__variables]
+
+        if len(self.__variables) > 1:
+            tile.data = ma.masked_invalid(var_data)
+            tile.is_multi = True
+        else:
+            tile.data = ma.masked_invalid(var_data[0])
+            tile.is_multi = False
 
-        tile.is_multi = True
 
     def _metadata_store_docs_to_tiles(self, *store_docs):
         return [ZarrBackend.__nts_url_to_tile(d) for d in store_docs]

From 0aef0f13d0b2178724eb6aa9e198fb4260066920 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Wed, 2 Aug 2023 14:34:15 -0700
Subject: [PATCH 24/70] DDAS adjustments

---
 .../algorithms_spark/DailyDifferenceAverageSpark.py         | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/analysis/webservice/algorithms_spark/DailyDifferenceAverageSpark.py b/analysis/webservice/algorithms_spark/DailyDifferenceAverageSpark.py
index b4245783..12f7deec 100644
--- a/analysis/webservice/algorithms_spark/DailyDifferenceAverageSpark.py
+++ b/analysis/webservice/algorithms_spark/DailyDifferenceAverageSpark.py
@@ -324,7 +324,7 @@ def calculate_diff(tile_service_factory, tile_ids, bounding_wkt, dataset, climat
     for tile_id in tile_ids:
         # Get the dataset tile
         try:
-            dataset_tile = get_dataset_tile(tile_service, wkt.loads(bounding_wkt.value), tile_id)
+            dataset_tile = get_dataset_tile(tile_service, wkt.loads(bounding_wkt.value), tile_id, dataset.value)
         except NoDatasetTile:
             # This should only happen if all measurements in a tile become masked after applying the bounding polygon
             continue
@@ -348,12 +348,12 @@ def calculate_diff(tile_service_factory, tile_ids, bounding_wkt, dataset, climat
     return chain(*diff_generators)
 
 
-def get_dataset_tile(tile_service, search_bounding_shape, tile_id):
+def get_dataset_tile(tile_service, search_bounding_shape, tile_id, dataset):
     the_time = datetime.now()
 
     try:
         # Load the dataset tile
-        dataset_tile = tile_service.find_tile_by_id(tile_id)[0]
+        dataset_tile = tile_service.find_tile_by_id(tile_id, ds=dataset)[0]
         # Mask it to the search domain
         dataset_tile = tile_service.mask_tiles_to_polygon(search_bounding_shape, [dataset_tile])[0]
     except IndexError:

From 42b912ebec6e445ef4d163f7774c50aa1c422339 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 3 Aug 2023 14:33:26 -0700
Subject: [PATCH 25/70] find_tile_by_polygon_and_most_recent_day_of_year impl

---
 .../nexustiles/backends/zarr/backend.py       | 39 ++++++++++++++-----
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py
index 29099d28..d592954c 100644
--- a/data-access/nexustiles/backends/zarr/backend.py
+++ b/data-access/nexustiles/backends/zarr/backend.py
@@ -174,7 +174,24 @@ def find_tile_by_polygon_and_most_recent_day_of_year(self, bounding_polygon, ds,
         :param day_of_year: Tile day of year to search for, tile nearest to this day (without going over) will be returned
         :return: List of one tile from ds with bounding_polygon on or before day_of_year or raise NexusTileServiceException if no tile found
         """
-        raise NotImplementedError()
+
+        times = self.__ds[self.__time].to_numpy()
+
+        to_doy = lambda dt: datetime.utcfromtimestamp(int(dt)).timetuple().tm_yday
+
+        vfunc = np.vectorize(to_doy)
+        days_of_year = vfunc(times.astype(datetime) / 1e9)
+
+        try:
+            time = times[np.where(days_of_year <= day_of_year)[0][-1]].astype(datetime) / 1e9
+        except IndexError:
+            raise NexusTileServiceException(reason='No tiles matched')
+
+        min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds
+
+        return self.find_tiles_in_box(
+            min_lat, max_lat, min_lon, max_lon, ds, time, time
+        )
 
     def find_all_tiles_in_box_at_time(self, min_lat, max_lat, min_lon, max_lon, dataset, time, **kwargs):
         return self.find_tiles_in_box(min_lat, max_lat, min_lon, max_lon, dataset, time, time, **kwargs)
@@ -371,14 +388,14 @@ def __fetch_data_for_tile(self, tile: Tile):
         max_lat = None
         max_lon = None
 
-        min_time = float(tile.min_time)
-        max_time = float(tile.max_time)
-
-        if min_time:
-            min_time = datetime.utcfromtimestamp(min_time)
+        min_time = tile.min_time
+        max_time = tile.max_time
 
-        if max_time:
-            max_time = datetime.utcfromtimestamp(max_time)
+        # if min_time:
+        #     min_time = datetime.utcfromtimestamp(min_time)
+        #
+        # if max_time:
+        #     max_time = datetime.utcfromtimestamp(max_time)
 
         if bbox:
             min_lat = bbox.min_lat
@@ -451,12 +468,14 @@ def __nts_url_to_tile(nts_url):
         tile.dataset_id = url.path
 
         try:
-            tile.min_time = int(url.query['min_time'])
+            # tile.min_time = int(url.query['min_time'])
+            tile.min_time = datetime.utcfromtimestamp(int(url.query['min_time']))
         except KeyError:
             pass
 
         try:
-            tile.max_time = int(url.query['max_time'])
+            # tile.max_time = int(url.query['max_time'])
+            tile.max_time = datetime.utcfromtimestamp(int(url.query['max_time']))
         except KeyError:
             pass
 

From 1559fbafee08a73e0ab8c44e063e30870592a078 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Tue, 8 Aug 2023 15:35:05 -0700
Subject: [PATCH 26/70] Don't sel by time if neither max nor min time are given

---
 data-access/nexustiles/backends/zarr/backend.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py
index d592954c..c8fd0fe1 100644
--- a/data-access/nexustiles/backends/zarr/backend.py
+++ b/data-access/nexustiles/backends/zarr/backend.py
@@ -410,7 +410,10 @@ def __fetch_data_for_tile(self, tile: Tile):
 
         sel_t = {}
 
-        if min_time == max_time:
+        if min_time is None and max_time is None:
+            sel_t = None
+            method = None
+        elif min_time == max_time:
             sel_t[self.__time] = [min_time]  # List, otherwise self.__time dim will be dropped
             method = 'nearest'
         else:
@@ -421,7 +424,10 @@ def __fetch_data_for_tile(self, tile: Tile):
             TileVariable(v, v) for v in self.__variables
         ]
 
-        matched = self.__ds.sel(sel_g).sel(sel_t, method=method)
+        matched = self.__ds.sel(sel_g) #.sel(sel_t, method=method)
+
+        if sel_t is not None:
+            matched = matched.sel(sel_t, method=method)
 
         tile.latitudes = ma.masked_invalid(matched[self.__latitude].to_numpy())
         tile.longitudes = ma.masked_invalid(matched[self.__longitude].to_numpy())

From 2bb52afb0925e89921f3576138228defea966c47 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Tue, 15 Aug 2023 13:15:29 -0700
Subject: [PATCH 27/70] Fix not calling partial when needed

---
 analysis/webservice/algorithms_spark/CorrMapSpark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/analysis/webservice/algorithms_spark/CorrMapSpark.py b/analysis/webservice/algorithms_spark/CorrMapSpark.py
index fe1954df..7336993a 100644
--- a/analysis/webservice/algorithms_spark/CorrMapSpark.py
+++ b/analysis/webservice/algorithms_spark/CorrMapSpark.py
@@ -57,7 +57,7 @@ def _map(tile_service_factory, tile_in):
         # print 'days_at_a_time = ', days_at_a_time
         t_incr = 86400 * days_at_a_time
 
-        tile_service = tile_service_factory
+        tile_service = tile_service_factory()
 
         # Compute the intermediate summations needed for the Pearson 
         # Correlation Coefficient.  We use a one-pass online algorithm

From f9dc2aebd77c1739a24823eaf2f529bba220ee4e Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Fri, 18 Aug 2023 09:32:18 -0700
Subject: [PATCH 28/70] Pinned s3fs and fsspec versions

---
 data-access/requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/data-access/requirements.txt b/data-access/requirements.txt
index c732bede..db1bf2cf 100644
--- a/data-access/requirements.txt
+++ b/data-access/requirements.txt
@@ -20,7 +20,7 @@ urllib3==1.26.2
 requests
 nexusproto
 Shapely
-s3fs
-fsspec
+s3fs==2022.5.0
+fsspec==2022.5.0
 xarray~=2022.3.0
 numpy==1.24.3
\ No newline at end of file

From a6f602d63705bb753ccfaaced202de366c0dd462 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Fri, 18 Aug 2023 11:44:10 -0700
Subject: [PATCH 29/70] Fixed some dependencies to ensure image builds properly
 + s3fs works

---
 analysis/conda-requirements.txt | 3 ++-
 data-access/requirements.txt    | 6 +++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/analysis/conda-requirements.txt b/analysis/conda-requirements.txt
index 902d5114..22dff066 100644
--- a/analysis/conda-requirements.txt
+++ b/analysis/conda-requirements.txt
@@ -22,7 +22,8 @@ pytz==2021.1
 utm==0.6.0
 shapely==1.7.1
 backports.functools_lru_cache==1.6.1
-boto3==1.16.63
+boto3>=1.16.63
+botocore==1.24.21
 pillow==8.1.0
 mpld3=0.5.1
 tornado==6.1
diff --git a/data-access/requirements.txt b/data-access/requirements.txt
index db1bf2cf..48a1fc6a 100644
--- a/data-access/requirements.txt
+++ b/data-access/requirements.txt
@@ -22,5 +22,9 @@ nexusproto
 Shapely
 s3fs==2022.5.0
 fsspec==2022.5.0
+botocore==1.24.21
+aiohttp==3.8.1
 xarray~=2022.3.0
-numpy==1.24.3
\ No newline at end of file
+numpy==1.24.3
+pandas<2.1.0rc0  # Temporary restriction because 2.1.0rc0 fails to build
+

From 1a451eba314f17d9fabcbf152b4214ab9819da4a Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Mon, 21 Aug 2023 07:39:23 -0700
Subject: [PATCH 30/70] Config override for backends

---
 data-access/nexustiles/nexustiles.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py
index ed526c55..b4fd6bba 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -141,9 +141,6 @@ def __init__(self, config=None):
 
         self._alg_config = config
 
-        if config:
-            self.override_config(config)
-
         if not NexusTileService.backends:
             NexusTileService.ds_config = configparser.RawConfigParser()
             NexusTileService.ds_config.read(NexusTileService._get_config_files('config/datasets.ini'))
@@ -153,6 +150,9 @@ def __init__(self, config=None):
             NexusTileService.backends[None] = default_backend
             NexusTileService.backends['__nexusproto__'] = default_backend
 
+        if config:
+            self.override_config(config)
+
         if not NexusTileService.__update_thread:
             NexusTileService.__update_thread = threading.Thread(
                 target=NexusTileService.__update_datasets_loop,
@@ -377,6 +377,10 @@ def override_config(self, config):
                 for option in config.options(section):
                     if config.get(section, option) is not None:
                         self._config.set(section, option, config.get(section, option))
+            if NexusTileService.ds_config.has_section(section):  # only override preexisting section, ignores the other
+                for option in config.options(section):
+                    if config.get(section, option) is not None:
+                        NexusTileService.ds_config.set(section, option, config.get(section, option))
 
     def get_dataseries_list(self, simple=False):
         datasets = []

From 6f8f7b10f60c316b1a8dd6ed39984b81a8e19294 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Mon, 21 Aug 2023 07:40:51 -0700
Subject: [PATCH 31/70] Deps update

---
 data-access/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/data-access/requirements.txt b/data-access/requirements.txt
index 48a1fc6a..9001ed34 100644
--- a/data-access/requirements.txt
+++ b/data-access/requirements.txt
@@ -25,6 +25,7 @@ fsspec==2022.5.0
 botocore==1.24.21
 aiohttp==3.8.1
 xarray~=2022.3.0
+zarr>=2.11.3
 numpy==1.24.3
 pandas<2.1.0rc0  # Temporary restriction because 2.1.0rc0 fails to build
 

From 483ad9f07d277cba13c18d946821a806995f1afd Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 31 Aug 2023 15:57:57 -0700
Subject: [PATCH 32/70] Add metadata from Zarr collection to /list

---
 data-access/nexustiles/backends/zarr/backend.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py
index c8fd0fe1..818d4b07 100644
--- a/data-access/nexustiles/backends/zarr/backend.py
+++ b/data-access/nexustiles/backends/zarr/backend.py
@@ -115,6 +115,8 @@ def get_dataseries_list(self, simple=False):
             ds['iso_start'] = datetime.utcfromtimestamp(min_date).strftime(ISO_8601)
             ds['iso_end'] = datetime.utcfromtimestamp(max_date).strftime(ISO_8601)
 
+            ds['metadata'] = dict(self.__ds.attrs)
+
         return [ds]
 
     def find_tile_by_id(self, tile_id, **kwargs):

From f5750c32eafaef0fb5f31795037c30cec1e1c325 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 14 Sep 2023 14:07:16 -0700
Subject: [PATCH 33/70] Zarr: Probe lat order and flip if necessary

---
 data-access/nexustiles/backends/zarr/backend.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py
index 818d4b07..01559000 100644
--- a/data-access/nexustiles/backends/zarr/backend.py
+++ b/data-access/nexustiles/backends/zarr/backend.py
@@ -100,6 +100,13 @@ def __init__(self, dataset_name, path, config=None):
             logger.error(f'Failed to open zarr dataset at {self.__path}, ignoring it. Cause: {e}')
             raise NexusTileServiceException(f'Cannot open dataset ({e})')
 
+        lats = self.__ds[self.__latitude].to_numpy()
+        delta = lats[1] - lats[0]
+
+        if delta < 0:
+            logger.warning(f'Latitude coordinate for {self._name} is in descending order. Flipping it to ascending')
+            self.__ds = self.__ds.isel({self.__latitude: slice(None, None, -1)})
+
     def get_dataseries_list(self, simple=False):
         ds = {
             "shortName": self._name,

From 7fc260ae53b2b97f0140e12c1a18f9ba9d6e7b4e Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Wed, 20 Sep 2023 09:14:00 -0700
Subject: [PATCH 34/70] Strip quotes from variable names

CM can sometimes publish with extra quotes resulting in KeyErrors
---
 data-access/nexustiles/backends/zarr/backend.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py
index 01559000..e1d0a0c1 100644
--- a/data-access/nexustiles/backends/zarr/backend.py
+++ b/data-access/nexustiles/backends/zarr/backend.py
@@ -68,6 +68,8 @@ def __init__(self, dataset_name, path, config=None):
         else:
             raise TypeError(f'Improper type for variables config: {type(data_vars)}')
 
+        self.__variables = [v.strip('\"\'') for v in self.__variables]
+
         self.__longitude = config['coords']['longitude']
         self.__latitude = config['coords']['latitude']
         self.__time = config['coords']['time']

From b5df944ec6dcc3bb038c02f5849fd300c0a219c6 Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Mon, 25 Sep 2023 13:37:09 -0700
Subject: [PATCH 35/70] removed resultSizeLimit param from matchup

---
 analysis/webservice/algorithms_spark/Matchup.py | 17 +++--------------
 analysis/webservice/apidocs/openapi.yml         | 13 -------------
 2 files changed, 3 insertions(+), 27 deletions(-)

diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py
index a55f61d1..77ecc346 100644
--- a/analysis/webservice/algorithms_spark/Matchup.py
+++ b/analysis/webservice/algorithms_spark/Matchup.py
@@ -137,14 +137,6 @@ class Matchup(NexusCalcSparkTornadoHandler):
                            + "If true, only the nearest point will be returned for each primary point. "
                            + "If false, all points within the tolerances will be returned for each primary point. Default: False"
         },
-        "resultSizeLimit": {
-            "name": "Result Size Limit",
-            "type": "int",
-            "description": "Optional integer value that limits the number of results returned from the matchup. "
-                           "If the number of primary matches is greater than this limit, the service will respond with "
-                           "(HTTP 202: Accepted) and an empty response body. A value of 0 means return all results. "
-                           "Default: 500"
-        },
         "prioritizeDistance": {
             "name": "Prioritize distance",
             "type": "boolean",
@@ -223,8 +215,6 @@ def parse_arguments(self, request):
 
         match_once = request.get_boolean_arg("matchOnce", default=False)
 
-        result_size_limit = request.get_int_arg("resultSizeLimit", default=500)
-
         start_seconds_from_epoch = int((start_time - EPOCH).total_seconds())
         end_seconds_from_epoch = int((end_time - EPOCH).total_seconds())
 
@@ -234,7 +224,7 @@ def parse_arguments(self, request):
         return bounding_polygon, primary_ds_name, secondary_ds_names, parameter_s, \
                start_time, start_seconds_from_epoch, end_time, end_seconds_from_epoch, \
                depth_min, depth_max, time_tolerance, radius_tolerance, \
-               platforms, match_once, result_size_limit, prioritize_distance
+               platforms, match_once, prioritize_distance
 
     def get_job_pool(self, tile_ids):
         if len(tile_ids) > LARGE_JOB_THRESHOLD:
@@ -244,7 +234,7 @@ def get_job_pool(self, tile_ids):
     def async_calc(self, execution_id, tile_ids, bounding_polygon, primary_ds_name,
                    secondary_ds_names, parameter_s, start_time, end_time, depth_min,
                    depth_max, time_tolerance, radius_tolerance, platforms, match_once,
-                   result_size_limit, start, prioritize_distance):
+                   start, prioritize_distance):
         # Call spark_matchup
         self.log.debug("Calling Spark Driver")
 
@@ -310,7 +300,7 @@ def calc(self, request, tornado_io_loop, **args):
         bounding_polygon, primary_ds_name, secondary_ds_names, parameter_s, \
         start_time, start_seconds_from_epoch, end_time, end_seconds_from_epoch, \
         depth_min, depth_max, time_tolerance, radius_tolerance, \
-        platforms, match_once, result_size_limit, prioritize_distance = self.parse_arguments(request)
+        platforms, match_once, prioritize_distance = self.parse_arguments(request)
 
         args = {
             "primary": primary_ds_name,
@@ -380,7 +370,6 @@ def calc(self, request, tornado_io_loop, **args):
             radius_tolerance=radius_tolerance,
             platforms=platforms,
             match_once=match_once,
-            result_size_limit=result_size_limit,
             start=start,
             prioritize_distance=prioritize_distance
         ))
diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml
index ea9b16ba..dc6fdb4a 100644
--- a/analysis/webservice/apidocs/openapi.yml
+++ b/analysis/webservice/apidocs/openapi.yml
@@ -154,19 +154,6 @@ paths:
             type: boolean
             default: false
           example: false
-        - in: query
-          name: resultSizeLimit
-          description: |
-            Optional integer value that limits the number of results
-            returned from the matchup. If the number of primary matches
-            is greater than this limit, the service will respond with
-            (HTTP 202 Accepted) and an empty response body. A value of
-            0 means return all results.
-          required: false
-          schema:
-            type: integer
-            default: 500
-          example: 500
         - in: query
           name: prioritizeDistance
           description: |

From 5e0fbb2521cc8ce2fa33281707aeb28950384a6b Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Mon, 25 Sep 2023 15:45:41 -0700
Subject: [PATCH 36/70] Add # of primaries/avergae secondaries to job output

---
 .../webservice/algorithms/doms/ExecutionStatus.py   | 12 +++++++++++-
 .../webservice/algorithms/doms/ResultsStorage.py    |  4 ++--
 .../webservice/webmodel/NexusExecutionResults.py    | 13 +++++++++++--
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/analysis/webservice/algorithms/doms/ExecutionStatus.py b/analysis/webservice/algorithms/doms/ExecutionStatus.py
index 1bae4556..2add7b1f 100644
--- a/analysis/webservice/algorithms/doms/ExecutionStatus.py
+++ b/analysis/webservice/algorithms/doms/ExecutionStatus.py
@@ -53,6 +53,14 @@ def calc(self, request, **args):
                     code=404
                 )
 
+        # Get execution stats. This call will raise an exception if the
+        # execution is not done.
+        with ResultsRetrieval(self.config) as retrieval:
+            try:
+                execution_stats = retrieval.retrieveStats(execution_id)
+            except NexusProcessingException:
+                execution_stats = {}
+
         job_status = NexusExecutionResults.ExecutionStatus(execution_details['status'])
         host = f'{request.requestHandler.request.protocol}://{request.requestHandler.request.host}'
 
@@ -63,5 +71,7 @@ def calc(self, request, **args):
             execution_id=execution_id,
             message=execution_details['message'],
             params=execution_params,
-            host=host
+            host=host,
+            num_primary_matched=execution_stats.get('numPrimaryMatched'),
+            num_secondary_matched=execution_stats.get('numSecondaryMatched')
         )
diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py
index 39db27b3..99e3c6b7 100644
--- a/analysis/webservice/algorithms/doms/ResultsStorage.py
+++ b/analysis/webservice/algorithms/doms/ResultsStorage.py
@@ -286,7 +286,7 @@ def retrieveResults(self, execution_id, trim_data=False, page_num=1, page_size=1
             execution_id = uuid.UUID(execution_id)
 
         params = self.retrieveParams(execution_id)
-        stats = self.__retrieveStats(execution_id)
+        stats = self.retrieveStats(execution_id)
         data = self.__retrieveData(execution_id, trim_data=trim_data, page_num=page_num, page_size=page_size)
         return params, stats, data
 
@@ -357,7 +357,7 @@ def __rowToDataEntry(self, row, trim_data=False):
 
         return entry
 
-    def __retrieveStats(self, id):
+    def retrieveStats(self, id):
         cql = "SELECT num_gridded_matched, num_insitu_matched, time_to_complete FROM doms_execution_stats where execution_id = %s limit 1"
         rows = self._session.execute(cql, (id,))
         for row in rows:
diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py
index d5c12046..7cf9abb1 100644
--- a/analysis/webservice/webmodel/NexusExecutionResults.py
+++ b/analysis/webservice/webmodel/NexusExecutionResults.py
@@ -44,7 +44,8 @@ def construct_job_status(job_state, created, updated, execution_id, params, host
     }
 
 
-def construct_done(status, created, completed, execution_id, params, host):
+def construct_done(status, created, completed, execution_id, params, host,
+                   num_primary_matched, num_secondary_matched):
     job_body = construct_job_status(
         status,
         created,
@@ -53,6 +54,9 @@ def construct_done(status, created, completed, execution_id, params, host):
         params,
         host
     )
+    # Add stats to body
+    job_body['totalPrimaryMatched'] = num_primary_matched
+    job_body['averageSecondaryMatched'] = round(num_secondary_matched/num_primary_matched)
 
     # Construct urls
     formats = [
@@ -112,7 +116,8 @@ def construct_cancelled(status, created, completed, execution_id, params, host):
 
 class NexusExecutionResults:
     def __init__(self, status=None, created=None, completed=None, execution_id=None, message='',
-                 params=None, host=None, status_code=200):
+                 params=None, host=None, status_code=200, num_primary_matched=None,
+                 num_secondary_matched=None):
         self.status_code = status_code
         self.status = status
         self.created = created
@@ -121,6 +126,8 @@ def __init__(self, status=None, created=None, completed=None, execution_id=None,
         self.message = message
         self.execution_params = params
         self.host = host
+        self.num_primary_matched = num_primary_matched
+        self.num_secondary_matched = num_secondary_matched
 
     def toJson(self):
         params = {
@@ -132,6 +139,8 @@ def toJson(self):
         }
         if self.status == ExecutionStatus.SUCCESS:
             params['completed'] = self.completed
+            params['num_primary_matched'] = self.num_primary_matched
+            params['num_secondary_matched'] = self.num_secondary_matched
             construct = construct_done
         elif self.status == ExecutionStatus.RUNNING:
             construct = construct_running

From fbad6b72bf649709cdb51152d49f65bf0a7c4cac Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Mon, 25 Sep 2023 15:48:01 -0700
Subject: [PATCH 37/70] rename to executionId

---
 analysis/webservice/apidocs/openapi.yml               | 4 ++--
 analysis/webservice/webmodel/NexusExecutionResults.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml
index dc6fdb4a..f5c57a3e 100644
--- a/analysis/webservice/apidocs/openapi.yml
+++ b/analysis/webservice/apidocs/openapi.yml
@@ -684,7 +684,7 @@ paths:
         - in: query
           name: id
           description: |
-            The job execution ID
+            The execution ID
           required: true
           schema:
             type: string
@@ -702,7 +702,7 @@ paths:
         - in: query
           name: id
           description: |
-            The job execution ID
+            The execution ID
           required: true
           schema:
             type: string
diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py
index 7cf9abb1..c80914dd 100644
--- a/analysis/webservice/webmodel/NexusExecutionResults.py
+++ b/analysis/webservice/webmodel/NexusExecutionResults.py
@@ -40,7 +40,7 @@ def construct_job_status(job_state, created, updated, execution_id, params, host
             'rel': 'self'
         }],
         'params': params,
-        'jobID': execution_id
+        'executionID': execution_id
     }
 
 

From e0a5999792b466b502c65d7d042d911a5214a4ba Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Mon, 25 Sep 2023 15:50:20 -0700
Subject: [PATCH 38/70] update changelog

---
 CHANGELOG.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 11789189..b8ed55b1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,9 +24,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - SDAP-482: Updated Saildrone in situ endpoint in config file
 - SDAP-485: Improved behavior for retrying failed Cassandra inserts when saving matchup results.
 - SDAP-487: Improved result fetch speed for large matchup results by tweaking `doms.doms_data` schema to support querying by primary value id.
+- SDAP-493: 
+  - Updated /job endpoint to use `executionId` terminology for consistency with existing `/cdmsresults` endpoint
+  - Updated /job endpoint with details about number of primary and secondary tiles.
 ### Deprecated
 ### Removed
 - SDAP-465: Removed `climatology` directory. 
+- SDAP-493: 
+  - Removed `resultSizeLimit` from /match_spark endpoint 
 ### Fixed
 - SDAP-474: Fixed bug in CSV attributes where secondary dataset would be rendered as comma separated characters
 - SDAP-475: Bug fixes for `/timeSeriesSpark` and `/timeAvgMapSpark`

From 8942afc55d7f438b35a2df7392b09496c19813c9 Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Fri, 29 Sep 2023 10:59:32 -0700
Subject: [PATCH 39/70] add totalSecondaryMatched field to /job output

---
 analysis/webservice/webmodel/NexusExecutionResults.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py
index c80914dd..961fd198 100644
--- a/analysis/webservice/webmodel/NexusExecutionResults.py
+++ b/analysis/webservice/webmodel/NexusExecutionResults.py
@@ -56,6 +56,7 @@ def construct_done(status, created, completed, execution_id, params, host,
     )
     # Add stats to body
     job_body['totalPrimaryMatched'] = num_primary_matched
+    job_body['totalSecondaryMatched'] = num_secondary_matched
     job_body['averageSecondaryMatched'] = round(num_secondary_matched/num_primary_matched)
 
     # Construct urls

From dd73036307a313eaba57203f208196da2f6a3ab0 Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Fri, 29 Sep 2023 14:25:21 -0700
Subject: [PATCH 40/70] num unique secondaries addition

---
 .../algorithms/doms/DomsInitialization.py     |  5 +++--
 .../algorithms/doms/ExecutionStatus.py        |  3 ++-
 .../algorithms/doms/ResultsStorage.py         | 20 ++++++++++---------
 .../webservice/algorithms_spark/Matchup.py    |  8 +++++---
 .../webmodel/NexusExecutionResults.py         |  7 +++++--
 5 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/analysis/webservice/algorithms/doms/DomsInitialization.py b/analysis/webservice/algorithms/doms/DomsInitialization.py
index 43627b14..a10a7e70 100644
--- a/analysis/webservice/algorithms/doms/DomsInitialization.py
+++ b/analysis/webservice/algorithms/doms/DomsInitialization.py
@@ -173,7 +173,7 @@ def createDomsDataTable(self, session):
 
     def createDomsExecutionStatsTable(self, session):
         log = logging.getLogger(__name__)
-        log.info("Verifying doms_execuction_stats table")
+        log.info("Verifying doms_execution_stats table")
         cql = """
             CREATE TABLE IF NOT EXISTS doms_execution_stats (
               execution_id uuid PRIMARY KEY,
@@ -181,7 +181,8 @@ def createDomsExecutionStatsTable(self, session):
               num_gridded_checked int,
               num_insitu_matched int,
               num_insitu_checked int,
-              time_to_complete int
+              time_to_complete int,
+              num_unique_secondaries int
             );
         """
         session.execute(cql)
diff --git a/analysis/webservice/algorithms/doms/ExecutionStatus.py b/analysis/webservice/algorithms/doms/ExecutionStatus.py
index 2add7b1f..eafdbbbf 100644
--- a/analysis/webservice/algorithms/doms/ExecutionStatus.py
+++ b/analysis/webservice/algorithms/doms/ExecutionStatus.py
@@ -73,5 +73,6 @@ def calc(self, request, **args):
             params=execution_params,
             host=host,
             num_primary_matched=execution_stats.get('numPrimaryMatched'),
-            num_secondary_matched=execution_stats.get('numSecondaryMatched')
+            num_secondary_matched=execution_stats.get('numSecondaryMatched'),
+            num_unique_secondaries=execution_stats.get('numUniqueSecondaries')
         )
diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py
index 99e3c6b7..48b2122d 100644
--- a/analysis/webservice/algorithms/doms/ResultsStorage.py
+++ b/analysis/webservice/algorithms/doms/ResultsStorage.py
@@ -166,17 +166,18 @@ def __insertParams(self, execution_id, params):
     def __insertStats(self, execution_id, stats):
         cql = """
            INSERT INTO doms_execution_stats
-                (execution_id, num_gridded_matched, num_gridded_checked, num_insitu_matched, num_insitu_checked, time_to_complete)
+                (execution_id, num_gridded_matched, num_gridded_checked, num_insitu_matched, num_insitu_checked, time_to_complete, num_unique_secondaries)
            VALUES
-                (%s, %s, %s, %s, %s, %s)
+                (%s, %s, %s, %s, %s, %s, %s)
         """
         self._session.execute(cql, (
             execution_id,
-            stats["numPrimaryMatched"],
+            stats['numPrimaryMatched'],
             None,
-            stats["numSecondaryMatched"],
+            stats['numSecondaryMatched'],
             None,
-            stats["timeToComplete"]
+            stats['timeToComplete'],
+            stats['numUniqueSecondaries']
         ))
 
     def __insertResults(self, execution_id, results):
@@ -358,13 +359,14 @@ def __rowToDataEntry(self, row, trim_data=False):
         return entry
 
     def retrieveStats(self, id):
-        cql = "SELECT num_gridded_matched, num_insitu_matched, time_to_complete FROM doms_execution_stats where execution_id = %s limit 1"
+        cql = "SELECT num_gridded_matched, num_insitu_matched, time_to_complete, num_unique_secondaries FROM doms_execution_stats where execution_id = %s limit 1"
         rows = self._session.execute(cql, (id,))
         for row in rows:
             stats = {
-                "timeToComplete": row.time_to_complete,
-                "numSecondaryMatched": row.num_insitu_matched,
-                "numPrimaryMatched": row.num_gridded_matched,
+                'timeToComplete': row.time_to_complete,
+                'numSecondaryMatched': row.num_insitu_matched,
+                'numPrimaryMatched': row.num_gridded_matched,
+                'numUniqueSecondaries': row.num_unique_secondaries
             }
             return stats
 
diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py
index 77ecc346..46d1d89d 100644
--- a/analysis/webservice/algorithms_spark/Matchup.py
+++ b/analysis/webservice/algorithms_spark/Matchup.py
@@ -276,10 +276,12 @@ def async_calc(self, execution_id, tile_ids, bounding_polygon, primary_ds_name,
 
         total_keys = len(list(spark_result.keys()))
         total_values = sum(len(v) for v in spark_result.values())
+        unique_values = len(set([point.data_id for point in spark_result.values()]))
         details = {
-            "timeToComplete": int((end - start).total_seconds()),
-            "numSecondaryMatched": total_values,
-            "numPrimaryMatched": total_keys
+            'timeToComplete': int((end - start).total_seconds()),
+            'numSecondaryMatched': total_values,
+            'numPrimaryMatched': total_keys,
+            'numUniqueSecondaries': unique_values
         }
 
         matches = Matchup.convert_to_matches(spark_result)
diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py
index 961fd198..2b0007ac 100644
--- a/analysis/webservice/webmodel/NexusExecutionResults.py
+++ b/analysis/webservice/webmodel/NexusExecutionResults.py
@@ -45,7 +45,7 @@ def construct_job_status(job_state, created, updated, execution_id, params, host
 
 
 def construct_done(status, created, completed, execution_id, params, host,
-                   num_primary_matched, num_secondary_matched):
+                   num_primary_matched, num_secondary_matched, num_unique_secondaries):
     job_body = construct_job_status(
         status,
         created,
@@ -58,6 +58,7 @@ def construct_done(status, created, completed, execution_id, params, host,
     job_body['totalPrimaryMatched'] = num_primary_matched
     job_body['totalSecondaryMatched'] = num_secondary_matched
     job_body['averageSecondaryMatched'] = round(num_secondary_matched/num_primary_matched)
+    job_body['totalUniqueSecondaryMatched'] = num_unique_secondaries
 
     # Construct urls
     formats = [
@@ -118,7 +119,7 @@ def construct_cancelled(status, created, completed, execution_id, params, host):
 class NexusExecutionResults:
     def __init__(self, status=None, created=None, completed=None, execution_id=None, message='',
                  params=None, host=None, status_code=200, num_primary_matched=None,
-                 num_secondary_matched=None):
+                 num_secondary_matched=None, num_unique_secondaries=None):
         self.status_code = status_code
         self.status = status
         self.created = created
@@ -129,6 +130,7 @@ def __init__(self, status=None, created=None, completed=None, execution_id=None,
         self.host = host
         self.num_primary_matched = num_primary_matched
         self.num_secondary_matched = num_secondary_matched
+        self.num_unique_secondaries = num_unique_secondaries
 
     def toJson(self):
         params = {
@@ -142,6 +144,7 @@ def toJson(self):
             params['completed'] = self.completed
             params['num_primary_matched'] = self.num_primary_matched
             params['num_secondary_matched'] = self.num_secondary_matched
+            params['num_unique_secondaries'] = self.num_unique_secondaries
             construct = construct_done
         elif self.status == ExecutionStatus.RUNNING:
             construct = construct_running

From db68d4fa8f1feb4682500b9947c875d286d191ff Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Fri, 13 Oct 2023 10:12:01 -0700
Subject: [PATCH 41/70] updated docs to use correct sea_water_temperature param
 name

---
 analysis/webservice/apidocs/openapi.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml
index f5c57a3e..b719ad85 100644
--- a/analysis/webservice/apidocs/openapi.yml
+++ b/analysis/webservice/apidocs/openapi.yml
@@ -139,8 +139,7 @@ paths:
           required: false
           schema:
             type: string
-            default: sea_surface_temperature
-          example: sea_surface_temperature
+          example: sea_water_temperature
         - in: query
           name: matchOnce
           description: |

From a8be9b8c599f949ebe00a595c879973132956b4f Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Wed, 1 Nov 2023 15:28:43 -0700
Subject: [PATCH 42/70] bugfix

---
 analysis/webservice/algorithms_spark/Matchup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py
index 46d1d89d..8955d95c 100644
--- a/analysis/webservice/algorithms_spark/Matchup.py
+++ b/analysis/webservice/algorithms_spark/Matchup.py
@@ -276,7 +276,7 @@ def async_calc(self, execution_id, tile_ids, bounding_polygon, primary_ds_name,
 
         total_keys = len(list(spark_result.keys()))
         total_values = sum(len(v) for v in spark_result.values())
-        unique_values = len(set([point.data_id for point in spark_result.values()]))
+        unique_values = len(set([point.data_id for v in spark_result.values() for point in v]))
         details = {
             'timeToComplete': int((end - start).total_seconds()),
             'numSecondaryMatched': total_values,

From 62de86772600c94110c748b3a9358712094cab8f Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Mon, 6 Nov 2023 13:39:02 -0800
Subject: [PATCH 43/70] fix division by zero bug

---
 analysis/webservice/webmodel/NexusExecutionResults.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py
index 2b0007ac..7dd7af99 100644
--- a/analysis/webservice/webmodel/NexusExecutionResults.py
+++ b/analysis/webservice/webmodel/NexusExecutionResults.py
@@ -57,7 +57,8 @@ def construct_done(status, created, completed, execution_id, params, host,
     # Add stats to body
     job_body['totalPrimaryMatched'] = num_primary_matched
     job_body['totalSecondaryMatched'] = num_secondary_matched
-    job_body['averageSecondaryMatched'] = round(num_secondary_matched/num_primary_matched)
+    job_body['averageSecondaryMatched'] = round(num_secondary_matched/num_primary_matched) \
+        if num_primary_matched > 0 else 0
     job_body['totalUniqueSecondaryMatched'] = num_unique_secondaries
 
     # Construct urls

From 972f3ddf076af8c51311a62e8e23a84a9f926d91 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Wed, 8 Nov 2023 14:58:11 -0800
Subject: [PATCH 44/70] add params to dataset management handler classes

---
 analysis/webservice/management/Datasets.py | 42 ++++++++++++++++++++--
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/analysis/webservice/management/Datasets.py b/analysis/webservice/management/Datasets.py
index ded1e8a2..40b267fd 100644
--- a/analysis/webservice/management/Datasets.py
+++ b/analysis/webservice/management/Datasets.py
@@ -93,7 +93,24 @@ def toJson(self):
 class DatasetAdd(DatasetManagement):
     name = 'Add dataset'
     path = '/datasets/add'
-    description = "Add new dataset to running SDAP instance"
+    description = "Add new Zarr dataset to running SDAP instance"
+    params = {
+        "name": {
+            "name": "Dataset name",
+            "type": "string",
+            "description": "Name of new dataset to add"
+        },
+        "path": {
+            "name": "Path or URL",
+            "type": "string",
+            "description": "Path/URL of Zarr group"
+        },
+        "body": {
+            "name": "Request body",
+            "type": "application/json OR application/yaml",
+            "description": "POST request body. Config options for Zarr (variabe, coords, aws (if applicable))"
+        }
+    }
 
     def __init__(self, **args):
         pass
@@ -147,7 +164,19 @@ def calc(self, request: NexusRequestObject, **args):
 class DatasetUpdate(DatasetManagement):
     name = 'Update dynamically added dataset'
     path = '/datasets/update'
-    description = "Update dataset in running SDAP instance"
+    description = "Update Zarr dataset in running SDAP instance"
+    params = {
+        "name": {
+            "name": "Dataset name",
+            "type": "string",
+            "description": "Name of dataset to update"
+        },
+        "body": {
+            "name": "Request body",
+            "type": "application/json OR application/yaml",
+            "description": "POST request body. Config options for Zarr (variabe, coords, aws (if applicable))"
+        }
+    }
 
     def __init__(self, **args):
         pass
@@ -182,7 +211,14 @@ def calc(self, request: NexusRequestObject, **args):
 class DatasetDelete(DatasetManagement):
     name = 'Remove dataset'
     path = '/datasets/remove'
-    description = "Remove dataset from running SDAP instance"
+    description = "Remove Zarr dataset from running SDAP instance"
+    params = {
+        "name": {
+            "name": "Dataset name",
+            "type": "string",
+            "description": "Name of dataset to remove"
+        }
+    }
 
     def __init__(self, **args):
         pass

From 831ca37b4d4b524cfd6804edcc3aa5d874a39271 Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Thu, 16 Nov 2023 09:23:30 -0800
Subject: [PATCH 45/70] add page number to default filename for matchup output

---
 CHANGELOG.md                                           | 1 +
 analysis/webservice/algorithms/doms/BaseDomsHandler.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4e0a4e98..64e65c95 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - SDAP-493: 
   - Updated /job endpoint to use `executionId` terminology for consistency with existing `/cdmsresults` endpoint
   - Updated /job endpoint with details about number of primary and secondary tiles.
+- SDAP-499: Added page number to default filename for matchup output
 ### Deprecated
 ### Removed
 - SDAP-465: Removed `climatology` directory. 
diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
index 84c91633..d4dcd512 100644
--- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py
+++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
@@ -114,7 +114,7 @@ def toNetCDF(self):
         return DomsNetCDFFormatter.create(self.__executionId, self.results(), self.__args, self.__details)
 
     def filename(self):
-        return f'CDMS_{self.__executionId}'
+        return f'CDMS_{self.__executionId}_page{self.__details["pageNum"]}'
 
 
 class DomsCSVFormatter:

From 4ab2f9b4a4f72a34b922cb496cedb87e684335b7 Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Thu, 16 Nov 2023 13:22:09 -0800
Subject: [PATCH 46/70] pagination improvements

---
 .../algorithms/doms/ExecutionStatus.py        |  5 ++-
 .../algorithms/doms/ResultsRetrieval.py       |  2 +
 .../webservice/algorithms_spark/Matchup.py    | 10 ++---
 analysis/webservice/apidocs/openapi.yml       | 14 +++++++
 .../webmodel/NexusExecutionResults.py         | 40 ++++++++++++-------
 5 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/analysis/webservice/algorithms/doms/ExecutionStatus.py b/analysis/webservice/algorithms/doms/ExecutionStatus.py
index 17c6ca95..63cf423b 100644
--- a/analysis/webservice/algorithms/doms/ExecutionStatus.py
+++ b/analysis/webservice/algorithms/doms/ExecutionStatus.py
@@ -42,6 +42,8 @@ def calc(self, request, **args):
         except ValueError:
             raise NexusProcessingException(reason='"id" argument must be a valid uuid', code=400)
 
+        filename = request.get_argument('filename', None)
+
         # Check if the job is done
         with ResultsRetrieval(self.config) as retrieval:
             try:
@@ -74,5 +76,6 @@ def calc(self, request, **args):
             host=host,
             num_primary_matched=execution_stats.get('numPrimaryMatched'),
             num_secondary_matched=execution_stats.get('numSecondaryMatched'),
-            num_unique_secondaries=execution_stats.get('numUniqueSecondaries')
+            num_unique_secondaries=execution_stats.get('numUniqueSecondaries'),
+            filename=filename
         )
diff --git a/analysis/webservice/algorithms/doms/ResultsRetrieval.py b/analysis/webservice/algorithms/doms/ResultsRetrieval.py
index f03c1caa..cdec9294 100644
--- a/analysis/webservice/algorithms/doms/ResultsRetrieval.py
+++ b/analysis/webservice/algorithms/doms/ResultsRetrieval.py
@@ -45,6 +45,8 @@ def calc(self, computeOptions, **args):
 
         simple_results = computeOptions.get_boolean_arg("simpleResults", default=False)
 
+        filename = computeOptions.get_argument("filename", default=None)
+
         with ResultsStorage.ResultsRetrieval(self.config) as storage:
             params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results, page_num=page_num, page_size=page_size)
 
diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py
index 8955d95c..7c7f551b 100644
--- a/analysis/webservice/algorithms_spark/Matchup.py
+++ b/analysis/webservice/algorithms_spark/Matchup.py
@@ -219,12 +219,13 @@ def parse_arguments(self, request):
         end_seconds_from_epoch = int((end_time - EPOCH).total_seconds())
 
         prioritize_distance = request.get_boolean_arg("prioritizeDistance", default=True)
+        filename = request.get_argument('filename', default=None)
 
 
         return bounding_polygon, primary_ds_name, secondary_ds_names, parameter_s, \
                start_time, start_seconds_from_epoch, end_time, end_seconds_from_epoch, \
                depth_min, depth_max, time_tolerance, radius_tolerance, \
-               platforms, match_once, prioritize_distance
+               platforms, match_once, prioritize_distance, filename
 
     def get_job_pool(self, tile_ids):
         if len(tile_ids) > LARGE_JOB_THRESHOLD:
@@ -302,7 +303,7 @@ def calc(self, request, tornado_io_loop, **args):
         bounding_polygon, primary_ds_name, secondary_ds_names, parameter_s, \
         start_time, start_seconds_from_epoch, end_time, end_seconds_from_epoch, \
         depth_min, depth_max, time_tolerance, radius_tolerance, \
-        platforms, match_once, prioritize_distance = self.parse_arguments(request)
+        platforms, match_once, prioritize_distance, filename = self.parse_arguments(request)
 
         args = {
             "primary": primary_ds_name,
@@ -375,9 +376,8 @@ def calc(self, request, tornado_io_loop, **args):
             start=start,
             prioritize_distance=prioritize_distance
         ))
-
-        request.requestHandler.redirect(f'/job?id={execution_id}')
-
+        filename_param = f'&filename={filename}' if filename else ''
+        request.requestHandler.redirect(f'/job?id={execution_id}{filename_param}')
 
     @classmethod
     def convert_to_matches(cls, spark_result):
diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml
index b719ad85..8c6efdc9 100644
--- a/analysis/webservice/apidocs/openapi.yml
+++ b/analysis/webservice/apidocs/openapi.yml
@@ -166,6 +166,13 @@ paths:
             type: boolean
             default: true
           example: true
+        - in: query
+          name: filename
+          description: |
+            Optional filename. Will be passed into /job and results links
+          required: false
+          schema:
+            type: string
       responses:
         '200':
           description: Successful operation
@@ -689,6 +696,13 @@ paths:
             type: string
             format: uuid
           example: c864a51b-3d87-4872-9070-632820b1cae2
+        - in: query
+          name: filename
+          description: |
+            Optional filename. Will be passed into /job results links
+          required: false
+          schema:
+            type: string
   /job/cancel:
     get:
       summary: |
diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py
index 7dd7af99..47a891a9 100644
--- a/analysis/webservice/webmodel/NexusExecutionResults.py
+++ b/analysis/webservice/webmodel/NexusExecutionResults.py
@@ -27,15 +27,17 @@ class ExecutionStatus(Enum):
     CANCELLED = 'cancelled'
 
 
-def construct_job_status(job_state, created, updated, execution_id, params, host, message=''):
+def construct_job_status(job_state, created, updated, execution_id, params, host, message='',
+                         filename=None):
+    filename_param = f'&filename={filename}' if filename else ''
     return {
         'status': job_state.value,
         'message': message,
         'createdAt': created,
         'updatedAt': updated,
         'links': [{
-            'href': f'{host}/job?id={execution_id}',
-            'title': 'The current page',
+            'href': f'{host}/job?id={execution_id}{filename_param}',
+            'title': 'Get job status - the current page',
             'type': 'application/json',
             'rel': 'self'
         }],
@@ -45,14 +47,15 @@ def construct_job_status(job_state, created, updated, execution_id, params, host
 
 
 def construct_done(status, created, completed, execution_id, params, host,
-                   num_primary_matched, num_secondary_matched, num_unique_secondaries):
+                   num_primary_matched, num_secondary_matched, num_unique_secondaries, filename):
     job_body = construct_job_status(
         status,
         created,
         completed,
         execution_id,
         params,
-        host
+        host,
+        filename=filename
     )
     # Add stats to body
     job_body['totalPrimaryMatched'] = num_primary_matched
@@ -61,6 +64,8 @@ def construct_done(status, created, completed, execution_id, params, host,
         if num_primary_matched > 0 else 0
     job_body['totalUniqueSecondaryMatched'] = num_unique_secondaries
 
+    filename_param = f'&filename={filename}' if filename else ''
+
     # Construct urls
     formats = [
         ('CSV', 'text/csv'),
@@ -68,8 +73,8 @@ def construct_done(status, created, completed, execution_id, params, host,
         ('NETCDF', 'binary/octet-stream')
     ]
     data_links = [{
-        'href': f'{host}/cdmsresults?id={execution_id}&output={output_format}',
-        'title': 'Download results',
+        'href': f'{host}/cdmsresults?id={execution_id}&output={output_format}{filename_param}',
+        'title': f'Download {output_format} results',
         'type': mime,
         'rel': 'data'
     } for output_format, mime in formats]
@@ -77,14 +82,15 @@ def construct_done(status, created, completed, execution_id, params, host,
     return job_body
 
 
-def construct_running(status, created, execution_id, params, host):
+def construct_running(status, created, execution_id, params, host, filename):
     job_body = construct_job_status(
         status,
         created,
         None,
         execution_id,
         params,
-        host
+        host,
+        filename=filename
     )
     job_body['links'].append({
         'href': f'{host}/job/cancel?id={execution_id}',
@@ -94,7 +100,7 @@ def construct_running(status, created, execution_id, params, host):
     return job_body
 
 
-def construct_error(status, created, completed, execution_id, message, params, host):
+def construct_error(status, created, completed, execution_id, message, params, host, filename):
     return construct_job_status(
         status,
         created,
@@ -102,25 +108,27 @@ def construct_error(status, created, completed, execution_id, message, params, h
         execution_id,
         params,
         host,
-        message
+        message,
+        filename=filename
     )
 
 
-def construct_cancelled(status, created, completed, execution_id, params, host):
+def construct_cancelled(status, created, completed, execution_id, params, host, filename):
     return construct_job_status(
         status,
         created,
         completed,
         execution_id,
         params,
-        host
+        host,
+        filename=filename
     )
 
 
 class NexusExecutionResults:
     def __init__(self, status=None, created=None, completed=None, execution_id=None, message='',
                  params=None, host=None, status_code=200, num_primary_matched=None,
-                 num_secondary_matched=None, num_unique_secondaries=None):
+                 num_secondary_matched=None, num_unique_secondaries=None, filename=None):
         self.status_code = status_code
         self.status = status
         self.created = created
@@ -132,6 +140,7 @@ def __init__(self, status=None, created=None, completed=None, execution_id=None,
         self.num_primary_matched = num_primary_matched
         self.num_secondary_matched = num_secondary_matched
         self.num_unique_secondaries = num_unique_secondaries
+        self.filename = filename
 
     def toJson(self):
         params = {
@@ -139,7 +148,8 @@ def toJson(self):
             'created': self.created,
             'execution_id': self.execution_id,
             'params': self.execution_params,
-            'host': self.host
+            'host': self.host,
+            'filename': self.filename
         }
         if self.status == ExecutionStatus.SUCCESS:
             params['completed'] = self.completed

From 3677c11db7b9346933a185f5129fd670234cd4ca Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Thu, 16 Nov 2023 13:26:48 -0800
Subject: [PATCH 47/70] removed debugging line

---
 analysis/webservice/algorithms/doms/ResultsRetrieval.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/analysis/webservice/algorithms/doms/ResultsRetrieval.py b/analysis/webservice/algorithms/doms/ResultsRetrieval.py
index cdec9294..f03c1caa 100644
--- a/analysis/webservice/algorithms/doms/ResultsRetrieval.py
+++ b/analysis/webservice/algorithms/doms/ResultsRetrieval.py
@@ -45,8 +45,6 @@ def calc(self, computeOptions, **args):
 
         simple_results = computeOptions.get_boolean_arg("simpleResults", default=False)
 
-        filename = computeOptions.get_argument("filename", default=None)
-
         with ResultsStorage.ResultsRetrieval(self.config) as storage:
             params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results, page_num=page_num, page_size=page_size)
 

From 86f1348d6d283eb688b1b619f95dde2f8e635218 Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Thu, 16 Nov 2023 13:27:43 -0800
Subject: [PATCH 48/70] changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4e0a4e98..6ffff5dc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - SDAP-493: 
   - Updated /job endpoint to use `executionId` terminology for consistency with existing `/cdmsresults` endpoint
   - Updated /job endpoint with details about number of primary and secondary tiles.
+- SDAP-500: Improvements to SDAP Asynchronous Jobs
 ### Deprecated
 ### Removed
 - SDAP-465: Removed `climatology` directory. 

From 1e8cc4e9d31d295e172c0db4bba61a5776642bea Mon Sep 17 00:00:00 2001
From: Riley Kuttruff <72955101+RKuttruff@users.noreply.github.com>
Date: Mon, 27 Nov 2023 15:44:38 -0800
Subject: [PATCH 49/70] Update helm cassandra dependency (#289)

* Update helm cassandra dependency

* Bump default cassandra PV to 4

* Bump default cassandra PV to 4 in tools

* Changelog

* Fixed small documentation issue

---------

Co-authored-by: rileykk <rileykk@jpl.nasa.gov>
---
 CHANGELOG.md                                               | 1 +
 analysis/webservice/algorithms/doms/ResultsStorage.py      | 3 +++
 analysis/webservice/algorithms/doms/domsconfig.ini.default | 2 +-
 data-access/nexustiles/config/datastores.ini.default       | 2 +-
 helm/requirements.yaml                                     | 2 +-
 helm/values.yaml                                           | 3 +--
 tools/deletebyquery/deletebyquery.py                       | 2 +-
 tools/doms-data-tools/update_doms_data_pk.py               | 2 +-
 tools/domspurge/README.md                                  | 2 +-
 tools/domspurge/purge.py                                   | 2 +-
 10 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6ffff5dc..5e36c0a8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -41,6 +41,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Status code for results endpoint if execution id is not found fixed to be `404` instead of `500`.
 - Ensured links in the `/job` endpoint are https
 - SDAP-488: Workaround to build issue on Apple Silicon (M1/M2). Image build installs nexusproto through PyPI instead of building from source. A build arg `BUILD_NEXUSPROTO` was defined to allow building from source if desired
+- SDAP-496: Fix `solr-cloud-init` image failing to run.
 ### Security
 
 ## [1.1.0] - 2023-04-26
diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py
index 48b2122d..1dea1610 100644
--- a/analysis/webservice/algorithms/doms/ResultsStorage.py
+++ b/analysis/webservice/algorithms/doms/ResultsStorage.py
@@ -65,6 +65,9 @@ def __enter__(self):
         dc_policy = DCAwareRoundRobinPolicy(cassDatacenter)
         token_policy = TokenAwarePolicy(dc_policy)
 
+        logger.info(f'Connecting to Cassandra cluster @ {[host for host in cassHost.split(",")]}; datacenter: '
+                    f'{cassDatacenter}; protocol version: {cassVersion}')
+
         self._cluster = Cluster([host for host in cassHost.split(',')], load_balancing_policy=token_policy,
                                 protocol_version=cassVersion, auth_provider=auth_provider)
 
diff --git a/analysis/webservice/algorithms/doms/domsconfig.ini.default b/analysis/webservice/algorithms/doms/domsconfig.ini.default
index 55f9b16c..f4e44960 100644
--- a/analysis/webservice/algorithms/doms/domsconfig.ini.default
+++ b/analysis/webservice/algorithms/doms/domsconfig.ini.default
@@ -18,7 +18,7 @@ host=localhost
 port=9042
 keyspace=doms
 local_datacenter=datacenter1
-protocol_version=3
+protocol_version=4
 dc_policy=DCAwareRoundRobinPolicy
 username=
 password=
diff --git a/data-access/nexustiles/config/datastores.ini.default b/data-access/nexustiles/config/datastores.ini.default
index d8db1902..51455a38 100644
--- a/data-access/nexustiles/config/datastores.ini.default
+++ b/data-access/nexustiles/config/datastores.ini.default
@@ -18,7 +18,7 @@ host=localhost
 port=9042
 keyspace=nexustiles
 local_datacenter=datacenter1
-protocol_version=3
+protocol_version=4
 dc_policy=DCAwareRoundRobinPolicy
 username=
 password=
diff --git a/helm/requirements.yaml b/helm/requirements.yaml
index a9996586..1de8cf0f 100644
--- a/helm/requirements.yaml
+++ b/helm/requirements.yaml
@@ -12,7 +12,7 @@ dependencies:
     repository: https://raw.githubusercontent.com/bitnami/charts/archive-full-index/bitnami
     condition: solr.enabled
   - name: cassandra
-    version: 5.5.3
+    version: 9.1.7
     repository: https://raw.githubusercontent.com/bitnami/charts/archive-full-index/bitnami
     condition: cassandra.enabled
 
diff --git a/helm/values.yaml b/helm/values.yaml
index 4105362e..fe2481ef 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -195,8 +195,7 @@ cassandra:
   dbUser:
     user: cassandra
     password: cassandra
-  cluster:
-    replicaCount: 1
+  replicaCount: 1
   persistence:
     storageClass: hostpath
     size: 8Gi
diff --git a/tools/deletebyquery/deletebyquery.py b/tools/deletebyquery/deletebyquery.py
index 4fb7bd66..8b98111a 100644
--- a/tools/deletebyquery/deletebyquery.py
+++ b/tools/deletebyquery/deletebyquery.py
@@ -262,7 +262,7 @@ def parse_args():
                         help='The version of the Cassandra protocol the driver should use.',
                         required=False,
                         choices=['1', '2', '3', '4', '5'],
-                        default='3')
+                        default='4')
 
     parser.add_argument('--solr-rows',
                         help='Number of rows to fetch with each Solr query to build the list of tiles to delete',
diff --git a/tools/doms-data-tools/update_doms_data_pk.py b/tools/doms-data-tools/update_doms_data_pk.py
index ed8dbe5e..749995da 100644
--- a/tools/doms-data-tools/update_doms_data_pk.py
+++ b/tools/doms-data-tools/update_doms_data_pk.py
@@ -114,7 +114,7 @@ def main():
                              request_timeout=60.0,
                          )
                      },
-                     protocol_version=3,
+                     protocol_version=4,
                      auth_provider=auth_provider) as cluster:
 
             session = cluster.connect('doms')
diff --git a/tools/domspurge/README.md b/tools/domspurge/README.md
index 92f7cfb1..e88b62f5 100644
--- a/tools/domspurge/README.md
+++ b/tools/domspurge/README.md
@@ -33,5 +33,5 @@ You can build an image for this script to run it in a Kubernetes CronJob.
 
 ```shell
 cd /incubator-sdap-nexus
-docker build . -f Dockerfile -t sdap-local/DomsPurge:<tag>
+docker build . -f tools/domspurge/Dockerfile -t sdap-local/DomsPurge:<tag>
 ```
diff --git a/tools/domspurge/purge.py b/tools/domspurge/purge.py
index 4fb2fc37..d4bb15a8 100644
--- a/tools/domspurge/purge.py
+++ b/tools/domspurge/purge.py
@@ -270,7 +270,7 @@ def parse_args():
                                 required=False,
                                 dest='pv',
                                 choices=['1', '2', '3', '4', '5'],
-                                default='3')
+                                default='4')
 
     time_before = purge_options.add_mutually_exclusive_group(required=True)
 

From faed801d106dddf827faddf077f961d2a39c5492 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 14 Dec 2023 10:32:00 -0800
Subject: [PATCH 50/70] Register dataset docs with nexusproto backend + static
 getters

---
 data-access/nexustiles/AbstractTileService.py | 10 +++++++
 .../nexustiles/backends/nexusproto/backend.py | 29 +++++++++++++++++++
 .../nexustiles/backends/zarr/backend.py       |  3 ++
 data-access/nexustiles/nexustiles.py          |  9 ++++++
 4 files changed, 51 insertions(+)

diff --git a/data-access/nexustiles/AbstractTileService.py b/data-access/nexustiles/AbstractTileService.py
index 20467784..c418180e 100644
--- a/data-access/nexustiles/AbstractTileService.py
+++ b/data-access/nexustiles/AbstractTileService.py
@@ -18,6 +18,7 @@
 
 import numpy as np
 import numpy.ma as ma
+from copy import deepcopy
 
 
 class AbstractTileService(ABC):
@@ -32,6 +33,7 @@ class AbstractTileService(ABC):
 
     def __init__(self, dataset_name):
         self._name = dataset_name
+        self._ds_info = {}
 
     @abstractmethod
     def get_dataseries_list(self, simple=False):
@@ -200,3 +202,11 @@ def fetch_data_for_tiles(self, *tiles):
     def _metadata_store_docs_to_tiles(self, *store_docs):
         raise NotImplementedError()
 
+    @abstractmethod
+    def update_metadata(self, solr_doc):
+        raise NotImplementedError()
+
+    def get_metadata(self, dataset=None):  # ds as param for nexusproto backend
+        return deepcopy(self._ds_info)
+
+
diff --git a/data-access/nexustiles/backends/nexusproto/backend.py b/data-access/nexustiles/backends/nexusproto/backend.py
index 690b109c..3b9390ff 100644
--- a/data-access/nexustiles/backends/nexusproto/backend.py
+++ b/data-access/nexustiles/backends/nexusproto/backend.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import configparser
+import copy
 import logging
 import sys
 import json
@@ -551,6 +552,34 @@ def pingSolr(self):
         else:
             return False
 
+    def update_metadata(self, solr_doc):
+        variables = solr_doc.get('variables_s', None)
+
+        dataset = solr_doc['dataset_s']
+
+        if dataset not in self._ds_info:
+            self._ds_info[dataset] = {}
+
+        if variables is not None:
+            variables = json.loads(variables)
+
+            if isinstance(variables, dict):
+                variables = [variables]
+        else:
+            variables = []
+
+        self._ds_info[dataset]['variables'] = variables
+
+        # print(self._ds_info)
+
+    def get_metadata(self, dataset=None):
+        if dataset is None:
+            logger.error('Cannot pull metadata for nexusproto without specifying dataset name')
+            return {}
+        else:
+            return copy.deepcopy(self._ds_info[dataset])
+
+
     @staticmethod
     def _get_config_files(filename):
         log = logging.getLogger(__name__)
diff --git a/data-access/nexustiles/backends/zarr/backend.py b/data-access/nexustiles/backends/zarr/backend.py
index e1d0a0c1..86081a27 100644
--- a/data-access/nexustiles/backends/zarr/backend.py
+++ b/data-access/nexustiles/backends/zarr/backend.py
@@ -463,6 +463,9 @@ def __fetch_data_for_tile(self, tile: Tile):
     def _metadata_store_docs_to_tiles(self, *store_docs):
         return [ZarrBackend.__nts_url_to_tile(d) for d in store_docs]
 
+    def update_metadata(self, solr_doc):
+        raise NotImplementedError()
+
     @staticmethod
     def __nts_url_to_tile(nts_url):
         tile = Tile()
diff --git a/data-access/nexustiles/nexustiles.py b/data-access/nexustiles/nexustiles.py
index b4fd6bba..ef64e8f8 100644
--- a/data-access/nexustiles/nexustiles.py
+++ b/data-access/nexustiles/nexustiles.py
@@ -250,6 +250,7 @@ def _update_datasets():
                 if store_type == 'nexus_proto' or store_type == 'nexusproto':
                     update_logger.info(f"Detected new nexusproto dataset {d_id}, using default nexusproto backend")
                     NexusTileService.backends[d_id] = NexusTileService.backends[None]
+                    NexusTileService.backends[d_id]['backend'].update_metadata(dataset)
                 elif store_type == 'zarr':
                     update_logger.info(f"Detected new zarr dataset {d_id}, opening new zarr backend")
 
@@ -371,6 +372,14 @@ def user_ds_delete(name):
 
         return {'success': True}
 
+    @staticmethod
+    def get_metadata_for_dataset(ds_name):
+        try:
+            backend = NexusTileService._get_backend(ds_name)
+            return backend.get_metadata(ds_name)
+        except:
+            return None
+
     def override_config(self, config):
         for section in config.sections():
             if self._config.has_section(section):  # only override preexisting section, ignores the other

From 20902ebde81eacc6ed643977b57a64b615fbb3d6 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 14 Dec 2023 10:32:18 -0800
Subject: [PATCH 51/70] Matchup impl

---
 .../webservice/algorithms_spark/Matchup.py    | 25 +++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py
index 30e9bb6a..c15b1e5d 100644
--- a/analysis/webservice/algorithms_spark/Matchup.py
+++ b/analysis/webservice/algorithms_spark/Matchup.py
@@ -41,6 +41,8 @@
 from webservice.webmodel import NexusProcessingException
 from webservice.webmodel.NexusExecutionResults import ExecutionStatus
 
+from nexustiles.nexustiles import NexusTileService
+
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
 ISO_8601 = '%Y-%m-%dT%H:%M:%S%z'
 
@@ -494,14 +496,33 @@ def from_nexus_point(nexus_point, tile=None):
         else:
             data_vals = [nexus_point.data_vals]
 
+        ds_metadata = NexusTileService.get_metadata_for_dataset(tile.dataset)
+
+        if ds_metadata is not None:
+            ds_vars = ds_metadata.get('variables', [])
+        else:
+            ds_vars = []
+
+        variable_dict = {}
+
+        for v in ds_vars:
+            variable_dict[v['name']] = v
+
         data = []
         for data_val, variable in zip(data_vals, tile.variables):
             if data_val:
+                if variable.variable_name in variable_dict:
+                    standard_name = variable_dict[variable.variable_name]['cf_standard_name']
+                    unit = variable_dict[variable.variable_name]['unit']
+                else:
+                    standard_name = variable.standard_name
+                    unit = None
+
                 data.append(DataPoint(
                     variable_name=variable.variable_name,
                     variable_value=data_val,
-                    cf_variable_name=variable.standard_name,
-                    variable_unit=None
+                    cf_variable_name=standard_name,
+                    variable_unit=unit
                 ))
         point.data = data
 

From 1af0c41185adfc719ef27da386209e28431028f5 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Mon, 18 Dec 2023 15:33:56 -0800
Subject: [PATCH 52/70] Add vars to headers in CDMS subsetter

---
 .../webservice/algorithms/doms/subsetter.py   | 34 +++++++++++++++++--
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/analysis/webservice/algorithms/doms/subsetter.py b/analysis/webservice/algorithms/doms/subsetter.py
index bf63fc88..32e64a4e 100644
--- a/analysis/webservice/algorithms/doms/subsetter.py
+++ b/analysis/webservice/algorithms/doms/subsetter.py
@@ -24,6 +24,8 @@
 from webservice.algorithms.doms.insitu import query_insitu
 from webservice.webmodel import NexusProcessingException, NexusResults
 
+from nexustiles.nexustiles import NexusTileService
+
 from . import BaseDomsHandler
 
 ISO_8601 = '%Y-%m-%dT%H:%M:%S%z'
@@ -302,6 +304,20 @@ def toCsv(self):
         logging.info('Converting result to CSV')
 
         for dataset_name, results in dataset_results.items():
+            try:
+                ds_metadata = NexusTileService.get_metadata_for_dataset(dataset_name)
+            except:
+                ds_metadata = {}
+
+            ds_vars = ds_metadata.get('variables', [])
+
+            variable_dict = {}
+            variable_dict_cf = {}
+
+            for v in ds_vars:
+                variable_dict[v['name']] = v
+                variable_dict_cf[v['cf_standard_name']] = v
+
             rows = []
 
             headers = [
@@ -309,13 +325,25 @@ def toCsv(self):
                 'latitude',
                 'time'
             ]
-            data_variables = list(set([keys for result in results for keys in result['data'].keys()]))
-            data_variables.sort()
+
+            data_variables = []
+            data_variable_headers = []
+
+            for dv in sorted(list(set([keys for result in results for keys in result['data'].keys()]))):
+                data_variables.append(dv)
+
+                if dv in variable_dict_cf:
+                    data_variable_headers.append(f'{dv} ({variable_dict_cf[dv]["unit"]})')
+                elif dv in variable_dict:
+                    data_variable_headers.append(f'{dv} ({variable_dict[dv]["unit"]})')
+                else:
+                    data_variable_headers.append(dv)
 
             if 'id' in list(set([keys for result in results for keys in result.keys()])):
                 headers.append('id')
 
-            headers.extend(data_variables)
+            headers.extend(data_variable_headers)
+
             for i, result in enumerate(results):
                 cols = []
 

From 8a069db22f72c8405434c1ab164c13057ac34479 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 21 Dec 2023 13:07:59 -0800
Subject: [PATCH 53/70] Add units to all matchup result formats

---
 analysis/webservice/algorithms/doms/BaseDomsHandler.py | 9 +++++++--
 analysis/webservice/algorithms_spark/Matchup.py        | 3 +++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
index 84c91633..66c4079a 100644
--- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py
+++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
@@ -142,7 +142,11 @@ def is_empty(s):
 
         name = variable['cf_variable_name']
 
-        return name if not is_empty(name) else variable['variable_name']
+        header_name = name if not is_empty(name) else variable['variable_name']
+
+        unit = variable.get('variable_unit', None)
+
+        return f'{header_name} ({unit})' if unit is not None else header_name
 
     @staticmethod
     def __packValues(csv_mem_file, results):
@@ -541,7 +545,8 @@ def writeGroup(self):
                 self.__enrichVariable(data_variable, min_data, max_data, has_depth=None, unit=units[variable])
                 data_variable[:] = np.ma.masked_invalid(variables[variable])
                 data_variable.long_name = name
-                data_variable.standard_name = cf_name
+                if cf_name:
+                    data_variable.standard_name = cf_name
 
     #
     # Lists may include 'None" values, to calc min these must be filtered out
diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py
index c15b1e5d..5cb1f32b 100644
--- a/analysis/webservice/algorithms_spark/Matchup.py
+++ b/analysis/webservice/algorithms_spark/Matchup.py
@@ -518,6 +518,9 @@ def from_nexus_point(nexus_point, tile=None):
                     standard_name = variable.standard_name
                     unit = None
 
+                if standard_name is None or standard_name == '':
+                    standard_name = variable.standard_name
+
                 data.append(DataPoint(
                     variable_name=variable.variable_name,
                     variable_value=data_val,

From 0c39b075b070fe01f1e44583d01b95a4e5cc3c52 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 21 Dec 2023 13:08:48 -0800
Subject: [PATCH 54/70] Formatting for units in subsetter headers

When units are absent, don't write var_name (None) as the header
---
 analysis/webservice/algorithms/doms/subsetter.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/analysis/webservice/algorithms/doms/subsetter.py b/analysis/webservice/algorithms/doms/subsetter.py
index 32e64a4e..c8ae8d79 100644
--- a/analysis/webservice/algorithms/doms/subsetter.py
+++ b/analysis/webservice/algorithms/doms/subsetter.py
@@ -332,9 +332,9 @@ def toCsv(self):
             for dv in sorted(list(set([keys for result in results for keys in result['data'].keys()]))):
                 data_variables.append(dv)
 
-                if dv in variable_dict_cf:
+                if dv in variable_dict_cf and variable_dict_cf[dv]["unit"] is not None:
                     data_variable_headers.append(f'{dv} ({variable_dict_cf[dv]["unit"]})')
-                elif dv in variable_dict:
+                elif dv in variable_dict and variable_dict[dv]["unit"] is not None:
                     data_variable_headers.append(f'{dv} ({variable_dict[dv]["unit"]})')
                 else:
                     data_variable_headers.append(dv)

From 32ca3d709237d324decada84fe92a4a4044b6521 Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Fri, 5 Jan 2024 15:33:49 -0800
Subject: [PATCH 55/70] stac catalog

---
 .../algorithms/doms/ResultsStorage.py         |  15 +-
 .../webservice/algorithms/doms/StacCatalog.py | 166 ++++++++++++++++++
 .../webservice/algorithms/doms/__init__.py    |   1 +
 .../webmodel/NexusExecutionResults.py         |   6 +
 4 files changed, 180 insertions(+), 8 deletions(-)
 create mode 100644 analysis/webservice/algorithms/doms/StacCatalog.py

diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py
index 39db27b3..6b4cc1c2 100644
--- a/analysis/webservice/algorithms/doms/ResultsStorage.py
+++ b/analysis/webservice/algorithms/doms/ResultsStorage.py
@@ -286,7 +286,7 @@ def retrieveResults(self, execution_id, trim_data=False, page_num=1, page_size=1
             execution_id = uuid.UUID(execution_id)
 
         params = self.retrieveParams(execution_id)
-        stats = self.__retrieveStats(execution_id)
+        stats = self.retrieveStats(execution_id)
         data = self.__retrieveData(execution_id, trim_data=trim_data, page_num=page_num, page_size=page_size)
         return params, stats, data
 
@@ -357,19 +357,18 @@ def __rowToDataEntry(self, row, trim_data=False):
 
         return entry
 
-    def __retrieveStats(self, id):
-        cql = "SELECT num_gridded_matched, num_insitu_matched, time_to_complete FROM doms_execution_stats where execution_id = %s limit 1"
+    def retrieveStats(self, id):
+        cql = "SELECT num_gridded_matched, num_insitu_matched, time_to_complete, num_unique_secondaries FROM doms_execution_stats where execution_id = %s limit 1"
         rows = self._session.execute(cql, (id,))
         for row in rows:
             stats = {
-                "timeToComplete": row.time_to_complete,
-                "numSecondaryMatched": row.num_insitu_matched,
-                "numPrimaryMatched": row.num_gridded_matched,
+                'timeToComplete': row.time_to_complete,
+                'numSecondaryMatched': row.num_insitu_matched,
+                'numPrimaryMatched': row.num_gridded_matched,
+                'numUniqueSecondaries': row.num_unique_secondaries
             }
             return stats
 
-        raise NexusProcessingException(reason=f'No stats found for id {str(id)}', code=404)
-
     def retrieveParams(self, id):
         cql = "SELECT * FROM doms_params where execution_id = %s limit 1"
         rows = self._session.execute(cql, (id,))
diff --git a/analysis/webservice/algorithms/doms/StacCatalog.py b/analysis/webservice/algorithms/doms/StacCatalog.py
new file mode 100644
index 00000000..2c1aa125
--- /dev/null
+++ b/analysis/webservice/algorithms/doms/StacCatalog.py
@@ -0,0 +1,166 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the 'License'); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import re
+import uuid
+from typing import List
+
+from webservice.NexusHandler import nexus_handler
+from webservice.algorithms.doms.ResultsStorage import ResultsRetrieval
+from webservice.webmodel import NexusProcessingException
+from webservice.webmodel import NexusResults
+
+from . import BaseDomsHandler
+
+
+class StacResults(NexusResults):
+    def __init__(self, contents):
+        NexusResults.__init__(self)
+        self.contents = contents
+
+    def toJson(self):
+        return json.dumps(self.contents, indent=4)
+
+
+@nexus_handler
+class StacCatalog(BaseDomsHandler.BaseDomsQueryCalcHandler):
+    name = 'STAC Catalog Handler'
+    path = '^/cdmscatalog/?.*$'
+    description = ''
+    params = {}
+    singleton = True
+
+    def __init__(self, tile_service_factory, config=None):
+        BaseDomsHandler.BaseDomsQueryCalcHandler.__init__(self, tile_service_factory)
+        self.config = config
+
+    def construct_catalog(self, execution_id: str):
+        return {
+            'stac_version': '1.0.0',
+            'type': 'Catalog',
+            'id': str(execution_id),
+            'description': 'STAC Catalog for CDMS output',
+            'links': [
+                {
+                    'rel': 'collection',
+                    'href': f'https://{self.host}/cdmscatalog/{execution_id}/{output_format}',
+                    'title': f'Collection of pages for {execution_id} {output_format} output'
+                }
+                for output_format in ['CSV', 'JSON', 'NETCDF']
+            ]
+        }
+
+    def construct_collection(self, execution_id: str, output_format: str,
+                             num_primary_matched: int, page_size: int, start_time: str,
+                             end_time: str, bbox: List[float]):
+        links = [
+            {
+                'rel': 'self',
+                'href': f'https://{self.host}/cdmscatalog/{execution_id}/{output_format}',
+                'title': 'The current page',
+                'type': 'application/json'
+            },
+            {
+                'rel': 'root',
+                'href': f'https://{self.host}/cdmscatalog/{execution_id}',
+                'title': f'Root catalog for {execution_id}',
+            }
+        ]
+
+        url = f'https://{self.host}/cdmsresults?id={execution_id}&output={output_format}'
+        for page_num in range(1, num_primary_matched, page_size):
+            links.append({
+                'rel': 'data',
+                'href': f'{url}&pageNum={page_num}&pageSize={page_size}'
+            })
+
+        return {
+            'stac_version': '1.0.0',
+            'type': 'Collection',
+            'license': 'not-provided',
+            'id': f'{execution_id}.{output_format}',
+            'description': 'Collection of results for CDMS execution and result format',
+            'extent': {
+                'spatial': {
+                    'bbox': bbox
+                },
+                'temporal': {
+                    'interval': [start_time, end_time]
+                }
+            },
+            'links': links,
+        }
+
+    def calc(self, request, **args):
+        page_size = request.get_int_arg('pageSize', default=1000)
+        url_path_regex = '^\/cdmscatalog\/?(?P<id>[a-zA-Z0-9-]*)\/?(?P<format>[a-zA-Z0-9]*)'
+        match = re.search(url_path_regex, request.requestHandler.request.path)
+
+        execution_id = match.group('id')
+        output_format = match.group('format')
+
+        self.host = request.requestHandler.request.host
+
+        if not execution_id:
+            raise NexusProcessingException(
+                reason=f'Execution ID path param must be provided.',
+                code=400
+            )
+
+        if execution_id:
+            try:
+                execution_id = uuid.UUID(execution_id)
+            except ValueError:
+                raise NexusProcessingException(
+                    reason=f'"{execution_id}" is not a valid uuid',
+                    code=400
+                )
+
+        if output_format and output_format.upper() not in ['CSV', 'JSON', 'NETCDF']:
+            raise NexusProcessingException(
+                reason=f'"{output_format}" is not a valid format. Should be CSV, JSON, or NETCDF.',
+                code=400
+            )
+
+        if execution_id and not output_format:
+            # Route to STAC catalog for execution
+            stac_output = self.construct_catalog(execution_id)
+        elif execution_id and output_format:
+            # Route to STAC collection for execution+format
+
+            with ResultsRetrieval(self.config) as retrieval:
+                try:
+                    execution_stats = retrieval.retrieveStats(execution_id)
+                    execution_params = retrieval.retrieveParams(execution_id)
+                except NexusProcessingException:
+                    execution_stats = {}
+
+            num_primary_matched = execution_stats.get('numPrimaryMatched', 0)
+            start_time = execution_params['startTime'].isoformat()
+            end_time = execution_params['endTime'].isoformat()
+            bbox = list(map(float, execution_params['bbox'].split(',')))
+
+            stac_output = self.construct_collection(
+                execution_id, output_format, num_primary_matched, page_size,
+                start_time, end_time, bbox
+            )
+        else:
+            raise NexusProcessingException(
+                reason=f'Invalid path parameters were provided',
+                code=400
+            )
+
+        return StacResults(stac_output)
diff --git a/analysis/webservice/algorithms/doms/__init__.py b/analysis/webservice/algorithms/doms/__init__.py
index bc568f83..7e5715f4 100644
--- a/analysis/webservice/algorithms/doms/__init__.py
+++ b/analysis/webservice/algorithms/doms/__init__.py
@@ -20,6 +20,7 @@
 from . import DatasetListQuery
 from . import DomsInitialization
 from . import MatchupQuery
+from . import StacCatalog
 from . import MetadataQuery
 from . import ResultsPlotQuery
 from . import ResultsRetrieval
diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py
index d5c12046..be9d332a 100644
--- a/analysis/webservice/webmodel/NexusExecutionResults.py
+++ b/analysis/webservice/webmodel/NexusExecutionResults.py
@@ -60,6 +60,12 @@ def construct_done(status, created, completed, execution_id, params, host):
         ('JSON', 'application/json'),
         ('NETCDF', 'binary/octet-stream')
     ]
+    job_body['links'].append({
+        'href': f'{host}/cdmscatalog/{execution_id}',
+        'title': 'STAC Catalog for execution results',
+        'type': 'application/json',
+        'rel': 'stac'
+    })
     data_links = [{
         'href': f'{host}/cdmsresults?id={execution_id}&output={output_format}',
         'title': 'Download results',

From 3563ae9820f3c8699e15f348f61b94d7f5aa65b5 Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Fri, 5 Jan 2024 16:03:33 -0800
Subject: [PATCH 56/70] Updated openapi spec

---
 analysis/webservice/apidocs/openapi.yml | 52 +++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml
index ea9b16ba..0420bf9d 100644
--- a/analysis/webservice/apidocs/openapi.yml
+++ b/analysis/webservice/apidocs/openapi.yml
@@ -721,6 +721,58 @@ paths:
             type: string
             format: uuid
           example: c864a51b-3d87-4872-9070-632820b1cae2
+  /cdmscatalog/{executionId}:
+    get:
+      summary: |
+        Get STAC Catalog for execution
+      operationId: cdmscatalog
+      tags:
+        - Analytics
+      description: "Get STAC catalog by execution id"
+      parameters:
+        - in: path
+          name: executionId
+          description: |
+            The job execution ID
+          required: true
+          schema:
+            type: string
+            format: uuid
+          example: c864a51b-3d87-4872-9070-632820b1cae2
+  /cdmscatalog/{executionId}/{format}:
+    get:
+      summary: |
+        Get STAC Catalog format catalog for execution
+      operationId: cdmscatalogcollection
+      tags:
+        - Analytics
+      description: "Get STAC catalog by execution id"
+      parameters:
+        - in: path
+          name: executionId
+          description: |
+            The job execution ID
+          required: true
+          schema:
+            type: string
+            format: uuid
+          example: c864a51b-3d87-4872-9070-632820b1cae2
+        - in: path
+          name: format
+          description: |
+            CDMS results format
+          required: true
+          schema:
+            type: string
+            enum: [JSON,CSV,NETCDF]
+          example: JSON
+        - in: query
+          name: pageSize
+          description: |
+            How many primary matches on each page of CDMS results
+          required: false
+          schema:
+            type: integer
 externalDocs:
   description: Documentation
   url: https://incubator-sdap-nexus.readthedocs.io/en/latest/index.html

From 0691d87932ec0a7c54ef77f1839100681848df37 Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Fri, 5 Jan 2024 16:15:40 -0800
Subject: [PATCH 57/70] move stac endpoints to matchup tag in openapi spec

---
 analysis/webservice/apidocs/openapi.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml
index 0420bf9d..3bb5103a 100644
--- a/analysis/webservice/apidocs/openapi.yml
+++ b/analysis/webservice/apidocs/openapi.yml
@@ -727,7 +727,7 @@ paths:
         Get STAC Catalog for execution
       operationId: cdmscatalog
       tags:
-        - Analytics
+        - Matchup
       description: "Get STAC catalog by execution id"
       parameters:
         - in: path
@@ -745,7 +745,7 @@ paths:
         Get STAC Catalog format catalog for execution
       operationId: cdmscatalogcollection
       tags:
-        - Analytics
+        - Matchup
       description: "Get STAC catalog by execution id"
       parameters:
         - in: path

From 61e6223c21fbf00951081785a9e1ad815d6c9519 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Mon, 8 Jan 2024 14:36:12 -0800
Subject: [PATCH 58/70] Meta field in matchup result - all formats

---
 .../algorithms/doms/BaseDomsHandler.py        | 62 ++++++++++++++++---
 .../algorithms/doms/ResultsRetrieval.py       | 24 ++++++-
 .../nexustiles/backends/nexusproto/backend.py | 10 ++-
 3 files changed, 87 insertions(+), 9 deletions(-)

diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
index 66c4079a..4c019bd4 100644
--- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py
+++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
@@ -85,14 +85,15 @@ def default(self, obj):
 
 class DomsQueryResults(NexusResults):
     def __init__(self, results=None, args=None, bounds=None, count=None, details=None, computeOptions=None,
-                 executionId=None, status_code=200, page_num=None, page_size=None):
-        NexusResults.__init__(self, results=results, meta=None, stats=None, computeOptions=computeOptions,
+                 executionId=None, status_code=200, page_num=None, page_size=None, meta=None):
+        NexusResults.__init__(self, results=results, meta=meta, stats=None, computeOptions=computeOptions,
                               status_code=status_code)
         self.__args = args
         self.__bounds = bounds
         self.__count = count
         self.__details = details
         self.__executionId = str(executionId)
+        self.__meta = meta if meta is not None else {}
 
         if self.__details is None:
             self.__details = {}
@@ -105,13 +106,13 @@ def toJson(self):
         bounds = self.__bounds.toMap() if self.__bounds is not None else {}
         return json.dumps(
             {"executionId": self.__executionId, "data": self.results(), "params": self.__args, "bounds": bounds,
-             "count": self.__count, "details": self.__details}, indent=4, cls=DomsEncoder)
+             "count": self.__count, "details": self.__details, "metadata": self.__meta}, indent=4, cls=DomsEncoder)
 
     def toCSV(self):
-        return DomsCSVFormatter.create(self.__executionId, self.results(), self.__args, self.__details)
+        return DomsCSVFormatter.create(self.__executionId, self.results(), self.__args, self.__details, self.__meta)
 
     def toNetCDF(self):
-        return DomsNetCDFFormatter.create(self.__executionId, self.results(), self.__args, self.__details)
+        return DomsNetCDFFormatter.create(self.__executionId, self.results(), self.__args, self.__details, self.__meta)
 
     def filename(self):
         return f'CDMS_{self.__executionId}'
@@ -119,13 +120,15 @@ def filename(self):
 
 class DomsCSVFormatter:
     @staticmethod
-    def create(executionId, results, params, details):
+    def create(executionId, results, params, details, metadata):
 
         csv_mem_file = io.StringIO()
         try:
             DomsCSVFormatter.__addConstants(csv_mem_file)
             DomsCSVFormatter.__addDynamicAttrs(csv_mem_file, executionId, results, params, details)
             csv.writer(csv_mem_file).writerow([])
+            DomsCSVFormatter.__addMetadata(csv_mem_file, metadata)
+            csv.writer(csv_mem_file).writerow([])
 
             DomsCSVFormatter.__packValues(csv_mem_file, results)
 
@@ -299,10 +302,31 @@ def __addDynamicAttrs(csvfile, executionId, results, params, details):
 
         writer.writerows(global_attrs)
 
+    @staticmethod
+    def __addMetadata(csvfile, meta):
+        def meta_dict_to_list(meta_dict: dict, prefix='metadata') -> list:
+            attrs = []
+
+            for key in meta_dict:
+                new_key = key if prefix == '' else f'{prefix}.{key}'
+                value = meta_dict[key]
+
+                if isinstance(value, dict):
+                    attrs.extend(meta_dict_to_list(value, new_key))
+                else:
+                    attrs.append(dict(MetadataAttribute=new_key, Value=value))
+
+            return attrs
+
+        metadata_attrs = meta_dict_to_list(meta)
+
+        writer = csv.DictWriter(csvfile, sorted(next(iter(metadata_attrs)).keys()))
+        writer.writerows(metadata_attrs)
+
 
 class DomsNetCDFFormatter:
     @staticmethod
-    def create(executionId, results, params, details):
+    def create(executionId, results, params, details, metadata):
 
         t = tempfile.mkstemp(prefix="cdms_", suffix=".nc")
         tempFileName = t[1]
@@ -346,6 +370,30 @@ def create(executionId, results, params, details):
         dataset.CDMS_page_num = details["pageNum"]
         dataset.CDMS_page_size = details["pageSize"]
 
+        ####TEST
+
+        def meta_dict_to_list(meta_dict: dict, prefix='metadata') -> list:
+            attrs = []
+
+            for key in meta_dict:
+                new_key = key if prefix == '' else f'{prefix}.{key}'
+                value = meta_dict[key]
+
+                if value is None:
+                    value = 'NULL'
+                elif isinstance(value, list):
+                    value = json.dumps(value)
+
+                if isinstance(value, dict):
+                    attrs.extend(meta_dict_to_list(value, new_key))
+                else:
+                    attrs.append((new_key, value))
+
+            return attrs
+
+        for attr in meta_dict_to_list(metadata):
+            setattr(dataset, *attr)
+
         insituDatasets = params["matchup"]
         insituLinks = set()
         for insitu in insituDatasets:
diff --git a/analysis/webservice/algorithms/doms/ResultsRetrieval.py b/analysis/webservice/algorithms/doms/ResultsRetrieval.py
index f03c1caa..0b26056a 100644
--- a/analysis/webservice/algorithms/doms/ResultsRetrieval.py
+++ b/analysis/webservice/algorithms/doms/ResultsRetrieval.py
@@ -19,6 +19,11 @@
 from . import ResultsStorage
 from webservice.NexusHandler import nexus_handler
 from webservice.webmodel import NexusProcessingException
+from nexustiles.nexustiles import NexusTileService
+
+import logging
+
+log = logging.getLogger(__name__)
 
 
 @nexus_handler
@@ -48,5 +53,22 @@ def calc(self, computeOptions, **args):
         with ResultsStorage.ResultsRetrieval(self.config) as storage:
             params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results, page_num=page_num, page_size=page_size)
 
+        try:
+            ds_metadata = {}
+            ds_meta_primary_name = params['primary']
+
+            primary_metadata = NexusTileService.get_metadata_for_dataset(ds_meta_primary_name)
+
+            ds_metadata['primary'] = {ds_meta_primary_name: primary_metadata}
+
+            ds_metadata['secondary'] = {}
+
+            for secondary_ds_name in params['matchup'].split(','):
+                ds_metadata['secondary'][secondary_ds_name] = NexusTileService.get_metadata_for_dataset(secondary_ds_name)
+        except:
+            log.warning('Could not build dataset metadata dict due to an error')
+            ds_metadata = {}
+
         return BaseDomsHandler.DomsQueryResults(results=data, args=params, details=stats, bounds=None, count=len(data),
-                                                computeOptions=None, executionId=execution_id, page_num=page_num, page_size=page_size)
+                                                computeOptions=None, executionId=execution_id, page_num=page_num,
+                                                page_size=page_size, meta=dict(datasets=ds_metadata))
diff --git a/data-access/nexustiles/backends/nexusproto/backend.py b/data-access/nexustiles/backends/nexusproto/backend.py
index 3b9390ff..d86a594a 100644
--- a/data-access/nexustiles/backends/nexusproto/backend.py
+++ b/data-access/nexustiles/backends/nexusproto/backend.py
@@ -568,9 +568,17 @@ def update_metadata(self, solr_doc):
         else:
             variables = []
 
+        extra_meta = solr_doc.get('meta_s', None)
+
         self._ds_info[dataset]['variables'] = variables
 
-        # print(self._ds_info)
+        if extra_meta is not None:
+            try:
+                extra_meta = json.loads(extra_meta)
+            except json.JSONDecodeError:
+                pass
+
+        self._ds_info[dataset]['metadata'] = extra_meta
 
     def get_metadata(self, dataset=None):
         if dataset is None:

From e02fc78f64b76e0974cd9bbc6f09b01cd9593447 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 11 Jan 2024 10:49:19 -0800
Subject: [PATCH 59/70] SDAP-507 - Changes to remove geos sub-dependency

---
 analysis/webservice/algorithms/doms/BaseDomsHandler.py     | 7 -------
 analysis/webservice/algorithms/doms/ResultsPlotQuery.py    | 2 +-
 analysis/webservice/algorithms/doms/__init__.py            | 2 +-
 .../nexus_tornado/app_builders/HandlerArgsBuilder.py       | 7 ++++---
 docker/nexus-webapp/Dockerfile                             | 4 ++--
 5 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
index 84c91633..faa384f7 100644
--- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py
+++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
@@ -35,13 +35,6 @@
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
 ISO_8601 = '%Y-%m-%dT%H:%M:%S%z'
 
-try:
-    from osgeo import gdal
-    from osgeo.gdalnumeric import *
-except ImportError:
-    import gdal
-    from gdalnumeric import *
-
 from netCDF4 import Dataset
 import netCDF4
 import tempfile
diff --git a/analysis/webservice/algorithms/doms/ResultsPlotQuery.py b/analysis/webservice/algorithms/doms/ResultsPlotQuery.py
index 950c7964..864cdc3b 100644
--- a/analysis/webservice/algorithms/doms/ResultsPlotQuery.py
+++ b/analysis/webservice/algorithms/doms/ResultsPlotQuery.py
@@ -26,7 +26,7 @@ class PlotTypes:
     HISTOGRAM = "histogram"
 
 
-@nexus_handler
+# @nexus_handler
 class DomsResultsPlotHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
     name = "DOMS Results Plotting"
     path = "/domsplot"
diff --git a/analysis/webservice/algorithms/doms/__init__.py b/analysis/webservice/algorithms/doms/__init__.py
index bc568f83..8a94798e 100644
--- a/analysis/webservice/algorithms/doms/__init__.py
+++ b/analysis/webservice/algorithms/doms/__init__.py
@@ -21,7 +21,7 @@
 from . import DomsInitialization
 from . import MatchupQuery
 from . import MetadataQuery
-from . import ResultsPlotQuery
+# from . import ResultsPlotQuery
 from . import ResultsRetrieval
 from . import ResultsStorage
 from . import StatsQuery
diff --git a/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py b/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py
index 2a84ae7e..3b8b480f 100644
--- a/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py
+++ b/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py
@@ -37,9 +37,10 @@ def handler_needs_algorithm_config(class_wrapper):
                 class_wrapper == webservice.algorithms_spark.Matchup.Matchup
                 or class_wrapper == webservice.algorithms_spark.MatchupDoms.MatchupDoms
                 or issubclass(class_wrapper, webservice.algorithms.doms.BaseDomsHandler.BaseDomsQueryCalcHandler)
-                or issubclass(class_wrapper, webservice.algorithms_spark.NexusCalcSparkTornadoHandler.NexusCalcSparkTornadoHandler)
+                or issubclass(class_wrapper,
+                              webservice.algorithms_spark.NexusCalcSparkTornadoHandler.NexusCalcSparkTornadoHandler)
                 or class_wrapper == webservice.algorithms.doms.ResultsRetrieval.DomsResultsRetrievalHandler
-                or class_wrapper == webservice.algorithms.doms.ResultsPlotQuery.DomsResultsPlotHandler
+            #                or class_wrapper == webservice.algorithms.doms.ResultsPlotQuery.DomsResultsPlotHandler
         )
 
     @staticmethod
@@ -50,7 +51,7 @@ def get_args(self, clazz_wrapper):
         args = dict(
             clazz=clazz_wrapper,
             tile_service_factory=self.tile_service_factory,
-            thread_pool=self. request_thread_pool
+            thread_pool=self.request_thread_pool
         )
 
         if issubclass(clazz_wrapper, webservice.algorithms_spark.NexusCalcSparkHandler.NexusCalcSparkHandler):
diff --git a/docker/nexus-webapp/Dockerfile b/docker/nexus-webapp/Dockerfile
index 515d6ab0..6f13f9f4 100644
--- a/docker/nexus-webapp/Dockerfile
+++ b/docker/nexus-webapp/Dockerfile
@@ -95,10 +95,10 @@ RUN python3 setup.py install clean
 WORKDIR /incubator-sdap-nexus/analysis
 RUN python3 setup.py install clean && mamba clean -afy
 
+RUN pip install shapely==1.7.1
 
 WORKDIR /incubator-sdap-nexus/tools/deletebyquery
-RUN pip3 install cassandra-driver==3.20.1
-RUN pip3 install pyspark py4j
+RUN pip3 install cassandra-driver==3.20.1 pyspark py4j
 RUN pip3 install -r requirements.txt
 RUN pip3 install cython
 RUN rm requirements.txt

From 51231cad14ba5242bbee1814ba1141a366d33b61 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 11 Jan 2024 10:54:14 -0800
Subject: [PATCH 60/70] SDAP-507 - Changelog

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 01d62724..793c6017 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - SDAP-482: Updated Saildrone in situ endpoint in config file
 - SDAP-485: Improved behavior for retrying failed Cassandra inserts when saving matchup results.
 - SDAP-487: Improved result fetch speed for large matchup results by tweaking `doms.doms_data` schema to support querying by primary value id.
+- SDAP-507: Changes to remove `geos` sub-dependency from core image build:
+  - Removed `gdal` and `basemap` as core dependencies
+  - Moved `shapely` installation in docker build from conda install to pip install
+  - Disabled `/domsplot` endpoint & commented out references to its source file as it depends on `basemap` and raises `ImportError`s at startup
 ### Deprecated
 ### Removed
 - SDAP-465: Removed `climatology` directory. 

From 5c755736517ac33c526e6f00ab3331fd02c48411 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 11 Jan 2024 10:49:19 -0800
Subject: [PATCH 61/70] SDAP-507 - Changes to remove geos sub-dependency

---
 analysis/webservice/algorithms/doms/BaseDomsHandler.py     | 7 -------
 analysis/webservice/algorithms/doms/ResultsPlotQuery.py    | 2 +-
 analysis/webservice/algorithms/doms/__init__.py            | 2 +-
 .../nexus_tornado/app_builders/HandlerArgsBuilder.py       | 7 ++++---
 docker/nexus-webapp/Dockerfile                             | 4 ++--
 5 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
index 84c91633..faa384f7 100644
--- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py
+++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
@@ -35,13 +35,6 @@
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
 ISO_8601 = '%Y-%m-%dT%H:%M:%S%z'
 
-try:
-    from osgeo import gdal
-    from osgeo.gdalnumeric import *
-except ImportError:
-    import gdal
-    from gdalnumeric import *
-
 from netCDF4 import Dataset
 import netCDF4
 import tempfile
diff --git a/analysis/webservice/algorithms/doms/ResultsPlotQuery.py b/analysis/webservice/algorithms/doms/ResultsPlotQuery.py
index 950c7964..864cdc3b 100644
--- a/analysis/webservice/algorithms/doms/ResultsPlotQuery.py
+++ b/analysis/webservice/algorithms/doms/ResultsPlotQuery.py
@@ -26,7 +26,7 @@ class PlotTypes:
     HISTOGRAM = "histogram"
 
 
-@nexus_handler
+# @nexus_handler
 class DomsResultsPlotHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
     name = "DOMS Results Plotting"
     path = "/domsplot"
diff --git a/analysis/webservice/algorithms/doms/__init__.py b/analysis/webservice/algorithms/doms/__init__.py
index bc568f83..8a94798e 100644
--- a/analysis/webservice/algorithms/doms/__init__.py
+++ b/analysis/webservice/algorithms/doms/__init__.py
@@ -21,7 +21,7 @@
 from . import DomsInitialization
 from . import MatchupQuery
 from . import MetadataQuery
-from . import ResultsPlotQuery
+# from . import ResultsPlotQuery
 from . import ResultsRetrieval
 from . import ResultsStorage
 from . import StatsQuery
diff --git a/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py b/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py
index 2a84ae7e..3b8b480f 100644
--- a/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py
+++ b/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py
@@ -37,9 +37,10 @@ def handler_needs_algorithm_config(class_wrapper):
                 class_wrapper == webservice.algorithms_spark.Matchup.Matchup
                 or class_wrapper == webservice.algorithms_spark.MatchupDoms.MatchupDoms
                 or issubclass(class_wrapper, webservice.algorithms.doms.BaseDomsHandler.BaseDomsQueryCalcHandler)
-                or issubclass(class_wrapper, webservice.algorithms_spark.NexusCalcSparkTornadoHandler.NexusCalcSparkTornadoHandler)
+                or issubclass(class_wrapper,
+                              webservice.algorithms_spark.NexusCalcSparkTornadoHandler.NexusCalcSparkTornadoHandler)
                 or class_wrapper == webservice.algorithms.doms.ResultsRetrieval.DomsResultsRetrievalHandler
-                or class_wrapper == webservice.algorithms.doms.ResultsPlotQuery.DomsResultsPlotHandler
+            #                or class_wrapper == webservice.algorithms.doms.ResultsPlotQuery.DomsResultsPlotHandler
         )
 
     @staticmethod
@@ -50,7 +51,7 @@ def get_args(self, clazz_wrapper):
         args = dict(
             clazz=clazz_wrapper,
             tile_service_factory=self.tile_service_factory,
-            thread_pool=self. request_thread_pool
+            thread_pool=self.request_thread_pool
         )
 
         if issubclass(clazz_wrapper, webservice.algorithms_spark.NexusCalcSparkHandler.NexusCalcSparkHandler):
diff --git a/docker/nexus-webapp/Dockerfile b/docker/nexus-webapp/Dockerfile
index 6aaadda8..c38c5f29 100644
--- a/docker/nexus-webapp/Dockerfile
+++ b/docker/nexus-webapp/Dockerfile
@@ -95,11 +95,11 @@ RUN python3 setup.py install clean
 WORKDIR /incubator-sdap-nexus/analysis
 RUN python3 setup.py install clean && mamba clean -afy
 
+RUN pip install shapely==1.7.1
 
 WORKDIR /incubator-sdap-nexus/tools/deletebyquery
 ARG CASS_DRIVER_BUILD_CONCURRENCY=8
-RUN pip3 install cassandra-driver==3.20.1
-RUN pip3 install pyspark py4j
+RUN pip3 install cassandra-driver==3.20.1 pyspark py4j
 RUN pip3 install -r requirements.txt
 RUN pip3 install cython
 

From 7f717c0fcf1f31701cff258a19145327592562ff Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 11 Jan 2024 10:54:14 -0800
Subject: [PATCH 62/70] SDAP-507 - Changelog

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5396fdde..55c5bc6a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Support for deploying on k8s version 1.25:
   - Upgraded Cassandra Helm chart dependency version
   - Bumped default Cassandra protocol version 3 -> 4 in webapp and tools
+- SDAP-507: Changes to remove `geos` sub-dependency from core image build:
+  - Removed `gdal` and `basemap` as core dependencies
+  - Moved `shapely` installation in docker build from conda install to pip install
+  - Disabled `/domsplot` endpoint & commented out references to its source file as it depends on `basemap` and raises `ImportError`s at startup
 ### Deprecated
 ### Removed
 - SDAP-465: Removed `climatology` directory.

From 9779f409bd8b5aff9d9d17b244ab6cfbddedfb28 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Fri, 19 Jan 2024 09:15:01 -0800
Subject: [PATCH 63/70] delete instead of comment out

---
 analysis/webservice/algorithms/doms/__init__.py                  | 1 -
 .../webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py  | 1 -
 2 files changed, 2 deletions(-)

diff --git a/analysis/webservice/algorithms/doms/__init__.py b/analysis/webservice/algorithms/doms/__init__.py
index 8a94798e..8bddad9e 100644
--- a/analysis/webservice/algorithms/doms/__init__.py
+++ b/analysis/webservice/algorithms/doms/__init__.py
@@ -21,7 +21,6 @@
 from . import DomsInitialization
 from . import MatchupQuery
 from . import MetadataQuery
-# from . import ResultsPlotQuery
 from . import ResultsRetrieval
 from . import ResultsStorage
 from . import StatsQuery
diff --git a/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py b/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py
index 3b8b480f..f2d6f1b4 100644
--- a/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py
+++ b/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py
@@ -40,7 +40,6 @@ def handler_needs_algorithm_config(class_wrapper):
                 or issubclass(class_wrapper,
                               webservice.algorithms_spark.NexusCalcSparkTornadoHandler.NexusCalcSparkTornadoHandler)
                 or class_wrapper == webservice.algorithms.doms.ResultsRetrieval.DomsResultsRetrievalHandler
-            #                or class_wrapper == webservice.algorithms.doms.ResultsPlotQuery.DomsResultsPlotHandler
         )
 
     @staticmethod

From 937876031c63d7d1eac279eb7ba34c1a9a62355c Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Fri, 19 Jan 2024 09:45:40 -0800
Subject: [PATCH 64/70] Revert "Update helm cassandra dependency (#289)"

This reverts commit 1e8cc4e9d31d295e172c0db4bba61a5776642bea.
---
 CHANGELOG.md                                               | 1 -
 analysis/webservice/algorithms/doms/ResultsStorage.py      | 3 ---
 analysis/webservice/algorithms/doms/domsconfig.ini.default | 2 +-
 data-access/nexustiles/config/datastores.ini.default       | 2 +-
 helm/requirements.yaml                                     | 2 +-
 helm/values.yaml                                           | 3 ++-
 tools/deletebyquery/deletebyquery.py                       | 2 +-
 tools/doms-data-tools/update_doms_data_pk.py               | 2 +-
 tools/domspurge/README.md                                  | 2 +-
 tools/domspurge/purge.py                                   | 2 +-
 10 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1e686e60..ed72f245 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -42,7 +42,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Status code for results endpoint if execution id is not found fixed to be `404` instead of `500`.
 - Ensured links in the `/job` endpoint are https
 - SDAP-488: Workaround to build issue on Apple Silicon (M1/M2). Image build installs nexusproto through PyPI instead of building from source. A build arg `BUILD_NEXUSPROTO` was defined to allow building from source if desired
-- SDAP-496: Fix `solr-cloud-init` image failing to run.
 ### Security
 
 ## [1.1.0] - 2023-04-26
diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py
index 1dea1610..48b2122d 100644
--- a/analysis/webservice/algorithms/doms/ResultsStorage.py
+++ b/analysis/webservice/algorithms/doms/ResultsStorage.py
@@ -65,9 +65,6 @@ def __enter__(self):
         dc_policy = DCAwareRoundRobinPolicy(cassDatacenter)
         token_policy = TokenAwarePolicy(dc_policy)
 
-        logger.info(f'Connecting to Cassandra cluster @ {[host for host in cassHost.split(",")]}; datacenter: '
-                    f'{cassDatacenter}; protocol version: {cassVersion}')
-
         self._cluster = Cluster([host for host in cassHost.split(',')], load_balancing_policy=token_policy,
                                 protocol_version=cassVersion, auth_provider=auth_provider)
 
diff --git a/analysis/webservice/algorithms/doms/domsconfig.ini.default b/analysis/webservice/algorithms/doms/domsconfig.ini.default
index f4e44960..55f9b16c 100644
--- a/analysis/webservice/algorithms/doms/domsconfig.ini.default
+++ b/analysis/webservice/algorithms/doms/domsconfig.ini.default
@@ -18,7 +18,7 @@ host=localhost
 port=9042
 keyspace=doms
 local_datacenter=datacenter1
-protocol_version=4
+protocol_version=3
 dc_policy=DCAwareRoundRobinPolicy
 username=
 password=
diff --git a/data-access/nexustiles/config/datastores.ini.default b/data-access/nexustiles/config/datastores.ini.default
index 51455a38..d8db1902 100644
--- a/data-access/nexustiles/config/datastores.ini.default
+++ b/data-access/nexustiles/config/datastores.ini.default
@@ -18,7 +18,7 @@ host=localhost
 port=9042
 keyspace=nexustiles
 local_datacenter=datacenter1
-protocol_version=4
+protocol_version=3
 dc_policy=DCAwareRoundRobinPolicy
 username=
 password=
diff --git a/helm/requirements.yaml b/helm/requirements.yaml
index 1de8cf0f..a9996586 100644
--- a/helm/requirements.yaml
+++ b/helm/requirements.yaml
@@ -12,7 +12,7 @@ dependencies:
     repository: https://raw.githubusercontent.com/bitnami/charts/archive-full-index/bitnami
     condition: solr.enabled
   - name: cassandra
-    version: 9.1.7
+    version: 5.5.3
     repository: https://raw.githubusercontent.com/bitnami/charts/archive-full-index/bitnami
     condition: cassandra.enabled
 
diff --git a/helm/values.yaml b/helm/values.yaml
index fe2481ef..4105362e 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -195,7 +195,8 @@ cassandra:
   dbUser:
     user: cassandra
     password: cassandra
-  replicaCount: 1
+  cluster:
+    replicaCount: 1
   persistence:
     storageClass: hostpath
     size: 8Gi
diff --git a/tools/deletebyquery/deletebyquery.py b/tools/deletebyquery/deletebyquery.py
index 8b98111a..4fb7bd66 100644
--- a/tools/deletebyquery/deletebyquery.py
+++ b/tools/deletebyquery/deletebyquery.py
@@ -262,7 +262,7 @@ def parse_args():
                         help='The version of the Cassandra protocol the driver should use.',
                         required=False,
                         choices=['1', '2', '3', '4', '5'],
-                        default='4')
+                        default='3')
 
     parser.add_argument('--solr-rows',
                         help='Number of rows to fetch with each Solr query to build the list of tiles to delete',
diff --git a/tools/doms-data-tools/update_doms_data_pk.py b/tools/doms-data-tools/update_doms_data_pk.py
index 749995da..ed8dbe5e 100644
--- a/tools/doms-data-tools/update_doms_data_pk.py
+++ b/tools/doms-data-tools/update_doms_data_pk.py
@@ -114,7 +114,7 @@ def main():
                              request_timeout=60.0,
                          )
                      },
-                     protocol_version=4,
+                     protocol_version=3,
                      auth_provider=auth_provider) as cluster:
 
             session = cluster.connect('doms')
diff --git a/tools/domspurge/README.md b/tools/domspurge/README.md
index e88b62f5..92f7cfb1 100644
--- a/tools/domspurge/README.md
+++ b/tools/domspurge/README.md
@@ -33,5 +33,5 @@ You can build an image for this script to run it in a Kubernetes CronJob.
 
 ```shell
 cd /incubator-sdap-nexus
-docker build . -f tools/domspurge/Dockerfile -t sdap-local/DomsPurge:<tag>
+docker build . -f Dockerfile -t sdap-local/DomsPurge:<tag>
 ```
diff --git a/tools/domspurge/purge.py b/tools/domspurge/purge.py
index d4bb15a8..4fb2fc37 100644
--- a/tools/domspurge/purge.py
+++ b/tools/domspurge/purge.py
@@ -270,7 +270,7 @@ def parse_args():
                                 required=False,
                                 dest='pv',
                                 choices=['1', '2', '3', '4', '5'],
-                                default='4')
+                                default='3')
 
     time_before = purge_options.add_mutually_exclusive_group(required=True)
 

From 530314652bd1e74907962a9b03ceddfdc302c3ff Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Fri, 19 Jan 2024 10:20:44 -0800
Subject: [PATCH 65/70] deleted disabled endpoint files

---
 .../algorithms/doms/ResultsPlotQuery.py       |  56 ------
 .../webservice/algorithms/doms/mapplot.py     | 174 ------------------
 2 files changed, 230 deletions(-)
 delete mode 100644 analysis/webservice/algorithms/doms/ResultsPlotQuery.py
 delete mode 100644 analysis/webservice/algorithms/doms/mapplot.py

diff --git a/analysis/webservice/algorithms/doms/ResultsPlotQuery.py b/analysis/webservice/algorithms/doms/ResultsPlotQuery.py
deleted file mode 100644
index 864cdc3b..00000000
--- a/analysis/webservice/algorithms/doms/ResultsPlotQuery.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from . import BaseDomsHandler
-from . import histogramplot
-from . import mapplot
-from . import scatterplot
-from webservice.NexusHandler import nexus_handler
-
-
-class PlotTypes:
-    SCATTER = "scatter"
-    MAP = "map"
-    HISTOGRAM = "histogram"
-
-
-# @nexus_handler
-class DomsResultsPlotHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
-    name = "DOMS Results Plotting"
-    path = "/domsplot"
-    description = ""
-    params = {}
-    singleton = True
-
-    def __init__(self, tile_service_factory, config=None):
-        BaseDomsHandler.BaseDomsQueryCalcHandler.__init__(self, tile_service_factory)
-        self.config = config
-
-    def calc(self, computeOptions, **args):
-        id = computeOptions.get_argument("id", None)
-        parameter = computeOptions.get_argument('parameter', 'sst')
-
-        plotType = computeOptions.get_argument("type", PlotTypes.SCATTER)
-
-        normAndCurve = computeOptions.get_boolean_arg("normandcurve", False)
-
-        if plotType == PlotTypes.SCATTER:
-            return scatterplot.createScatterPlot(id, parameter, config=self.config)
-        elif plotType == PlotTypes.MAP:
-            return mapplot.createMapPlot(id, parameter, config=self.config)
-        elif plotType == PlotTypes.HISTOGRAM:
-            return histogramplot.createHistogramPlot(id, parameter, normAndCurve, config=self.config)
-        else:
-            raise Exception("Unsupported plot type '%s' specified." % plotType)
diff --git a/analysis/webservice/algorithms/doms/mapplot.py b/analysis/webservice/algorithms/doms/mapplot.py
deleted file mode 100644
index 8b93d3c6..00000000
--- a/analysis/webservice/algorithms/doms/mapplot.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import io
-from multiprocessing import Process, Manager
-
-import matplotlib
-import matplotlib.pyplot as plt
-import numpy as np
-from mpl_toolkits.basemap import Basemap
-
-from . import BaseDomsHandler
-from . import ResultsStorage
-
-if not matplotlib.get_backend():
-    matplotlib.use('Agg')
-
-PARAMETER_TO_FIELD = {
-    "sst": "sea_water_temperature",
-    "sss": "sea_water_salinity"
-}
-
-PARAMETER_TO_UNITS = {
-    "sst": "($^\circ$ C)",
-    "sss": "(g/L)"
-}
-
-
-def __square(minLon, maxLon, minLat, maxLat):
-    if maxLat - minLat > maxLon - minLon:
-        a = ((maxLat - minLat) - (maxLon - minLon)) / 2.0
-        minLon -= a
-        maxLon += a
-    elif maxLon - minLon > maxLat - minLat:
-        a = ((maxLon - minLon) - (maxLat - minLat)) / 2.0
-        minLat -= a
-        maxLat += a
-
-    return minLon, maxLon, minLat, maxLat
-
-
-def render(d, lats, lons, z, primary, secondary, parameter):
-    fig = plt.figure()
-    ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
-
-    ax.set_title(f'{primary} vs. {secondary}')
-    # ax.set_ylabel('Latitude')
-    # ax.set_xlabel('Longitude')
-
-    minLatA = np.min(lats)
-    maxLatA = np.max(lats)
-    minLonA = np.min(lons)
-    maxLonA = np.max(lons)
-
-    minLat = minLatA - (abs(maxLatA - minLatA) * 0.1)
-    maxLat = maxLatA + (abs(maxLatA - minLatA) * 0.1)
-
-    minLon = minLonA - (abs(maxLonA - minLonA) * 0.1)
-    maxLon = maxLonA + (abs(maxLonA - minLonA) * 0.1)
-
-    minLon, maxLon, minLat, maxLat = __square(minLon, maxLon, minLat, maxLat)
-
-    # m = Basemap(projection='mill', llcrnrlon=-180,llcrnrlat=-80,urcrnrlon=180,urcrnrlat=80,resolution='l')
-    m = Basemap(projection='mill', llcrnrlon=minLon, llcrnrlat=minLat, urcrnrlon=maxLon, urcrnrlat=maxLat,
-                resolution='l')
-
-    m.drawparallels(np.arange(minLat, maxLat, (maxLat - minLat) / 5.0), labels=[1, 0, 0, 0], fontsize=10)
-    m.drawmeridians(np.arange(minLon, maxLon, (maxLon - minLon) / 5.0), labels=[0, 0, 0, 1], fontsize=10)
-
-    m.drawcoastlines()
-    m.drawmapboundary(fill_color='#99ffff')
-    m.fillcontinents(color='#cc9966', lake_color='#99ffff')
-
-    # lats, lons = np.meshgrid(lats, lons)
-
-    masked_array = np.ma.array(z, mask=np.isnan(z))
-    z = masked_array
-
-    values = np.zeros(len(z))
-    for i in range(0, len(z)):
-        values[i] = ((z[i] - np.min(z)) / (np.max(z) - np.min(z)) * 20.0) + 10
-
-    x, y = m(lons, lats)
-
-    im1 = m.scatter(x, y, values)
-
-    im1.set_array(z)
-    cb = m.colorbar(im1)
-
-    units = PARAMETER_TO_UNITS[parameter] if parameter in PARAMETER_TO_UNITS else PARAMETER_TO_UNITS["sst"]
-    cb.set_label("Difference %s" % units)
-
-    buf = io.BytesIO()
-    plt.savefig(buf, format='png')
-    plot = buf.getvalue()
-    if d is not None:
-        d['plot'] = plot
-    return plot
-
-
-class DomsMapPlotQueryResults(BaseDomsHandler.DomsQueryResults):
-    def __init__(self, lats, lons, z, parameter, primary, secondary, args=None, bounds=None, count=None, details=None,
-                 computeOptions=None, executionId=None, plot=None):
-        BaseDomsHandler.DomsQueryResults.__init__(self, results={"lats": lats, "lons": lons, "values": z}, args=args,
-                                                  details=details, bounds=bounds, count=count,
-                                                  computeOptions=computeOptions, executionId=executionId)
-        self.__lats = lats
-        self.__lons = lons
-        self.__z = np.array(z)
-        self.__parameter = parameter
-        self.__primary = primary
-        self.__secondary = secondary
-        self.__plot = plot
-
-    def toImage(self):
-        return self.__plot
-
-
-def renderAsync(x, y, z, primary, secondary, parameter):
-    manager = Manager()
-    d = manager.dict()
-    p = Process(target=render, args=(d, x, y, z, primary, secondary, parameter))
-    p.start()
-    p.join()
-    return d['plot']
-
-
-def createMapPlot(id, parameter, config=None):
-    with ResultsStorage.ResultsRetrieval(config) as storage:
-        params, stats, data = storage.retrieveResults(id)
-
-    primary = params["primary"]
-    secondary = params["matchup"][0]
-
-    lats = []
-    lons = []
-    z = []
-
-    field = PARAMETER_TO_FIELD[parameter] if parameter in PARAMETER_TO_FIELD else PARAMETER_TO_FIELD["sst"]
-
-    for entry in data:
-        for match in entry["matches"]:
-            if match["source"] == secondary:
-
-                if field in entry and field in match:
-                    a = entry[field]
-                    b = match[field]
-                    z.append((a - b))
-                    z.append((a - b))
-                else:
-                    z.append(1.0)
-                    z.append(1.0)
-                lats.append(entry["y"])
-                lons.append(entry["x"])
-                lats.append(match["y"])
-                lons.append(match["x"])
-
-    plot = renderAsync(lats, lons, z, primary, secondary, parameter)
-    r = DomsMapPlotQueryResults(lats=lats, lons=lons, z=z, parameter=parameter, primary=primary, secondary=secondary,
-                                args=params,
-                                details=stats, bounds=None, count=None, computeOptions=None, executionId=id, plot=plot)
-    return r

From ee5e5c8da244af7b0e0a2864dd894459b0706350 Mon Sep 17 00:00:00 2001
From: skorper <stepheny.k.perez@gmail.com>
Date: Thu, 25 Jan 2024 12:14:25 -0800
Subject: [PATCH 66/70] fix bug where still-running jobs failed /job endpoint
 due to missing metadata

---
 analysis/webservice/algorithms/doms/ExecutionStatus.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/analysis/webservice/algorithms/doms/ExecutionStatus.py b/analysis/webservice/algorithms/doms/ExecutionStatus.py
index 63cf423b..9817b070 100644
--- a/analysis/webservice/algorithms/doms/ExecutionStatus.py
+++ b/analysis/webservice/algorithms/doms/ExecutionStatus.py
@@ -63,6 +63,9 @@ def calc(self, request, **args):
             except NexusProcessingException:
                 execution_stats = {}
 
+        if execution_stats is None:
+            execution_stats = {}
+
         job_status = NexusExecutionResults.ExecutionStatus(execution_details['status'])
         host = f'https://{request.requestHandler.request.host}'
 

From 40a80e2266a40fa710e16f8cc43ebc5ea6b17646 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Mon, 29 Jan 2024 12:57:46 -0800
Subject: [PATCH 67/70] Don't write an empty row between meta blocks in CSV
 writer

---
 analysis/webservice/algorithms/doms/BaseDomsHandler.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
index 11da3eca..bc7db09e 100644
--- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py
+++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
@@ -119,7 +119,6 @@ def create(executionId, results, params, details, metadata):
         try:
             DomsCSVFormatter.__addConstants(csv_mem_file)
             DomsCSVFormatter.__addDynamicAttrs(csv_mem_file, executionId, results, params, details)
-            csv.writer(csv_mem_file).writerow([])
             DomsCSVFormatter.__addMetadata(csv_mem_file, metadata)
             csv.writer(csv_mem_file).writerow([])
 

From 34b7b95c273ddad1119a1e5ca5f9c3899551ca08 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 1 Feb 2024 15:30:09 -0800
Subject: [PATCH 68/70] Moved changelog entries

---
 CHANGELOG.md | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1ba371e0..94e2fa60 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -41,11 +41,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - SDAP-482: Updated Saildrone in situ endpoint in config file
 - SDAP-485: Improved behavior for retrying failed Cassandra inserts when saving matchup results.
 - SDAP-487: Improved result fetch speed for large matchup results by tweaking `doms.doms_data` schema to support querying by primary value id.
-- SDAP-493: 
-  - Updated /job endpoint to use `executionId` terminology for consistency with existing `/cdmsresults` endpoint
-  - Updated /job endpoint with details about number of primary and secondary tiles.
-- SDAP-500: Improvements to SDAP Asynchronous Jobs
-- SDAP-499: Added page number to default filename for matchup output
 - Support for deploying on k8s version 1.25:
   - Upgraded Cassandra Helm chart dependency version
   - Bumped default Cassandra protocol version 3 -> 4 in webapp and tools
@@ -57,8 +52,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Removed
 - SDAP-465: Removed `climatology` directory.
 - SDAP-501: Updated dependencies to remove `chardet`
-- SDAP-493: 
-  - Removed `resultSizeLimit` from /match_spark endpoint 
 ### Fixed
 - SDAP-474: Fixed bug in CSV attributes where secondary dataset would be rendered as comma separated characters
 - SDAP-475: Bug fixes for `/timeSeriesSpark` and `/timeAvgMapSpark`

From 6de825b3c83e1b15e261c6af70da010856961934 Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 1 Feb 2024 15:33:05 -0800
Subject: [PATCH 69/70] SDAP-472 changelog entries

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 94e2fa60..84eced44 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,12 +8,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 - SDAP-506:
   - Added STAC Catalog endpoint for matchup outputs
+- SDAP-472:
+  - Support for Zarr backend (gridded data only)
+  - Dataset management endpoints for Zarr datasets
 ### Changed
 - SDAP-493: 
   - Updated /job endpoint to use `executionId` terminology for consistency with existing `/cdmsresults` endpoint
   - Updated /job endpoint with details about number of primary and secondary tiles.
 - SDAP-500: Improvements to SDAP Asynchronous Jobs
 - SDAP-499: Added page number to default filename for matchup output
+- SDAP-472: Overhauled `data-access` to support multiple backends for simultaneous support of multiple ARD formats
 ### Deprecated
 ### Removed
 - SDAP-493: 

From 2aaf07b0d8170a76467abb80f62fdb26044d642a Mon Sep 17 00:00:00 2001
From: rileykk <rileykk@jpl.nasa.gov>
Date: Thu, 1 Feb 2024 15:34:20 -0800
Subject: [PATCH 70/70] SDAP-498 changelog entries

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 84eced44..60cb6081 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - SDAP-472:
   - Support for Zarr backend (gridded data only)
   - Dataset management endpoints for Zarr datasets
+- SDAP-498: Support for satellite units & other dataset-level metadata
 ### Changed
 - SDAP-493: 
   - Updated /job endpoint to use `executionId` terminology for consistency with existing `/cdmsresults` endpoint