From 07312bf3b764e7a1b889d96498905504f9c8d5a1 Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Thu, 16 Jul 2020 18:54:07 -0700 Subject: [PATCH 01/26] updated helm chart for zookeeper --- helm/templates/granule-ingester.yml | 4 ++-- helm/templates/history-pvc.yml | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/helm/templates/granule-ingester.yml b/helm/templates/granule-ingester.yml index 2ce03b68..ba4794a2 100644 --- a/helm/templates/granule-ingester.yml +++ b/helm/templates/granule-ingester.yml @@ -27,8 +27,8 @@ spec: value: {{ .Values.rabbitmq.fullnameOverride }} - name: CASSANDRA_CONTACT_POINTS value: sdap-cassandra - - name: SOLR_HOST_AND_PORT - value: http://sdap-solr:8983 + - name: ZK_HOST_AND_PORT + value: zk-cs:2181 resources: requests: cpu: {{ .Values.ingestion.granuleIngester.cpu }} diff --git a/helm/templates/history-pvc.yml b/helm/templates/history-pvc.yml index 3ecabe9d..ed18f767 100644 --- a/helm/templates/history-pvc.yml +++ b/helm/templates/history-pvc.yml @@ -2,6 +2,8 @@ apiVersion: v1 kind: PersistentVolumeClaim metadata: name: history-volume-claim + annotations: + helm.sh/resource-policy: "keep" spec: accessModes: - ReadWriteOnce From 73c9e41e60c508254a55601698ca5f7ec027388b Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Mon, 20 Jul 2020 17:03:23 -0700 Subject: [PATCH 02/26] use solr and zk helm charts --- docker/solr/cloud-init/create-collection.py | 4 +- helm/requirements.yaml | 5 + helm/templates/granule-ingester.yml | 3 +- helm/templates/solr-create-collection.yml | 34 +++++ helm/templates/solr.yml | 129 ------------------ helm/templates/zookeeper.yml | 144 -------------------- helm/values.yaml | 36 +++-- 7 files changed, 65 insertions(+), 290 deletions(-) create mode 100644 helm/templates/solr-create-collection.yml delete mode 100644 helm/templates/solr.yml delete mode 100644 helm/templates/zookeeper.yml diff --git a/docker/solr/cloud-init/create-collection.py b/docker/solr/cloud-init/create-collection.py index eb77d001..9fce9f05 100755 --- a/docker/solr/cloud-init/create-collection.py +++ b/docker/solr/cloud-init/create-collection.py @@ -142,5 +142,5 @@ def get_cluster_status(): # We're done, do nothing forever. logging.info("Done.") -while True: - time.sleep(987654321) +# while True: +# time.sleep(987654321) diff --git a/helm/requirements.yaml b/helm/requirements.yaml index 7970f294..57a7e13e 100644 --- a/helm/requirements.yaml +++ b/helm/requirements.yaml @@ -7,5 +7,10 @@ dependencies: version: 7.1.0 repository: https://charts.bitnami.com/bitnami condition: ingestion.enabled + - name: solr + version: 1.5.2 + repository: http://storage.googleapis.com/kubernetes-charts-incubator + condition: ingestion.enabled + diff --git a/helm/templates/granule-ingester.yml b/helm/templates/granule-ingester.yml index ba4794a2..078e744a 100644 --- a/helm/templates/granule-ingester.yml +++ b/helm/templates/granule-ingester.yml @@ -17,6 +17,7 @@ spec: spec: containers: - image: {{ .Values.ingestion.granuleIngester.image }} + imagePullPolicy: IfNotPresent name: granule-ingester env: - name: RABBITMQ_USERNAME @@ -28,7 +29,7 @@ spec: - name: CASSANDRA_CONTACT_POINTS value: sdap-cassandra - name: ZK_HOST_AND_PORT - value: zk-cs:2181 + value: {{ .Release.Namespace }}-zookeeper:2181 resources: requests: cpu: {{ .Values.ingestion.granuleIngester.cpu }} diff --git a/helm/templates/solr-create-collection.yml b/helm/templates/solr-create-collection.yml new file mode 100644 index 00000000..6886cdd8 --- /dev/null +++ b/helm/templates/solr-create-collection.yml @@ -0,0 +1,34 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: solr-create-collection +spec: +# selector: +# matchLabels: +# app: solr-create-collection # has to match .spec.template.metadata.labels +# replicas: 1 + template: + metadata: + labels: + app: solr-create-collection + spec: + containers: + - name: solr-create-collection + imagePullPolicy: Always + image: nexusjpl/solr-cloud-init:1.0.1 + resources: + requests: + memory: "1Gi" + cpu: "0.25" + env: + - name: MINIMUM_NODES + value: "{{ .Values.solr.replicaCount }}" + - name: SOLR_HOST + value: "{{ .Release.Namespace }}-solr-svc" + - name: SDAP_SOLR_URL + value: "http://$(SOLR_HOST):8983/solr/" + - name: SDAP_ZK_SOLR + value: "{{ .Release.Namespace }}-zookeeper:2181/solr" + - name: CREATE_COLLECTION_PARAMS + value: "name=nexustiles&numShards=$(MINIMUM_NODES)&waitForFinalState=true" + restartPolicy: OnFailure diff --git a/helm/templates/solr.yml b/helm/templates/solr.yml deleted file mode 100644 index c8d0f9b0..00000000 --- a/helm/templates/solr.yml +++ /dev/null @@ -1,129 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: sdap-solr -spec: - ports: - - port: 8983 - clusterIP: None - selector: - app: sdap-solr - ---- - -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: solr-set -spec: - selector: - matchLabels: - app: sdap-solr # has to match .spec.template.metadata.labels - serviceName: "sdap-solr" - replicas: {{.Values.solr.replicas }} # by default is 1 - podManagementPolicy: Parallel - template: - metadata: - labels: - app: sdap-solr # has to match .spec.selector.matchLabels - spec: - terminationGracePeriodSeconds: 10 - {{ if .Values.solr.tolerations }} - tolerations: -{{ .Values.solr.tolerations | toYaml | indent 6 }} - {{ end }} - {{ if .Values.solr.nodeSelector }} - nodeSelector: -{{ .Values.solr.nodeSelector | toYaml | indent 8 }} - {{ end }} - affinity: - podAntiAffinity: - # Prefer spreading over all hosts - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: "app" - operator: In - values: - - sdap-solr - topologyKey: "kubernetes.io/hostname" - securityContext: - runAsUser: 8983 - fsGroup: 8983 - containers: - - name: solr-create-collection - imagePullPolicy: Always - image: nexusjpl/solr-cloud-init:1.0.0-rc1 - resources: - requests: - memory: "1Gi" - cpu: "0.25" - env: - - name: MINIMUM_NODES - value: "2" # MINIMUM_NODES should be the same as spec.replicas - - name: SOLR_HOST - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: SDAP_SOLR_URL - value: http://$(SOLR_HOST):8983/solr/ - - name: SDAP_ZK_SOLR - value: "zk-hs:2181/solr" - - name: CREATE_COLLECTION_PARAMS - value: "name=nexustiles&collection.configName=nexustiles&numShards=$(MINIMUM_NODES)&waitForFinalState=true" - - name: solr-cloud - imagePullPolicy: Always - image: nexusjpl/solr-cloud:1.0.0-rc1 - resources: - requests: - memory: {{ .Values.solr.requests.memory }} - cpu: {{ .Values.solr.requests.cpu }} - limits: - memory: {{ .Values.solr.limits.memory }} - cpu: {{ .Values.solr.limits.cpu }} - env: - - name: SOLR_HEAP - value: {{ .Values.solr.heap }} - - name: SOLR_HOST - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: SDAP_ZK_SERVICE_HOST - value: "zk-hs" - ports: - - containerPort: 8983 - name: http - volumeMounts: - - name: solr-data - mountPath: /opt/solr/server/solr/ - readinessProbe: - exec: - command: - - solr - - healthcheck - - -c - - nexustiles - - -z - - zk-hs:2181/solr - initialDelaySeconds: 10 - timeoutSeconds: 5 - livenessProbe: - exec: - command: - - solr - - assert - - -s - - http://localhost:8983/solr/ - initialDelaySeconds: 10 - timeoutSeconds: 5 - volumeClaimTemplates: - - metadata: - name: solr-data - spec: - accessModes: [ "ReadWriteOnce" ] - storageClassName: {{ .Values.storageClass }} - resources: - requests: - storage: {{ .Values.solr.storage }} diff --git a/helm/templates/zookeeper.yml b/helm/templates/zookeeper.yml deleted file mode 100644 index bdc39258..00000000 --- a/helm/templates/zookeeper.yml +++ /dev/null @@ -1,144 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: zk-hs - labels: - app: zk -spec: - ports: - - port: 2888 - name: server - - port: 3888 - name: leader-election - clusterIP: None - selector: - app: zk ---- -apiVersion: v1 -kind: Service -metadata: - name: zk-cs - labels: - app: zk -spec: - ports: - - port: 2181 - name: client - selector: - app: zk ---- -apiVersion: policy/v1beta1 -kind: PodDisruptionBudget -metadata: - name: zk-pdb -spec: - selector: - matchLabels: - app: zk - maxUnavailable: 1 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: zk -spec: - selector: - matchLabels: - app: zk - serviceName: zk-hs - replicas: {{ .Values.zookeeper.replicas }} - updateStrategy: - type: RollingUpdate - podManagementPolicy: Parallel - template: - metadata: - labels: - app: zk - spec: - {{ if .Values.zookeeper.tolerations }} - tolerations: -{{ .Values.zookeeper.tolerations | toYaml | indent 6 }} - {{ end }} - {{ if .Values.zookeeper.nodeSelector }} - nodeSelector: -{{ .Values.zookeeper.nodeSelector | toYaml | indent 8 }} - {{ end }} - affinity: - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: "app" - operator: In - values: - - zk - topologyKey: "kubernetes.io/hostname" - containers: - - name: kubernetes-zookeeper - imagePullPolicy: Always - image: "k8s.gcr.io/kubernetes-zookeeper:1.0-3.4.10" - resources: - requests: - memory: {{ .Values.zookeeper.memory }} - cpu: {{ .Values.zookeeper.cpu }} - ports: - - containerPort: 2181 - name: client - - containerPort: 2888 - name: server - - containerPort: 3888 - name: leader-election - command: - - sh - - -c - - "start-zookeeper \ - --servers={{ .Values.zookeeper.replicas }} \ - --data_dir=/var/lib/zookeeper/data \ - --data_log_dir=/var/lib/zookeeper/data/log \ - --conf_dir=/opt/zookeeper/conf \ - --client_port=2181 \ - --election_port=3888 \ - --server_port=2888 \ - --tick_time=2000 \ - --init_limit=10 \ - --sync_limit=5 \ - --heap=512M \ - --max_client_cnxns=60 \ - --snap_retain_count=3 \ - --purge_interval=12 \ - --max_session_timeout=40000 \ - --min_session_timeout=4000 \ - --log_level=INFO" - readinessProbe: - exec: - command: - - sh - - -c - - "zookeeper-ready 2181" - initialDelaySeconds: 10 - timeoutSeconds: 5 - livenessProbe: - exec: - command: - - sh - - -c - - "zookeeper-ready 2181" - initialDelaySeconds: 10 - timeoutSeconds: 5 - volumeMounts: - - name: zkdatadir - mountPath: /var/lib/zookeeper - securityContext: - runAsUser: 1000 - fsGroup: 1000 - volumeClaimTemplates: - - metadata: - name: zkdatadir - spec: - accessModes: [ "ReadWriteOnce" ] - storageClassName: {{ .Values.storageClass }} - resources: - requests: - storage: {{ .Values.zookeeper.storage }} diff --git a/helm/values.yaml b/helm/values.yaml index c012e6e1..5ad3b5b4 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -110,21 +110,29 @@ cassandra: memory: 3Gi solr: - replicas: 2 + replicaCount: 3 storage: 10Gi - heap: 4g - requests: - memory: 5Gi - cpu: 1 - limits: - memory: 5Gi - cpu: 1 - -zookeeper: - replicas: 3 - memory: 1Gi - cpu: 0.5 - storage: 8Gi + volumeClaimTemplates: + storageClassName: hostpath + storageSize: 10Gi + resources: + requests: + memory: 2Gi + cpu: 1 + limits: + memory: 2Gi + cpu: 1 + zookeeper: + replicaCount: 3 + persistence: + storageClass: hostpath + resources: + limits: + memory: 1Gi + cpu: 0.5 + requests: + memory: 1Gi + cpu: 0.5 ingressEnabled: false From a74b22de34cb368655e10c2876b0ea6a64f165a6 Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Mon, 20 Jul 2020 17:20:43 -0700 Subject: [PATCH 03/26] change .Release.Namespace to .Release.Name --- helm/templates/granule-ingester.yml | 2 +- helm/templates/solr-create-collection.yml | 4 ++-- helm/values.yaml | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/helm/templates/granule-ingester.yml b/helm/templates/granule-ingester.yml index 078e744a..16cac4df 100644 --- a/helm/templates/granule-ingester.yml +++ b/helm/templates/granule-ingester.yml @@ -29,7 +29,7 @@ spec: - name: CASSANDRA_CONTACT_POINTS value: sdap-cassandra - name: ZK_HOST_AND_PORT - value: {{ .Release.Namespace }}-zookeeper:2181 + value: {{ .Release.Name }}-zookeeper:2181 resources: requests: cpu: {{ .Values.ingestion.granuleIngester.cpu }} diff --git a/helm/templates/solr-create-collection.yml b/helm/templates/solr-create-collection.yml index 6886cdd8..7db5b97e 100644 --- a/helm/templates/solr-create-collection.yml +++ b/helm/templates/solr-create-collection.yml @@ -24,11 +24,11 @@ spec: - name: MINIMUM_NODES value: "{{ .Values.solr.replicaCount }}" - name: SOLR_HOST - value: "{{ .Release.Namespace }}-solr-svc" + value: "{{ .Release.Name }}-solr-svc" - name: SDAP_SOLR_URL value: "http://$(SOLR_HOST):8983/solr/" - name: SDAP_ZK_SOLR - value: "{{ .Release.Namespace }}-zookeeper:2181/solr" + value: "{{ .Release.Name }}-zookeeper:2181/solr" - name: CREATE_COLLECTION_PARAMS value: "name=nexustiles&numShards=$(MINIMUM_NODES)&waitForFinalState=true" restartPolicy: OnFailure diff --git a/helm/values.yaml b/helm/values.yaml index 5ad3b5b4..b04dffd9 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -111,7 +111,6 @@ cassandra: solr: replicaCount: 3 - storage: 10Gi volumeClaimTemplates: storageClassName: hostpath storageSize: 10Gi From cb1efbaa9f03e310cd9bdba63d5d2ea178b8f648 Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Mon, 20 Jul 2020 17:33:05 -0700 Subject: [PATCH 04/26] add rabbitmq storageclass --- helm/values.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/helm/values.yaml b/helm/values.yaml index b04dffd9..aa03c0aa 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -157,6 +157,8 @@ nginx-ingress: rabbitmq: ## fullnameOverride sets the name of the RabbitMQ service ## with which the ingestion components will communicate. + persistence: + storageClass: hostpath fullnameOverride: rabbitmq replicaCount: 1 auth: From 45d087de74ca4f188200f6c5ac48ddb5bb0e5dfd Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Tue, 21 Jul 2020 12:03:03 -0700 Subject: [PATCH 05/26] fix rbac --- helm/templates/config-operator-rbac.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/helm/templates/config-operator-rbac.yml b/helm/templates/config-operator-rbac.yml index 54064d51..6626b0ba 100644 --- a/helm/templates/config-operator-rbac.yml +++ b/helm/templates/config-operator-rbac.yml @@ -6,7 +6,7 @@ metadata: --- apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding +kind: ClusterRoleBinding metadata: name: config-operator-role-binding roleRef: @@ -16,4 +16,6 @@ roleRef: subjects: - kind: ServiceAccount name: config-operator + namespace: {{ .Release.Namespace }} + From d41e0d7bc7c069e3b7c86c19c683b755192330aa Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Tue, 21 Jul 2020 16:51:48 -0700 Subject: [PATCH 06/26] add max_concurrency --- helm/templates/granule-ingester.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/helm/templates/granule-ingester.yml b/helm/templates/granule-ingester.yml index 16cac4df..2a716f17 100644 --- a/helm/templates/granule-ingester.yml +++ b/helm/templates/granule-ingester.yml @@ -30,6 +30,10 @@ spec: value: sdap-cassandra - name: ZK_HOST_AND_PORT value: {{ .Release.Name }}-zookeeper:2181 + {{ if .Values.ingestion.granuleIngester.maxConcurrency }} + - name: MAX_CONCURRENCY + value: "{{ .Values.ingestion.granuleIngester.maxConcurrency }}" + {{ end }} resources: requests: cpu: {{ .Values.ingestion.granuleIngester.cpu }} From b17632ba402a6c7f05a446fa6cf12731a719209d Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Wed, 22 Jul 2020 11:40:53 -0700 Subject: [PATCH 07/26] add solr_host arg --- helm/templates/webapp.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/helm/templates/webapp.yml b/helm/templates/webapp.yml index d77496f1..9f72dbaf 100644 --- a/helm/templates/webapp.yml +++ b/helm/templates/webapp.yml @@ -11,6 +11,8 @@ spec: image: {{ .Values.webapp.distributed.image }} imagePullPolicy: Always mainApplicationFile: local:///incubator-sdap-nexus/analysis/webservice/webapp.py + arguments: + - "--solr-host={{ .Release.Name }}-solr-svc" sparkVersion: "2.4.4" restartPolicy: type: OnFailure From 36e9aa84acf1207c96057e342509df46e8274c51 Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Wed, 22 Jul 2020 11:43:21 -0700 Subject: [PATCH 08/26] add solr port --- helm/templates/webapp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/templates/webapp.yml b/helm/templates/webapp.yml index 9f72dbaf..e363ab75 100644 --- a/helm/templates/webapp.yml +++ b/helm/templates/webapp.yml @@ -12,7 +12,7 @@ spec: imagePullPolicy: Always mainApplicationFile: local:///incubator-sdap-nexus/analysis/webservice/webapp.py arguments: - - "--solr-host={{ .Release.Name }}-solr-svc" + - "--solr-host={{ .Release.Name }}-solr-svc:8983" sparkVersion: "2.4.4" restartPolicy: type: OnFailure From 153a5700354dd8d4c323b6178750dd35cb348ecd Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Wed, 22 Jul 2020 11:51:56 -0700 Subject: [PATCH 09/26] always deploy solr --- helm/requirements.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/helm/requirements.yaml b/helm/requirements.yaml index 57a7e13e..ebcd9c0d 100644 --- a/helm/requirements.yaml +++ b/helm/requirements.yaml @@ -10,7 +10,6 @@ dependencies: - name: solr version: 1.5.2 repository: http://storage.googleapis.com/kubernetes-charts-incubator - condition: ingestion.enabled From 1bcd3fca885757ac28063eab34866d24b418cd1d Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Wed, 22 Jul 2020 16:10:53 -0700 Subject: [PATCH 10/26] read cli args for cass and solr hosts --- .gitignore | 1 - analysis/setup.py | 3 +- .../tests/algorithms_spark/Matchup_test.py | 321 --------- analysis/tests/algorithms_spark/__init__.py | 16 - .../algorithms/doms/BaseDomsHandler.py | 635 ------------------ .../algorithms/doms/DatasetListQuery.py | 116 ---- .../algorithms/doms/MatchupQuery.py | 452 ------------- .../algorithms/doms/MetadataQuery.py | 65 -- .../algorithms/doms/ResultsPlotQuery.py | 55 -- .../algorithms/doms/ResultsRetrieval.py | 49 -- .../algorithms/doms/ResultsStorage.py | 286 -------- .../webservice/algorithms/doms/StatsQuery.py | 63 -- .../webservice/algorithms/doms/ValuesQuery.py | 72 -- .../webservice/algorithms/doms/__init__.py | 34 - analysis/webservice/algorithms/doms/config.py | 109 --- .../webservice/algorithms/doms/datafetch.py | 47 -- .../algorithms/doms/fetchedgeimpl.py | 217 ------ analysis/webservice/algorithms/doms/geo.py | 129 ---- .../algorithms/doms/histogramplot.py | 127 ---- .../algorithms/doms/insitusubset.py | 263 -------- .../webservice/algorithms/doms/mapplot.py | 175 ----- .../webservice/algorithms/doms/scatterplot.py | 118 ---- .../webservice/algorithms/doms/subsetter.py | 260 ------- analysis/webservice/algorithms/doms/values.py | 72 -- .../algorithms/doms/workerthread.py | 61 -- .../webservice/algorithms_spark/__init__.py | 6 - analysis/webservice/config/web.ini | 2 +- helm/templates/webapp.yml | 2 +- tools/doms/README.md | 66 -- tools/doms/doms_reader.py | 144 ---- 30 files changed, 3 insertions(+), 3963 deletions(-) delete mode 100644 analysis/tests/algorithms_spark/Matchup_test.py delete mode 100644 analysis/tests/algorithms_spark/__init__.py delete mode 100644 analysis/webservice/algorithms/doms/BaseDomsHandler.py delete mode 100644 analysis/webservice/algorithms/doms/DatasetListQuery.py delete mode 100644 analysis/webservice/algorithms/doms/MatchupQuery.py delete mode 100644 analysis/webservice/algorithms/doms/MetadataQuery.py delete mode 100644 analysis/webservice/algorithms/doms/ResultsPlotQuery.py delete mode 100644 analysis/webservice/algorithms/doms/ResultsRetrieval.py delete mode 100644 analysis/webservice/algorithms/doms/ResultsStorage.py delete mode 100644 analysis/webservice/algorithms/doms/StatsQuery.py delete mode 100644 analysis/webservice/algorithms/doms/ValuesQuery.py delete mode 100644 analysis/webservice/algorithms/doms/__init__.py delete mode 100644 analysis/webservice/algorithms/doms/config.py delete mode 100644 analysis/webservice/algorithms/doms/datafetch.py delete mode 100644 analysis/webservice/algorithms/doms/fetchedgeimpl.py delete mode 100644 analysis/webservice/algorithms/doms/geo.py delete mode 100644 analysis/webservice/algorithms/doms/histogramplot.py delete mode 100644 analysis/webservice/algorithms/doms/insitusubset.py delete mode 100644 analysis/webservice/algorithms/doms/mapplot.py delete mode 100644 analysis/webservice/algorithms/doms/scatterplot.py delete mode 100644 analysis/webservice/algorithms/doms/subsetter.py delete mode 100644 analysis/webservice/algorithms/doms/values.py delete mode 100644 analysis/webservice/algorithms/doms/workerthread.py delete mode 100644 tools/doms/README.md delete mode 100644 tools/doms/doms_reader.py diff --git a/.gitignore b/.gitignore index 3e296266..4e4cf6ec 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,5 @@ *.code-workspace *.idea *.DS_Store -analysis/webservice/algorithms/doms/domsconfig.ini data-access/nexustiles/config/datastores.ini venv/ diff --git a/analysis/setup.py b/analysis/setup.py index 62a68916..9a449ceb 100644 --- a/analysis/setup.py +++ b/analysis/setup.py @@ -50,8 +50,7 @@ # 'webservice.nexus_tornado.request.renderers' #], package_data={ - 'webservice': ['config/web.ini', 'config/algorithms.ini'], - 'webservice.algorithms.doms': ['domsconfig.ini.default'] + 'webservice': ['config/web.ini', 'config/algorithms.ini'] }, data_files=[ ('static', ['static/index.html']) diff --git a/analysis/tests/algorithms_spark/Matchup_test.py b/analysis/tests/algorithms_spark/Matchup_test.py deleted file mode 100644 index 5dee17ca..00000000 --- a/analysis/tests/algorithms_spark/Matchup_test.py +++ /dev/null @@ -1,321 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import pickle -import random -import timeit -import unittest - -from webservice.algorithms_spark.Matchup import * - - -class TestMatch_Points(unittest.TestCase): - def test_one_point_match_exact(self): - primary = DomsPoint(longitude=1.0, latitude=2.0, time=1000, depth=5.0, data_id=1) - matchup = DomsPoint(longitude=1.0, latitude=2.0, time=1000, depth=5.0, data_id=2) - - primary_points = [primary] - matchup_points = [matchup] - - matches = list(match_points_generator(primary_points, matchup_points, 0)) - - self.assertEquals(1, len(matches)) - - p_match_point, match = matches[0] - - self.assertEqual(primary, p_match_point) - self.assertEqual(matchup, match) - - def test_one_point_match_within_tolerance_150km(self): - primary = DomsPoint(longitude=1.0, latitude=2.0, time=1000, depth=5.0, data_id=1) - matchup = DomsPoint(longitude=1.0, latitude=3.0, time=1000, depth=5.0, data_id=2) - - primary_points = [primary] - matchup_points = [matchup] - - matches = list(match_points_generator(primary_points, matchup_points, 150000)) # tolerance 150 km - - self.assertEquals(1, len(matches)) - - p_match_point, match = matches[0] - - self.assertEqual(primary, p_match_point) - self.assertEqual(matchup, match) - - def test_one_point_match_within_tolerance_200m(self): - primary = DomsPoint(longitude=1.0, latitude=2.0, time=1000, depth=5.0, data_id=1) - matchup = DomsPoint(longitude=1.001, latitude=2.0, time=1000, depth=5.0, data_id=2) - - primary_points = [primary] - matchup_points = [matchup] - - matches = list(match_points_generator(primary_points, matchup_points, 200)) # tolerance 200 m - - self.assertEquals(1, len(matches)) - - p_match_point, match = matches[0] - - self.assertEqual(primary, p_match_point) - self.assertEqual(matchup, match) - - def test_one_point_not_match_tolerance_150km(self): - primary = DomsPoint(longitude=1.0, latitude=2.0, time=1000, depth=5.0, data_id=1) - matchup = DomsPoint(longitude=1.0, latitude=4.0, time=1000, depth=5.0, data_id=2) - - primary_points = [primary] - matchup_points = [matchup] - - matches = list(match_points_generator(primary_points, matchup_points, 150000)) # tolerance 150 km - - self.assertEquals(0, len(matches)) - - def test_one_point_not_match_tolerance_100m(self): - primary = DomsPoint(longitude=1.0, latitude=2.0, time=1000, depth=5.0, data_id=1) - matchup = DomsPoint(longitude=1.001, latitude=2.0, time=1000, depth=5.0, data_id=2) - - primary_points = [primary] - matchup_points = [matchup] - - matches = list(match_points_generator(primary_points, matchup_points, 100)) # tolerance 100 m - - self.assertEquals(0, len(matches)) - - def test_multiple_point_match(self): - primary = DomsPoint(longitude=1.0, latitude=2.0, time=1000, depth=5.0, data_id=1) - primary_points = [primary] - - matchup_points = [ - DomsPoint(longitude=1.0, latitude=3.0, time=1000, depth=10.0, data_id=2), - DomsPoint(longitude=2.0, latitude=2.0, time=1000, depth=0.0, data_id=3), - DomsPoint(longitude=0.5, latitude=1.5, time=1000, depth=3.0, data_id=4) - ] - - matches = list(match_points_generator(primary_points, matchup_points, 150000)) # tolerance 150 km - - self.assertEquals(3, len(matches)) - - self.assertSetEqual({primary}, {x[0] for x in matches}) - - list_of_matches = [x[1] for x in matches] - - self.assertEquals(3, len(list_of_matches)) - self.assertItemsEqual(matchup_points, list_of_matches) - - def test_multiple_point_match_multiple_times(self): - primary_points = [ - DomsPoint(longitude=1.0, latitude=2.0, time=1000, depth=5.0, data_id=1), - DomsPoint(longitude=1.5, latitude=1.5, time=1000, depth=5.0, data_id=2) - ] - - matchup_points = [ - DomsPoint(longitude=1.0, latitude=3.0, time=1000, depth=10.0, data_id=3), - DomsPoint(longitude=2.0, latitude=2.0, time=1000, depth=0.0, data_id=4), - DomsPoint(longitude=0.5, latitude=1.5, time=1000, depth=3.0, data_id=5) - ] - - matches = list(match_points_generator(primary_points, matchup_points, 150000)) # tolerance 150 km - - self.assertEquals(5, len(matches)) - - self.assertSetEqual({p for p in primary_points}, {x[0] for x in matches}) - - # First primary point matches all 3 secondary - self.assertEquals(3, [x[0] for x in matches].count(primary_points[0])) - self.assertItemsEqual(matchup_points, [x[1] for x in matches if x[0] == primary_points[0]]) - - # Second primary point matches only last 2 secondary - self.assertEquals(2, [x[0] for x in matches].count(primary_points[1])) - self.assertItemsEqual(matchup_points[1:], [x[1] for x in matches if x[0] == primary_points[1]]) - - def test_one_of_many_primary_matches_one_of_many_matchup(self): - primary_points = [ - DomsPoint(longitude=-33.76764, latitude=30.42946, time=1351553994, data_id=1), - DomsPoint(longitude=-33.75731, latitude=29.86216, time=1351554004, data_id=2) - ] - - matchup_points = [ - DomsPoint(longitude=-33.762, latitude=28.877, time=1351521432, depth=3.973, data_id=3), - DomsPoint(longitude=-34.916, latitude=28.879, time=1351521770, depth=2.9798, data_id=4), - DomsPoint(longitude=-31.121, latitude=31.256, time=1351519892, depth=4.07, data_id=5) - ] - - matches = list(match_points_generator(primary_points, matchup_points, 110000)) # tolerance 110 km - - self.assertEquals(1, len(matches)) - - self.assertSetEqual({p for p in primary_points if p.data_id == 2}, {x[0] for x in matches}) - - # First primary point matches none - self.assertEquals(0, [x[0] for x in matches].count(primary_points[0])) - - # Second primary point matches only first secondary - self.assertEquals(1, [x[0] for x in matches].count(primary_points[1])) - self.assertItemsEqual(matchup_points[0:1], [x[1] for x in matches if x[0] == primary_points[1]]) - - @unittest.skip("This test is just for timing, doesn't actually assert anything.") - def test_time_many_primary_many_matchup(self): - import logging - import sys - logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', - datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout) - log = logging.getLogger(__name__) - # Generate 160000 DomsPoints distributed equally in a box from -2.0 lat/lon to 2.0 lat/lon - log.info("Generating primary points") - x = np.arange(-2.0, 2.0, 0.01) - y = np.arange(-2.0, 2.0, 0.01) - primary_points = [DomsPoint(longitude=xy[0], latitude=xy[1], time=1000, depth=5.0, data_id=i) for i, xy in - enumerate(np.array(np.meshgrid(x, y)).T.reshape(-1, 2))] - - # Generate 2000 DomsPoints distributed randomly in a box from -2.0 lat/lon to 2.0 lat/lon - log.info("Generating matchup points") - matchup_points = [ - DomsPoint(longitude=random.uniform(-2.0, 2.0), latitude=random.uniform(-2.0, 2.0), time=1000, depth=5.0, - data_id=i) for i in xrange(0, 2000)] - - log.info("Starting matchup") - log.info("Best of repeat(3, 2) matchups: %s seconds" % min( - timeit.repeat(lambda: list(match_points_generator(primary_points, matchup_points, 1500)), repeat=3, - number=2))) - - -class TestDOMSPoint(unittest.TestCase): - def test_is_pickleable(self): - edge_point = json.loads("""{ -"id": "argo-profiles-5903995(46, 0)", -"time": "2012-10-15T14:24:04Z", -"point": "-33.467 29.728", -"sea_water_temperature": 24.5629997253, -"sea_water_temperature_depth": 2.9796258642, -"wind_speed": null, -"sea_water_salinity": null, -"sea_water_salinity_depth": null, -"platform": 4, -"device": 3, -"fileurl": "ftp://podaac-ftp.jpl.nasa.gov/allData/insitu/L2/spurs1/argo/argo-profiles-5903995.nc" -}""") - point = DomsPoint.from_edge_point(edge_point) - self.assertIsNotNone(pickle.dumps(point)) - - -def check_all(): - return check_solr() and check_cass() and check_edge() - - -def check_solr(): - # TODO eventually this might do something. - return False - - -def check_cass(): - # TODO eventually this might do something. - return False - - -def check_edge(): - # TODO eventually this might do something. - return False - - -@unittest.skipUnless(check_all(), - "These tests require local instances of Solr, Cassandra, and Edge to be running.") -class TestMatchup(unittest.TestCase): - def setUp(self): - from os import environ - environ['PYSPARK_DRIVER_PYTHON'] = '/Users/greguska/anaconda/envs/nexus-analysis/bin/python2.7' - environ['PYSPARK_PYTHON'] = '/Users/greguska/anaconda/envs/nexus-analysis/bin/python2.7' - environ['SPARK_HOME'] = '/Users/greguska/sandbox/spark-2.0.0-bin-hadoop2.7' - - def test_mur_match(self): - from shapely.wkt import loads - from nexustiles.nexustiles import NexusTileService - - polygon = loads("POLYGON((-34.98 29.54, -30.1 29.54, -30.1 31.00, -34.98 31.00, -34.98 29.54))") - primary_ds = "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1" - matchup_ds = "spurs" - parameter = "sst" - start_time = 1350259200 # 2012-10-15T00:00:00Z - end_time = 1350345600 # 2012-10-16T00:00:00Z - time_tolerance = 86400 - depth_tolerance = 5.0 - radius_tolerance = 1500.0 - platforms = "1,2,3,4,5,6,7,8,9" - - tile_service = NexusTileService() - tile_ids = [tile.tile_id for tile in - tile_service.find_tiles_in_polygon(polygon, primary_ds, start_time, end_time, fetch_data=False, - fl='id')] - result = spark_matchup_driver(tile_ids, wkt.dumps(polygon), primary_ds, matchup_ds, parameter, time_tolerance, - depth_tolerance, radius_tolerance, platforms) - for k, v in result.iteritems(): - print "primary: %s\n\tmatches:\n\t\t%s" % ( - "lon: %s, lat: %s, time: %s, sst: %s" % (k.longitude, k.latitude, k.time, k.sst), - '\n\t\t'.join( - ["lon: %s, lat: %s, time: %s, sst: %s" % (i.longitude, i.latitude, i.time, i.sst) for i in v])) - - def test_smap_match(self): - from shapely.wkt import loads - from nexustiles.nexustiles import NexusTileService - - polygon = loads("POLYGON((-34.98 29.54, -30.1 29.54, -30.1 31.00, -34.98 31.00, -34.98 29.54))") - primary_ds = "SMAP_L2B_SSS" - matchup_ds = "spurs" - parameter = "sss" - start_time = 1350259200 # 2012-10-15T00:00:00Z - end_time = 1350345600 # 2012-10-16T00:00:00Z - time_tolerance = 86400 - depth_tolerance = 5.0 - radius_tolerance = 1500.0 - platforms = "1,2,3,4,5,6,7,8,9" - - tile_service = NexusTileService() - tile_ids = [tile.tile_id for tile in - tile_service.find_tiles_in_polygon(polygon, primary_ds, start_time, end_time, fetch_data=False, - fl='id')] - result = spark_matchup_driver(tile_ids, wkt.dumps(polygon), primary_ds, matchup_ds, parameter, time_tolerance, - depth_tolerance, radius_tolerance, platforms) - for k, v in result.iteritems(): - print "primary: %s\n\tmatches:\n\t\t%s" % ( - "lon: %s, lat: %s, time: %s, sst: %s" % (k.longitude, k.latitude, k.time, k.sst), - '\n\t\t'.join( - ["lon: %s, lat: %s, time: %s, sst: %s" % (i.longitude, i.latitude, i.time, i.sst) for i in v])) - - def test_ascatb_match(self): - from shapely.wkt import loads - from nexustiles.nexustiles import NexusTileService - - polygon = loads("POLYGON((-34.98 29.54, -30.1 29.54, -30.1 31.00, -34.98 31.00, -34.98 29.54))") - primary_ds = "ASCATB-L2-Coastal" - matchup_ds = "spurs" - parameter = "wind" - start_time = 1351468800 # 2012-10-29T00:00:00Z - end_time = 1351555200 # 2012-10-30T00:00:00Z - time_tolerance = 86400 - depth_tolerance = 5.0 - radius_tolerance = 110000.0 # 110 km - platforms = "1,2,3,4,5,6,7,8,9" - - tile_service = NexusTileService() - tile_ids = [tile.tile_id for tile in - tile_service.find_tiles_in_polygon(polygon, primary_ds, start_time, end_time, fetch_data=False, - fl='id')] - result = spark_matchup_driver(tile_ids, wkt.dumps(polygon), primary_ds, matchup_ds, parameter, time_tolerance, - depth_tolerance, radius_tolerance, platforms) - for k, v in result.iteritems(): - print "primary: %s\n\tmatches:\n\t\t%s" % ( - "lon: %s, lat: %s, time: %s, wind u,v: %s,%s" % (k.longitude, k.latitude, k.time, k.wind_u, k.wind_v), - '\n\t\t'.join( - ["lon: %s, lat: %s, time: %s, wind u,v: %s,%s" % ( - i.longitude, i.latitude, i.time, i.wind_u, i.wind_v) for i in v])) diff --git a/analysis/tests/algorithms_spark/__init__.py b/analysis/tests/algorithms_spark/__init__.py deleted file mode 100644 index 07073680..00000000 --- a/analysis/tests/algorithms_spark/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py deleted file mode 100644 index d07f929e..00000000 --- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py +++ /dev/null @@ -1,635 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import StringIO -import os -import csv -import json -from datetime import datetime -import time -from decimal import Decimal - -import numpy as np -from pytz import timezone, UTC - -import config -import geo -from webservice.algorithms.NexusCalcHandler import NexusCalcHandler as BaseHandler -from webservice.webmodel import NexusResults - -EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) -ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' - -try: - from osgeo import gdal - from osgeo.gdalnumeric import * -except ImportError: - import gdal - from gdalnumeric import * - -from netCDF4 import Dataset -import netCDF4 -import tempfile - - -class BaseDomsQueryCalcHandler(BaseHandler): - def __init__(self): - BaseHandler.__init__(self) - - def getDataSourceByName(self, source): - for s in config.ENDPOINTS: - if s["name"] == source: - return s - return None - - def _does_datasource_exist(self, ds): - for endpoint in config.ENDPOINTS: - if endpoint["name"] == ds: - return True - return False - - -class DomsEncoder(json.JSONEncoder): - def __init__(self, **args): - json.JSONEncoder.__init__(self, **args) - - def default(self, obj): - # print 'MyEncoder.default() called' - # print type(obj) - if obj == np.nan: - return None # hard code string for now - elif isinstance(obj, datetime): - return long((obj - EPOCH).total_seconds()) - elif isinstance(obj, Decimal): - return str(obj) - else: - return json.JSONEncoder.default(self, obj) - - -class DomsQueryResults(NexusResults): - def __init__(self, results=None, args=None, bounds=None, count=None, details=None, computeOptions=None, - executionId=None, status_code=200): - NexusResults.__init__(self, results=results, meta=None, stats=None, computeOptions=computeOptions, - status_code=status_code) - self.__args = args - self.__bounds = bounds - self.__count = count - self.__details = details - self.__executionId = str(executionId) - - def toJson(self): - bounds = self.__bounds.toMap() if self.__bounds is not None else {} - return json.dumps( - {"executionId": self.__executionId, "data": self.results(), "params": self.__args, "bounds": bounds, - "count": self.__count, "details": self.__details}, indent=4, cls=DomsEncoder) - - def toCSV(self): - return DomsCSVFormatter.create(self.__executionId, self.results(), self.__args, self.__details) - - def toNetCDF(self): - return DomsNetCDFFormatter.create(self.__executionId, self.results(), self.__args, self.__details) - - -class DomsCSVFormatter: - @staticmethod - def create(executionId, results, params, details): - - csv_mem_file = StringIO.StringIO() - try: - DomsCSVFormatter.__addConstants(csv_mem_file) - DomsCSVFormatter.__addDynamicAttrs(csv_mem_file, executionId, results, params, details) - csv.writer(csv_mem_file).writerow([]) - - DomsCSVFormatter.__packValues(csv_mem_file, results, params) - - csv_out = csv_mem_file.getvalue() - finally: - csv_mem_file.close() - - return csv_out - - @staticmethod - def __packValues(csv_mem_file, results, params): - - writer = csv.writer(csv_mem_file) - - headers = [ - # Primary - "id", "source", "lon (degrees_east)", "lat (degrees_north)", "time", "platform", - "sea_surface_salinity (1e-3)", "sea_surface_temperature (degree_C)", "wind_speed (m s-1)", "wind_direction", - "wind_u (m s-1)", "wind_v (m s-1)", - # Match - "id", "source", "lon (degrees_east)", "lat (degrees_north)", "time", "platform", - "depth (m)", "sea_water_salinity (1e-3)", - "sea_water_temperature (degree_C)", "wind_speed (m s-1)", - "wind_direction", "wind_u (m s-1)", "wind_v (m s-1)" - ] - - writer.writerow(headers) - - # - # Only include the depth variable related to the match-up parameter. If the match-up parameter - # is not sss or sst then do not include any depth data, just fill values. - # - if params["parameter"] == "sss": - depth = "sea_water_salinity_depth" - elif params["parameter"] == "sst": - depth = "sea_water_temperature_depth" - else: - depth = "NO_DEPTH" - - for primaryValue in results: - for matchup in primaryValue["matches"]: - row = [ - # Primary - primaryValue["id"], primaryValue["source"], str(primaryValue["x"]), str(primaryValue["y"]), - primaryValue["time"].strftime(ISO_8601), primaryValue["platform"], - primaryValue.get("sea_water_salinity", ""), primaryValue.get("sea_water_temperature", ""), - primaryValue.get("wind_speed", ""), primaryValue.get("wind_direction", ""), - primaryValue.get("wind_u", ""), primaryValue.get("wind_v", ""), - - # Matchup - matchup["id"], matchup["source"], matchup["x"], matchup["y"], - matchup["time"].strftime(ISO_8601), matchup["platform"], - matchup.get(depth, ""), matchup.get("sea_water_salinity", ""), - matchup.get("sea_water_temperature", ""), - matchup.get("wind_speed", ""), matchup.get("wind_direction", ""), - matchup.get("wind_u", ""), matchup.get("wind_v", ""), - ] - writer.writerow(row) - - @staticmethod - def __addConstants(csvfile): - - global_attrs = [ - {"Global Attribute": "product_version", "Value": "1.0"}, - {"Global Attribute": "Conventions", "Value": "CF-1.6, ACDD-1.3"}, - {"Global Attribute": "title", "Value": "DOMS satellite-insitu machup output file"}, - {"Global Attribute": "history", - "Value": "Processing_Version = V1.0, Software_Name = DOMS, Software_Version = 1.03"}, - {"Global Attribute": "institution", "Value": "JPL, FSU, NCAR"}, - {"Global Attribute": "source", "Value": "doms.jpl.nasa.gov"}, - {"Global Attribute": "standard_name_vocabulary", - "Value": "CF Standard Name Table v27, BODC controlled vocabulary"}, - {"Global Attribute": "cdm_data_type", "Value": "Point/Profile, Swath/Grid"}, - {"Global Attribute": "processing_level", "Value": "4"}, - {"Global Attribute": "project", "Value": "Distributed Oceanographic Matchup System (DOMS)"}, - {"Global Attribute": "keywords_vocabulary", - "Value": "NASA Global Change Master Directory (GCMD) Science Keywords"}, - # TODO What should the keywords be? - {"Global Attribute": "keywords", "Value": "SATELLITES, OCEAN PLATFORMS, SHIPS, BUOYS, MOORINGS, AUVS, ROV, " - "NASA/JPL/PODAAC, FSU/COAPS, UCAR/NCAR, SALINITY, " - "SEA SURFACE TEMPERATURE, SURFACE WINDS"}, - {"Global Attribute": "creator_name", "Value": "NASA PO.DAAC"}, - {"Global Attribute": "creator_email", "Value": "podaac@podaac.jpl.nasa.gov"}, - {"Global Attribute": "creator_url", "Value": "https://podaac.jpl.nasa.gov/"}, - {"Global Attribute": "publisher_name", "Value": "NASA PO.DAAC"}, - {"Global Attribute": "publisher_email", "Value": "podaac@podaac.jpl.nasa.gov"}, - {"Global Attribute": "publisher_url", "Value": "https://podaac.jpl.nasa.gov"}, - {"Global Attribute": "acknowledgment", "Value": "DOMS is a NASA/AIST-funded project. NRA NNH14ZDA001N."}, - ] - - writer = csv.DictWriter(csvfile, sorted(next(iter(global_attrs)).keys())) - - writer.writerows(global_attrs) - - @staticmethod - def __addDynamicAttrs(csvfile, executionId, results, params, details): - - platforms = set() - for primaryValue in results: - platforms.add(primaryValue['platform']) - for match in primaryValue['matches']: - platforms.add(match['platform']) - - # insituDatasets = params["matchup"].split(",") - insituDatasets = params["matchup"] - insituLinks = set() - for insitu in insituDatasets: - insituLinks.add(config.METADATA_LINKS[insitu]) - - - global_attrs = [ - {"Global Attribute": "Platform", "Value": ', '.join(platforms)}, - {"Global Attribute": "time_coverage_start", - "Value": params["startTime"].strftime(ISO_8601)}, - {"Global Attribute": "time_coverage_end", - "Value": params["endTime"].strftime(ISO_8601)}, - {"Global Attribute": "time_coverage_resolution", "Value": "point"}, - - {"Global Attribute": "geospatial_lon_min", "Value": params["bbox"].split(',')[0]}, - {"Global Attribute": "geospatial_lat_min", "Value": params["bbox"].split(',')[1]}, - {"Global Attribute": "geospatial_lon_max", "Value": params["bbox"].split(',')[2]}, - {"Global Attribute": "geospatial_lat_max", "Value": params["bbox"].split(',')[3]}, - {"Global Attribute": "geospatial_lat_resolution", "Value": "point"}, - {"Global Attribute": "geospatial_lon_resolution", "Value": "point"}, - {"Global Attribute": "geospatial_lat_units", "Value": "degrees_north"}, - {"Global Attribute": "geospatial_lon_units", "Value": "degrees_east"}, - - {"Global Attribute": "geospatial_vertical_min", "Value": params["depthMin"]}, - {"Global Attribute": "geospatial_vertical_max", "Value": params["depthMax"]}, - {"Global Attribute": "geospatial_vertical_units", "Value": "m"}, - {"Global Attribute": "geospatial_vertical_resolution", "Value": "point"}, - {"Global Attribute": "geospatial_vertical_positive", "Value": "down"}, - - {"Global Attribute": "DOMS_matchID", "Value": executionId}, - {"Global Attribute": "DOMS_TimeWindow", "Value": params["timeTolerance"] / 60 / 60}, - {"Global Attribute": "DOMS_TimeWindow_Units", "Value": "hours"}, - - {"Global Attribute": "DOMS_platforms", "Value": params["platforms"]}, - {"Global Attribute": "DOMS_SearchRadius", "Value": params["radiusTolerance"]}, - {"Global Attribute": "DOMS_SearchRadius_Units", "Value": "m"}, - - {"Global Attribute": "DOMS_DatasetMetadata", "Value": ', '.join(insituLinks)}, - {"Global Attribute": "DOMS_primary", "Value": params["primary"]}, - {"Global Attribute": "DOMS_match_up", "Value": params["matchup"]}, - {"Global Attribute": "DOMS_ParameterPrimary", "Value": params.get("parameter", "")}, - - {"Global Attribute": "DOMS_time_to_complete", "Value": details["timeToComplete"]}, - {"Global Attribute": "DOMS_time_to_complete_units", "Value": "seconds"}, - {"Global Attribute": "DOMS_num_matchup_matched", "Value": details["numInSituMatched"]}, - {"Global Attribute": "DOMS_num_primary_matched", "Value": details["numGriddedMatched"]}, - - {"Global Attribute": "date_modified", "Value": datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)}, - {"Global Attribute": "date_created", "Value": datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)}, - - {"Global Attribute": "URI_Matchup", "Value": "http://{webservice}/domsresults?id=" + executionId + "&output=CSV"}, - ] - - writer = csv.DictWriter(csvfile, sorted(next(iter(global_attrs)).keys())) - - writer.writerows(global_attrs) - - -class DomsNetCDFFormatter: - @staticmethod - def create(executionId, results, params, details): - - t = tempfile.mkstemp(prefix="doms_", suffix=".nc") - tempFileName = t[1] - - dataset = Dataset(tempFileName, "w", format="NETCDF4") - dataset.DOMS_matchID = executionId - DomsNetCDFFormatter.__addNetCDFConstants(dataset) - - dataset.date_modified = datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601) - dataset.date_created = datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601) - dataset.time_coverage_start = params["startTime"].strftime(ISO_8601) - dataset.time_coverage_end = params["endTime"].strftime(ISO_8601) - dataset.time_coverage_resolution = "point" - dataset.DOMS_match_up = params["matchup"] - dataset.DOMS_num_matchup_matched = details["numInSituMatched"] - dataset.DOMS_num_primary_matched = details["numGriddedMatched"] - - bbox = geo.BoundingBox(asString=params["bbox"]) - dataset.geospatial_lat_max = bbox.north - dataset.geospatial_lat_min = bbox.south - dataset.geospatial_lon_max = bbox.east - dataset.geospatial_lon_min = bbox.west - dataset.geospatial_lat_resolution = "point" - dataset.geospatial_lon_resolution = "point" - dataset.geospatial_lat_units = "degrees_north" - dataset.geospatial_lon_units = "degrees_east" - dataset.geospatial_vertical_min = float(params["depthMin"]) - dataset.geospatial_vertical_max = float(params["depthMax"]) - dataset.geospatial_vertical_units = "m" - dataset.geospatial_vertical_resolution = "point" - dataset.geospatial_vertical_positive = "down" - - dataset.DOMS_TimeWindow = params["timeTolerance"] / 60 / 60 - dataset.DOMS_TimeWindow_Units = "hours" - dataset.DOMS_SearchRadius = float(params["radiusTolerance"]) - dataset.DOMS_SearchRadius_Units = "m" - # dataset.URI_Subset = "http://webservice subsetting query request" - dataset.URI_Matchup = "http://{webservice}/domsresults?id=" + executionId + "&output=NETCDF" - dataset.DOMS_ParameterPrimary = params["parameter"] if "parameter" in params else "" - dataset.DOMS_platforms = params["platforms"] - dataset.DOMS_primary = params["primary"] - dataset.DOMS_time_to_complete = details["timeToComplete"] - dataset.DOMS_time_to_complete_units = "seconds" - - insituDatasets = params["matchup"] - insituLinks = set() - for insitu in insituDatasets: - insituLinks.add(config.METADATA_LINKS[insitu]) - dataset.DOMS_DatasetMetadata = ', '.join(insituLinks) - - platforms = set() - for primaryValue in results: - platforms.add(primaryValue['platform']) - for match in primaryValue['matches']: - platforms.add(match['platform']) - dataset.platform = ', '.join(platforms) - - satellite_group_name = "SatelliteData" - insitu_group_name = "InsituData" - - #Create Satellite group, variables, and attributes - satelliteGroup = dataset.createGroup(satellite_group_name) - satelliteWriter = DomsNetCDFValueWriter(satelliteGroup, params["parameter"]) - - # Create InSitu group, variables, and attributes - insituGroup = dataset.createGroup(insitu_group_name) - insituWriter = DomsNetCDFValueWriter(insituGroup, params["parameter"]) - - # Add data to Insitu and Satellite groups, generate array of match ID pairs - matches = DomsNetCDFFormatter.__writeResults(results, satelliteWriter, insituWriter) - dataset.createDimension("MatchedRecords", size=None) - dataset.createDimension("MatchedGroups", size=2) - matchArray = dataset.createVariable("matchIDs", "f4", ("MatchedRecords", "MatchedGroups")) - matchArray[:] = matches - - dataset.close() - f = open(tempFileName, "rb") - data = f.read() - f.close() - os.unlink(tempFileName) - return data - - @staticmethod - def __addNetCDFConstants(dataset): - dataset.product_version = "1.0" - dataset.Conventions = "CF-1.6, ACDD-1.3" - dataset.title = "DOMS satellite-insitu machup output file" - dataset.history = "Processing_Version = V1.0, Software_Name = DOMS, Software_Version = 1.03" - dataset.institution = "JPL, FSU, NCAR" - dataset.source = "doms.jpl.nasa.gov" - dataset.standard_name_vocabulary = "CF Standard Name Table v27", "BODC controlled vocabulary" - dataset.cdm_data_type = "Point/Profile, Swath/Grid" - dataset.processing_level = "4" - dataset.project = "Distributed Oceanographic Matchup System (DOMS)" - dataset.keywords_vocabulary = "NASA Global Change Master Directory (GCMD) Science Keywords" - dataset.keywords = "SATELLITES, OCEAN PLATFORMS, SHIPS, BUOYS, MOORINGS, AUVS, ROV, NASA/JPL/PODAAC, " \ - "FSU/COAPS, UCAR/NCAR, SALINITY, SEA SURFACE TEMPERATURE, SURFACE WINDS" - dataset.creator_name = "NASA PO.DAAC" - dataset.creator_email = "podaac@podaac.jpl.nasa.gov" - dataset.creator_url = "https://podaac.jpl.nasa.gov/" - dataset.publisher_name = "NASA PO.DAAC" - dataset.publisher_email = "podaac@podaac.jpl.nasa.gov" - dataset.publisher_url = "https://podaac.jpl.nasa.gov" - dataset.acknowledgment = "DOMS is a NASA/AIST-funded project. NRA NNH14ZDA001N." - - @staticmethod - def __writeResults(results, satelliteWriter, insituWriter): - ids = {} - matches = [] - insituIndex = 0 - - # - # Loop through all of the results, add each satellite data point to the array - # - for r in range(0, len(results)): - result = results[r] - satelliteWriter.addData(result) - - # Add each match only if it is not already in the array of in situ points - for match in result["matches"]: - if match["id"] not in ids: - ids[match["id"]] = insituIndex - insituIndex += 1 - insituWriter.addData(match) - - # Append an index pait of (satellite, in situ) to the array of matches - matches.append((r, ids[match["id"]])) - - # Add data/write to the netCDF file - satelliteWriter.writeGroup() - insituWriter.writeGroup() - - return matches - - -class DomsNetCDFValueWriter: - def __init__(self, group, matchup_parameter): - group.createDimension("dim", size=None) - self.group = group - - self.lat = [] - self.lon = [] - self.time = [] - self.sea_water_salinity = [] - self.wind_speed = [] - self.wind_u = [] - self.wind_v = [] - self.wind_direction = [] - self.sea_water_temperature = [] - self.depth = [] - - self.satellite_group_name = "SatelliteData" - self.insitu_group_name = "InsituData" - - # - # Only include the depth variable related to the match-up parameter. If the match-up parameter is - # not sss or sst then do not include any depth data, just fill values. - # - if matchup_parameter == "sss": - self.matchup_depth = "sea_water_salinity_depth" - elif matchup_parameter == "sst": - self.matchup_depth = "sea_water_temperature_depth" - else: - self.matchup_depth = "NO_DEPTH" - - def addData(self, value): - self.lat.append(value.get("y", None)) - self.lon.append(value.get("x", None)) - self.time.append(time.mktime(value.get("time").timetuple())) - self.sea_water_salinity.append(value.get("sea_water_salinity", None)) - self.wind_speed.append(value.get("wind_speed", None)) - self.wind_u.append(value.get("wind_u", None)) - self.wind_v.append(value.get("wind_v", None)) - self.wind_direction.append(value.get("wind_direction", None)) - self.sea_water_temperature.append(value.get("sea_water_temperature", None)) - self.depth.append(value.get(self.matchup_depth, None)) - - def writeGroup(self): - # - # Create variables, enrich with attributes, and add data - # - lonVar = self.group.createVariable("lon", "f4", ("dim",), fill_value=-32767.0) - latVar = self.group.createVariable("lat", "f4", ("dim",), fill_value=-32767.0) - timeVar = self.group.createVariable("time", "f4", ("dim",), fill_value=-32767.0) - - self.__enrichLon(lonVar, min(self.lon), max(self.lon)) - self.__enrichLat(latVar, min(self.lat), max(self.lat)) - self.__enrichTime(timeVar) - - latVar[:] = self.lat - lonVar[:] = self.lon - timeVar[:] = self.time - - if self.sea_water_salinity.count(None) != len(self.sea_water_salinity): - if self.group.name == self.satellite_group_name: - sssVar = self.group.createVariable("SeaSurfaceSalinity", "f4", ("dim",), fill_value=-32767.0) - self.__enrichSSSMeasurements(sssVar, min(self.sea_water_salinity), max(self.sea_water_salinity)) - else: # group.name == self.insitu_group_name - sssVar = self.group.createVariable("SeaWaterSalinity", "f4", ("dim",), fill_value=-32767.0) - self.__enrichSWSMeasurements(sssVar, min(self.sea_water_salinity), max(self.sea_water_salinity)) - sssVar[:] = self.sea_water_salinity - - if self.wind_speed.count(None) != len(self.wind_speed): - windSpeedVar = self.group.createVariable("WindSpeed", "f4", ("dim",), fill_value=-32767.0) - self.__enrichWindSpeed(windSpeedVar, self.__calcMin(self.wind_speed), max(self.wind_speed)) - windSpeedVar[:] = self.wind_speed - - if self.wind_u.count(None) != len(self.wind_u): - windUVar = self.group.createVariable("WindU", "f4", ("dim",), fill_value=-32767.0) - windUVar[:] = self.wind_u - self.__enrichWindU(windUVar, self.__calcMin(self.wind_u), max(self.wind_u)) - - if self.wind_v.count(None) != len(self.wind_v): - windVVar = self.group.createVariable("WindV", "f4", ("dim",), fill_value=-32767.0) - windVVar[:] = self.wind_v - self.__enrichWindV(windVVar, self.__calcMin(self.wind_v), max(self.wind_v)) - - if self.wind_direction.count(None) != len(self.wind_direction): - windDirVar = self.group.createVariable("WindDirection", "f4", ("dim",), fill_value=-32767.0) - windDirVar[:] = self.wind_direction - self.__enrichWindDir(windDirVar) - - if self.sea_water_temperature.count(None) != len(self.sea_water_temperature): - if self.group.name == self.satellite_group_name: - tempVar = self.group.createVariable("SeaSurfaceTemp", "f4", ("dim",), fill_value=-32767.0) - self.__enrichSurfaceTemp(tempVar, self.__calcMin(self.sea_water_temperature), max(self.sea_water_temperature)) - else: - tempVar = self.group.createVariable("SeaWaterTemp", "f4", ("dim",), fill_value=-32767.0) - self.__enrichWaterTemp(tempVar, self.__calcMin(self.sea_water_temperature), max(self.sea_water_temperature)) - tempVar[:] = self.sea_water_temperature - - if self.group.name == self.insitu_group_name: - depthVar = self.group.createVariable("Depth", "f4", ("dim",), fill_value=-32767.0) - - if self.depth.count(None) != len(self.depth): - self.__enrichDepth(depthVar, self.__calcMin(self.depth), max(self.depth)) - depthVar[:] = self.depth - else: - # If depth has no data, set all values to 0 - tempDepth = [0 for x in range(len(self.depth))] - depthVar[:] = tempDepth - - # - # Lists may include 'None" values, to calc min these must be filtered out - # - @staticmethod - def __calcMin(var): - return min(x for x in var if x is not None) - - - # - # Add attributes to each variable - # - @staticmethod - def __enrichLon(var, var_min, var_max): - var.long_name = "Longitude" - var.standard_name = "longitude" - var.axis = "X" - var.units = "degrees_east" - var.valid_min = var_min - var.valid_max = var_max - - @staticmethod - def __enrichLat(var, var_min, var_max): - var.long_name = "Latitude" - var.standard_name = "latitude" - var.axis = "Y" - var.units = "degrees_north" - var.valid_min = var_min - var.valid_max = var_max - - @staticmethod - def __enrichTime(var): - var.long_name = "Time" - var.standard_name = "time" - var.axis = "T" - var.units = "seconds since 1970-01-01 00:00:00 0:00" - - @staticmethod - def __enrichSSSMeasurements(var, var_min, var_max): - var.long_name = "Sea surface salinity" - var.standard_name = "sea_surface_salinity" - var.units = "1e-3" - var.valid_min = var_min - var.valid_max = var_max - var.coordinates = "lon lat time" - - @staticmethod - def __enrichSWSMeasurements(var, var_min, var_max): - var.long_name = "Sea water salinity" - var.standard_name = "sea_water_salinity" - var.units = "1e-3" - var.valid_min = var_min - var.valid_max = var_max - var.coordinates = "lon lat depth time" - - @staticmethod - def __enrichDepth(var, var_min, var_max): - var.valid_min = var_min - var.valid_max = var_max - var.units = "m" - var.long_name = "Depth" - var.standard_name = "depth" - var.axis = "Z" - var.positive = "Down" - - @staticmethod - def __enrichWindSpeed(var, var_min, var_max): - var.long_name = "Wind speed" - var.standard_name = "wind_speed" - var.units = "m s-1" - var.valid_min = var_min - var.valid_max = var_max - var.coordinates = "lon lat depth time" - - @staticmethod - def __enrichWindU(var, var_min, var_max): - var.long_name = "Eastward wind" - var.standard_name = "eastward_wind" - var.units = "m s-1" - var.valid_min = var_min - var.valid_max = var_max - var.coordinates = "lon lat depth time" - - @staticmethod - def __enrichWindV(var, var_min, var_max): - var.long_name = "Northward wind" - var.standard_name = "northward_wind" - var.units = "m s-1" - var.valid_min = var_min - var.valid_max = var_max - var.coordinates = "lon lat depth time" - - @staticmethod - def __enrichWaterTemp(var, var_min, var_max): - var.long_name = "Sea water temperature" - var.standard_name = "sea_water_temperature" - var.units = "degree_C" - var.valid_min = var_min - var.valid_max = var_max - var.coordinates = "lon lat depth time" - - @staticmethod - def __enrichSurfaceTemp(var, var_min, var_max): - var.long_name = "Sea surface temperature" - var.standard_name = "sea_surface_temperature" - var.units = "degree_C" - var.valid_min = var_min - var.valid_max = var_max - var.coordinates = "lon lat time" - - @staticmethod - def __enrichWindDir(var): - var.long_name = "Wind from direction" - var.standard_name = "wind_from_direction" - var.units = "degree" - var.coordinates = "lon lat depth time" diff --git a/analysis/webservice/algorithms/doms/DatasetListQuery.py b/analysis/webservice/algorithms/doms/DatasetListQuery.py deleted file mode 100644 index ac7f2634..00000000 --- a/analysis/webservice/algorithms/doms/DatasetListQuery.py +++ /dev/null @@ -1,116 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import traceback - -import requests - -import BaseDomsHandler -import config -import values -from webservice.algorithms.NexusCalcHandler import NexusCalcHandler as BaseHandler -from webservice.NexusHandler import nexus_handler -from webservice.webmodel import cached - - -@nexus_handler -class DomsDatasetListQueryHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): - name = "DOMS Dataset Listing" - path = "/domslist" - description = "" - params = {} - singleton = True - - def __init__(self): - BaseHandler.__init__(self) - - def getFacetsForInsituSource(self, source): - url = source["url"] - - params = { - "facet": "true", - "stats": "true", - "startIndex": 0, - "itemsPerPage": 0 - } - try: - r = requests.get(url, params=params) - results = json.loads(r.text) - - depths = None - if "stats_fields" in results and "depth" in results["stats_fields"]: - depths = results["stats_fields"]["depth"] - - for facet in results["facets"]: - field = facet["field"] - for value in facet["values"]: - value["value"] = values.getDescByListNameAndId(field, int(value["value"])) - - return depths, results["facets"] - except: # KMG: Don't eat the exception. Add better handling... - traceback.print_exc() - return None, None - - def getMetadataUrlForDataset(self, dataset): - datasetSpec = config.getEndpointByName(dataset) - if datasetSpec is not None: - return datasetSpec["metadataUrl"] - else: - - # KMG: NOT a good hack - if dataset == "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1" or dataset == "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1_CLIM": - dataset = "MUR-JPL-L4-GLOB-v4.1" - elif dataset == "SMAP_L2B_SSS": - dataset = "JPL_SMAP-SSS_L2_EVAL-V2" - elif dataset == "AVHRR_OI_L4_GHRSST_NCEI" or dataset == "AVHRR_OI_L4_GHRSST_NCEI_CLIM": - dataset = "AVHRR_OI-NCEI-L4-GLOB-v2.0" - - return "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=%s&format=umm-json" % dataset - - def getMetadataForSource(self, dataset): - try: - r = requests.get(self.getMetadataUrlForDataset(dataset)) - results = json.loads(r.text) - return results - except: - return None - - @cached(ttl=(60 * 60 * 1000)) # 1 hour cached - def calc(self, computeOptions, **args): - - satellitesList = self._get_tile_service().get_dataseries_list(simple=True) - - insituList = [] - - for satellite in satellitesList: - satellite["metadata"] = self.getMetadataForSource(satellite["shortName"]) - - for insitu in config.ENDPOINTS: - depths, facets = self.getFacetsForInsituSource(insitu) - insituList.append({ - "name": insitu["name"], - "endpoint": insitu["url"], - "metadata": self.getMetadataForSource(insitu["name"]), - "depths": depths, - "facets": facets - }) - - values = { - "satellite": satellitesList, - "insitu": insituList - } - - return BaseDomsHandler.DomsQueryResults(results=values) diff --git a/analysis/webservice/algorithms/doms/MatchupQuery.py b/analysis/webservice/algorithms/doms/MatchupQuery.py deleted file mode 100644 index 57a08340..00000000 --- a/analysis/webservice/algorithms/doms/MatchupQuery.py +++ /dev/null @@ -1,452 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import uuid -from datetime import datetime - -import numpy as np -import utm -from nexustiles.model.nexusmodel import get_approximate_value_for_lat_lon -from scipy import spatial - -import BaseDomsHandler -import ResultsStorage -import datafetch -import fetchedgeimpl -import geo -import workerthread -from webservice.NexusHandler import nexus_handler - - -@nexus_handler -class CombinedDomsMatchupQueryHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): - name = "Experimental Combined DOMS In-Situ Matchup" - path = "/domsmatchup" - description = "" - params = {} - singleton = True - - def __init__(self): - BaseDomsHandler.BaseDomsQueryCalcHandler.__init__(self) - - def fetchData(self, endpoints, startTime, endTime, bbox, depth_min, depth_max, platforms): - - boundsConstrainer = geo.BoundsConstrainer(asString=bbox) - threads = [] - for endpoint in endpoints: - thread = workerthread.WorkerThread(datafetch.fetchData, - params=(endpoint, startTime, endTime, bbox, depth_min, depth_max)) - threads.append(thread) - workerthread.wait(threads, startFirst=True, poll=0.01) - - data2 = [] - for thread in threads: - data, bounds = thread.results - data2 += data - boundsConstrainer.testOtherConstrainer(bounds) - - return data2, boundsConstrainer - - def __parseDatetime(self, dtString): - dt = datetime.strptime(dtString, "%Y-%m-%dT%H:%M:%SZ") - epoch = datetime.utcfromtimestamp(0) - time = (dt - epoch).total_seconds() * 1000.0 - return time - - def calc(self, computeOptions, **args): - primary = computeOptions.get_argument("primary", None) - matchup = computeOptions.get_argument("matchup", None) - startTime = computeOptions.get_argument("s", None) - endTime = computeOptions.get_argument("e", None) - bbox = computeOptions.get_argument("b", None) - timeTolerance = computeOptions.get_float_arg("tt") - depth_min = computeOptions.get_float_arg("depthMin", default=None) - depth_max = computeOptions.get_float_arg("depthMax", default=None) - radiusTolerance = computeOptions.get_float_arg("rt") - platforms = computeOptions.get_argument("platforms", None) - - if primary is None or len(primary) == 0: - raise Exception("No primary dataset specified") - - if matchup is None or len(matchup) == 0: - raise Exception("No matchup datasets specified") - - start = self._now() - - primarySpec = self.getDataSourceByName(primary) - if primarySpec is None: - raise Exception("Specified primary dataset not found using identifier '%s'" % primary) - - primaryData, bounds = self.fetchData([primarySpec], startTime, endTime, bbox, depth_min, depth_max, platforms) - - primaryContext = MatchupContext(primaryData) - - matchupIds = matchup.split(",") - - for matchupId in matchupIds: - matchupSpec = self.getDataSourceByName(matchupId) - - if matchupSpec is not None: # Then it's in the in-situ configuration - proc = InsituDatasetProcessor(primaryContext, matchupSpec, startTime, endTime, bbox, depth_min, - depth_max, - platforms, timeTolerance, radiusTolerance) - proc.start() - else: # We assume it to be a Nexus tiled dataset - - ''' - Single Threaded at the moment... - ''' - daysinrange = self._get_tile_service().find_days_in_range_asc(bounds.south, bounds.north, bounds.west, - bounds.east, matchupId, - self.__parseDatetime(startTime) / 1000, - self.__parseDatetime(endTime) / 1000) - - tilesByDay = {} - for dayTimestamp in daysinrange: - ds1_nexus_tiles = self._get_tile_service().get_tiles_bounded_by_box_at_time(bounds.south, bounds.north, - bounds.west, bounds.east, - matchupId, dayTimestamp) - - # print "***", type(ds1_nexus_tiles) - # print ds1_nexus_tiles[0].__dict__ - tilesByDay[dayTimestamp] = ds1_nexus_tiles - - primaryContext.processGridded(tilesByDay, matchupId, radiusTolerance, timeTolerance) - - matches, numMatches = primaryContext.getFinal(len(matchupIds)) - - end = self._now() - - args = { - "primary": primary, - "matchup": matchupIds, - "startTime": startTime, - "endTime": endTime, - "bbox": bbox, - "timeTolerance": timeTolerance, - "depthMin": depth_min, - "depthMax": depth_max, - "radiusTolerance": radiusTolerance, - "platforms": platforms - } - - details = { - "timeToComplete": (end - start), - "numInSituRecords": primaryContext.insituCount, - "numInSituMatched": primaryContext.insituMatches, - "numGriddedChecked": primaryContext.griddedCount, - "numGriddedMatched": primaryContext.griddedMatched - } - - with ResultsStorage.ResultsStorage() as resultsStorage: - execution_id = resultsStorage.insertResults(results=matches, params=args, stats=details, startTime=start, - completeTime=end, userEmail="") - - return BaseDomsHandler.DomsQueryResults(results=matches, args=args, details=details, bounds=None, count=None, - computeOptions=None, executionId=execution_id) - - -class MatchupContextMap: - def __init__(self): - pass - - def add(self, context): - pass - - def delete(self, context): - pass - - -class MatchupContext: - def __init__(self, primaryData): - self.id = str(uuid.uuid4()) - - self.griddedCount = 0 - self.griddedMatched = 0 - - self.insituCount = len(primaryData) - self.insituMatches = 0 - - self.primary = primaryData - for r in self.primary: - r["matches"] = [] - - self.data = [] - for s in primaryData: - u = utm.from_latlon(s["y"], s["x"]) - v = (u[0], u[1], 0.0) - self.data.append(v) - - if len(self.data) > 0: - self.tree = spatial.KDTree(self.data) - else: - self.tree = None - - def getFinal(self, minMatchesToInclude): - - matched = [] - ttlMatches = 0 - for m in self.primary: - if len(m["matches"]) >= minMatchesToInclude: - matched.append(m) - ttlMatches += len(m["matches"]) - - return matched, ttlMatches - - def processGridded(self, tilesByDay, source, xyTolerance, timeTolerance): - for r in self.primary: - foundSatNodes = self.__getSatNodeForLatLonAndTime(tilesByDay, source, r["y"], r["x"], r["time"], - xyTolerance) - self.griddedCount += 1 - self.griddedMatched += len(foundSatNodes) - r["matches"].extend(foundSatNodes) - - def processInSitu(self, records, xyTolerance, timeTolerance): - if self.tree is not None: - for s in records: - self.insituCount += 1 - u = utm.from_latlon(s["y"], s["x"]) - coords = np.array([u[0], u[1], 0]) - ball = self.tree.query_ball_point(coords, xyTolerance) - - self.insituMatches += len(ball) - - for i in ball: - match = self.primary[i] - if abs(match["time"] - s["time"]) <= (timeTolerance * 1000.0): - match["matches"].append(s) - - def __getValueForLatLon(self, chunks, lat, lon, arrayName="data"): - value = get_approximate_value_for_lat_lon(chunks, lat, lon, arrayName) - return value - - def __checkNumber(self, value): - if isinstance(value, float) and (math.isnan(value) or value == np.nan): - value = None - elif value is not None: - value = float(value) - return value - - def __buildSwathIndexes(self, chunk): - latlons = [] - utms = [] - indexes = [] - for i in range(0, len(chunk.latitudes)): - _lat = chunk.latitudes[i] - if isinstance(_lat, np.ma.core.MaskedConstant): - continue - for j in range(0, len(chunk.longitudes)): - _lon = chunk.longitudes[j] - if isinstance(_lon, np.ma.core.MaskedConstant): - continue - - value = self.__getChunkValueAtIndex(chunk, (i, j)) - if isinstance(value, float) and (math.isnan(value) or value == np.nan): - continue - - u = utm.from_latlon(_lat, _lon) - v = (u[0], u[1], 0.0) - latlons.append((_lat, _lon)) - utms.append(v) - indexes.append((i, j)) - - tree = None - if len(latlons) > 0: - tree = spatial.KDTree(utms) - - chunk.swathIndexing = { - "tree": tree, - "latlons": latlons, - "indexes": indexes - } - - def __getChunkIndexesForLatLon(self, chunk, lat, lon, xyTolerance): - foundIndexes = [] - foundLatLons = [] - - if "swathIndexing" not in chunk.__dict__: - self.__buildSwathIndexes(chunk) - - tree = chunk.swathIndexing["tree"] - if tree is not None: - indexes = chunk.swathIndexing["indexes"] - latlons = chunk.swathIndexing["latlons"] - u = utm.from_latlon(lat, lon) - coords = np.array([u[0], u[1], 0]) - ball = tree.query_ball_point(coords, xyTolerance) - for i in ball: - foundIndexes.append(indexes[i]) - foundLatLons.append(latlons[i]) - return foundIndexes, foundLatLons - - def __getChunkValueAtIndex(self, chunk, index, arrayName=None): - - if arrayName is None or arrayName == "data": - data_val = chunk.data[0][index[0]][index[1]] - else: - data_val = chunk.meta_data[arrayName][0][index[0]][index[1]] - return data_val.item() if (data_val is not np.ma.masked) and data_val.size == 1 else float('Nan') - - def __getSatNodeForLatLonAndTime(self, chunksByDay, source, lat, lon, searchTime, xyTolerance): - timeDiff = 86400 * 365 * 1000 - foundNodes = [] - - for ts in chunksByDay: - chunks = chunksByDay[ts] - if abs((ts * 1000) - searchTime) < timeDiff: - for chunk in chunks: - indexes, latlons = self.__getChunkIndexesForLatLon(chunk, lat, lon, xyTolerance) - - # for index in indexes: - for i in range(0, len(indexes)): - index = indexes[i] - latlon = latlons[i] - sst = None - sss = None - windSpeed = None - windDirection = None - windU = None - windV = None - - value = self.__getChunkValueAtIndex(chunk, index) - - if isinstance(value, float) and (math.isnan(value) or value == np.nan): - continue - - if "GHRSST" in source: - sst = value - elif "ASCATB" in source: - windU = value - elif "SSS" in source: # SMAP - sss = value - - if len(chunks) > 0 and "wind_dir" in chunks[0].meta_data: - windDirection = self.__checkNumber(self.__getChunkValueAtIndex(chunk, index, "wind_dir")) - if len(chunks) > 0 and "wind_v" in chunks[0].meta_data: - windV = self.__checkNumber(self.__getChunkValueAtIndex(chunk, index, "wind_v")) - if len(chunks) > 0 and "wind_speed" in chunks[0].meta_data: - windSpeed = self.__checkNumber(self.__getChunkValueAtIndex(chunk, index, "wind_speed")) - - foundNode = { - "sea_water_temperature": sst, - "sea_water_salinity": sss, - "wind_speed": windSpeed, - "wind_direction": windDirection, - "wind_u": windU, - "wind_v": windV, - "time": ts, - "x": self.__checkNumber(latlon[1]), - "y": self.__checkNumber(latlon[0]), - "depth": 0, - "sea_water_temperature_depth": 0, - "source": source, - "id": "%s:%s:%s" % (ts, lat, lon) - } - - foundNodes.append(foundNode) - timeDiff = abs(ts - searchTime) - - return foundNodes - - def __getSatNodeForLatLonAndTime__(self, chunksByDay, source, lat, lon, searchTime): - - timeDiff = 86400 * 365 * 1000 - foundNodes = [] - - for ts in chunksByDay: - chunks = chunksByDay[ts] - # print chunks - # ts = calendar.timegm(chunks.start.utctimetuple()) * 1000 - if abs((ts * 1000) - searchTime) < timeDiff: - value = self.__getValueForLatLon(chunks, lat, lon, arrayName="data") - value = self.__checkNumber(value) - - # _Really_ don't like doing it this way... - - sst = None - sss = None - windSpeed = None - windDirection = None - windU = None - windV = None - - if "GHRSST" in source: - sst = value - - if "ASCATB" in source: - windU = value - - if len(chunks) > 0 and "wind_dir" in chunks[0].meta_data: - windDirection = self.__checkNumber(self.__getValueForLatLon(chunks, lat, lon, arrayName="wind_dir")) - if len(chunks) > 0 and "wind_v" in chunks[0].meta_data: - windV = self.__checkNumber(self.__getValueForLatLon(chunks, lat, lon, arrayName="wind_v")) - if len(chunks) > 0 and "wind_speed" in chunks[0].meta_data: - windSpeed = self.__checkNumber(self.__getValueForLatLon(chunks, lat, lon, arrayName="wind_speed")) - - foundNode = { - "sea_water_temperature": sst, - "sea_water_salinity": sss, - "wind_speed": windSpeed, - "wind_direction": windDirection, - "wind_uv": { - "u": windU, - "v": windV - }, - "time": ts, - "x": lon, - "y": lat, - "depth": 0, - "sea_water_temperature_depth": 0, - "source": source, - "id": "%s:%s:%s" % (ts, lat, lon) - } - - isValidNode = True - if "ASCATB" in source and windSpeed is None: - isValidNode = None - - if isValidNode: - foundNodes.append(foundNode) - timeDiff = abs(ts - searchTime) - - return foundNodes - - -class InsituDatasetProcessor: - def __init__(self, primary, datasource, startTime, endTime, bbox, depth_min, depth_max, platforms, timeTolerance, - radiusTolerance): - self.primary = primary - self.datasource = datasource - self.startTime = startTime - self.endTime = endTime - self.bbox = bbox - self.depth_min = depth_min - self.depth_max = depth_max - self.platforms = platforms - self.timeTolerance = timeTolerance - self.radiusTolerance = radiusTolerance - - def start(self): - def callback(pageData): - self.primary.processInSitu(pageData, self.radiusTolerance, self.timeTolerance) - - fetchedgeimpl.fetch(self.datasource, self.startTime, self.endTime, self.bbox, self.depth_min, self.depth_max, - self.platforms, pageCallback=callback) - - -class InsituPageProcessor: - def __init__(self): - pass diff --git a/analysis/webservice/algorithms/doms/MetadataQuery.py b/analysis/webservice/algorithms/doms/MetadataQuery.py deleted file mode 100644 index aa24d910..00000000 --- a/analysis/webservice/algorithms/doms/MetadataQuery.py +++ /dev/null @@ -1,65 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json - -import requests - -import BaseDomsHandler -import config -from webservice.algorithms.NexusCalcHandler import NexusCalcHandler as BaseHandler -from webservice.NexusHandler import nexus_handler -from webservice.webmodel import DatasetNotFoundException - - -@nexus_handler -class DomsMetadataQueryHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): - name = "DOMS Metadata Listing" - path = "/domsmetadata" - description = "" - params = {} - singleton = True - - def __init__(self): - BaseHandler.__init__(self) - - def calc(self, computeOptions, **args): - - dataset = computeOptions.get_argument("dataset", None) - if dataset is None or len(dataset) == 0: - raise Exception("'dataset' parameter not specified") - - metadataUrl = self.__getUrlForDataset(dataset) - - try: - r = requests.get(metadataUrl) - results = json.loads(r.text) - return BaseDomsHandler.DomsQueryResults(results=results) - except: - raise DatasetNotFoundException("Dataset '%s' not found") - - def __getUrlForDataset(self, dataset): - datasetSpec = config.getEndpointByName(dataset) - if datasetSpec is not None: - return datasetSpec["metadataUrl"] - else: - - # KMG: NOT a good hack - if dataset == "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1" or dataset == "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1_CLIM": - dataset = "MUR-JPL-L4-GLOB-v4.1" - elif dataset == "SMAP_L2B_SSS": - dataset = "JPL_SMAP-SSS_L2_EVAL-V2" - - return "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=%s&format=umm-json" % dataset diff --git a/analysis/webservice/algorithms/doms/ResultsPlotQuery.py b/analysis/webservice/algorithms/doms/ResultsPlotQuery.py deleted file mode 100644 index 1b48d14f..00000000 --- a/analysis/webservice/algorithms/doms/ResultsPlotQuery.py +++ /dev/null @@ -1,55 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import BaseDomsHandler -import histogramplot -import mapplot -import scatterplot -from webservice.NexusHandler import nexus_handler - - -class PlotTypes: - SCATTER = "scatter" - MAP = "map" - HISTOGRAM = "histogram" - - -@nexus_handler -class DomsResultsPlotHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): - name = "DOMS Results Plotting" - path = "/domsplot" - description = "" - params = {} - singleton = True - - def __init__(self): - BaseDomsHandler.BaseDomsQueryCalcHandler.__init__(self) - - def calc(self, computeOptions, **args): - id = computeOptions.get_argument("id", None) - parameter = computeOptions.get_argument('parameter', 'sst') - - plotType = computeOptions.get_argument("type", PlotTypes.SCATTER) - - normAndCurve = computeOptions.get_boolean_arg("normandcurve", False) - - if plotType == PlotTypes.SCATTER: - return scatterplot.createScatterPlot(id, parameter) - elif plotType == PlotTypes.MAP: - return mapplot.createMapPlot(id, parameter) - elif plotType == PlotTypes.HISTOGRAM: - return histogramplot.createHistogramPlot(id, parameter, normAndCurve) - else: - raise Exception("Unsupported plot type '%s' specified." % plotType) diff --git a/analysis/webservice/algorithms/doms/ResultsRetrieval.py b/analysis/webservice/algorithms/doms/ResultsRetrieval.py deleted file mode 100644 index 93358e91..00000000 --- a/analysis/webservice/algorithms/doms/ResultsRetrieval.py +++ /dev/null @@ -1,49 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import uuid - -import BaseDomsHandler -import ResultsStorage -from webservice.NexusHandler import nexus_handler -from webservice.webmodel import NexusProcessingException - - -@nexus_handler -class DomsResultsRetrievalHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): - name = "DOMS Resultset Retrieval" - path = "/domsresults" - description = "" - params = {} - singleton = True - - def __init__(self): - BaseDomsHandler.BaseDomsQueryCalcHandler.__init__(self) - - def calc(self, computeOptions, **args): - execution_id = computeOptions.get_argument("id", None) - - try: - execution_id = uuid.UUID(execution_id) - except: - raise NexusProcessingException(reason="'id' argument must be a valid uuid", code=400) - - simple_results = computeOptions.get_boolean_arg("simpleResults", default=False) - - with ResultsStorage.ResultsRetrieval() as storage: - params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results) - - return BaseDomsHandler.DomsQueryResults(results=data, args=params, details=stats, bounds=None, count=None, - computeOptions=None, executionId=execution_id) diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py deleted file mode 100644 index 03bbd099..00000000 --- a/analysis/webservice/algorithms/doms/ResultsStorage.py +++ /dev/null @@ -1,286 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - - -import ConfigParser -import logging -import uuid -from datetime import datetime - -import pkg_resources -from cassandra.cluster import Cluster -from cassandra.policies import TokenAwarePolicy, DCAwareRoundRobinPolicy -from cassandra.query import BatchStatement -from pytz import UTC - - -class AbstractResultsContainer: - def __init__(self): - self._log = logging.getLogger(__name__) - self._log.info("Creating DOMS Results Storage Instance") - - self._session = None - - def __enter__(self): - domsconfig = ConfigParser.RawConfigParser() - domsconfig.readfp(pkg_resources.resource_stream(__name__, "domsconfig.ini"), filename='domsconfig.ini') - - cassHost = domsconfig.get("cassandra", "host") - cassKeyspace = domsconfig.get("cassandra", "keyspace") - cassDatacenter = domsconfig.get("cassandra", "local_datacenter") - cassVersion = int(domsconfig.get("cassandra", "protocol_version")) - - dc_policy = DCAwareRoundRobinPolicy(cassDatacenter) - token_policy = TokenAwarePolicy(dc_policy) - - self._cluster = Cluster([host for host in cassHost.split(',')], load_balancing_policy=token_policy, - protocol_version=cassVersion) - - self._session = self._cluster.connect(cassKeyspace) - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self._cluster.shutdown() - - def _parseDatetime(self, dtString): - dt = datetime.strptime(dtString, "%Y-%m-%dT%H:%M:%SZ") - epoch = datetime.utcfromtimestamp(0) - time = (dt - epoch).total_seconds() * 1000.0 - return int(time) - - -class ResultsStorage(AbstractResultsContainer): - def __init__(self): - AbstractResultsContainer.__init__(self) - - def insertResults(self, results, params, stats, startTime, completeTime, userEmail, execution_id=None): - if isinstance(execution_id, basestring): - execution_id = uuid.UUID(execution_id) - - execution_id = self.insertExecution(execution_id, startTime, completeTime, userEmail) - self.__insertParams(execution_id, params) - self.__insertStats(execution_id, stats) - self.__insertResults(execution_id, results) - return execution_id - - def insertExecution(self, execution_id, startTime, completeTime, userEmail): - if execution_id is None: - execution_id = uuid.uuid4() - - cql = "INSERT INTO doms_executions (id, time_started, time_completed, user_email) VALUES (%s, %s, %s, %s)" - self._session.execute(cql, (execution_id, startTime, completeTime, userEmail)) - return execution_id - - def __insertParams(self, execution_id, params): - cql = """INSERT INTO doms_params - (execution_id, primary_dataset, matchup_datasets, depth_min, depth_max, time_tolerance, radius_tolerance, start_time, end_time, platforms, bounding_box, parameter) - VALUES - (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) - """ - self._session.execute(cql, (execution_id, - params["primary"], - ",".join(params["matchup"]) if type(params["matchup"]) == list else params[ - "matchup"], - params["depthMin"] if "depthMin" in params.keys() else None, - params["depthMax"] if "depthMax" in params.keys() else None, - int(params["timeTolerance"]), - params["radiusTolerance"], - params["startTime"], - params["endTime"], - params["platforms"], - params["bbox"], - params["parameter"] - )) - - def __insertStats(self, execution_id, stats): - cql = """ - INSERT INTO doms_execution_stats - (execution_id, num_gridded_matched, num_gridded_checked, num_insitu_matched, num_insitu_checked, time_to_complete) - VALUES - (%s, %s, %s, %s, %s, %s) - """ - self._session.execute(cql, ( - execution_id, - stats["numGriddedMatched"], - stats["numGriddedChecked"], - stats["numInSituMatched"], - stats["numInSituRecords"], - stats["timeToComplete"] - )) - - def __insertResults(self, execution_id, results): - - cql = """ - INSERT INTO doms_data - (id, execution_id, value_id, primary_value_id, x, y, source_dataset, measurement_time, platform, device, measurement_values, is_primary) - VALUES - (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """ - insertStatement = self._session.prepare(cql) - batch = BatchStatement() - - for result in results: - self.__insertResult(execution_id, None, result, batch, insertStatement) - - self._session.execute(batch) - - def __insertResult(self, execution_id, primaryId, result, batch, insertStatement): - - dataMap = self.__buildDataMap(result) - result_id = uuid.uuid4() - batch.add(insertStatement, ( - result_id, - execution_id, - result["id"], - primaryId, - result["x"], - result["y"], - result["source"], - result["time"], - result["platform"] if "platform" in result else None, - result["device"] if "device" in result else None, - dataMap, - 1 if primaryId is None else 0 - ) - ) - - n = 0 - if "matches" in result: - for match in result["matches"]: - self.__insertResult(execution_id, result["id"], match, batch, insertStatement) - n += 1 - if n >= 20: - if primaryId is None: - self.__commitBatch(batch) - n = 0 - - if primaryId is None: - self.__commitBatch(batch) - - def __commitBatch(self, batch): - self._session.execute(batch) - batch.clear() - - def __buildDataMap(self, result): - dataMap = {} - for name in result: - value = result[name] - if name not in ["id", "x", "y", "source", "time", "platform", "device", "point", "matches"] and type( - value) in [float, int]: - dataMap[name] = value - return dataMap - - -class ResultsRetrieval(AbstractResultsContainer): - def __init__(self): - AbstractResultsContainer.__init__(self) - - def retrieveResults(self, execution_id, trim_data=False): - if isinstance(execution_id, basestring): - execution_id = uuid.UUID(execution_id) - - params = self.__retrieveParams(execution_id) - stats = self.__retrieveStats(execution_id) - data = self.__retrieveData(execution_id, trim_data=trim_data) - return params, stats, data - - def __retrieveData(self, id, trim_data=False): - dataMap = self.__retrievePrimaryData(id, trim_data=trim_data) - self.__enrichPrimaryDataWithMatches(id, dataMap, trim_data=trim_data) - data = [dataMap[name] for name in dataMap] - return data - - def __enrichPrimaryDataWithMatches(self, id, dataMap, trim_data=False): - cql = "SELECT * FROM doms_data where execution_id = %s and is_primary = false" - rows = self._session.execute(cql, (id,)) - - for row in rows: - entry = self.__rowToDataEntry(row, trim_data=trim_data) - if row.primary_value_id in dataMap: - if not "matches" in dataMap[row.primary_value_id]: - dataMap[row.primary_value_id]["matches"] = [] - dataMap[row.primary_value_id]["matches"].append(entry) - else: - print row - - def __retrievePrimaryData(self, id, trim_data=False): - cql = "SELECT * FROM doms_data where execution_id = %s and is_primary = true" - rows = self._session.execute(cql, (id,)) - - dataMap = {} - for row in rows: - entry = self.__rowToDataEntry(row, trim_data=trim_data) - dataMap[row.value_id] = entry - return dataMap - - def __rowToDataEntry(self, row, trim_data=False): - if trim_data: - entry = { - "x": float(row.x), - "y": float(row.y), - "source": row.source_dataset, - "time": row.measurement_time.replace(tzinfo=UTC) - } - else: - entry = { - "id": row.value_id, - "x": float(row.x), - "y": float(row.y), - "source": row.source_dataset, - "device": row.device, - "platform": row.platform, - "time": row.measurement_time.replace(tzinfo=UTC) - } - for key in row.measurement_values: - value = float(row.measurement_values[key]) - entry[key] = value - return entry - - def __retrieveStats(self, id): - cql = "SELECT * FROM doms_execution_stats where execution_id = %s limit 1" - rows = self._session.execute(cql, (id,)) - for row in rows: - stats = { - "numGriddedMatched": row.num_gridded_matched, - "numGriddedChecked": row.num_gridded_checked, - "numInSituMatched": row.num_insitu_matched, - "numInSituChecked": row.num_insitu_checked, - "timeToComplete": row.time_to_complete - } - return stats - - raise Exception("Execution not found with id '%s'" % id) - - def __retrieveParams(self, id): - cql = "SELECT * FROM doms_params where execution_id = %s limit 1" - rows = self._session.execute(cql, (id,)) - for row in rows: - params = { - "primary": row.primary_dataset, - "matchup": row.matchup_datasets.split(","), - "depthMin": row.depth_min, - "depthMax": row.depth_max, - "timeTolerance": row.time_tolerance, - "radiusTolerance": row.radius_tolerance, - "startTime": row.start_time.replace(tzinfo=UTC), - "endTime": row.end_time.replace(tzinfo=UTC), - "platforms": row.platforms, - "bbox": row.bounding_box, - "parameter": row.parameter - } - return params - - raise Exception("Execution not found with id '%s'" % id) diff --git a/analysis/webservice/algorithms/doms/StatsQuery.py b/analysis/webservice/algorithms/doms/StatsQuery.py deleted file mode 100644 index f5ac7651..00000000 --- a/analysis/webservice/algorithms/doms/StatsQuery.py +++ /dev/null @@ -1,63 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import BaseDomsHandler -import datafetch -from webservice.algorithms.NexusCalcHandler import NexusCalcHandler as BaseHandler -from webservice.NexusHandler import nexus_handler - - -@nexus_handler -class DomsStatsQueryHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): - name = "DOMS In-Situ Stats Lookup" - path = "/domsstats" - description = "" - params = {} - singleton = True - - def __init__(self): - BaseHandler.__init__(self) - - def calc(self, computeOptions, **args): - source = computeOptions.get_argument("source", None) - startTime = computeOptions.get_argument("s", None) - endTime = computeOptions.get_argument("e", None) - bbox = computeOptions.get_argument("b", None) - timeTolerance = computeOptions.get_float_arg("tt") - depth_min = computeOptions.get_float_arg("depthMin", default=None) - depth_max = computeOptions.get_float_arg("depthMax", default=None) - radiusTolerance = computeOptions.get_float_arg("rt") - platforms = computeOptions.get_argument("platforms", None) - - source1 = self.getDataSourceByName(source) - if source1 is None: - raise Exception("Source '%s' not found" % source) - - count, bounds = datafetch.getCount(source1, startTime, endTime, bbox, depth_min, depth_max, platforms) - - args = { - "source": source, - "startTime": startTime, - "endTime": endTime, - "bbox": bbox, - "timeTolerance": timeTolerance, - "depthMin": depth_min, - "depthMax": depth_max, - "radiusTolerance": radiusTolerance, - "platforms": platforms - } - - return BaseDomsHandler.DomsQueryResults(results={}, args=args, details={}, bounds=bounds, count=count, - computeOptions=None) diff --git a/analysis/webservice/algorithms/doms/ValuesQuery.py b/analysis/webservice/algorithms/doms/ValuesQuery.py deleted file mode 100644 index d766c7bb..00000000 --- a/analysis/webservice/algorithms/doms/ValuesQuery.py +++ /dev/null @@ -1,72 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from datetime import datetime - -from pytz import timezone - -import BaseDomsHandler -import datafetch -from webservice.algorithms.NexusCalcHandler import NexusCalcHandler as BaseHandler -from webservice.NexusHandler import nexus_handler - -EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) - - -@nexus_handler -class DomsValuesQueryHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): - name = "DOMS In-Situ Value Lookup" - path = "/domsvalues" - description = "" - params = {} - singleton = True - - def __init__(self): - BaseHandler.__init__(self) - - def calc(self, computeOptions, **args): - source = computeOptions.get_argument("source", None) - startTime = computeOptions.get_start_datetime() - endTime = computeOptions.get_end_datetime() - bbox = computeOptions.get_argument("b", None) - timeTolerance = computeOptions.get_float_arg("tt") - depth_min = computeOptions.get_float_arg("depthMin", default=None) - depth_max = computeOptions.get_float_arg("depthMax", default=None) - radiusTolerance = computeOptions.get_float_arg("rt") - platforms = computeOptions.get_argument("platforms", "") - - source1 = self.getDataSourceByName(source) - if source1 is None: - raise Exception("Source '%s' not found" % source) - - values, bounds = datafetch.getValues(source1, startTime.strftime('%Y-%m-%dT%H:%M:%SZ'), - endTime.strftime('%Y-%m-%dT%H:%M:%SZ'), bbox, depth_min, depth_max, - platforms, placeholders=True) - count = len(values) - - args = { - "source": source, - "startTime": startTime, - "endTime": endTime, - "bbox": bbox, - "timeTolerance": timeTolerance, - "depthMin": depth_min, - "depthMax": depth_max, - "radiusTolerance": radiusTolerance, - "platforms": platforms - } - - return BaseDomsHandler.DomsQueryResults(results=values, args=args, bounds=bounds, details={}, count=count, - computeOptions=None) diff --git a/analysis/webservice/algorithms/doms/__init__.py b/analysis/webservice/algorithms/doms/__init__.py deleted file mode 100644 index d5a8e247..00000000 --- a/analysis/webservice/algorithms/doms/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import BaseDomsHandler -import DatasetListQuery -import DomsInitialization -import MatchupQuery -import MetadataQuery -import ResultsPlotQuery -import ResultsRetrieval -import ResultsStorage -import StatsQuery -import ValuesQuery -import config -import datafetch -import fetchedgeimpl -import geo -import insitusubset -import subsetter -import values -import workerthread diff --git a/analysis/webservice/algorithms/doms/config.py b/analysis/webservice/algorithms/doms/config.py deleted file mode 100644 index ff492e86..00000000 --- a/analysis/webservice/algorithms/doms/config.py +++ /dev/null @@ -1,109 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -ENDPOINTS = [ - { - "name": "samos", - "url": "http://doms.coaps.fsu.edu:8890/ws/search/samos", - "fetchParallel": True, - "fetchThreads": 8, - "itemsPerPage": 1000, - "metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=SAMOS&format=umm-json" - }, - { - "name": "spurs", - "url": "https://doms.jpl.nasa.gov/ws/search/spurs", - "fetchParallel": True, - "fetchThreads": 8, - "itemsPerPage": 25000, - "metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=SPURS-1&format=umm-json" - }, - { - "name": "icoads", - "url": "http://rda-data.ucar.edu:8890/ws/search/icoads", - "fetchParallel": True, - "fetchThreads": 8, - "itemsPerPage": 1000, - "metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=ICOADS&format=umm-json" - }, - { - "name": "spurs2", - "url": "https://doms.jpl.nasa.gov/ws/search/spurs2", - "fetchParallel": True, - "fetchThreads": 8, - "itemsPerPage": 25000, - "metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=SPURS-2&format=umm-json" - } -] - -METADATA_LINKS = { - "samos": "http://samos.coaps.fsu.edu/html/nav.php?s=2", - "icoads": "https://rda.ucar.edu/datasets/ds548.1/", - "spurs": "https://podaac.jpl.nasa.gov/spurs" -} - -import os - -try: - env = os.environ['ENV'] - if env == 'dev': - ENDPOINTS = [ - { - "name": "samos", - "url": "http://doms.coaps.fsu.edu:8890/ws/search/samos", - "fetchParallel": True, - "fetchThreads": 8, - "itemsPerPage": 1000, - "metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=SAMOS&format=umm-json" - }, - { - "name": "spurs", - "url": "http://127.0.0.1:8890/ws/search/spurs", - "fetchParallel": True, - "fetchThreads": 8, - "itemsPerPage": 25000, - "metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=SPURS-1&format=umm-json" - }, - { - "name": "icoads", - "url": "http://rda-data.ucar.edu:8890/ws/search/icoads", - "fetchParallel": True, - "fetchThreads": 8, - "itemsPerPage": 1000, - "metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=ICOADS&format=umm-json" - }, - { - "name": "spurs2", - "url": "https://doms.jpl.nasa.gov/ws/search/spurs2", - "fetchParallel": True, - "fetchThreads": 8, - "itemsPerPage": 25000, - "metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=SPURS-2&format=umm-json" - } - ] - METADATA_LINKS = { - "samos": "http://samos.coaps.fsu.edu/html/nav.php?s=2", - "icoads": "https://rda.ucar.edu/datasets/ds548.1/", - "spurs": "https://podaac.jpl.nasa.gov/spurs" - } -except KeyError: - pass - - -def getEndpointByName(name): - for endpoint in ENDPOINTS: - if endpoint["name"].upper() == name.upper(): - return endpoint - return None diff --git a/analysis/webservice/algorithms/doms/datafetch.py b/analysis/webservice/algorithms/doms/datafetch.py deleted file mode 100644 index 3fc3917e..00000000 --- a/analysis/webservice/algorithms/doms/datafetch.py +++ /dev/null @@ -1,47 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import fetchedgeimpl - - -def getCount(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms=None): - return fetchedgeimpl.getCount(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms) - - -def __fetchSingleDataSource(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms=None): - return fetchedgeimpl.fetch(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms) - - -def __fetchMultipleDataSource(endpoints, startTime, endTime, bbox, depth_min, depth_max, platforms=None): - data = [] - for endpoint in endpoints: - dataSingleSource = __fetchSingleDataSource(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms) - data = data + dataSingleSource - return data - - -def fetchData(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms=None): - if type(endpoint) == list: - return __fetchMultipleDataSource(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms) - else: - return __fetchSingleDataSource(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms) - - -def getValues(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms=None, placeholders=False): - return fetchedgeimpl.getValues(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms, placeholders) - - -if __name__ == "__main__": - pass diff --git a/analysis/webservice/algorithms/doms/fetchedgeimpl.py b/analysis/webservice/algorithms/doms/fetchedgeimpl.py deleted file mode 100644 index 70cf14e3..00000000 --- a/analysis/webservice/algorithms/doms/fetchedgeimpl.py +++ /dev/null @@ -1,217 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import traceback -from datetime import datetime -from multiprocessing.pool import ThreadPool - -import requests - -import geo -import values -from webservice.webmodel import NexusProcessingException - - -def __parseDatetime(dtString): - dt = datetime.strptime(dtString, "%Y-%m-%dT%H:%M:%SZ") - epoch = datetime.utcfromtimestamp(0) - time = (dt - epoch).total_seconds() * 1000.0 - return time - - -def __parseLocation(locString): - if "Point" in locString: - locString = locString[6:-1] - - if "," in locString: - latitude = float(locString.split(",")[0]) - longitude = float(locString.split(",")[1]) - else: - latitude = float(locString.split(" ")[1]) - longitude = float(locString.split(" ")[0]) - - return (latitude, longitude) - - -def __resultRawToUsable(resultdict): - resultdict["time"] = __parseDatetime(resultdict["time"]) - latitude, longitude = __parseLocation(resultdict["point"]) - - resultdict["x"] = longitude - resultdict["y"] = latitude - - if "id" not in resultdict and "metadata" in resultdict: - resultdict["id"] = resultdict["metadata"] - - resultdict["id"] = "id-%s" % resultdict["id"] - - if "device" in resultdict: - resultdict["device"] = values.getDeviceById(resultdict["device"]) - - if "platform" in resultdict: - resultdict["platform"] = values.getPlatformById(resultdict["platform"]) - - if "mission" in resultdict: - resultdict["mission"] = values.getMissionById(resultdict["mission"]) - - if "sea_surface_temperature" in resultdict: - resultdict["sea_water_temperature"] = resultdict["sea_surface_temperature"] - del resultdict["sea_surface_temperature"] - - return resultdict - - -def __fetchJson(url, params, trycount=1, maxtries=5): - if trycount > maxtries: - raise Exception("Maximum retries attempted.") - if trycount > 1: - print "Retry #", trycount - r = requests.get(url, params=params, timeout=500.000) - - print r.url - - if r.status_code != 200: - return __fetchJson(url, params, trycount + 1, maxtries) - try: - results = json.loads(r.text) - return results - except: - return __fetchJson(url, params, trycount + 1, maxtries) - - -def __doQuery(endpoint, startTime, endTime, bbox, depth_min=None, depth_max=None, itemsPerPage=10, startIndex=0, - platforms=None, - pageCallback=None): - params = {"startTime": startTime, "endTime": endTime, "bbox": bbox, "itemsPerPage": itemsPerPage, - "startIndex": startIndex, "stats": "true"} - - if depth_min is not None: - params['minDepth'] = depth_min - if depth_max is not None: - params['maxDepth'] = depth_max - - if platforms is not None: - params["platform"] = platforms.split(",") - - resultsRaw = __fetchJson(endpoint["url"], params) - boundsConstrainer = geo.BoundsConstrainer(north=-90, south=90, west=180, east=-180) - - if resultsRaw["totalResults"] == 0 or len(resultsRaw["results"]) == 0: # Double-sanity check - return [], resultsRaw["totalResults"], startIndex, itemsPerPage, boundsConstrainer - - try: - results = [] - for resultdict in resultsRaw["results"]: - result = __resultRawToUsable(resultdict) - result["source"] = endpoint["name"] - boundsConstrainer.testCoords(north=result["y"], south=result["y"], west=result["x"], east=result["x"]) - results.append(result) - - if "stats_fields" in resultsRaw and len(resultsRaw["results"]) == 0: - stats = resultsRaw["stats_fields"] - if "lat" in stats and "lon" in stats: - boundsConstrainer.testCoords(north=stats['lat']['max'], south=stats['lat']['min'], - west=stats['lon']['min'], east=stats['lon']['max']) - - if pageCallback is not None: - pageCallback(results) - - ''' - If pageCallback was supplied, we assume this call to be asynchronous. Otherwise combine all the results data and return it. - ''' - if pageCallback is None: - return results, int(resultsRaw["totalResults"]), int(resultsRaw["startIndex"]), int( - resultsRaw["itemsPerPage"]), boundsConstrainer - else: - return [], int(resultsRaw["totalResults"]), int(resultsRaw["startIndex"]), int( - resultsRaw["itemsPerPage"]), boundsConstrainer - except: - print "Invalid or missing JSON in response." - traceback.print_exc() - raise NexusProcessingException(reason="Invalid or missing JSON in response.") - # return [], 0, startIndex, itemsPerPage, boundsConstrainer - - -def getCount(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms=None): - startIndex = 0 - pageResults, totalResults, pageStartIndex, itemsPerPageR, boundsConstrainer = __doQuery(endpoint, startTime, - endTime, bbox, - depth_min, depth_max, 0, - startIndex, platforms) - return totalResults, boundsConstrainer - - -def fetch(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms=None, pageCallback=None): - results = [] - startIndex = 0 - - mainBoundsConstrainer = geo.BoundsConstrainer(north=-90, south=90, west=180, east=-180) - - # First isn't parellel so we can get the ttl results, forced items per page, etc... - pageResults, totalResults, pageStartIndex, itemsPerPageR, boundsConstrainer = __doQuery(endpoint, startTime, - endTime, bbox, - depth_min, depth_max, - endpoint["itemsPerPage"], - startIndex, platforms, - pageCallback) - results = results + pageResults - mainBoundsConstrainer.testOtherConstrainer(boundsConstrainer) - - pool = ThreadPool(processes=endpoint["fetchThreads"]) - mpResults = [pool.apply_async(__doQuery, args=( - endpoint, startTime, endTime, bbox, depth_min, depth_max, itemsPerPageR, x, platforms, pageCallback)) for x in - range(len(pageResults), totalResults, itemsPerPageR)] - pool.close() - pool.join() - - ''' - If pageCallback was supplied, we assume this call to be asynchronous. Otherwise combine all the results data and return it. - ''' - if pageCallback is None: - mpResults = [p.get() for p in mpResults] - for mpResult in mpResults: - results = results + mpResult[0] - mainBoundsConstrainer.testOtherConstrainer(mpResult[4]) - - return results, mainBoundsConstrainer - - -def getValues(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms=None, placeholders=False): - results, boundsConstrainer = fetch(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms) - - if placeholders: - trimmedResults = [] - for item in results: - depth = None - if "depth" in item: - depth = item["depth"] - if "sea_water_temperature_depth" in item: - depth = item["sea_water_temperature_depth"] - - trimmedItem = { - "x": item["x"], - "y": item["y"], - "source": item["source"], - "time": item["time"], - "device": item["device"] if "device" in item else None, - "platform": item["platform"], - "depth": depth - } - trimmedResults.append(trimmedItem) - - results = trimmedResults - - return results, boundsConstrainer diff --git a/analysis/webservice/algorithms/doms/geo.py b/analysis/webservice/algorithms/doms/geo.py deleted file mode 100644 index 3323f571..00000000 --- a/analysis/webservice/algorithms/doms/geo.py +++ /dev/null @@ -1,129 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math - -MEAN_RADIUS_EARTH_METERS = 6371010.0 -EQUATORIAL_RADIUS_EARTH_METERS = 6378140.0 -POLAR_RADIUS_EARTH_METERS = 6356752.0 -FLATTENING_EARTH = 298.257223563 -MEAN_RADIUS_EARTH_MILES = 3958.8 - - -class DistanceUnit(object): - METERS = 0 - MILES = 1 - - -# Haversine implementation for great-circle distances between two points -def haversine(x0, y0, x1, y1, units=DistanceUnit.METERS): - if units == DistanceUnit.METERS: - R = MEAN_RADIUS_EARTH_METERS - elif units == DistanceUnit.MILES: - R = MEAN_RADIUS_EARTH_MILES - else: - raise Exception("Invalid units specified") - x0r = x0 * (math.pi / 180.0) # To radians - x1r = x1 * (math.pi / 180.0) # To radians - xd = (x1 - x0) * (math.pi / 180.0) - yd = (y1 - y0) * (math.pi / 180.0) - - a = math.sin(xd / 2.0) * math.sin(xd / 2.0) + \ - math.cos(x0r) * math.cos(x1r) * \ - math.sin(yd / 2.0) * math.sin(yd / 2.0) - c = 2.0 * math.atan2(math.sqrt(a), math.sqrt(1.0 - a)) - d = R * c - return d - - -# Equirectangular approximation for when performance is key. Better at smaller distances -def equirectangularApprox(x0, y0, x1, y1): - R = 6371000.0 # Meters - x0r = x0 * (math.pi / 180.0) # To radians - x1r = x1 * (math.pi / 180.0) - y0r = y0 * (math.pi / 180.0) - y1r = y1 * (math.pi / 180.0) - - x = (y1r - y0r) * math.cos((x0r + x1r) / 2.0) - y = x1r - x0r - d = math.sqrt(x * x + y * y) * R - return d - - -class BoundingBox(object): - - def __init__(self, north=None, south=None, west=None, east=None, asString=None): - if asString is not None: - bboxParts = asString.split(",") - self.west = float(bboxParts[0]) - self.south = float(bboxParts[1]) - self.east = float(bboxParts[2]) - self.north = float(bboxParts[3]) - else: - self.north = north - self.south = south - self.west = west - self.east = east - - def toString(self): - return "%s,%s,%s,%s" % (self.west, self.south, self.east, self.north) - - def toMap(self): - return { - "xmin": self.west, - "xmax": self.east, - "ymin": self.south, - "ymax": self.north - } - - -''' - Constrains, does not expand. -''' - - -class BoundsConstrainer(BoundingBox): - - def __init__(self, north=None, south=None, west=None, east=None, asString=None): - BoundingBox.__init__(self, north, south, west, east, asString) - - def testNorth(self, v): - if v is None: - return - self.north = max([self.north, v]) - - def testSouth(self, v): - if v is None: - return - self.south = min([self.south, v]) - - def testEast(self, v): - if v is None: - return - self.east = max([self.east, v]) - - def testWest(self, v): - if v is None: - return - self.west = min([self.west, v]) - - def testCoords(self, north=None, south=None, west=None, east=None): - self.testNorth(north) - self.testSouth(south) - self.testWest(west) - self.testEast(east) - - def testOtherConstrainer(self, other): - self.testCoords(north=other.north, south=other.south, west=other.west, east=other.east) diff --git a/analysis/webservice/algorithms/doms/histogramplot.py b/analysis/webservice/algorithms/doms/histogramplot.py deleted file mode 100644 index 1e06b66b..00000000 --- a/analysis/webservice/algorithms/doms/histogramplot.py +++ /dev/null @@ -1,127 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import string -from cStringIO import StringIO -from multiprocessing import Process, Manager - -import matplotlib -import matplotlib.mlab as mlab -import matplotlib.pyplot as plt -import numpy as np - -import BaseDomsHandler -import ResultsStorage - -if not matplotlib.get_backend(): - matplotlib.use('Agg') - -PARAMETER_TO_FIELD = { - "sst": "sea_water_temperature", - "sss": "sea_water_salinity" -} - -PARAMETER_TO_UNITS = { - "sst": "($^\circ$C)", - "sss": "(g/L)" -} - - -class DomsHistogramPlotQueryResults(BaseDomsHandler.DomsQueryResults): - - def __init__(self, x, parameter, primary, secondary, args=None, bounds=None, count=None, details=None, - computeOptions=None, executionId=None, plot=None): - BaseDomsHandler.DomsQueryResults.__init__(self, results=x, args=args, details=details, bounds=bounds, - count=count, computeOptions=computeOptions, executionId=executionId) - self.__primary = primary - self.__secondary = secondary - self.__x = x - self.__parameter = parameter - self.__plot = plot - - def toImage(self): - return self.__plot - - -def render(d, x, primary, secondary, parameter, norm_and_curve=False): - fig, ax = plt.subplots() - fig.suptitle(string.upper("%s vs. %s" % (primary, secondary)), fontsize=14, fontweight='bold') - - n, bins, patches = plt.hist(x, 50, normed=norm_and_curve, facecolor='green', alpha=0.75) - - if norm_and_curve: - mean = np.mean(x) - variance = np.var(x) - sigma = np.sqrt(variance) - y = mlab.normpdf(bins, mean, sigma) - l = plt.plot(bins, y, 'r--', linewidth=1) - - ax.set_title('n = %d' % len(x)) - - units = PARAMETER_TO_UNITS[parameter] if parameter in PARAMETER_TO_UNITS else PARAMETER_TO_UNITS["sst"] - ax.set_xlabel("%s - %s %s" % (primary, secondary, units)) - - if norm_and_curve: - ax.set_ylabel("Probability per unit difference") - else: - ax.set_ylabel("Frequency") - - plt.grid(True) - - sio = StringIO() - plt.savefig(sio, format='png') - d['plot'] = sio.getvalue() - - -def renderAsync(x, primary, secondary, parameter, norm_and_curve): - manager = Manager() - d = manager.dict() - p = Process(target=render, args=(d, x, primary, secondary, parameter, norm_and_curve)) - p.start() - p.join() - return d['plot'] - - -def createHistogramPlot(id, parameter, norm_and_curve=False): - with ResultsStorage.ResultsRetrieval() as storage: - params, stats, data = storage.retrieveResults(id) - - primary = params["primary"] - secondary = params["matchup"][0] - - x = createHistTable(data, secondary, parameter) - - plot = renderAsync(x, primary, secondary, parameter, norm_and_curve) - - r = DomsHistogramPlotQueryResults(x=x, parameter=parameter, primary=primary, secondary=secondary, - args=params, details=stats, - bounds=None, count=None, computeOptions=None, executionId=id, plot=plot) - return r - - -def createHistTable(results, secondary, parameter): - x = [] - - field = PARAMETER_TO_FIELD[parameter] if parameter in PARAMETER_TO_FIELD else PARAMETER_TO_FIELD["sst"] - - for entry in results: - for match in entry["matches"]: - if match["source"] == secondary: - if field in entry and field in match: - a = entry[field] - b = match[field] - x.append((a - b)) - - return x diff --git a/analysis/webservice/algorithms/doms/insitusubset.py b/analysis/webservice/algorithms/doms/insitusubset.py deleted file mode 100644 index 7f60e997..00000000 --- a/analysis/webservice/algorithms/doms/insitusubset.py +++ /dev/null @@ -1,263 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import StringIO -import csv -import json -import logging -from datetime import datetime - -import requests - -import BaseDomsHandler -from webservice.NexusHandler import nexus_handler -from webservice.algorithms.doms import config as edge_endpoints -from webservice.webmodel import NexusProcessingException, NoDataException - -ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' - - -@nexus_handler -class DomsResultsRetrievalHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): - name = "DOMS In Situ Subsetter" - path = "/domsinsitusubset" - description = "Subset a DOMS in situ source given the search domain." - - params = [ - { - "name": "source", - "type": "comma-delimited string", - "description": "The in situ Dataset to be sub-setted", - "required": "true", - "sample": "spurs" - }, - { - "name": "parameter", - "type": "string", - "description": "The parameter of interest. One of 'sst', 'sss', 'wind'", - "required": "false", - "default": "All", - "sample": "sss" - }, - { - "name": "startTime", - "type": "string", - "description": "Starting time in format YYYY-MM-DDTHH:mm:ssZ or seconds since EPOCH", - "required": "true", - "sample": "2013-10-21T00:00:00Z" - }, - { - "name": "endTime", - "type": "string", - "description": "Ending time in format YYYY-MM-DDTHH:mm:ssZ or seconds since EPOCH", - "required": "true", - "sample": "2013-10-31T23:59:59Z" - }, - { - "name": "b", - "type": "comma-delimited float", - "description": "Minimum (Western) Longitude, Minimum (Southern) Latitude, " - "Maximum (Eastern) Longitude, Maximum (Northern) Latitude", - "required": "true", - "sample": "-30,15,-45,30" - }, - { - "name": "depthMin", - "type": "float", - "description": "Minimum depth of measurements. Must be less than depthMax", - "required": "false", - "default": "No limit", - "sample": "0" - }, - { - "name": "depthMax", - "type": "float", - "description": "Maximum depth of measurements. Must be greater than depthMin", - "required": "false", - "default": "No limit", - "sample": "5" - }, - { - "name": "platforms", - "type": "comma-delimited integer", - "description": "Platforms to include for subset consideration", - "required": "false", - "default": "All", - "sample": "1,2,3,4,5,6,7,8,9" - }, - { - "name": "output", - "type": "string", - "description": "Output type. Only 'CSV' or 'JSON' is currently supported", - "required": "false", - "default": "JSON", - "sample": "CSV" - } - ] - singleton = True - - def __init__(self): - BaseDomsHandler.BaseDomsQueryCalcHandler.__init__(self) - self.log = logging.getLogger(__name__) - - def parse_arguments(self, request): - # Parse input arguments - self.log.debug("Parsing arguments") - - source_name = request.get_argument('source', None) - if source_name is None or source_name.strip() == '': - raise NexusProcessingException(reason="'source' argument is required", code=400) - - parameter_s = request.get_argument('parameter', None) - if parameter_s not in ['sst', 'sss', 'wind', None]: - raise NexusProcessingException( - reason="Parameter %s not supported. Must be one of 'sst', 'sss', 'wind'." % parameter_s, code=400) - - try: - start_time = request.get_start_datetime() - start_time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") - except: - raise NexusProcessingException( - reason="'startTime' argument is required. Can be int value seconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ", - code=400) - try: - end_time = request.get_end_datetime() - end_time = end_time.strftime("%Y-%m-%dT%H:%M:%SZ") - except: - raise NexusProcessingException( - reason="'endTime' argument is required. Can be int value seconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ", - code=400) - - if start_time > end_time: - raise NexusProcessingException( - reason="The starting time must be before the ending time. Received startTime: %s, endTime: %s" % ( - request.get_start_datetime().strftime(ISO_8601), request.get_end_datetime().strftime(ISO_8601)), - code=400) - - try: - bounding_polygon = request.get_bounding_polygon() - except: - raise NexusProcessingException( - reason="'b' argument is required. Must be comma-delimited float formatted as Minimum (Western) Longitude, Minimum (Southern) Latitude, Maximum (Eastern) Longitude, Maximum (Northern) Latitude", - code=400) - - depth_min = request.get_decimal_arg('depthMin', default=None) - depth_max = request.get_decimal_arg('depthMax', default=None) - - if depth_min is not None and depth_max is not None and depth_min >= depth_max: - raise NexusProcessingException( - reason="Depth Min should be less than Depth Max", code=400) - - platforms = request.get_argument('platforms', None) - if platforms is not None: - try: - p_validation = platforms.split(',') - p_validation = [int(p) for p in p_validation] - del p_validation - except: - raise NexusProcessingException(reason="platforms must be a comma-delimited list of integers", code=400) - - return source_name, parameter_s, start_time, end_time, bounding_polygon, depth_min, depth_max, platforms - - def calc(self, request, **args): - - source_name, parameter_s, start_time, end_time, bounding_polygon, \ - depth_min, depth_max, platforms = self.parse_arguments(request) - - with requests.session() as edge_session: - edge_results = query_edge(source_name, parameter_s, start_time, end_time, - ','.join([str(bound) for bound in bounding_polygon.bounds]), - platforms, depth_min, depth_max, edge_session)['results'] - - if len(edge_results) == 0: - raise NoDataException - return InSituSubsetResult(results=edge_results) - - -class InSituSubsetResult(object): - def __init__(self, results): - self.results = results - - def toJson(self): - return json.dumps(self.results, indent=4) - - def toCSV(self): - fieldnames = sorted(next(iter(self.results)).keys()) - - csv_mem_file = StringIO.StringIO() - try: - writer = csv.DictWriter(csv_mem_file, fieldnames=fieldnames) - - writer.writeheader() - writer.writerows(self.results) - csv_out = csv_mem_file.getvalue() - finally: - csv_mem_file.close() - - return csv_out - - -def query_edge(dataset, variable, startTime, endTime, bbox, platform, depth_min, depth_max, session, itemsPerPage=1000, - startIndex=0, stats=True): - log = logging.getLogger('webservice.algorithms.doms.insitusubset.query_edge') - try: - startTime = datetime.utcfromtimestamp(startTime).strftime('%Y-%m-%dT%H:%M:%SZ') - except TypeError: - # Assume we were passed a properly formatted string - pass - - try: - endTime = datetime.utcfromtimestamp(endTime).strftime('%Y-%m-%dT%H:%M:%SZ') - except TypeError: - # Assume we were passed a properly formatted string - pass - - try: - platform = platform.split(',') - except AttributeError: - # Assume we were passed a list - pass - - params = {"startTime": startTime, - "endTime": endTime, - "bbox": bbox, - "minDepth": depth_min, - "maxDepth": depth_max, - "itemsPerPage": itemsPerPage, "startIndex": startIndex, "stats": str(stats).lower()} - - if variable: - params['variable'] = variable - if platform: - params['platform'] = platform - - edge_request = session.get(edge_endpoints.getEndpointByName(dataset)['url'], params=params) - - edge_request.raise_for_status() - edge_response = json.loads(edge_request.text) - - # Get all edge results - next_page_url = edge_response.get('next', None) - while next_page_url is not None: - log.debug("requesting %s" % next_page_url) - edge_page_request = session.get(next_page_url) - - edge_page_request.raise_for_status() - edge_page_response = json.loads(edge_page_request.text) - - edge_response['results'].extend(edge_page_response['results']) - - next_page_url = edge_page_response.get('next', None) - - return edge_response diff --git a/analysis/webservice/algorithms/doms/mapplot.py b/analysis/webservice/algorithms/doms/mapplot.py deleted file mode 100644 index 3af85d30..00000000 --- a/analysis/webservice/algorithms/doms/mapplot.py +++ /dev/null @@ -1,175 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import string -from cStringIO import StringIO -from multiprocessing import Process, Manager - -import matplotlib -import matplotlib.pyplot as plt -import numpy as np -from mpl_toolkits.basemap import Basemap - -import BaseDomsHandler -import ResultsStorage - -if not matplotlib.get_backend(): - matplotlib.use('Agg') - -PARAMETER_TO_FIELD = { - "sst": "sea_water_temperature", - "sss": "sea_water_salinity" -} - -PARAMETER_TO_UNITS = { - "sst": "($^\circ$ C)", - "sss": "(g/L)" -} - - -def __square(minLon, maxLon, minLat, maxLat): - if maxLat - minLat > maxLon - minLon: - a = ((maxLat - minLat) - (maxLon - minLon)) / 2.0 - minLon -= a - maxLon += a - elif maxLon - minLon > maxLat - minLat: - a = ((maxLon - minLon) - (maxLat - minLat)) / 2.0 - minLat -= a - maxLat += a - - return minLon, maxLon, minLat, maxLat - - -def render(d, lats, lons, z, primary, secondary, parameter): - fig = plt.figure() - ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) - - ax.set_title(string.upper("%s vs. %s" % (primary, secondary))) - # ax.set_ylabel('Latitude') - # ax.set_xlabel('Longitude') - - minLatA = np.min(lats) - maxLatA = np.max(lats) - minLonA = np.min(lons) - maxLonA = np.max(lons) - - minLat = minLatA - (abs(maxLatA - minLatA) * 0.1) - maxLat = maxLatA + (abs(maxLatA - minLatA) * 0.1) - - minLon = minLonA - (abs(maxLonA - minLonA) * 0.1) - maxLon = maxLonA + (abs(maxLonA - minLonA) * 0.1) - - minLon, maxLon, minLat, maxLat = __square(minLon, maxLon, minLat, maxLat) - - # m = Basemap(projection='mill', llcrnrlon=-180,llcrnrlat=-80,urcrnrlon=180,urcrnrlat=80,resolution='l') - m = Basemap(projection='mill', llcrnrlon=minLon, llcrnrlat=minLat, urcrnrlon=maxLon, urcrnrlat=maxLat, - resolution='l') - - m.drawparallels(np.arange(minLat, maxLat, (maxLat - minLat) / 5.0), labels=[1, 0, 0, 0], fontsize=10) - m.drawmeridians(np.arange(minLon, maxLon, (maxLon - minLon) / 5.0), labels=[0, 0, 0, 1], fontsize=10) - - m.drawcoastlines() - m.drawmapboundary(fill_color='#99ffff') - m.fillcontinents(color='#cc9966', lake_color='#99ffff') - - # lats, lons = np.meshgrid(lats, lons) - - masked_array = np.ma.array(z, mask=np.isnan(z)) - z = masked_array - - values = np.zeros(len(z)) - for i in range(0, len(z)): - values[i] = ((z[i] - np.min(z)) / (np.max(z) - np.min(z)) * 20.0) + 10 - - x, y = m(lons, lats) - - im1 = m.scatter(x, y, values) - - im1.set_array(z) - cb = m.colorbar(im1) - - units = PARAMETER_TO_UNITS[parameter] if parameter in PARAMETER_TO_UNITS else PARAMETER_TO_UNITS["sst"] - cb.set_label("Difference %s" % units) - - sio = StringIO() - plt.savefig(sio, format='png') - plot = sio.getvalue() - if d is not None: - d['plot'] = plot - return plot - - -class DomsMapPlotQueryResults(BaseDomsHandler.DomsQueryResults): - def __init__(self, lats, lons, z, parameter, primary, secondary, args=None, bounds=None, count=None, details=None, - computeOptions=None, executionId=None, plot=None): - BaseDomsHandler.DomsQueryResults.__init__(self, results={"lats": lats, "lons": lons, "values": z}, args=args, - details=details, bounds=bounds, count=count, - computeOptions=computeOptions, executionId=executionId) - self.__lats = lats - self.__lons = lons - self.__z = np.array(z) - self.__parameter = parameter - self.__primary = primary - self.__secondary = secondary - self.__plot = plot - - def toImage(self): - return self.__plot - - -def renderAsync(x, y, z, primary, secondary, parameter): - manager = Manager() - d = manager.dict() - p = Process(target=render, args=(d, x, y, z, primary, secondary, parameter)) - p.start() - p.join() - return d['plot'] - - -def createMapPlot(id, parameter): - with ResultsStorage.ResultsRetrieval() as storage: - params, stats, data = storage.retrieveResults(id) - - primary = params["primary"] - secondary = params["matchup"][0] - - lats = [] - lons = [] - z = [] - - field = PARAMETER_TO_FIELD[parameter] if parameter in PARAMETER_TO_FIELD else PARAMETER_TO_FIELD["sst"] - - for entry in data: - for match in entry["matches"]: - if match["source"] == secondary: - - if field in entry and field in match: - a = entry[field] - b = match[field] - z.append((a - b)) - z.append((a - b)) - else: - z.append(1.0) - z.append(1.0) - lats.append(entry["y"]) - lons.append(entry["x"]) - lats.append(match["y"]) - lons.append(match["x"]) - - plot = renderAsync(lats, lons, z, primary, secondary, parameter) - r = DomsMapPlotQueryResults(lats=lats, lons=lons, z=z, parameter=parameter, primary=primary, secondary=secondary, - args=params, - details=stats, bounds=None, count=None, computeOptions=None, executionId=id, plot=plot) - return r diff --git a/analysis/webservice/algorithms/doms/scatterplot.py b/analysis/webservice/algorithms/doms/scatterplot.py deleted file mode 100644 index 2ff57ee9..00000000 --- a/analysis/webservice/algorithms/doms/scatterplot.py +++ /dev/null @@ -1,118 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import string -from cStringIO import StringIO -from multiprocessing import Process, Manager - -import matplotlib -import matplotlib.pyplot as plt - -import BaseDomsHandler -import ResultsStorage - -if not matplotlib.get_backend(): - matplotlib.use('Agg') - -PARAMETER_TO_FIELD = { - "sst": "sea_water_temperature", - "sss": "sea_water_salinity" -} - -PARAMETER_TO_UNITS = { - "sst": "($^\circ$ C)", - "sss": "(g/L)" -} - - -def render(d, x, y, z, primary, secondary, parameter): - fig, ax = plt.subplots() - - ax.set_title(string.upper("%s vs. %s" % (primary, secondary))) - - units = PARAMETER_TO_UNITS[parameter] if parameter in PARAMETER_TO_UNITS else PARAMETER_TO_UNITS[ - "sst"] - ax.set_ylabel("%s %s" % (secondary, units)) - ax.set_xlabel("%s %s" % (primary, units)) - - ax.scatter(x, y) - - sio = StringIO() - plt.savefig(sio, format='png') - d['plot'] = sio.getvalue() - - -class DomsScatterPlotQueryResults(BaseDomsHandler.DomsQueryResults): - - def __init__(self, x, y, z, parameter, primary, secondary, args=None, bounds=None, count=None, details=None, - computeOptions=None, executionId=None, plot=None): - BaseDomsHandler.DomsQueryResults.__init__(self, results=[x, y], args=args, details=details, bounds=bounds, - count=count, computeOptions=computeOptions, executionId=executionId) - self.__primary = primary - self.__secondary = secondary - self.__x = x - self.__y = y - self.__z = z - self.__parameter = parameter - self.__plot = plot - - def toImage(self): - return self.__plot - - -def renderAsync(x, y, z, primary, secondary, parameter): - manager = Manager() - d = manager.dict() - p = Process(target=render, args=(d, x, y, z, primary, secondary, parameter)) - p.start() - p.join() - return d['plot'] - - -def createScatterPlot(id, parameter): - with ResultsStorage.ResultsRetrieval() as storage: - params, stats, data = storage.retrieveResults(id) - - primary = params["primary"] - secondary = params["matchup"][0] - - x, y, z = createScatterTable(data, secondary, parameter) - - plot = renderAsync(x, y, z, primary, secondary, parameter) - - r = DomsScatterPlotQueryResults(x=x, y=y, z=z, parameter=parameter, primary=primary, secondary=secondary, - args=params, details=stats, - bounds=None, count=None, computeOptions=None, executionId=id, plot=plot) - return r - - -def createScatterTable(results, secondary, parameter): - x = [] - y = [] - z = [] - - field = PARAMETER_TO_FIELD[parameter] if parameter in PARAMETER_TO_FIELD else PARAMETER_TO_FIELD["sst"] - - for entry in results: - for match in entry["matches"]: - if match["source"] == secondary: - if field in entry and field in match: - a = entry[field] - b = match[field] - x.append(a) - y.append(b) - z.append(a - b) - - return x, y, z diff --git a/analysis/webservice/algorithms/doms/subsetter.py b/analysis/webservice/algorithms/doms/subsetter.py deleted file mode 100644 index 67a2276f..00000000 --- a/analysis/webservice/algorithms/doms/subsetter.py +++ /dev/null @@ -1,260 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import os -import tempfile -import zipfile -from datetime import datetime - -import requests - -import BaseDomsHandler -from webservice.NexusHandler import nexus_handler -from webservice.webmodel import NexusProcessingException - -ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' - - -def is_blank(my_string): - return not (my_string and my_string.strip() != '') - - -@nexus_handler -class DomsResultsRetrievalHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): - name = "DOMS Subsetter" - path = "/domssubset" - description = "Subset DOMS sources given the search domain" - - params = { - "dataset": { - "name": "NEXUS Dataset", - "type": "string", - "description": "The NEXUS dataset. Optional but at least one of 'dataset' or 'insitu' are required" - }, - "insitu": { - "name": "In Situ sources", - "type": "comma-delimited string", - "description": "The in situ source(s). Optional but at least one of 'dataset' or 'insitu' are required" - }, - "parameter": { - "name": "Data Parameter", - "type": "string", - "description": "The parameter of interest. One of 'sst', 'sss', 'wind'. Required" - }, - "startTime": { - "name": "Start Time", - "type": "string", - "description": "Starting time in format YYYY-MM-DDTHH:mm:ssZ or seconds since EPOCH. Required" - }, - "endTime": { - "name": "End Time", - "type": "string", - "description": "Ending time in format YYYY-MM-DDTHH:mm:ssZ or seconds since EPOCH. Required" - }, - "b": { - "name": "Bounding box", - "type": "comma-delimited float", - "description": "Minimum (Western) Longitude, Minimum (Southern) Latitude, " - "Maximum (Eastern) Longitude, Maximum (Northern) Latitude. Required" - }, - "depthMin": { - "name": "Minimum Depth", - "type": "float", - "description": "Minimum depth of measurements. Must be less than depthMax. Optional" - }, - "depthMax": { - "name": "Maximum Depth", - "type": "float", - "description": "Maximum depth of measurements. Must be greater than depthMin. Optional" - }, - "platforms": { - "name": "Platforms", - "type": "comma-delimited integer", - "description": "Platforms to include for subset consideration. Optional" - }, - "output": { - "name": "Output", - "type": "string", - "description": "Output type. Only 'ZIP' is currently supported. Required" - } - } - singleton = True - - def __init__(self): - BaseDomsHandler.BaseDomsQueryCalcHandler.__init__(self) - self.log = logging.getLogger(__name__) - - def parse_arguments(self, request): - # Parse input arguments - self.log.debug("Parsing arguments") - - primary_ds_name = request.get_argument('dataset', None) - matchup_ds_names = request.get_argument('insitu', None) - - if is_blank(primary_ds_name) and is_blank(matchup_ds_names): - raise NexusProcessingException(reason="Either 'dataset', 'insitu', or both arguments are required", - code=400) - - if matchup_ds_names is not None: - try: - matchup_ds_names = matchup_ds_names.split(',') - except: - raise NexusProcessingException(reason="'insitu' argument should be a comma-seperated list", code=400) - - parameter_s = request.get_argument('parameter', None) - if parameter_s not in ['sst', 'sss', 'wind']: - raise NexusProcessingException( - reason="Parameter %s not supported. Must be one of 'sst', 'sss', 'wind'." % parameter_s, code=400) - - try: - start_time = request.get_start_datetime() - start_time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") - except: - raise NexusProcessingException( - reason="'startTime' argument is required. Can be int value seconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ", - code=400) - try: - end_time = request.get_end_datetime() - end_time = end_time.strftime("%Y-%m-%dT%H:%M:%SZ") - except: - raise NexusProcessingException( - reason="'endTime' argument is required. Can be int value seconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ", - code=400) - - if start_time > end_time: - raise NexusProcessingException( - reason="The starting time must be before the ending time. Received startTime: %s, endTime: %s" % ( - request.get_start_datetime().strftime(ISO_8601), request.get_end_datetime().strftime(ISO_8601)), - code=400) - - try: - bounding_polygon = request.get_bounding_polygon() - except: - raise NexusProcessingException( - reason="'b' argument is required. Must be comma-delimited float formatted as Minimum (Western) Longitude, Minimum (Southern) Latitude, Maximum (Eastern) Longitude, Maximum (Northern) Latitude", - code=400) - - depth_min = request.get_decimal_arg('depthMin', default=None) - depth_max = request.get_decimal_arg('depthMax', default=None) - - if depth_min is not None and depth_max is not None and depth_min >= depth_max: - raise NexusProcessingException( - reason="Depth Min should be less than Depth Max", code=400) - - platforms = request.get_argument('platforms', None) - if platforms is not None: - try: - p_validation = platforms.split(',') - p_validation = [int(p) for p in p_validation] - del p_validation - except: - raise NexusProcessingException(reason="platforms must be a comma-delimited list of integers", code=400) - - return primary_ds_name, matchup_ds_names, parameter_s, start_time, end_time, \ - bounding_polygon, depth_min, depth_max, platforms - - def calc(self, request, **args): - - primary_ds_name, matchup_ds_names, parameter_s, start_time, end_time, \ - bounding_polygon, depth_min, depth_max, platforms = self.parse_arguments(request) - - primary_url = "https://doms.jpl.nasa.gov/datainbounds" - primary_params = { - 'ds': primary_ds_name, - 'parameter': parameter_s, - 'b': ','.join([str(bound) for bound in bounding_polygon.bounds]), - 'startTime': start_time, - 'endTime': end_time, - 'output': "CSV" - } - - matchup_url = "https://doms.jpl.nasa.gov/domsinsitusubset" - matchup_params = { - 'source': None, - 'parameter': parameter_s, - 'startTime': start_time, - 'endTime': end_time, - 'b': ','.join([str(bound) for bound in bounding_polygon.bounds]), - 'depthMin': depth_min, - 'depthMax': depth_max, - 'platforms': platforms, - 'output': 'CSV' - } - - primary_temp_file_path = None - matchup_downloads = None - - with requests.session() as session: - - if not is_blank(primary_ds_name): - # Download primary - primary_temp_file, primary_temp_file_path = tempfile.mkstemp(suffix='.csv') - download_file(primary_url, primary_temp_file_path, session, params=primary_params) - - if len(matchup_ds_names) > 0: - # Download matchup - matchup_downloads = {} - for matchup_ds in matchup_ds_names: - matchup_downloads[matchup_ds] = tempfile.mkstemp(suffix='.csv') - matchup_params['source'] = matchup_ds - download_file(matchup_url, matchup_downloads[matchup_ds][1], session, params=matchup_params) - - # Zip downloads - date_range = "%s-%s" % (datetime.strptime(start_time, "%Y-%m-%dT%H:%M:%SZ").strftime("%Y%m%d"), - datetime.strptime(end_time, "%Y-%m-%dT%H:%M:%SZ").strftime("%Y%m%d")) - bounds = '%.4fW_%.4fS_%.4fE_%.4fN' % bounding_polygon.bounds - zip_dir = tempfile.mkdtemp() - zip_path = '%s/subset.%s.%s.zip' % (zip_dir, date_range, bounds) - with zipfile.ZipFile(zip_path, 'w') as my_zip: - if primary_temp_file_path: - my_zip.write(primary_temp_file_path, arcname='%s.%s.%s.csv' % (primary_ds_name, date_range, bounds)) - if matchup_downloads: - for matchup_ds, download in matchup_downloads.iteritems(): - my_zip.write(download[1], arcname='%s.%s.%s.csv' % (matchup_ds, date_range, bounds)) - - # Clean up - if primary_temp_file_path: - os.remove(primary_temp_file_path) - if matchup_downloads: - for matchup_ds, download in matchup_downloads.iteritems(): - os.remove(download[1]) - - return SubsetResult(zip_path) - - -class SubsetResult(object): - def __init__(self, zip_path): - self.zip_path = zip_path - - def toJson(self): - raise NotImplementedError - - def toZip(self): - with open(self.zip_path, 'rb') as zip_file: - zip_contents = zip_file.read() - - return zip_contents - - def cleanup(self): - os.remove(self.zip_path) - - -def download_file(url, filepath, session, params=None): - r = session.get(url, params=params, stream=True) - with open(filepath, 'wb') as f: - for chunk in r.iter_content(chunk_size=1024): - if chunk: # filter out keep-alive new chunks - f.write(chunk) diff --git a/analysis/webservice/algorithms/doms/values.py b/analysis/webservice/algorithms/doms/values.py deleted file mode 100644 index c47d4503..00000000 --- a/analysis/webservice/algorithms/doms/values.py +++ /dev/null @@ -1,72 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -PLATFORMS = [ - {"id": 1, "desc": "ship"}, - {"id": 2, "desc": "moored surface buoy"}, - {"id": 3, "desc": "drifting surface float"}, - {"id": 4, "desc": "drifting subsurface profiling float"}, - {"id": 5, "desc": "autonomous underwater vehicle"}, - {"id": 6, "desc": "offshore structure"}, - {"id": 7, "desc": "coastal structure"}, - {"id": 8, "desc": "towed unmanned submersible"}, - {"id": 9, "desc": "orbiting satellite"} -] - -DEVICES = [ - {"id": 1, "desc": "bathythermographs"}, - {"id": 2, "desc": "discrete water samplers"}, - {"id": 3, "desc": "CTD"}, - {"id": 4, "desc": "Current profilers / acousticDopplerCurrentProfiler"}, - {"id": 5, "desc": "radiometers"}, - {"id": 6, "desc": "scatterometers"} -] - -MISSIONS = [ - {"id": 1, "desc": "SAMOS"}, - {"id": 2, "desc": "ICOADS"}, - {"id": 3, "desc": "Aquarius"}, - {"id": 4, "desc": "SPURS1"} -] - - -def getDescById(list, id): - for item in list: - if item["id"] == id: - return item["desc"] - return id - - -def getPlatformById(id): - return getDescById(PLATFORMS, id) - - -def getDeviceById(id): - return getDescById(DEVICES, id) - - -def getMissionById(id): - return getDescById(MISSIONS, id) - - -def getDescByListNameAndId(listName, id): - if listName.upper() == "PLATFORM": - return getPlatformById(id) - elif listName.upper() == "DEVICE": - return getDeviceById(id) - elif listName.upper() == "MISSION": - return getMissionById(id) - else: - raise Exception("Invalid list name specified ('%s')" % listName) diff --git a/analysis/webservice/algorithms/doms/workerthread.py b/analysis/webservice/algorithms/doms/workerthread.py deleted file mode 100644 index 7639c006..00000000 --- a/analysis/webservice/algorithms/doms/workerthread.py +++ /dev/null @@ -1,61 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import threading - - -class WorkerThread(threading.Thread): - - def __init__(self, method, params): - threading.Thread.__init__(self) - self.method = method - self.params = params - self.completed = False - self.results = None - - def run(self): - self.results = self.method(*self.params) - self.completed = True - - -def __areAllComplete(threads): - for thread in threads: - if not thread.completed: - return False - - return True - - -def wait(threads, startFirst=False, poll=0.5): - if startFirst: - for thread in threads: - thread.start() - - while not __areAllComplete(threads): - threading._sleep(poll) - - -def foo(param1, param2): - print param1, param2 - return "c" - - -if __name__ == "__main__": - - thread = WorkerThread(foo, params=("a", "b")) - thread.start() - while not thread.completed: - threading._sleep(0.5) - print thread.results diff --git a/analysis/webservice/algorithms_spark/__init__.py b/analysis/webservice/algorithms_spark/__init__.py index d6ed83f5..a25c8d5c 100644 --- a/analysis/webservice/algorithms_spark/__init__.py +++ b/analysis/webservice/algorithms_spark/__init__.py @@ -20,7 +20,6 @@ import CorrMapSpark import DailyDifferenceAverageSpark import HofMoellerSpark -import Matchup import MaximaMinimaSpark import NexusCalcSparkHandler import TimeAvgMapSpark @@ -46,11 +45,6 @@ def module_exists(module_name): except ImportError: pass - try: - import Matchup - except ImportError: - pass - try: import TimeAvgMapSpark except ImportError: diff --git a/analysis/webservice/config/web.ini b/analysis/webservice/config/web.ini index 2644ade2..a1ecb2c2 100644 --- a/analysis/webservice/config/web.ini +++ b/analysis/webservice/config/web.ini @@ -14,4 +14,4 @@ static_enabled=true static_dir=static [modules] -module_dirs=webservice.algorithms,webservice.algorithms_spark,webservice.algorithms.doms \ No newline at end of file +module_dirs=webservice.algorithms,webservice.algorithms_spark \ No newline at end of file diff --git a/helm/templates/webapp.yml b/helm/templates/webapp.yml index e363ab75..4af0e381 100644 --- a/helm/templates/webapp.yml +++ b/helm/templates/webapp.yml @@ -9,7 +9,7 @@ spec: pythonVersion: "2" mode: cluster image: {{ .Values.webapp.distributed.image }} - imagePullPolicy: Always + imagePullPolicy: IfNotPresent mainApplicationFile: local:///incubator-sdap-nexus/analysis/webservice/webapp.py arguments: - "--solr-host={{ .Release.Name }}-solr-svc:8983" diff --git a/tools/doms/README.md b/tools/doms/README.md deleted file mode 100644 index c49fa4ab..00000000 --- a/tools/doms/README.md +++ /dev/null @@ -1,66 +0,0 @@ -# doms_reader.py -The functions in doms_reader.py read a DOMS netCDF file into memory, assemble a list of matches of satellite and in situ data, and optionally output the matches to a CSV file. Each matched pair contains one satellite data record and one in situ data record. - -The DOMS netCDF files hold satellite data and in situ data in different groups (`SatelliteData` and `InsituData`). The `matchIDs` netCDF variable contains pairs of IDs (matches) which reference a satellite data record and an in situ data record in their respective groups. These records have a many-to-many relationship; one satellite record may match to many in situ records, and one in situ record may match to many satellite records. The `assemble_matches` function assembles the individual data records into pairs based on their `dim` group dimension IDs as paired in the `matchIDs` variable. - -## Requirements -This tool was developed and tested with Python 2.7.5 and 3.7.0a0. -Imported packages: -* argparse -* netcdf4 -* sys -* datetime -* csv -* collections -* logging - - -## Functions -### Function: `assemble_matches(filename)` -Read a DOMS netCDF file into memory and return a list of matches from the file. - -#### Parameters -- `filename` (str): the DOMS netCDF file name. - -#### Returns -- `matches` (list): List of matches. - -Each list element in `matches` is a dictionary organized as follows: - For match `m`, netCDF group `GROUP` ('SatelliteData' or 'InsituData'), and netCDF group variable `VARIABLE`: - -`matches[m][GROUP]['matchID']`: netCDF `MatchedRecords` dimension ID for the match -`matches[m][GROUP]['GROUPID']`: GROUP netCDF `dim` dimension ID for the record -`matches[m][GROUP][VARIABLE]`: variable value - -For example, to access the timestamps of the satellite data and the in situ data of the first match in the list, along with the `MatchedRecords` dimension ID and the groups' `dim` dimension ID: -```python -matches[0]['SatelliteData']['time'] -matches[0]['InsituData']['time'] -matches[0]['SatelliteData']['matchID'] -matches[0]['SatelliteData']['SatelliteDataID'] -matches[0]['InsituData']['InsituDataID'] -``` - - -### Function: `matches_to_csv(matches, csvfile)` -Write the DOMS matches to a CSV file. Include a header of column names which are based on the group and variable names from the netCDF file. - -#### Parameters: -- `matches` (list): the list of dictionaries containing the DOMS matches as returned from the `assemble_matches` function. -- `csvfile` (str): the name of the CSV output file. - -## Usage -For example, to read some DOMS netCDF file called `doms_file.nc`: -### Command line -The main function for `doms_reader.py` takes one `filename` parameter (`doms_file.nc` argument in this example) for the DOMS netCDF file to read, calls the `assemble_matches` function, then calls the `matches_to_csv` function to write the matches to a CSV file `doms_matches.csv`. -``` -python doms_reader.py doms_file.nc -``` -``` -python3 doms_reader.py doms_file.nc -``` -### Importing `assemble_matches` -```python -from doms_reader import assemble_matches -matches = assemble_matches('doms_file.nc') -``` diff --git a/tools/doms/doms_reader.py b/tools/doms/doms_reader.py deleted file mode 100644 index c8229c48..00000000 --- a/tools/doms/doms_reader.py +++ /dev/null @@ -1,144 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -from netCDF4 import Dataset, num2date -import sys -import datetime -import csv -from collections import OrderedDict -import logging - -LOGGER = logging.getLogger("doms_reader") - -def assemble_matches(filename): - """ - Read a DOMS netCDF file and return a list of matches. - - Parameters - ---------- - filename : str - The DOMS netCDF file name. - - Returns - ------- - matches : list - List of matches. Each list element is a dictionary. - For match m, netCDF group GROUP (SatelliteData or InsituData), and - group variable VARIABLE: - matches[m][GROUP]['matchID']: MatchedRecords dimension ID for the match - matches[m][GROUP]['GROUPID']: GROUP dim dimension ID for the record - matches[m][GROUP][VARIABLE]: variable value - """ - - try: - # Open the netCDF file - with Dataset(filename, 'r') as doms_nc: - # Check that the number of groups is consistent w/ the MatchedGroups - # dimension - assert len(doms_nc.groups) == doms_nc.dimensions['MatchedGroups'].size,\ - ("Number of groups isn't the same as MatchedGroups dimension.") - - matches = [] - matched_records = doms_nc.dimensions['MatchedRecords'].size - - # Loop through the match IDs to assemble matches - for match in range(0, matched_records): - match_dict = OrderedDict() - # Grab the data from each platform (group) in the match - for group_num, group in enumerate(doms_nc.groups): - match_dict[group] = OrderedDict() - match_dict[group]['matchID'] = match - ID = doms_nc.variables['matchIDs'][match][group_num] - match_dict[group][group + 'ID'] = ID - for var in doms_nc.groups[group].variables.keys(): - match_dict[group][var] = doms_nc.groups[group][var][ID] - - # Create a UTC datetime field from timestamp - dt = num2date(match_dict[group]['time'], - doms_nc.groups[group]['time'].units) - match_dict[group]['datetime'] = dt - LOGGER.info(match_dict) - matches.append(match_dict) - - return matches - except (OSError, IOError) as err: - LOGGER.exception("Error reading netCDF file " + filename) - raise err - -def matches_to_csv(matches, csvfile): - """ - Write the DOMS matches to a CSV file. Include a header of column names - which are based on the group and variable names from the netCDF file. - - Parameters - ---------- - matches : list - The list of dictionaries containing the DOMS matches as returned from - assemble_matches. - csvfile : str - The name of the CSV output file. - """ - # Create a header for the CSV. Column names are GROUP_VARIABLE or - # GROUP_GROUPID. - header = [] - for key, value in matches[0].items(): - for otherkey in value.keys(): - header.append(key + "_" + otherkey) - - try: - # Write the CSV file - with open(csvfile, 'w') as output_file: - csv_writer = csv.writer(output_file) - csv_writer.writerow(header) - for match in matches: - row = [] - for group, data in match.items(): - for value in data.values(): - row.append(value) - csv_writer.writerow(row) - except (OSError, IOError) as err: - LOGGER.exception("Error writing CSV file " + csvfile) - raise err - -if __name__ == '__main__': - """ - Execution: - python doms_reader.py filename - OR - python3 doms_reader.py filename - """ - logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s', - level=logging.INFO, - datefmt='%Y-%m-%d %H:%M:%S') - - p = argparse.ArgumentParser() - p.add_argument('filename', help='DOMS netCDF file to read') - args = p.parse_args() - - doms_matches = assemble_matches(args.filename) - - matches_to_csv(doms_matches, 'doms_matches.csv') - - - - - - - - - - - \ No newline at end of file From d1f20b72ee12c2e9b45f5de100a14444b4c04ff5 Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Wed, 22 Jul 2020 16:37:09 -0700 Subject: [PATCH 11/26] pass cassandra host --- helm/templates/webapp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/templates/webapp.yml b/helm/templates/webapp.yml index 4af0e381..2963cce9 100644 --- a/helm/templates/webapp.yml +++ b/helm/templates/webapp.yml @@ -12,7 +12,7 @@ spec: imagePullPolicy: IfNotPresent mainApplicationFile: local:///incubator-sdap-nexus/analysis/webservice/webapp.py arguments: - - "--solr-host={{ .Release.Name }}-solr-svc:8983" + - "--cassandra-host=sdap-cassandra --solr-host={{ .Release.Name }}-solr-svc:8983" sparkVersion: "2.4.4" restartPolicy: type: OnFailure From 657924f45ac859dffc34fe3e3b39b346425d5845 Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Thu, 23 Jul 2020 12:34:18 -0700 Subject: [PATCH 12/26] add support for cassandra username and password --- data-access/nexustiles/config/datastores.ini.default | 2 +- data-access/nexustiles/dao/CassandraProxy.py | 1 + data-access/tests/config/datastores.ini | 9 --------- 3 files changed, 2 insertions(+), 10 deletions(-) delete mode 100644 data-access/tests/config/datastores.ini diff --git a/data-access/nexustiles/config/datastores.ini.default b/data-access/nexustiles/config/datastores.ini.default index ed40068e..2faae536 100644 --- a/data-access/nexustiles/config/datastores.ini.default +++ b/data-access/nexustiles/config/datastores.ini.default @@ -1,5 +1,5 @@ [cassandra] -host=localhost +host=sdap-cassandra port=9042 keyspace=nexustiles local_datacenter=datacenter1 diff --git a/data-access/nexustiles/dao/CassandraProxy.py b/data-access/nexustiles/dao/CassandraProxy.py index a8a4e6e6..3adaf15b 100644 --- a/data-access/nexustiles/dao/CassandraProxy.py +++ b/data-access/nexustiles/dao/CassandraProxy.py @@ -17,6 +17,7 @@ import uuid from ConfigParser import NoOptionError +from cassandra.auth import PlainTextAuthProvider import nexusproto.DataTile_pb2 as nexusproto import numpy as np from cassandra.auth import PlainTextAuthProvider diff --git a/data-access/tests/config/datastores.ini b/data-access/tests/config/datastores.ini deleted file mode 100644 index 194760cb..00000000 --- a/data-access/tests/config/datastores.ini +++ /dev/null @@ -1,9 +0,0 @@ -[cassandra] -host=127.0.0.1 -keyspace=nexustiles -local_datacenter=datacenter1 -protocol_version=3 - -[solr] -host=localhost:8983 -core=nexustiles \ No newline at end of file From a0bf3ec0bd75893bd42da8c2bac658e7b6ca266f Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Mon, 27 Jul 2020 10:29:33 -0700 Subject: [PATCH 13/26] cassandra helm chart included --- helm/requirements.yaml | 5 +- helm/templates/cassandra.yml | 107 -------------------- helm/templates/granule-ingester.yml | 4 + helm/templates/init-cassandra-configmap.yml | 13 +++ helm/templates/webapp.yml | 2 +- helm/values.yaml | 30 ++++-- 6 files changed, 40 insertions(+), 121 deletions(-) delete mode 100644 helm/templates/cassandra.yml create mode 100644 helm/templates/init-cassandra-configmap.yml diff --git a/helm/requirements.yaml b/helm/requirements.yaml index ebcd9c0d..3e038e71 100644 --- a/helm/requirements.yaml +++ b/helm/requirements.yaml @@ -10,6 +10,7 @@ dependencies: - name: solr version: 1.5.2 repository: http://storage.googleapis.com/kubernetes-charts-incubator - - + - name: cassandra + version: 5.5.3 + repository: https://charts.bitnami.com/bitnami diff --git a/helm/templates/cassandra.yml b/helm/templates/cassandra.yml deleted file mode 100644 index 6023e55e..00000000 --- a/helm/templates/cassandra.yml +++ /dev/null @@ -1,107 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: sdap-cassandra -spec: - clusterIP: None - ports: - - name: cql - port: 9042 - targetPort: cql - selector: - app: sdap-cassandra - ---- - -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: cassandra-set -spec: - serviceName: sdap-cassandra - replicas: {{ .Values.cassandra.replicas }} - selector: - matchLabels: - app: sdap-cassandra - template: - metadata: - labels: - app: sdap-cassandra - spec: - terminationGracePeriodSeconds: 120 - {{ if .Values.cassandra.tolerations }} - tolerations: -{{ .Values.cassandra.tolerations | toYaml | indent 6 }} - {{ end }} - {{ if .Values.cassandra.nodeSelector }} - nodeSelector: -{{ .Values.cassandra.nodeSelector | toYaml | indent 8 }} - {{ end }} - affinity: - podAntiAffinity: - # Prefer spreading over all hosts - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: "app" - operator: In - values: - - sdap-cassandra - topologyKey: "kubernetes.io/hostname" - containers: - - name: cassandra - image: nexusjpl/cassandra:1.0.0-rc1 - imagePullPolicy: Always - ports: - - containerPort: 7000 - name: intra-node - - containerPort: 7001 - name: tls-intra-node - - containerPort: 7199 - name: jmx - - containerPort: 9042 - name: cql - resources: - requests: - cpu: {{ .Values.cassandra.requests.cpu }} - memory: {{ .Values.cassandra.requests.memory }} - limits: - cpu: {{ .Values.cassandra.limits.cpu }} - memory: {{ .Values.cassandra.limits.memory }} - securityContext: - capabilities: - add: - - IPC_LOCK - lifecycle: - preStop: - exec: - command: - - /bin/sh - - -c - - nodetool drain - env: - - name: MAX_HEAP_SIZE - value: 2G - - name: HEAP_NEWSIZE - value: 200M - - name: CASSANDRA_SEEDS - value: "cassandra-set-0.sdap-cassandra" - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - volumeMounts: - - name: cassandra-data - mountPath: /var/lib/cassandra - - volumeClaimTemplates: - - metadata: - name: cassandra-data - spec: - accessModes: [ "ReadWriteOnce" ] - storageClassName: {{ .Values.storageClass }} - resources: - requests: - storage: {{ .Values.cassandra.storage }} diff --git a/helm/templates/granule-ingester.yml b/helm/templates/granule-ingester.yml index 2a716f17..312e7dd1 100644 --- a/helm/templates/granule-ingester.yml +++ b/helm/templates/granule-ingester.yml @@ -28,6 +28,10 @@ spec: value: {{ .Values.rabbitmq.fullnameOverride }} - name: CASSANDRA_CONTACT_POINTS value: sdap-cassandra + - name: CASSANDRA_USERNAME + value: cassandra + - name: CASSANDRA_PASSWORD + value: cassandra - name: ZK_HOST_AND_PORT value: {{ .Release.Name }}-zookeeper:2181 {{ if .Values.ingestion.granuleIngester.maxConcurrency }} diff --git a/helm/templates/init-cassandra-configmap.yml b/helm/templates/init-cassandra-configmap.yml new file mode 100644 index 00000000..eadbc9c7 --- /dev/null +++ b/helm/templates/init-cassandra-configmap.yml @@ -0,0 +1,13 @@ +apiVersion: v1 +data: + init.cql: | + CREATE KEYSPACE IF NOT EXISTS nexustiles WITH REPLICATION = { 'class': 'SimpleStrategy', 'replication_factor': 1 }; + + CREATE TABLE IF NOT EXISTS nexustiles.sea_surface_temp ( + tile_id uuid PRIMARY KEY, + tile_blob blob + ); +kind: ConfigMap +metadata: + name: init-cassandra + namespace: sdap diff --git a/helm/templates/webapp.yml b/helm/templates/webapp.yml index 2963cce9..8efd3ece 100644 --- a/helm/templates/webapp.yml +++ b/helm/templates/webapp.yml @@ -12,7 +12,7 @@ spec: imagePullPolicy: IfNotPresent mainApplicationFile: local:///incubator-sdap-nexus/analysis/webservice/webapp.py arguments: - - "--cassandra-host=sdap-cassandra --solr-host={{ .Release.Name }}-solr-svc:8983" + - "--cassandra-host={{ .Release.Name }}-cassandra --cassandra-username=cassandra --cassandra-password=cassandra --solr-host={{ .Release.Name }}-solr-svc:8983" sparkVersion: "2.4.4" restartPolicy: type: OnFailure diff --git a/helm/values.yaml b/helm/values.yaml index aa03c0aa..c9b9cf13 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -99,16 +99,6 @@ ingestion: ## Store ingestion history in a solr database instead of a filesystem directory # url: http://history-solr -cassandra: - replicas: 2 - storage: 13Gi - requests: - cpu: 1 - memory: 3Gi - limits: - cpu: 1 - memory: 3Gi - solr: replicaCount: 3 volumeClaimTemplates: @@ -165,4 +155,22 @@ rabbitmq: username: guest password: guest ingress: - enabled: true \ No newline at end of file + enabled: true + +cassandra: + initDBConfigMap: init-cassandra + dbUser: + user: cassandra + password: cassandra + cluster: + replicaCount: 1 + persistence: + storageClass: hostpath + size: 8Gi + resources: + requests: + cpu: 1 + memory: 8Gi + limits: + cpu: 1 + memory: 8Gi From f2aad1346d5d8251d5ca339f576f89902dfa93a4 Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Mon, 27 Jul 2020 13:18:08 -0700 Subject: [PATCH 14/26] fix arguments sent to spark driver, add logging in cassandraproxy --- data-access/nexustiles/dao/CassandraProxy.py | 3 +++ helm/templates/webapp.yml | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/data-access/nexustiles/dao/CassandraProxy.py b/data-access/nexustiles/dao/CassandraProxy.py index 3adaf15b..9a2d6ee6 100644 --- a/data-access/nexustiles/dao/CassandraProxy.py +++ b/data-access/nexustiles/dao/CassandraProxy.py @@ -162,6 +162,9 @@ def __init__(self, config): self.__cass_protocol_version = config.getint("cassandra", "protocol_version") self.__cass_dc_policy = config.get("cassandra", "dc_policy") + logger.info("Setting cassandra host to " + self.__cass_url) + logger.info("Setting cassandra username to " + self.__cass_username) + try: self.__cass_port = config.getint("cassandra", "port") except NoOptionError: diff --git a/helm/templates/webapp.yml b/helm/templates/webapp.yml index 8efd3ece..2921bb65 100644 --- a/helm/templates/webapp.yml +++ b/helm/templates/webapp.yml @@ -12,7 +12,10 @@ spec: imagePullPolicy: IfNotPresent mainApplicationFile: local:///incubator-sdap-nexus/analysis/webservice/webapp.py arguments: - - "--cassandra-host={{ .Release.Name }}-cassandra --cassandra-username=cassandra --cassandra-password=cassandra --solr-host={{ .Release.Name }}-solr-svc:8983" + - --cassandra-host={{ .Release.Name }}-cassandra + - --cassandra-username=cassandra + - --cassandra-password=cassandra + - --solr-host={{ .Release.Name }}-solr-svc:8983 sparkVersion: "2.4.4" restartPolicy: type: OnFailure From 0b1c3c484a3ba8dea273dce9abb334429abf5c0e Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Mon, 27 Jul 2020 16:34:37 -0700 Subject: [PATCH 15/26] fix namespace --- helm/templates/init-cassandra-configmap.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/templates/init-cassandra-configmap.yml b/helm/templates/init-cassandra-configmap.yml index eadbc9c7..3e7ed3cc 100644 --- a/helm/templates/init-cassandra-configmap.yml +++ b/helm/templates/init-cassandra-configmap.yml @@ -10,4 +10,4 @@ data: kind: ConfigMap metadata: name: init-cassandra - namespace: sdap + namespace: {{ .Release.Namespace }} From e728f27e9e93fe15057fd35d1f7b92c4204436c5 Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Mon, 27 Jul 2020 17:45:50 -0700 Subject: [PATCH 16/26] fix cass url for granule ingester --- helm/templates/granule-ingester.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/templates/granule-ingester.yml b/helm/templates/granule-ingester.yml index 312e7dd1..29fbd569 100644 --- a/helm/templates/granule-ingester.yml +++ b/helm/templates/granule-ingester.yml @@ -27,7 +27,7 @@ spec: - name: RABBITMQ_HOST value: {{ .Values.rabbitmq.fullnameOverride }} - name: CASSANDRA_CONTACT_POINTS - value: sdap-cassandra + value: {{ .Release.Name }}-cassandra - name: CASSANDRA_USERNAME value: cassandra - name: CASSANDRA_PASSWORD From fc31fa7ae99902a108e81929dc14b407a6fea63c Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Wed, 29 Jul 2020 10:45:53 -0700 Subject: [PATCH 17/26] change solr-create-collection to a deployment --- docker/solr/cloud-init/create-collection.py | 4 ++-- helm/templates/solr-create-collection.yml | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docker/solr/cloud-init/create-collection.py b/docker/solr/cloud-init/create-collection.py index 9fce9f05..eb77d001 100755 --- a/docker/solr/cloud-init/create-collection.py +++ b/docker/solr/cloud-init/create-collection.py @@ -142,5 +142,5 @@ def get_cluster_status(): # We're done, do nothing forever. logging.info("Done.") -# while True: -# time.sleep(987654321) +while True: + time.sleep(987654321) diff --git a/helm/templates/solr-create-collection.yml b/helm/templates/solr-create-collection.yml index 7db5b97e..756c8ed3 100644 --- a/helm/templates/solr-create-collection.yml +++ b/helm/templates/solr-create-collection.yml @@ -1,12 +1,12 @@ -apiVersion: batch/v1 -kind: Job +apiVersion: apps/v1 +kind: Deployment metadata: name: solr-create-collection spec: -# selector: -# matchLabels: -# app: solr-create-collection # has to match .spec.template.metadata.labels -# replicas: 1 + selector: + matchLabels: + app: solr-create-collection # has to match .spec.template.metadata.labels + replicas: 1 template: metadata: labels: @@ -18,7 +18,7 @@ spec: image: nexusjpl/solr-cloud-init:1.0.1 resources: requests: - memory: "1Gi" + memory: "0.5Gi" cpu: "0.25" env: - name: MINIMUM_NODES @@ -31,4 +31,4 @@ spec: value: "{{ .Release.Name }}-zookeeper:2181/solr" - name: CREATE_COLLECTION_PARAMS value: "name=nexustiles&numShards=$(MINIMUM_NODES)&waitForFinalState=true" - restartPolicy: OnFailure + restartPolicy: Always From da60069b9a278ee03158c7fb77f22f99a97e2c30 Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Wed, 29 Jul 2020 16:49:37 -0700 Subject: [PATCH 18/26] make solr history default --- helm/templates/collection-manager.yml | 8 ++++---- helm/values.yaml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/helm/templates/collection-manager.yml b/helm/templates/collection-manager.yml index 6708b133..374eabdc 100644 --- a/helm/templates/collection-manager.yml +++ b/helm/templates/collection-manager.yml @@ -30,9 +30,9 @@ spec: value: {{ .Values.rabbitmq.fullnameOverride }} - name: COLLECTIONS_PATH value: {{ include "nexus.collectionsConfig.mountPath" . }}/collections.yml - {{- if $history.url }} + {{- if $history.solrEnabled }} - name: HISTORY_URL - value: {{ .Values.ingestion.history.url}} + value: http://{{ .Release.Name }}-solr-svc:8983 {{- else }} - name: HISTORY_PATH value: {{ include "nexus.history.mountPath" . }} @@ -46,7 +46,7 @@ spec: memory: {{ .Values.ingestion.collectionManager.memory }} volumeMounts: {{ include "nexus.ingestion.dataVolumeMount" . | indent 12 }} - {{- if not $history.url }} + {{- if not $history.solrEnabled }} - name: history-volume mountPath: {{ include "nexus.history.mountPath" . }} {{- end }} @@ -57,7 +57,7 @@ spec: - name: collections-config-volume configMap: name: {{ include "nexus.collectionsConfig.configmapName" . }} - {{- if not $history.url }} + {{- if not $history.solrEnabled }} - name: history-volume persistentVolumeClaim: claimName: history-volume-claim diff --git a/helm/values.yaml b/helm/values.yaml index c9b9cf13..9158cb02 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -97,7 +97,7 @@ ingestion: ## Defaults to a using a history directory, stored on a PVC using the storageClass defined in this file above history: ## Store ingestion history in a solr database instead of a filesystem directory - # url: http://history-solr + solrEnabled: true solr: replicaCount: 3 From 9f0c6cd8cbf06dce5fc583221a5a60c8b051cb73 Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Mon, 3 Aug 2020 19:03:02 -0700 Subject: [PATCH 19/26] pr --- helm/templates/collection-manager.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/templates/collection-manager.yml b/helm/templates/collection-manager.yml index 374eabdc..255deb37 100644 --- a/helm/templates/collection-manager.yml +++ b/helm/templates/collection-manager.yml @@ -19,7 +19,7 @@ spec: spec: containers: - image: {{ .Values.ingestion.collectionManager.image }} - imagePullPolicy: Always + imagePullPolicy: IfNotPresent name: collection-manager env: - name: RABBITMQ_USERNAME From 481dd93e34ee091ba0807a85ef939af58487101b Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Wed, 5 Aug 2020 12:18:27 -0700 Subject: [PATCH 20/26] enable external solr/zk/cass hosts --- helm/requirements.yaml | 2 ++ helm/templates/_helpers.tpl | 7 +++++++ helm/templates/collection-manager.yml | 2 +- helm/templates/granule-ingester.yml | 2 +- helm/templates/solr-create-collection.yml | 8 ++++---- helm/templates/webapp.yml | 2 +- helm/values.yaml | 8 ++++++++ 7 files changed, 24 insertions(+), 7 deletions(-) diff --git a/helm/requirements.yaml b/helm/requirements.yaml index 3e038e71..725684cd 100644 --- a/helm/requirements.yaml +++ b/helm/requirements.yaml @@ -10,7 +10,9 @@ dependencies: - name: solr version: 1.5.2 repository: http://storage.googleapis.com/kubernetes-charts-incubator + condition: solr.enabled - name: cassandra version: 5.5.3 repository: https://charts.bitnami.com/bitnami + condition: cassandra.enabled diff --git a/helm/templates/_helpers.tpl b/helm/templates/_helpers.tpl index b697c179..a016b2fa 100644 --- a/helm/templates/_helpers.tpl +++ b/helm/templates/_helpers.tpl @@ -45,3 +45,10 @@ The data volume mount which is used in both the Collection Manager and the Granu mountPath: {{ .Values.ingestion.granules.mountPath }} {{- end -}} +{{- define "nexus.urls.solr" -}} +{{ .Values.external.solrHostAndPort | default (print "http://" .Release.Name "-solr-svc:8983") }} +{{- end -}} + +{{- define "nexus.urls.zookeeper" -}} +{{ .Values.external.zookeeperHostAndPort | default (print .Release.Name "-zookeeper:2181") }} +{{- end -}} \ No newline at end of file diff --git a/helm/templates/collection-manager.yml b/helm/templates/collection-manager.yml index 255deb37..e2815264 100644 --- a/helm/templates/collection-manager.yml +++ b/helm/templates/collection-manager.yml @@ -32,7 +32,7 @@ spec: value: {{ include "nexus.collectionsConfig.mountPath" . }}/collections.yml {{- if $history.solrEnabled }} - name: HISTORY_URL - value: http://{{ .Release.Name }}-solr-svc:8983 + value: {{ include "nexus.urls.solr" . }} {{- else }} - name: HISTORY_PATH value: {{ include "nexus.history.mountPath" . }} diff --git a/helm/templates/granule-ingester.yml b/helm/templates/granule-ingester.yml index 29fbd569..bb616ad6 100644 --- a/helm/templates/granule-ingester.yml +++ b/helm/templates/granule-ingester.yml @@ -33,7 +33,7 @@ spec: - name: CASSANDRA_PASSWORD value: cassandra - name: ZK_HOST_AND_PORT - value: {{ .Release.Name }}-zookeeper:2181 + value: {{ include "nexus.urls.zookeeper" . }} {{ if .Values.ingestion.granuleIngester.maxConcurrency }} - name: MAX_CONCURRENCY value: "{{ .Values.ingestion.granuleIngester.maxConcurrency }}" diff --git a/helm/templates/solr-create-collection.yml b/helm/templates/solr-create-collection.yml index 756c8ed3..7ecb2e3a 100644 --- a/helm/templates/solr-create-collection.yml +++ b/helm/templates/solr-create-collection.yml @@ -1,3 +1,4 @@ +{{ if .Values.solrInitEnabled }} apiVersion: apps/v1 kind: Deployment metadata: @@ -23,12 +24,11 @@ spec: env: - name: MINIMUM_NODES value: "{{ .Values.solr.replicaCount }}" - - name: SOLR_HOST - value: "{{ .Release.Name }}-solr-svc" - name: SDAP_SOLR_URL - value: "http://$(SOLR_HOST):8983/solr/" + value: {{ include "nexus.urls.solr" . }}/solr/ - name: SDAP_ZK_SOLR - value: "{{ .Release.Name }}-zookeeper:2181/solr" + value: {{ include "nexus.urls.zookeeper" . }}/solr - name: CREATE_COLLECTION_PARAMS value: "name=nexustiles&numShards=$(MINIMUM_NODES)&waitForFinalState=true" restartPolicy: Always +{{ end }} \ No newline at end of file diff --git a/helm/templates/webapp.yml b/helm/templates/webapp.yml index 2921bb65..e4e2adf3 100644 --- a/helm/templates/webapp.yml +++ b/helm/templates/webapp.yml @@ -15,7 +15,7 @@ spec: - --cassandra-host={{ .Release.Name }}-cassandra - --cassandra-username=cassandra - --cassandra-password=cassandra - - --solr-host={{ .Release.Name }}-solr-svc:8983 + - --solr-host={{ include "nexus.urls.solr" . }} sparkVersion: "2.4.4" restartPolicy: type: OnFailure diff --git a/helm/values.yaml b/helm/values.yaml index 9158cb02..6cabd43e 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -99,7 +99,14 @@ ingestion: ## Store ingestion history in a solr database instead of a filesystem directory solrEnabled: true +external: + solrHostAndPort: + zookeeperHostAndPort: + +solrInitEnabled: true + solr: + enabled: true replicaCount: 3 volumeClaimTemplates: storageClassName: hostpath @@ -158,6 +165,7 @@ rabbitmq: enabled: true cassandra: + enabled: true initDBConfigMap: init-cassandra dbUser: user: cassandra From f9ad993dde0e075a27a3cc56e9c6eccd778a2a14 Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Wed, 5 Aug 2020 12:31:25 -0700 Subject: [PATCH 21/26] rabbitmq.enabled --- helm/requirements.yaml | 2 +- helm/values.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/helm/requirements.yaml b/helm/requirements.yaml index 725684cd..78cc52ed 100644 --- a/helm/requirements.yaml +++ b/helm/requirements.yaml @@ -6,7 +6,7 @@ dependencies: - name: rabbitmq version: 7.1.0 repository: https://charts.bitnami.com/bitnami - condition: ingestion.enabled + condition: rabbitmq.enabled - name: solr version: 1.5.2 repository: http://storage.googleapis.com/kubernetes-charts-incubator diff --git a/helm/values.yaml b/helm/values.yaml index 6cabd43e..41b20bda 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -154,6 +154,7 @@ nginx-ingress: rabbitmq: ## fullnameOverride sets the name of the RabbitMQ service ## with which the ingestion components will communicate. + enabled: true persistence: storageClass: hostpath fullnameOverride: rabbitmq From fb86896aa9e6cb7a53d6c3461992563065bb3245 Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Wed, 5 Aug 2020 15:24:10 -0700 Subject: [PATCH 22/26] revert doms --- .../algorithms/doms/BaseDomsHandler.py | 635 ++++++++++++++++++ .../algorithms/doms/DatasetListQuery.py | 116 ++++ .../algorithms/doms/MatchupQuery.py | 452 +++++++++++++ .../algorithms/doms/MetadataQuery.py | 65 ++ .../algorithms/doms/ResultsPlotQuery.py | 55 ++ .../algorithms/doms/ResultsRetrieval.py | 49 ++ .../algorithms/doms/ResultsStorage.py | 286 ++++++++ .../webservice/algorithms/doms/StatsQuery.py | 63 ++ .../webservice/algorithms/doms/ValuesQuery.py | 72 ++ .../webservice/algorithms/doms/__init__.py | 34 + analysis/webservice/algorithms/doms/config.py | 109 +++ .../webservice/algorithms/doms/datafetch.py | 47 ++ .../algorithms/doms/fetchedgeimpl.py | 217 ++++++ analysis/webservice/algorithms/doms/geo.py | 129 ++++ .../algorithms/doms/histogramplot.py | 127 ++++ .../algorithms/doms/insitusubset.py | 263 ++++++++ .../webservice/algorithms/doms/mapplot.py | 175 +++++ .../webservice/algorithms/doms/scatterplot.py | 118 ++++ .../webservice/algorithms/doms/subsetter.py | 260 +++++++ analysis/webservice/algorithms/doms/values.py | 72 ++ .../algorithms/doms/workerthread.py | 61 ++ 21 files changed, 3405 insertions(+) create mode 100644 analysis/webservice/algorithms/doms/BaseDomsHandler.py create mode 100644 analysis/webservice/algorithms/doms/DatasetListQuery.py create mode 100644 analysis/webservice/algorithms/doms/MatchupQuery.py create mode 100644 analysis/webservice/algorithms/doms/MetadataQuery.py create mode 100644 analysis/webservice/algorithms/doms/ResultsPlotQuery.py create mode 100644 analysis/webservice/algorithms/doms/ResultsRetrieval.py create mode 100644 analysis/webservice/algorithms/doms/ResultsStorage.py create mode 100644 analysis/webservice/algorithms/doms/StatsQuery.py create mode 100644 analysis/webservice/algorithms/doms/ValuesQuery.py create mode 100644 analysis/webservice/algorithms/doms/__init__.py create mode 100644 analysis/webservice/algorithms/doms/config.py create mode 100644 analysis/webservice/algorithms/doms/datafetch.py create mode 100644 analysis/webservice/algorithms/doms/fetchedgeimpl.py create mode 100644 analysis/webservice/algorithms/doms/geo.py create mode 100644 analysis/webservice/algorithms/doms/histogramplot.py create mode 100644 analysis/webservice/algorithms/doms/insitusubset.py create mode 100644 analysis/webservice/algorithms/doms/mapplot.py create mode 100644 analysis/webservice/algorithms/doms/scatterplot.py create mode 100644 analysis/webservice/algorithms/doms/subsetter.py create mode 100644 analysis/webservice/algorithms/doms/values.py create mode 100644 analysis/webservice/algorithms/doms/workerthread.py diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py new file mode 100644 index 00000000..d07f929e --- /dev/null +++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py @@ -0,0 +1,635 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import StringIO +import os +import csv +import json +from datetime import datetime +import time +from decimal import Decimal + +import numpy as np +from pytz import timezone, UTC + +import config +import geo +from webservice.algorithms.NexusCalcHandler import NexusCalcHandler as BaseHandler +from webservice.webmodel import NexusResults + +EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) +ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' + +try: + from osgeo import gdal + from osgeo.gdalnumeric import * +except ImportError: + import gdal + from gdalnumeric import * + +from netCDF4 import Dataset +import netCDF4 +import tempfile + + +class BaseDomsQueryCalcHandler(BaseHandler): + def __init__(self): + BaseHandler.__init__(self) + + def getDataSourceByName(self, source): + for s in config.ENDPOINTS: + if s["name"] == source: + return s + return None + + def _does_datasource_exist(self, ds): + for endpoint in config.ENDPOINTS: + if endpoint["name"] == ds: + return True + return False + + +class DomsEncoder(json.JSONEncoder): + def __init__(self, **args): + json.JSONEncoder.__init__(self, **args) + + def default(self, obj): + # print 'MyEncoder.default() called' + # print type(obj) + if obj == np.nan: + return None # hard code string for now + elif isinstance(obj, datetime): + return long((obj - EPOCH).total_seconds()) + elif isinstance(obj, Decimal): + return str(obj) + else: + return json.JSONEncoder.default(self, obj) + + +class DomsQueryResults(NexusResults): + def __init__(self, results=None, args=None, bounds=None, count=None, details=None, computeOptions=None, + executionId=None, status_code=200): + NexusResults.__init__(self, results=results, meta=None, stats=None, computeOptions=computeOptions, + status_code=status_code) + self.__args = args + self.__bounds = bounds + self.__count = count + self.__details = details + self.__executionId = str(executionId) + + def toJson(self): + bounds = self.__bounds.toMap() if self.__bounds is not None else {} + return json.dumps( + {"executionId": self.__executionId, "data": self.results(), "params": self.__args, "bounds": bounds, + "count": self.__count, "details": self.__details}, indent=4, cls=DomsEncoder) + + def toCSV(self): + return DomsCSVFormatter.create(self.__executionId, self.results(), self.__args, self.__details) + + def toNetCDF(self): + return DomsNetCDFFormatter.create(self.__executionId, self.results(), self.__args, self.__details) + + +class DomsCSVFormatter: + @staticmethod + def create(executionId, results, params, details): + + csv_mem_file = StringIO.StringIO() + try: + DomsCSVFormatter.__addConstants(csv_mem_file) + DomsCSVFormatter.__addDynamicAttrs(csv_mem_file, executionId, results, params, details) + csv.writer(csv_mem_file).writerow([]) + + DomsCSVFormatter.__packValues(csv_mem_file, results, params) + + csv_out = csv_mem_file.getvalue() + finally: + csv_mem_file.close() + + return csv_out + + @staticmethod + def __packValues(csv_mem_file, results, params): + + writer = csv.writer(csv_mem_file) + + headers = [ + # Primary + "id", "source", "lon (degrees_east)", "lat (degrees_north)", "time", "platform", + "sea_surface_salinity (1e-3)", "sea_surface_temperature (degree_C)", "wind_speed (m s-1)", "wind_direction", + "wind_u (m s-1)", "wind_v (m s-1)", + # Match + "id", "source", "lon (degrees_east)", "lat (degrees_north)", "time", "platform", + "depth (m)", "sea_water_salinity (1e-3)", + "sea_water_temperature (degree_C)", "wind_speed (m s-1)", + "wind_direction", "wind_u (m s-1)", "wind_v (m s-1)" + ] + + writer.writerow(headers) + + # + # Only include the depth variable related to the match-up parameter. If the match-up parameter + # is not sss or sst then do not include any depth data, just fill values. + # + if params["parameter"] == "sss": + depth = "sea_water_salinity_depth" + elif params["parameter"] == "sst": + depth = "sea_water_temperature_depth" + else: + depth = "NO_DEPTH" + + for primaryValue in results: + for matchup in primaryValue["matches"]: + row = [ + # Primary + primaryValue["id"], primaryValue["source"], str(primaryValue["x"]), str(primaryValue["y"]), + primaryValue["time"].strftime(ISO_8601), primaryValue["platform"], + primaryValue.get("sea_water_salinity", ""), primaryValue.get("sea_water_temperature", ""), + primaryValue.get("wind_speed", ""), primaryValue.get("wind_direction", ""), + primaryValue.get("wind_u", ""), primaryValue.get("wind_v", ""), + + # Matchup + matchup["id"], matchup["source"], matchup["x"], matchup["y"], + matchup["time"].strftime(ISO_8601), matchup["platform"], + matchup.get(depth, ""), matchup.get("sea_water_salinity", ""), + matchup.get("sea_water_temperature", ""), + matchup.get("wind_speed", ""), matchup.get("wind_direction", ""), + matchup.get("wind_u", ""), matchup.get("wind_v", ""), + ] + writer.writerow(row) + + @staticmethod + def __addConstants(csvfile): + + global_attrs = [ + {"Global Attribute": "product_version", "Value": "1.0"}, + {"Global Attribute": "Conventions", "Value": "CF-1.6, ACDD-1.3"}, + {"Global Attribute": "title", "Value": "DOMS satellite-insitu machup output file"}, + {"Global Attribute": "history", + "Value": "Processing_Version = V1.0, Software_Name = DOMS, Software_Version = 1.03"}, + {"Global Attribute": "institution", "Value": "JPL, FSU, NCAR"}, + {"Global Attribute": "source", "Value": "doms.jpl.nasa.gov"}, + {"Global Attribute": "standard_name_vocabulary", + "Value": "CF Standard Name Table v27, BODC controlled vocabulary"}, + {"Global Attribute": "cdm_data_type", "Value": "Point/Profile, Swath/Grid"}, + {"Global Attribute": "processing_level", "Value": "4"}, + {"Global Attribute": "project", "Value": "Distributed Oceanographic Matchup System (DOMS)"}, + {"Global Attribute": "keywords_vocabulary", + "Value": "NASA Global Change Master Directory (GCMD) Science Keywords"}, + # TODO What should the keywords be? + {"Global Attribute": "keywords", "Value": "SATELLITES, OCEAN PLATFORMS, SHIPS, BUOYS, MOORINGS, AUVS, ROV, " + "NASA/JPL/PODAAC, FSU/COAPS, UCAR/NCAR, SALINITY, " + "SEA SURFACE TEMPERATURE, SURFACE WINDS"}, + {"Global Attribute": "creator_name", "Value": "NASA PO.DAAC"}, + {"Global Attribute": "creator_email", "Value": "podaac@podaac.jpl.nasa.gov"}, + {"Global Attribute": "creator_url", "Value": "https://podaac.jpl.nasa.gov/"}, + {"Global Attribute": "publisher_name", "Value": "NASA PO.DAAC"}, + {"Global Attribute": "publisher_email", "Value": "podaac@podaac.jpl.nasa.gov"}, + {"Global Attribute": "publisher_url", "Value": "https://podaac.jpl.nasa.gov"}, + {"Global Attribute": "acknowledgment", "Value": "DOMS is a NASA/AIST-funded project. NRA NNH14ZDA001N."}, + ] + + writer = csv.DictWriter(csvfile, sorted(next(iter(global_attrs)).keys())) + + writer.writerows(global_attrs) + + @staticmethod + def __addDynamicAttrs(csvfile, executionId, results, params, details): + + platforms = set() + for primaryValue in results: + platforms.add(primaryValue['platform']) + for match in primaryValue['matches']: + platforms.add(match['platform']) + + # insituDatasets = params["matchup"].split(",") + insituDatasets = params["matchup"] + insituLinks = set() + for insitu in insituDatasets: + insituLinks.add(config.METADATA_LINKS[insitu]) + + + global_attrs = [ + {"Global Attribute": "Platform", "Value": ', '.join(platforms)}, + {"Global Attribute": "time_coverage_start", + "Value": params["startTime"].strftime(ISO_8601)}, + {"Global Attribute": "time_coverage_end", + "Value": params["endTime"].strftime(ISO_8601)}, + {"Global Attribute": "time_coverage_resolution", "Value": "point"}, + + {"Global Attribute": "geospatial_lon_min", "Value": params["bbox"].split(',')[0]}, + {"Global Attribute": "geospatial_lat_min", "Value": params["bbox"].split(',')[1]}, + {"Global Attribute": "geospatial_lon_max", "Value": params["bbox"].split(',')[2]}, + {"Global Attribute": "geospatial_lat_max", "Value": params["bbox"].split(',')[3]}, + {"Global Attribute": "geospatial_lat_resolution", "Value": "point"}, + {"Global Attribute": "geospatial_lon_resolution", "Value": "point"}, + {"Global Attribute": "geospatial_lat_units", "Value": "degrees_north"}, + {"Global Attribute": "geospatial_lon_units", "Value": "degrees_east"}, + + {"Global Attribute": "geospatial_vertical_min", "Value": params["depthMin"]}, + {"Global Attribute": "geospatial_vertical_max", "Value": params["depthMax"]}, + {"Global Attribute": "geospatial_vertical_units", "Value": "m"}, + {"Global Attribute": "geospatial_vertical_resolution", "Value": "point"}, + {"Global Attribute": "geospatial_vertical_positive", "Value": "down"}, + + {"Global Attribute": "DOMS_matchID", "Value": executionId}, + {"Global Attribute": "DOMS_TimeWindow", "Value": params["timeTolerance"] / 60 / 60}, + {"Global Attribute": "DOMS_TimeWindow_Units", "Value": "hours"}, + + {"Global Attribute": "DOMS_platforms", "Value": params["platforms"]}, + {"Global Attribute": "DOMS_SearchRadius", "Value": params["radiusTolerance"]}, + {"Global Attribute": "DOMS_SearchRadius_Units", "Value": "m"}, + + {"Global Attribute": "DOMS_DatasetMetadata", "Value": ', '.join(insituLinks)}, + {"Global Attribute": "DOMS_primary", "Value": params["primary"]}, + {"Global Attribute": "DOMS_match_up", "Value": params["matchup"]}, + {"Global Attribute": "DOMS_ParameterPrimary", "Value": params.get("parameter", "")}, + + {"Global Attribute": "DOMS_time_to_complete", "Value": details["timeToComplete"]}, + {"Global Attribute": "DOMS_time_to_complete_units", "Value": "seconds"}, + {"Global Attribute": "DOMS_num_matchup_matched", "Value": details["numInSituMatched"]}, + {"Global Attribute": "DOMS_num_primary_matched", "Value": details["numGriddedMatched"]}, + + {"Global Attribute": "date_modified", "Value": datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)}, + {"Global Attribute": "date_created", "Value": datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)}, + + {"Global Attribute": "URI_Matchup", "Value": "http://{webservice}/domsresults?id=" + executionId + "&output=CSV"}, + ] + + writer = csv.DictWriter(csvfile, sorted(next(iter(global_attrs)).keys())) + + writer.writerows(global_attrs) + + +class DomsNetCDFFormatter: + @staticmethod + def create(executionId, results, params, details): + + t = tempfile.mkstemp(prefix="doms_", suffix=".nc") + tempFileName = t[1] + + dataset = Dataset(tempFileName, "w", format="NETCDF4") + dataset.DOMS_matchID = executionId + DomsNetCDFFormatter.__addNetCDFConstants(dataset) + + dataset.date_modified = datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601) + dataset.date_created = datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601) + dataset.time_coverage_start = params["startTime"].strftime(ISO_8601) + dataset.time_coverage_end = params["endTime"].strftime(ISO_8601) + dataset.time_coverage_resolution = "point" + dataset.DOMS_match_up = params["matchup"] + dataset.DOMS_num_matchup_matched = details["numInSituMatched"] + dataset.DOMS_num_primary_matched = details["numGriddedMatched"] + + bbox = geo.BoundingBox(asString=params["bbox"]) + dataset.geospatial_lat_max = bbox.north + dataset.geospatial_lat_min = bbox.south + dataset.geospatial_lon_max = bbox.east + dataset.geospatial_lon_min = bbox.west + dataset.geospatial_lat_resolution = "point" + dataset.geospatial_lon_resolution = "point" + dataset.geospatial_lat_units = "degrees_north" + dataset.geospatial_lon_units = "degrees_east" + dataset.geospatial_vertical_min = float(params["depthMin"]) + dataset.geospatial_vertical_max = float(params["depthMax"]) + dataset.geospatial_vertical_units = "m" + dataset.geospatial_vertical_resolution = "point" + dataset.geospatial_vertical_positive = "down" + + dataset.DOMS_TimeWindow = params["timeTolerance"] / 60 / 60 + dataset.DOMS_TimeWindow_Units = "hours" + dataset.DOMS_SearchRadius = float(params["radiusTolerance"]) + dataset.DOMS_SearchRadius_Units = "m" + # dataset.URI_Subset = "http://webservice subsetting query request" + dataset.URI_Matchup = "http://{webservice}/domsresults?id=" + executionId + "&output=NETCDF" + dataset.DOMS_ParameterPrimary = params["parameter"] if "parameter" in params else "" + dataset.DOMS_platforms = params["platforms"] + dataset.DOMS_primary = params["primary"] + dataset.DOMS_time_to_complete = details["timeToComplete"] + dataset.DOMS_time_to_complete_units = "seconds" + + insituDatasets = params["matchup"] + insituLinks = set() + for insitu in insituDatasets: + insituLinks.add(config.METADATA_LINKS[insitu]) + dataset.DOMS_DatasetMetadata = ', '.join(insituLinks) + + platforms = set() + for primaryValue in results: + platforms.add(primaryValue['platform']) + for match in primaryValue['matches']: + platforms.add(match['platform']) + dataset.platform = ', '.join(platforms) + + satellite_group_name = "SatelliteData" + insitu_group_name = "InsituData" + + #Create Satellite group, variables, and attributes + satelliteGroup = dataset.createGroup(satellite_group_name) + satelliteWriter = DomsNetCDFValueWriter(satelliteGroup, params["parameter"]) + + # Create InSitu group, variables, and attributes + insituGroup = dataset.createGroup(insitu_group_name) + insituWriter = DomsNetCDFValueWriter(insituGroup, params["parameter"]) + + # Add data to Insitu and Satellite groups, generate array of match ID pairs + matches = DomsNetCDFFormatter.__writeResults(results, satelliteWriter, insituWriter) + dataset.createDimension("MatchedRecords", size=None) + dataset.createDimension("MatchedGroups", size=2) + matchArray = dataset.createVariable("matchIDs", "f4", ("MatchedRecords", "MatchedGroups")) + matchArray[:] = matches + + dataset.close() + f = open(tempFileName, "rb") + data = f.read() + f.close() + os.unlink(tempFileName) + return data + + @staticmethod + def __addNetCDFConstants(dataset): + dataset.product_version = "1.0" + dataset.Conventions = "CF-1.6, ACDD-1.3" + dataset.title = "DOMS satellite-insitu machup output file" + dataset.history = "Processing_Version = V1.0, Software_Name = DOMS, Software_Version = 1.03" + dataset.institution = "JPL, FSU, NCAR" + dataset.source = "doms.jpl.nasa.gov" + dataset.standard_name_vocabulary = "CF Standard Name Table v27", "BODC controlled vocabulary" + dataset.cdm_data_type = "Point/Profile, Swath/Grid" + dataset.processing_level = "4" + dataset.project = "Distributed Oceanographic Matchup System (DOMS)" + dataset.keywords_vocabulary = "NASA Global Change Master Directory (GCMD) Science Keywords" + dataset.keywords = "SATELLITES, OCEAN PLATFORMS, SHIPS, BUOYS, MOORINGS, AUVS, ROV, NASA/JPL/PODAAC, " \ + "FSU/COAPS, UCAR/NCAR, SALINITY, SEA SURFACE TEMPERATURE, SURFACE WINDS" + dataset.creator_name = "NASA PO.DAAC" + dataset.creator_email = "podaac@podaac.jpl.nasa.gov" + dataset.creator_url = "https://podaac.jpl.nasa.gov/" + dataset.publisher_name = "NASA PO.DAAC" + dataset.publisher_email = "podaac@podaac.jpl.nasa.gov" + dataset.publisher_url = "https://podaac.jpl.nasa.gov" + dataset.acknowledgment = "DOMS is a NASA/AIST-funded project. NRA NNH14ZDA001N." + + @staticmethod + def __writeResults(results, satelliteWriter, insituWriter): + ids = {} + matches = [] + insituIndex = 0 + + # + # Loop through all of the results, add each satellite data point to the array + # + for r in range(0, len(results)): + result = results[r] + satelliteWriter.addData(result) + + # Add each match only if it is not already in the array of in situ points + for match in result["matches"]: + if match["id"] not in ids: + ids[match["id"]] = insituIndex + insituIndex += 1 + insituWriter.addData(match) + + # Append an index pait of (satellite, in situ) to the array of matches + matches.append((r, ids[match["id"]])) + + # Add data/write to the netCDF file + satelliteWriter.writeGroup() + insituWriter.writeGroup() + + return matches + + +class DomsNetCDFValueWriter: + def __init__(self, group, matchup_parameter): + group.createDimension("dim", size=None) + self.group = group + + self.lat = [] + self.lon = [] + self.time = [] + self.sea_water_salinity = [] + self.wind_speed = [] + self.wind_u = [] + self.wind_v = [] + self.wind_direction = [] + self.sea_water_temperature = [] + self.depth = [] + + self.satellite_group_name = "SatelliteData" + self.insitu_group_name = "InsituData" + + # + # Only include the depth variable related to the match-up parameter. If the match-up parameter is + # not sss or sst then do not include any depth data, just fill values. + # + if matchup_parameter == "sss": + self.matchup_depth = "sea_water_salinity_depth" + elif matchup_parameter == "sst": + self.matchup_depth = "sea_water_temperature_depth" + else: + self.matchup_depth = "NO_DEPTH" + + def addData(self, value): + self.lat.append(value.get("y", None)) + self.lon.append(value.get("x", None)) + self.time.append(time.mktime(value.get("time").timetuple())) + self.sea_water_salinity.append(value.get("sea_water_salinity", None)) + self.wind_speed.append(value.get("wind_speed", None)) + self.wind_u.append(value.get("wind_u", None)) + self.wind_v.append(value.get("wind_v", None)) + self.wind_direction.append(value.get("wind_direction", None)) + self.sea_water_temperature.append(value.get("sea_water_temperature", None)) + self.depth.append(value.get(self.matchup_depth, None)) + + def writeGroup(self): + # + # Create variables, enrich with attributes, and add data + # + lonVar = self.group.createVariable("lon", "f4", ("dim",), fill_value=-32767.0) + latVar = self.group.createVariable("lat", "f4", ("dim",), fill_value=-32767.0) + timeVar = self.group.createVariable("time", "f4", ("dim",), fill_value=-32767.0) + + self.__enrichLon(lonVar, min(self.lon), max(self.lon)) + self.__enrichLat(latVar, min(self.lat), max(self.lat)) + self.__enrichTime(timeVar) + + latVar[:] = self.lat + lonVar[:] = self.lon + timeVar[:] = self.time + + if self.sea_water_salinity.count(None) != len(self.sea_water_salinity): + if self.group.name == self.satellite_group_name: + sssVar = self.group.createVariable("SeaSurfaceSalinity", "f4", ("dim",), fill_value=-32767.0) + self.__enrichSSSMeasurements(sssVar, min(self.sea_water_salinity), max(self.sea_water_salinity)) + else: # group.name == self.insitu_group_name + sssVar = self.group.createVariable("SeaWaterSalinity", "f4", ("dim",), fill_value=-32767.0) + self.__enrichSWSMeasurements(sssVar, min(self.sea_water_salinity), max(self.sea_water_salinity)) + sssVar[:] = self.sea_water_salinity + + if self.wind_speed.count(None) != len(self.wind_speed): + windSpeedVar = self.group.createVariable("WindSpeed", "f4", ("dim",), fill_value=-32767.0) + self.__enrichWindSpeed(windSpeedVar, self.__calcMin(self.wind_speed), max(self.wind_speed)) + windSpeedVar[:] = self.wind_speed + + if self.wind_u.count(None) != len(self.wind_u): + windUVar = self.group.createVariable("WindU", "f4", ("dim",), fill_value=-32767.0) + windUVar[:] = self.wind_u + self.__enrichWindU(windUVar, self.__calcMin(self.wind_u), max(self.wind_u)) + + if self.wind_v.count(None) != len(self.wind_v): + windVVar = self.group.createVariable("WindV", "f4", ("dim",), fill_value=-32767.0) + windVVar[:] = self.wind_v + self.__enrichWindV(windVVar, self.__calcMin(self.wind_v), max(self.wind_v)) + + if self.wind_direction.count(None) != len(self.wind_direction): + windDirVar = self.group.createVariable("WindDirection", "f4", ("dim",), fill_value=-32767.0) + windDirVar[:] = self.wind_direction + self.__enrichWindDir(windDirVar) + + if self.sea_water_temperature.count(None) != len(self.sea_water_temperature): + if self.group.name == self.satellite_group_name: + tempVar = self.group.createVariable("SeaSurfaceTemp", "f4", ("dim",), fill_value=-32767.0) + self.__enrichSurfaceTemp(tempVar, self.__calcMin(self.sea_water_temperature), max(self.sea_water_temperature)) + else: + tempVar = self.group.createVariable("SeaWaterTemp", "f4", ("dim",), fill_value=-32767.0) + self.__enrichWaterTemp(tempVar, self.__calcMin(self.sea_water_temperature), max(self.sea_water_temperature)) + tempVar[:] = self.sea_water_temperature + + if self.group.name == self.insitu_group_name: + depthVar = self.group.createVariable("Depth", "f4", ("dim",), fill_value=-32767.0) + + if self.depth.count(None) != len(self.depth): + self.__enrichDepth(depthVar, self.__calcMin(self.depth), max(self.depth)) + depthVar[:] = self.depth + else: + # If depth has no data, set all values to 0 + tempDepth = [0 for x in range(len(self.depth))] + depthVar[:] = tempDepth + + # + # Lists may include 'None" values, to calc min these must be filtered out + # + @staticmethod + def __calcMin(var): + return min(x for x in var if x is not None) + + + # + # Add attributes to each variable + # + @staticmethod + def __enrichLon(var, var_min, var_max): + var.long_name = "Longitude" + var.standard_name = "longitude" + var.axis = "X" + var.units = "degrees_east" + var.valid_min = var_min + var.valid_max = var_max + + @staticmethod + def __enrichLat(var, var_min, var_max): + var.long_name = "Latitude" + var.standard_name = "latitude" + var.axis = "Y" + var.units = "degrees_north" + var.valid_min = var_min + var.valid_max = var_max + + @staticmethod + def __enrichTime(var): + var.long_name = "Time" + var.standard_name = "time" + var.axis = "T" + var.units = "seconds since 1970-01-01 00:00:00 0:00" + + @staticmethod + def __enrichSSSMeasurements(var, var_min, var_max): + var.long_name = "Sea surface salinity" + var.standard_name = "sea_surface_salinity" + var.units = "1e-3" + var.valid_min = var_min + var.valid_max = var_max + var.coordinates = "lon lat time" + + @staticmethod + def __enrichSWSMeasurements(var, var_min, var_max): + var.long_name = "Sea water salinity" + var.standard_name = "sea_water_salinity" + var.units = "1e-3" + var.valid_min = var_min + var.valid_max = var_max + var.coordinates = "lon lat depth time" + + @staticmethod + def __enrichDepth(var, var_min, var_max): + var.valid_min = var_min + var.valid_max = var_max + var.units = "m" + var.long_name = "Depth" + var.standard_name = "depth" + var.axis = "Z" + var.positive = "Down" + + @staticmethod + def __enrichWindSpeed(var, var_min, var_max): + var.long_name = "Wind speed" + var.standard_name = "wind_speed" + var.units = "m s-1" + var.valid_min = var_min + var.valid_max = var_max + var.coordinates = "lon lat depth time" + + @staticmethod + def __enrichWindU(var, var_min, var_max): + var.long_name = "Eastward wind" + var.standard_name = "eastward_wind" + var.units = "m s-1" + var.valid_min = var_min + var.valid_max = var_max + var.coordinates = "lon lat depth time" + + @staticmethod + def __enrichWindV(var, var_min, var_max): + var.long_name = "Northward wind" + var.standard_name = "northward_wind" + var.units = "m s-1" + var.valid_min = var_min + var.valid_max = var_max + var.coordinates = "lon lat depth time" + + @staticmethod + def __enrichWaterTemp(var, var_min, var_max): + var.long_name = "Sea water temperature" + var.standard_name = "sea_water_temperature" + var.units = "degree_C" + var.valid_min = var_min + var.valid_max = var_max + var.coordinates = "lon lat depth time" + + @staticmethod + def __enrichSurfaceTemp(var, var_min, var_max): + var.long_name = "Sea surface temperature" + var.standard_name = "sea_surface_temperature" + var.units = "degree_C" + var.valid_min = var_min + var.valid_max = var_max + var.coordinates = "lon lat time" + + @staticmethod + def __enrichWindDir(var): + var.long_name = "Wind from direction" + var.standard_name = "wind_from_direction" + var.units = "degree" + var.coordinates = "lon lat depth time" diff --git a/analysis/webservice/algorithms/doms/DatasetListQuery.py b/analysis/webservice/algorithms/doms/DatasetListQuery.py new file mode 100644 index 00000000..ac7f2634 --- /dev/null +++ b/analysis/webservice/algorithms/doms/DatasetListQuery.py @@ -0,0 +1,116 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import traceback + +import requests + +import BaseDomsHandler +import config +import values +from webservice.algorithms.NexusCalcHandler import NexusCalcHandler as BaseHandler +from webservice.NexusHandler import nexus_handler +from webservice.webmodel import cached + + +@nexus_handler +class DomsDatasetListQueryHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): + name = "DOMS Dataset Listing" + path = "/domslist" + description = "" + params = {} + singleton = True + + def __init__(self): + BaseHandler.__init__(self) + + def getFacetsForInsituSource(self, source): + url = source["url"] + + params = { + "facet": "true", + "stats": "true", + "startIndex": 0, + "itemsPerPage": 0 + } + try: + r = requests.get(url, params=params) + results = json.loads(r.text) + + depths = None + if "stats_fields" in results and "depth" in results["stats_fields"]: + depths = results["stats_fields"]["depth"] + + for facet in results["facets"]: + field = facet["field"] + for value in facet["values"]: + value["value"] = values.getDescByListNameAndId(field, int(value["value"])) + + return depths, results["facets"] + except: # KMG: Don't eat the exception. Add better handling... + traceback.print_exc() + return None, None + + def getMetadataUrlForDataset(self, dataset): + datasetSpec = config.getEndpointByName(dataset) + if datasetSpec is not None: + return datasetSpec["metadataUrl"] + else: + + # KMG: NOT a good hack + if dataset == "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1" or dataset == "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1_CLIM": + dataset = "MUR-JPL-L4-GLOB-v4.1" + elif dataset == "SMAP_L2B_SSS": + dataset = "JPL_SMAP-SSS_L2_EVAL-V2" + elif dataset == "AVHRR_OI_L4_GHRSST_NCEI" or dataset == "AVHRR_OI_L4_GHRSST_NCEI_CLIM": + dataset = "AVHRR_OI-NCEI-L4-GLOB-v2.0" + + return "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=%s&format=umm-json" % dataset + + def getMetadataForSource(self, dataset): + try: + r = requests.get(self.getMetadataUrlForDataset(dataset)) + results = json.loads(r.text) + return results + except: + return None + + @cached(ttl=(60 * 60 * 1000)) # 1 hour cached + def calc(self, computeOptions, **args): + + satellitesList = self._get_tile_service().get_dataseries_list(simple=True) + + insituList = [] + + for satellite in satellitesList: + satellite["metadata"] = self.getMetadataForSource(satellite["shortName"]) + + for insitu in config.ENDPOINTS: + depths, facets = self.getFacetsForInsituSource(insitu) + insituList.append({ + "name": insitu["name"], + "endpoint": insitu["url"], + "metadata": self.getMetadataForSource(insitu["name"]), + "depths": depths, + "facets": facets + }) + + values = { + "satellite": satellitesList, + "insitu": insituList + } + + return BaseDomsHandler.DomsQueryResults(results=values) diff --git a/analysis/webservice/algorithms/doms/MatchupQuery.py b/analysis/webservice/algorithms/doms/MatchupQuery.py new file mode 100644 index 00000000..57a08340 --- /dev/null +++ b/analysis/webservice/algorithms/doms/MatchupQuery.py @@ -0,0 +1,452 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import uuid +from datetime import datetime + +import numpy as np +import utm +from nexustiles.model.nexusmodel import get_approximate_value_for_lat_lon +from scipy import spatial + +import BaseDomsHandler +import ResultsStorage +import datafetch +import fetchedgeimpl +import geo +import workerthread +from webservice.NexusHandler import nexus_handler + + +@nexus_handler +class CombinedDomsMatchupQueryHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): + name = "Experimental Combined DOMS In-Situ Matchup" + path = "/domsmatchup" + description = "" + params = {} + singleton = True + + def __init__(self): + BaseDomsHandler.BaseDomsQueryCalcHandler.__init__(self) + + def fetchData(self, endpoints, startTime, endTime, bbox, depth_min, depth_max, platforms): + + boundsConstrainer = geo.BoundsConstrainer(asString=bbox) + threads = [] + for endpoint in endpoints: + thread = workerthread.WorkerThread(datafetch.fetchData, + params=(endpoint, startTime, endTime, bbox, depth_min, depth_max)) + threads.append(thread) + workerthread.wait(threads, startFirst=True, poll=0.01) + + data2 = [] + for thread in threads: + data, bounds = thread.results + data2 += data + boundsConstrainer.testOtherConstrainer(bounds) + + return data2, boundsConstrainer + + def __parseDatetime(self, dtString): + dt = datetime.strptime(dtString, "%Y-%m-%dT%H:%M:%SZ") + epoch = datetime.utcfromtimestamp(0) + time = (dt - epoch).total_seconds() * 1000.0 + return time + + def calc(self, computeOptions, **args): + primary = computeOptions.get_argument("primary", None) + matchup = computeOptions.get_argument("matchup", None) + startTime = computeOptions.get_argument("s", None) + endTime = computeOptions.get_argument("e", None) + bbox = computeOptions.get_argument("b", None) + timeTolerance = computeOptions.get_float_arg("tt") + depth_min = computeOptions.get_float_arg("depthMin", default=None) + depth_max = computeOptions.get_float_arg("depthMax", default=None) + radiusTolerance = computeOptions.get_float_arg("rt") + platforms = computeOptions.get_argument("platforms", None) + + if primary is None or len(primary) == 0: + raise Exception("No primary dataset specified") + + if matchup is None or len(matchup) == 0: + raise Exception("No matchup datasets specified") + + start = self._now() + + primarySpec = self.getDataSourceByName(primary) + if primarySpec is None: + raise Exception("Specified primary dataset not found using identifier '%s'" % primary) + + primaryData, bounds = self.fetchData([primarySpec], startTime, endTime, bbox, depth_min, depth_max, platforms) + + primaryContext = MatchupContext(primaryData) + + matchupIds = matchup.split(",") + + for matchupId in matchupIds: + matchupSpec = self.getDataSourceByName(matchupId) + + if matchupSpec is not None: # Then it's in the in-situ configuration + proc = InsituDatasetProcessor(primaryContext, matchupSpec, startTime, endTime, bbox, depth_min, + depth_max, + platforms, timeTolerance, radiusTolerance) + proc.start() + else: # We assume it to be a Nexus tiled dataset + + ''' + Single Threaded at the moment... + ''' + daysinrange = self._get_tile_service().find_days_in_range_asc(bounds.south, bounds.north, bounds.west, + bounds.east, matchupId, + self.__parseDatetime(startTime) / 1000, + self.__parseDatetime(endTime) / 1000) + + tilesByDay = {} + for dayTimestamp in daysinrange: + ds1_nexus_tiles = self._get_tile_service().get_tiles_bounded_by_box_at_time(bounds.south, bounds.north, + bounds.west, bounds.east, + matchupId, dayTimestamp) + + # print "***", type(ds1_nexus_tiles) + # print ds1_nexus_tiles[0].__dict__ + tilesByDay[dayTimestamp] = ds1_nexus_tiles + + primaryContext.processGridded(tilesByDay, matchupId, radiusTolerance, timeTolerance) + + matches, numMatches = primaryContext.getFinal(len(matchupIds)) + + end = self._now() + + args = { + "primary": primary, + "matchup": matchupIds, + "startTime": startTime, + "endTime": endTime, + "bbox": bbox, + "timeTolerance": timeTolerance, + "depthMin": depth_min, + "depthMax": depth_max, + "radiusTolerance": radiusTolerance, + "platforms": platforms + } + + details = { + "timeToComplete": (end - start), + "numInSituRecords": primaryContext.insituCount, + "numInSituMatched": primaryContext.insituMatches, + "numGriddedChecked": primaryContext.griddedCount, + "numGriddedMatched": primaryContext.griddedMatched + } + + with ResultsStorage.ResultsStorage() as resultsStorage: + execution_id = resultsStorage.insertResults(results=matches, params=args, stats=details, startTime=start, + completeTime=end, userEmail="") + + return BaseDomsHandler.DomsQueryResults(results=matches, args=args, details=details, bounds=None, count=None, + computeOptions=None, executionId=execution_id) + + +class MatchupContextMap: + def __init__(self): + pass + + def add(self, context): + pass + + def delete(self, context): + pass + + +class MatchupContext: + def __init__(self, primaryData): + self.id = str(uuid.uuid4()) + + self.griddedCount = 0 + self.griddedMatched = 0 + + self.insituCount = len(primaryData) + self.insituMatches = 0 + + self.primary = primaryData + for r in self.primary: + r["matches"] = [] + + self.data = [] + for s in primaryData: + u = utm.from_latlon(s["y"], s["x"]) + v = (u[0], u[1], 0.0) + self.data.append(v) + + if len(self.data) > 0: + self.tree = spatial.KDTree(self.data) + else: + self.tree = None + + def getFinal(self, minMatchesToInclude): + + matched = [] + ttlMatches = 0 + for m in self.primary: + if len(m["matches"]) >= minMatchesToInclude: + matched.append(m) + ttlMatches += len(m["matches"]) + + return matched, ttlMatches + + def processGridded(self, tilesByDay, source, xyTolerance, timeTolerance): + for r in self.primary: + foundSatNodes = self.__getSatNodeForLatLonAndTime(tilesByDay, source, r["y"], r["x"], r["time"], + xyTolerance) + self.griddedCount += 1 + self.griddedMatched += len(foundSatNodes) + r["matches"].extend(foundSatNodes) + + def processInSitu(self, records, xyTolerance, timeTolerance): + if self.tree is not None: + for s in records: + self.insituCount += 1 + u = utm.from_latlon(s["y"], s["x"]) + coords = np.array([u[0], u[1], 0]) + ball = self.tree.query_ball_point(coords, xyTolerance) + + self.insituMatches += len(ball) + + for i in ball: + match = self.primary[i] + if abs(match["time"] - s["time"]) <= (timeTolerance * 1000.0): + match["matches"].append(s) + + def __getValueForLatLon(self, chunks, lat, lon, arrayName="data"): + value = get_approximate_value_for_lat_lon(chunks, lat, lon, arrayName) + return value + + def __checkNumber(self, value): + if isinstance(value, float) and (math.isnan(value) or value == np.nan): + value = None + elif value is not None: + value = float(value) + return value + + def __buildSwathIndexes(self, chunk): + latlons = [] + utms = [] + indexes = [] + for i in range(0, len(chunk.latitudes)): + _lat = chunk.latitudes[i] + if isinstance(_lat, np.ma.core.MaskedConstant): + continue + for j in range(0, len(chunk.longitudes)): + _lon = chunk.longitudes[j] + if isinstance(_lon, np.ma.core.MaskedConstant): + continue + + value = self.__getChunkValueAtIndex(chunk, (i, j)) + if isinstance(value, float) and (math.isnan(value) or value == np.nan): + continue + + u = utm.from_latlon(_lat, _lon) + v = (u[0], u[1], 0.0) + latlons.append((_lat, _lon)) + utms.append(v) + indexes.append((i, j)) + + tree = None + if len(latlons) > 0: + tree = spatial.KDTree(utms) + + chunk.swathIndexing = { + "tree": tree, + "latlons": latlons, + "indexes": indexes + } + + def __getChunkIndexesForLatLon(self, chunk, lat, lon, xyTolerance): + foundIndexes = [] + foundLatLons = [] + + if "swathIndexing" not in chunk.__dict__: + self.__buildSwathIndexes(chunk) + + tree = chunk.swathIndexing["tree"] + if tree is not None: + indexes = chunk.swathIndexing["indexes"] + latlons = chunk.swathIndexing["latlons"] + u = utm.from_latlon(lat, lon) + coords = np.array([u[0], u[1], 0]) + ball = tree.query_ball_point(coords, xyTolerance) + for i in ball: + foundIndexes.append(indexes[i]) + foundLatLons.append(latlons[i]) + return foundIndexes, foundLatLons + + def __getChunkValueAtIndex(self, chunk, index, arrayName=None): + + if arrayName is None or arrayName == "data": + data_val = chunk.data[0][index[0]][index[1]] + else: + data_val = chunk.meta_data[arrayName][0][index[0]][index[1]] + return data_val.item() if (data_val is not np.ma.masked) and data_val.size == 1 else float('Nan') + + def __getSatNodeForLatLonAndTime(self, chunksByDay, source, lat, lon, searchTime, xyTolerance): + timeDiff = 86400 * 365 * 1000 + foundNodes = [] + + for ts in chunksByDay: + chunks = chunksByDay[ts] + if abs((ts * 1000) - searchTime) < timeDiff: + for chunk in chunks: + indexes, latlons = self.__getChunkIndexesForLatLon(chunk, lat, lon, xyTolerance) + + # for index in indexes: + for i in range(0, len(indexes)): + index = indexes[i] + latlon = latlons[i] + sst = None + sss = None + windSpeed = None + windDirection = None + windU = None + windV = None + + value = self.__getChunkValueAtIndex(chunk, index) + + if isinstance(value, float) and (math.isnan(value) or value == np.nan): + continue + + if "GHRSST" in source: + sst = value + elif "ASCATB" in source: + windU = value + elif "SSS" in source: # SMAP + sss = value + + if len(chunks) > 0 and "wind_dir" in chunks[0].meta_data: + windDirection = self.__checkNumber(self.__getChunkValueAtIndex(chunk, index, "wind_dir")) + if len(chunks) > 0 and "wind_v" in chunks[0].meta_data: + windV = self.__checkNumber(self.__getChunkValueAtIndex(chunk, index, "wind_v")) + if len(chunks) > 0 and "wind_speed" in chunks[0].meta_data: + windSpeed = self.__checkNumber(self.__getChunkValueAtIndex(chunk, index, "wind_speed")) + + foundNode = { + "sea_water_temperature": sst, + "sea_water_salinity": sss, + "wind_speed": windSpeed, + "wind_direction": windDirection, + "wind_u": windU, + "wind_v": windV, + "time": ts, + "x": self.__checkNumber(latlon[1]), + "y": self.__checkNumber(latlon[0]), + "depth": 0, + "sea_water_temperature_depth": 0, + "source": source, + "id": "%s:%s:%s" % (ts, lat, lon) + } + + foundNodes.append(foundNode) + timeDiff = abs(ts - searchTime) + + return foundNodes + + def __getSatNodeForLatLonAndTime__(self, chunksByDay, source, lat, lon, searchTime): + + timeDiff = 86400 * 365 * 1000 + foundNodes = [] + + for ts in chunksByDay: + chunks = chunksByDay[ts] + # print chunks + # ts = calendar.timegm(chunks.start.utctimetuple()) * 1000 + if abs((ts * 1000) - searchTime) < timeDiff: + value = self.__getValueForLatLon(chunks, lat, lon, arrayName="data") + value = self.__checkNumber(value) + + # _Really_ don't like doing it this way... + + sst = None + sss = None + windSpeed = None + windDirection = None + windU = None + windV = None + + if "GHRSST" in source: + sst = value + + if "ASCATB" in source: + windU = value + + if len(chunks) > 0 and "wind_dir" in chunks[0].meta_data: + windDirection = self.__checkNumber(self.__getValueForLatLon(chunks, lat, lon, arrayName="wind_dir")) + if len(chunks) > 0 and "wind_v" in chunks[0].meta_data: + windV = self.__checkNumber(self.__getValueForLatLon(chunks, lat, lon, arrayName="wind_v")) + if len(chunks) > 0 and "wind_speed" in chunks[0].meta_data: + windSpeed = self.__checkNumber(self.__getValueForLatLon(chunks, lat, lon, arrayName="wind_speed")) + + foundNode = { + "sea_water_temperature": sst, + "sea_water_salinity": sss, + "wind_speed": windSpeed, + "wind_direction": windDirection, + "wind_uv": { + "u": windU, + "v": windV + }, + "time": ts, + "x": lon, + "y": lat, + "depth": 0, + "sea_water_temperature_depth": 0, + "source": source, + "id": "%s:%s:%s" % (ts, lat, lon) + } + + isValidNode = True + if "ASCATB" in source and windSpeed is None: + isValidNode = None + + if isValidNode: + foundNodes.append(foundNode) + timeDiff = abs(ts - searchTime) + + return foundNodes + + +class InsituDatasetProcessor: + def __init__(self, primary, datasource, startTime, endTime, bbox, depth_min, depth_max, platforms, timeTolerance, + radiusTolerance): + self.primary = primary + self.datasource = datasource + self.startTime = startTime + self.endTime = endTime + self.bbox = bbox + self.depth_min = depth_min + self.depth_max = depth_max + self.platforms = platforms + self.timeTolerance = timeTolerance + self.radiusTolerance = radiusTolerance + + def start(self): + def callback(pageData): + self.primary.processInSitu(pageData, self.radiusTolerance, self.timeTolerance) + + fetchedgeimpl.fetch(self.datasource, self.startTime, self.endTime, self.bbox, self.depth_min, self.depth_max, + self.platforms, pageCallback=callback) + + +class InsituPageProcessor: + def __init__(self): + pass diff --git a/analysis/webservice/algorithms/doms/MetadataQuery.py b/analysis/webservice/algorithms/doms/MetadataQuery.py new file mode 100644 index 00000000..aa24d910 --- /dev/null +++ b/analysis/webservice/algorithms/doms/MetadataQuery.py @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + +import requests + +import BaseDomsHandler +import config +from webservice.algorithms.NexusCalcHandler import NexusCalcHandler as BaseHandler +from webservice.NexusHandler import nexus_handler +from webservice.webmodel import DatasetNotFoundException + + +@nexus_handler +class DomsMetadataQueryHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): + name = "DOMS Metadata Listing" + path = "/domsmetadata" + description = "" + params = {} + singleton = True + + def __init__(self): + BaseHandler.__init__(self) + + def calc(self, computeOptions, **args): + + dataset = computeOptions.get_argument("dataset", None) + if dataset is None or len(dataset) == 0: + raise Exception("'dataset' parameter not specified") + + metadataUrl = self.__getUrlForDataset(dataset) + + try: + r = requests.get(metadataUrl) + results = json.loads(r.text) + return BaseDomsHandler.DomsQueryResults(results=results) + except: + raise DatasetNotFoundException("Dataset '%s' not found") + + def __getUrlForDataset(self, dataset): + datasetSpec = config.getEndpointByName(dataset) + if datasetSpec is not None: + return datasetSpec["metadataUrl"] + else: + + # KMG: NOT a good hack + if dataset == "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1" or dataset == "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1_CLIM": + dataset = "MUR-JPL-L4-GLOB-v4.1" + elif dataset == "SMAP_L2B_SSS": + dataset = "JPL_SMAP-SSS_L2_EVAL-V2" + + return "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=%s&format=umm-json" % dataset diff --git a/analysis/webservice/algorithms/doms/ResultsPlotQuery.py b/analysis/webservice/algorithms/doms/ResultsPlotQuery.py new file mode 100644 index 00000000..1b48d14f --- /dev/null +++ b/analysis/webservice/algorithms/doms/ResultsPlotQuery.py @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import BaseDomsHandler +import histogramplot +import mapplot +import scatterplot +from webservice.NexusHandler import nexus_handler + + +class PlotTypes: + SCATTER = "scatter" + MAP = "map" + HISTOGRAM = "histogram" + + +@nexus_handler +class DomsResultsPlotHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): + name = "DOMS Results Plotting" + path = "/domsplot" + description = "" + params = {} + singleton = True + + def __init__(self): + BaseDomsHandler.BaseDomsQueryCalcHandler.__init__(self) + + def calc(self, computeOptions, **args): + id = computeOptions.get_argument("id", None) + parameter = computeOptions.get_argument('parameter', 'sst') + + plotType = computeOptions.get_argument("type", PlotTypes.SCATTER) + + normAndCurve = computeOptions.get_boolean_arg("normandcurve", False) + + if plotType == PlotTypes.SCATTER: + return scatterplot.createScatterPlot(id, parameter) + elif plotType == PlotTypes.MAP: + return mapplot.createMapPlot(id, parameter) + elif plotType == PlotTypes.HISTOGRAM: + return histogramplot.createHistogramPlot(id, parameter, normAndCurve) + else: + raise Exception("Unsupported plot type '%s' specified." % plotType) diff --git a/analysis/webservice/algorithms/doms/ResultsRetrieval.py b/analysis/webservice/algorithms/doms/ResultsRetrieval.py new file mode 100644 index 00000000..93358e91 --- /dev/null +++ b/analysis/webservice/algorithms/doms/ResultsRetrieval.py @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import uuid + +import BaseDomsHandler +import ResultsStorage +from webservice.NexusHandler import nexus_handler +from webservice.webmodel import NexusProcessingException + + +@nexus_handler +class DomsResultsRetrievalHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): + name = "DOMS Resultset Retrieval" + path = "/domsresults" + description = "" + params = {} + singleton = True + + def __init__(self): + BaseDomsHandler.BaseDomsQueryCalcHandler.__init__(self) + + def calc(self, computeOptions, **args): + execution_id = computeOptions.get_argument("id", None) + + try: + execution_id = uuid.UUID(execution_id) + except: + raise NexusProcessingException(reason="'id' argument must be a valid uuid", code=400) + + simple_results = computeOptions.get_boolean_arg("simpleResults", default=False) + + with ResultsStorage.ResultsRetrieval() as storage: + params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results) + + return BaseDomsHandler.DomsQueryResults(results=data, args=params, details=stats, bounds=None, count=None, + computeOptions=None, executionId=execution_id) diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py new file mode 100644 index 00000000..03bbd099 --- /dev/null +++ b/analysis/webservice/algorithms/doms/ResultsStorage.py @@ -0,0 +1,286 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + +import ConfigParser +import logging +import uuid +from datetime import datetime + +import pkg_resources +from cassandra.cluster import Cluster +from cassandra.policies import TokenAwarePolicy, DCAwareRoundRobinPolicy +from cassandra.query import BatchStatement +from pytz import UTC + + +class AbstractResultsContainer: + def __init__(self): + self._log = logging.getLogger(__name__) + self._log.info("Creating DOMS Results Storage Instance") + + self._session = None + + def __enter__(self): + domsconfig = ConfigParser.RawConfigParser() + domsconfig.readfp(pkg_resources.resource_stream(__name__, "domsconfig.ini"), filename='domsconfig.ini') + + cassHost = domsconfig.get("cassandra", "host") + cassKeyspace = domsconfig.get("cassandra", "keyspace") + cassDatacenter = domsconfig.get("cassandra", "local_datacenter") + cassVersion = int(domsconfig.get("cassandra", "protocol_version")) + + dc_policy = DCAwareRoundRobinPolicy(cassDatacenter) + token_policy = TokenAwarePolicy(dc_policy) + + self._cluster = Cluster([host for host in cassHost.split(',')], load_balancing_policy=token_policy, + protocol_version=cassVersion) + + self._session = self._cluster.connect(cassKeyspace) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self._cluster.shutdown() + + def _parseDatetime(self, dtString): + dt = datetime.strptime(dtString, "%Y-%m-%dT%H:%M:%SZ") + epoch = datetime.utcfromtimestamp(0) + time = (dt - epoch).total_seconds() * 1000.0 + return int(time) + + +class ResultsStorage(AbstractResultsContainer): + def __init__(self): + AbstractResultsContainer.__init__(self) + + def insertResults(self, results, params, stats, startTime, completeTime, userEmail, execution_id=None): + if isinstance(execution_id, basestring): + execution_id = uuid.UUID(execution_id) + + execution_id = self.insertExecution(execution_id, startTime, completeTime, userEmail) + self.__insertParams(execution_id, params) + self.__insertStats(execution_id, stats) + self.__insertResults(execution_id, results) + return execution_id + + def insertExecution(self, execution_id, startTime, completeTime, userEmail): + if execution_id is None: + execution_id = uuid.uuid4() + + cql = "INSERT INTO doms_executions (id, time_started, time_completed, user_email) VALUES (%s, %s, %s, %s)" + self._session.execute(cql, (execution_id, startTime, completeTime, userEmail)) + return execution_id + + def __insertParams(self, execution_id, params): + cql = """INSERT INTO doms_params + (execution_id, primary_dataset, matchup_datasets, depth_min, depth_max, time_tolerance, radius_tolerance, start_time, end_time, platforms, bounding_box, parameter) + VALUES + (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + """ + self._session.execute(cql, (execution_id, + params["primary"], + ",".join(params["matchup"]) if type(params["matchup"]) == list else params[ + "matchup"], + params["depthMin"] if "depthMin" in params.keys() else None, + params["depthMax"] if "depthMax" in params.keys() else None, + int(params["timeTolerance"]), + params["radiusTolerance"], + params["startTime"], + params["endTime"], + params["platforms"], + params["bbox"], + params["parameter"] + )) + + def __insertStats(self, execution_id, stats): + cql = """ + INSERT INTO doms_execution_stats + (execution_id, num_gridded_matched, num_gridded_checked, num_insitu_matched, num_insitu_checked, time_to_complete) + VALUES + (%s, %s, %s, %s, %s, %s) + """ + self._session.execute(cql, ( + execution_id, + stats["numGriddedMatched"], + stats["numGriddedChecked"], + stats["numInSituMatched"], + stats["numInSituRecords"], + stats["timeToComplete"] + )) + + def __insertResults(self, execution_id, results): + + cql = """ + INSERT INTO doms_data + (id, execution_id, value_id, primary_value_id, x, y, source_dataset, measurement_time, platform, device, measurement_values, is_primary) + VALUES + (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """ + insertStatement = self._session.prepare(cql) + batch = BatchStatement() + + for result in results: + self.__insertResult(execution_id, None, result, batch, insertStatement) + + self._session.execute(batch) + + def __insertResult(self, execution_id, primaryId, result, batch, insertStatement): + + dataMap = self.__buildDataMap(result) + result_id = uuid.uuid4() + batch.add(insertStatement, ( + result_id, + execution_id, + result["id"], + primaryId, + result["x"], + result["y"], + result["source"], + result["time"], + result["platform"] if "platform" in result else None, + result["device"] if "device" in result else None, + dataMap, + 1 if primaryId is None else 0 + ) + ) + + n = 0 + if "matches" in result: + for match in result["matches"]: + self.__insertResult(execution_id, result["id"], match, batch, insertStatement) + n += 1 + if n >= 20: + if primaryId is None: + self.__commitBatch(batch) + n = 0 + + if primaryId is None: + self.__commitBatch(batch) + + def __commitBatch(self, batch): + self._session.execute(batch) + batch.clear() + + def __buildDataMap(self, result): + dataMap = {} + for name in result: + value = result[name] + if name not in ["id", "x", "y", "source", "time", "platform", "device", "point", "matches"] and type( + value) in [float, int]: + dataMap[name] = value + return dataMap + + +class ResultsRetrieval(AbstractResultsContainer): + def __init__(self): + AbstractResultsContainer.__init__(self) + + def retrieveResults(self, execution_id, trim_data=False): + if isinstance(execution_id, basestring): + execution_id = uuid.UUID(execution_id) + + params = self.__retrieveParams(execution_id) + stats = self.__retrieveStats(execution_id) + data = self.__retrieveData(execution_id, trim_data=trim_data) + return params, stats, data + + def __retrieveData(self, id, trim_data=False): + dataMap = self.__retrievePrimaryData(id, trim_data=trim_data) + self.__enrichPrimaryDataWithMatches(id, dataMap, trim_data=trim_data) + data = [dataMap[name] for name in dataMap] + return data + + def __enrichPrimaryDataWithMatches(self, id, dataMap, trim_data=False): + cql = "SELECT * FROM doms_data where execution_id = %s and is_primary = false" + rows = self._session.execute(cql, (id,)) + + for row in rows: + entry = self.__rowToDataEntry(row, trim_data=trim_data) + if row.primary_value_id in dataMap: + if not "matches" in dataMap[row.primary_value_id]: + dataMap[row.primary_value_id]["matches"] = [] + dataMap[row.primary_value_id]["matches"].append(entry) + else: + print row + + def __retrievePrimaryData(self, id, trim_data=False): + cql = "SELECT * FROM doms_data where execution_id = %s and is_primary = true" + rows = self._session.execute(cql, (id,)) + + dataMap = {} + for row in rows: + entry = self.__rowToDataEntry(row, trim_data=trim_data) + dataMap[row.value_id] = entry + return dataMap + + def __rowToDataEntry(self, row, trim_data=False): + if trim_data: + entry = { + "x": float(row.x), + "y": float(row.y), + "source": row.source_dataset, + "time": row.measurement_time.replace(tzinfo=UTC) + } + else: + entry = { + "id": row.value_id, + "x": float(row.x), + "y": float(row.y), + "source": row.source_dataset, + "device": row.device, + "platform": row.platform, + "time": row.measurement_time.replace(tzinfo=UTC) + } + for key in row.measurement_values: + value = float(row.measurement_values[key]) + entry[key] = value + return entry + + def __retrieveStats(self, id): + cql = "SELECT * FROM doms_execution_stats where execution_id = %s limit 1" + rows = self._session.execute(cql, (id,)) + for row in rows: + stats = { + "numGriddedMatched": row.num_gridded_matched, + "numGriddedChecked": row.num_gridded_checked, + "numInSituMatched": row.num_insitu_matched, + "numInSituChecked": row.num_insitu_checked, + "timeToComplete": row.time_to_complete + } + return stats + + raise Exception("Execution not found with id '%s'" % id) + + def __retrieveParams(self, id): + cql = "SELECT * FROM doms_params where execution_id = %s limit 1" + rows = self._session.execute(cql, (id,)) + for row in rows: + params = { + "primary": row.primary_dataset, + "matchup": row.matchup_datasets.split(","), + "depthMin": row.depth_min, + "depthMax": row.depth_max, + "timeTolerance": row.time_tolerance, + "radiusTolerance": row.radius_tolerance, + "startTime": row.start_time.replace(tzinfo=UTC), + "endTime": row.end_time.replace(tzinfo=UTC), + "platforms": row.platforms, + "bbox": row.bounding_box, + "parameter": row.parameter + } + return params + + raise Exception("Execution not found with id '%s'" % id) diff --git a/analysis/webservice/algorithms/doms/StatsQuery.py b/analysis/webservice/algorithms/doms/StatsQuery.py new file mode 100644 index 00000000..f5ac7651 --- /dev/null +++ b/analysis/webservice/algorithms/doms/StatsQuery.py @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import BaseDomsHandler +import datafetch +from webservice.algorithms.NexusCalcHandler import NexusCalcHandler as BaseHandler +from webservice.NexusHandler import nexus_handler + + +@nexus_handler +class DomsStatsQueryHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): + name = "DOMS In-Situ Stats Lookup" + path = "/domsstats" + description = "" + params = {} + singleton = True + + def __init__(self): + BaseHandler.__init__(self) + + def calc(self, computeOptions, **args): + source = computeOptions.get_argument("source", None) + startTime = computeOptions.get_argument("s", None) + endTime = computeOptions.get_argument("e", None) + bbox = computeOptions.get_argument("b", None) + timeTolerance = computeOptions.get_float_arg("tt") + depth_min = computeOptions.get_float_arg("depthMin", default=None) + depth_max = computeOptions.get_float_arg("depthMax", default=None) + radiusTolerance = computeOptions.get_float_arg("rt") + platforms = computeOptions.get_argument("platforms", None) + + source1 = self.getDataSourceByName(source) + if source1 is None: + raise Exception("Source '%s' not found" % source) + + count, bounds = datafetch.getCount(source1, startTime, endTime, bbox, depth_min, depth_max, platforms) + + args = { + "source": source, + "startTime": startTime, + "endTime": endTime, + "bbox": bbox, + "timeTolerance": timeTolerance, + "depthMin": depth_min, + "depthMax": depth_max, + "radiusTolerance": radiusTolerance, + "platforms": platforms + } + + return BaseDomsHandler.DomsQueryResults(results={}, args=args, details={}, bounds=bounds, count=count, + computeOptions=None) diff --git a/analysis/webservice/algorithms/doms/ValuesQuery.py b/analysis/webservice/algorithms/doms/ValuesQuery.py new file mode 100644 index 00000000..d766c7bb --- /dev/null +++ b/analysis/webservice/algorithms/doms/ValuesQuery.py @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import datetime + +from pytz import timezone + +import BaseDomsHandler +import datafetch +from webservice.algorithms.NexusCalcHandler import NexusCalcHandler as BaseHandler +from webservice.NexusHandler import nexus_handler + +EPOCH = timezone('UTC').localize(datetime(1970, 1, 1)) + + +@nexus_handler +class DomsValuesQueryHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): + name = "DOMS In-Situ Value Lookup" + path = "/domsvalues" + description = "" + params = {} + singleton = True + + def __init__(self): + BaseHandler.__init__(self) + + def calc(self, computeOptions, **args): + source = computeOptions.get_argument("source", None) + startTime = computeOptions.get_start_datetime() + endTime = computeOptions.get_end_datetime() + bbox = computeOptions.get_argument("b", None) + timeTolerance = computeOptions.get_float_arg("tt") + depth_min = computeOptions.get_float_arg("depthMin", default=None) + depth_max = computeOptions.get_float_arg("depthMax", default=None) + radiusTolerance = computeOptions.get_float_arg("rt") + platforms = computeOptions.get_argument("platforms", "") + + source1 = self.getDataSourceByName(source) + if source1 is None: + raise Exception("Source '%s' not found" % source) + + values, bounds = datafetch.getValues(source1, startTime.strftime('%Y-%m-%dT%H:%M:%SZ'), + endTime.strftime('%Y-%m-%dT%H:%M:%SZ'), bbox, depth_min, depth_max, + platforms, placeholders=True) + count = len(values) + + args = { + "source": source, + "startTime": startTime, + "endTime": endTime, + "bbox": bbox, + "timeTolerance": timeTolerance, + "depthMin": depth_min, + "depthMax": depth_max, + "radiusTolerance": radiusTolerance, + "platforms": platforms + } + + return BaseDomsHandler.DomsQueryResults(results=values, args=args, bounds=bounds, details={}, count=count, + computeOptions=None) diff --git a/analysis/webservice/algorithms/doms/__init__.py b/analysis/webservice/algorithms/doms/__init__.py new file mode 100644 index 00000000..d5a8e247 --- /dev/null +++ b/analysis/webservice/algorithms/doms/__init__.py @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import BaseDomsHandler +import DatasetListQuery +import DomsInitialization +import MatchupQuery +import MetadataQuery +import ResultsPlotQuery +import ResultsRetrieval +import ResultsStorage +import StatsQuery +import ValuesQuery +import config +import datafetch +import fetchedgeimpl +import geo +import insitusubset +import subsetter +import values +import workerthread diff --git a/analysis/webservice/algorithms/doms/config.py b/analysis/webservice/algorithms/doms/config.py new file mode 100644 index 00000000..ff492e86 --- /dev/null +++ b/analysis/webservice/algorithms/doms/config.py @@ -0,0 +1,109 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ENDPOINTS = [ + { + "name": "samos", + "url": "http://doms.coaps.fsu.edu:8890/ws/search/samos", + "fetchParallel": True, + "fetchThreads": 8, + "itemsPerPage": 1000, + "metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=SAMOS&format=umm-json" + }, + { + "name": "spurs", + "url": "https://doms.jpl.nasa.gov/ws/search/spurs", + "fetchParallel": True, + "fetchThreads": 8, + "itemsPerPage": 25000, + "metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=SPURS-1&format=umm-json" + }, + { + "name": "icoads", + "url": "http://rda-data.ucar.edu:8890/ws/search/icoads", + "fetchParallel": True, + "fetchThreads": 8, + "itemsPerPage": 1000, + "metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=ICOADS&format=umm-json" + }, + { + "name": "spurs2", + "url": "https://doms.jpl.nasa.gov/ws/search/spurs2", + "fetchParallel": True, + "fetchThreads": 8, + "itemsPerPage": 25000, + "metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=SPURS-2&format=umm-json" + } +] + +METADATA_LINKS = { + "samos": "http://samos.coaps.fsu.edu/html/nav.php?s=2", + "icoads": "https://rda.ucar.edu/datasets/ds548.1/", + "spurs": "https://podaac.jpl.nasa.gov/spurs" +} + +import os + +try: + env = os.environ['ENV'] + if env == 'dev': + ENDPOINTS = [ + { + "name": "samos", + "url": "http://doms.coaps.fsu.edu:8890/ws/search/samos", + "fetchParallel": True, + "fetchThreads": 8, + "itemsPerPage": 1000, + "metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=SAMOS&format=umm-json" + }, + { + "name": "spurs", + "url": "http://127.0.0.1:8890/ws/search/spurs", + "fetchParallel": True, + "fetchThreads": 8, + "itemsPerPage": 25000, + "metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=SPURS-1&format=umm-json" + }, + { + "name": "icoads", + "url": "http://rda-data.ucar.edu:8890/ws/search/icoads", + "fetchParallel": True, + "fetchThreads": 8, + "itemsPerPage": 1000, + "metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=ICOADS&format=umm-json" + }, + { + "name": "spurs2", + "url": "https://doms.jpl.nasa.gov/ws/search/spurs2", + "fetchParallel": True, + "fetchThreads": 8, + "itemsPerPage": 25000, + "metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=SPURS-2&format=umm-json" + } + ] + METADATA_LINKS = { + "samos": "http://samos.coaps.fsu.edu/html/nav.php?s=2", + "icoads": "https://rda.ucar.edu/datasets/ds548.1/", + "spurs": "https://podaac.jpl.nasa.gov/spurs" + } +except KeyError: + pass + + +def getEndpointByName(name): + for endpoint in ENDPOINTS: + if endpoint["name"].upper() == name.upper(): + return endpoint + return None diff --git a/analysis/webservice/algorithms/doms/datafetch.py b/analysis/webservice/algorithms/doms/datafetch.py new file mode 100644 index 00000000..3fc3917e --- /dev/null +++ b/analysis/webservice/algorithms/doms/datafetch.py @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import fetchedgeimpl + + +def getCount(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms=None): + return fetchedgeimpl.getCount(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms) + + +def __fetchSingleDataSource(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms=None): + return fetchedgeimpl.fetch(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms) + + +def __fetchMultipleDataSource(endpoints, startTime, endTime, bbox, depth_min, depth_max, platforms=None): + data = [] + for endpoint in endpoints: + dataSingleSource = __fetchSingleDataSource(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms) + data = data + dataSingleSource + return data + + +def fetchData(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms=None): + if type(endpoint) == list: + return __fetchMultipleDataSource(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms) + else: + return __fetchSingleDataSource(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms) + + +def getValues(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms=None, placeholders=False): + return fetchedgeimpl.getValues(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms, placeholders) + + +if __name__ == "__main__": + pass diff --git a/analysis/webservice/algorithms/doms/fetchedgeimpl.py b/analysis/webservice/algorithms/doms/fetchedgeimpl.py new file mode 100644 index 00000000..70cf14e3 --- /dev/null +++ b/analysis/webservice/algorithms/doms/fetchedgeimpl.py @@ -0,0 +1,217 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import traceback +from datetime import datetime +from multiprocessing.pool import ThreadPool + +import requests + +import geo +import values +from webservice.webmodel import NexusProcessingException + + +def __parseDatetime(dtString): + dt = datetime.strptime(dtString, "%Y-%m-%dT%H:%M:%SZ") + epoch = datetime.utcfromtimestamp(0) + time = (dt - epoch).total_seconds() * 1000.0 + return time + + +def __parseLocation(locString): + if "Point" in locString: + locString = locString[6:-1] + + if "," in locString: + latitude = float(locString.split(",")[0]) + longitude = float(locString.split(",")[1]) + else: + latitude = float(locString.split(" ")[1]) + longitude = float(locString.split(" ")[0]) + + return (latitude, longitude) + + +def __resultRawToUsable(resultdict): + resultdict["time"] = __parseDatetime(resultdict["time"]) + latitude, longitude = __parseLocation(resultdict["point"]) + + resultdict["x"] = longitude + resultdict["y"] = latitude + + if "id" not in resultdict and "metadata" in resultdict: + resultdict["id"] = resultdict["metadata"] + + resultdict["id"] = "id-%s" % resultdict["id"] + + if "device" in resultdict: + resultdict["device"] = values.getDeviceById(resultdict["device"]) + + if "platform" in resultdict: + resultdict["platform"] = values.getPlatformById(resultdict["platform"]) + + if "mission" in resultdict: + resultdict["mission"] = values.getMissionById(resultdict["mission"]) + + if "sea_surface_temperature" in resultdict: + resultdict["sea_water_temperature"] = resultdict["sea_surface_temperature"] + del resultdict["sea_surface_temperature"] + + return resultdict + + +def __fetchJson(url, params, trycount=1, maxtries=5): + if trycount > maxtries: + raise Exception("Maximum retries attempted.") + if trycount > 1: + print "Retry #", trycount + r = requests.get(url, params=params, timeout=500.000) + + print r.url + + if r.status_code != 200: + return __fetchJson(url, params, trycount + 1, maxtries) + try: + results = json.loads(r.text) + return results + except: + return __fetchJson(url, params, trycount + 1, maxtries) + + +def __doQuery(endpoint, startTime, endTime, bbox, depth_min=None, depth_max=None, itemsPerPage=10, startIndex=0, + platforms=None, + pageCallback=None): + params = {"startTime": startTime, "endTime": endTime, "bbox": bbox, "itemsPerPage": itemsPerPage, + "startIndex": startIndex, "stats": "true"} + + if depth_min is not None: + params['minDepth'] = depth_min + if depth_max is not None: + params['maxDepth'] = depth_max + + if platforms is not None: + params["platform"] = platforms.split(",") + + resultsRaw = __fetchJson(endpoint["url"], params) + boundsConstrainer = geo.BoundsConstrainer(north=-90, south=90, west=180, east=-180) + + if resultsRaw["totalResults"] == 0 or len(resultsRaw["results"]) == 0: # Double-sanity check + return [], resultsRaw["totalResults"], startIndex, itemsPerPage, boundsConstrainer + + try: + results = [] + for resultdict in resultsRaw["results"]: + result = __resultRawToUsable(resultdict) + result["source"] = endpoint["name"] + boundsConstrainer.testCoords(north=result["y"], south=result["y"], west=result["x"], east=result["x"]) + results.append(result) + + if "stats_fields" in resultsRaw and len(resultsRaw["results"]) == 0: + stats = resultsRaw["stats_fields"] + if "lat" in stats and "lon" in stats: + boundsConstrainer.testCoords(north=stats['lat']['max'], south=stats['lat']['min'], + west=stats['lon']['min'], east=stats['lon']['max']) + + if pageCallback is not None: + pageCallback(results) + + ''' + If pageCallback was supplied, we assume this call to be asynchronous. Otherwise combine all the results data and return it. + ''' + if pageCallback is None: + return results, int(resultsRaw["totalResults"]), int(resultsRaw["startIndex"]), int( + resultsRaw["itemsPerPage"]), boundsConstrainer + else: + return [], int(resultsRaw["totalResults"]), int(resultsRaw["startIndex"]), int( + resultsRaw["itemsPerPage"]), boundsConstrainer + except: + print "Invalid or missing JSON in response." + traceback.print_exc() + raise NexusProcessingException(reason="Invalid or missing JSON in response.") + # return [], 0, startIndex, itemsPerPage, boundsConstrainer + + +def getCount(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms=None): + startIndex = 0 + pageResults, totalResults, pageStartIndex, itemsPerPageR, boundsConstrainer = __doQuery(endpoint, startTime, + endTime, bbox, + depth_min, depth_max, 0, + startIndex, platforms) + return totalResults, boundsConstrainer + + +def fetch(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms=None, pageCallback=None): + results = [] + startIndex = 0 + + mainBoundsConstrainer = geo.BoundsConstrainer(north=-90, south=90, west=180, east=-180) + + # First isn't parellel so we can get the ttl results, forced items per page, etc... + pageResults, totalResults, pageStartIndex, itemsPerPageR, boundsConstrainer = __doQuery(endpoint, startTime, + endTime, bbox, + depth_min, depth_max, + endpoint["itemsPerPage"], + startIndex, platforms, + pageCallback) + results = results + pageResults + mainBoundsConstrainer.testOtherConstrainer(boundsConstrainer) + + pool = ThreadPool(processes=endpoint["fetchThreads"]) + mpResults = [pool.apply_async(__doQuery, args=( + endpoint, startTime, endTime, bbox, depth_min, depth_max, itemsPerPageR, x, platforms, pageCallback)) for x in + range(len(pageResults), totalResults, itemsPerPageR)] + pool.close() + pool.join() + + ''' + If pageCallback was supplied, we assume this call to be asynchronous. Otherwise combine all the results data and return it. + ''' + if pageCallback is None: + mpResults = [p.get() for p in mpResults] + for mpResult in mpResults: + results = results + mpResult[0] + mainBoundsConstrainer.testOtherConstrainer(mpResult[4]) + + return results, mainBoundsConstrainer + + +def getValues(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms=None, placeholders=False): + results, boundsConstrainer = fetch(endpoint, startTime, endTime, bbox, depth_min, depth_max, platforms) + + if placeholders: + trimmedResults = [] + for item in results: + depth = None + if "depth" in item: + depth = item["depth"] + if "sea_water_temperature_depth" in item: + depth = item["sea_water_temperature_depth"] + + trimmedItem = { + "x": item["x"], + "y": item["y"], + "source": item["source"], + "time": item["time"], + "device": item["device"] if "device" in item else None, + "platform": item["platform"], + "depth": depth + } + trimmedResults.append(trimmedItem) + + results = trimmedResults + + return results, boundsConstrainer diff --git a/analysis/webservice/algorithms/doms/geo.py b/analysis/webservice/algorithms/doms/geo.py new file mode 100644 index 00000000..3323f571 --- /dev/null +++ b/analysis/webservice/algorithms/doms/geo.py @@ -0,0 +1,129 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +MEAN_RADIUS_EARTH_METERS = 6371010.0 +EQUATORIAL_RADIUS_EARTH_METERS = 6378140.0 +POLAR_RADIUS_EARTH_METERS = 6356752.0 +FLATTENING_EARTH = 298.257223563 +MEAN_RADIUS_EARTH_MILES = 3958.8 + + +class DistanceUnit(object): + METERS = 0 + MILES = 1 + + +# Haversine implementation for great-circle distances between two points +def haversine(x0, y0, x1, y1, units=DistanceUnit.METERS): + if units == DistanceUnit.METERS: + R = MEAN_RADIUS_EARTH_METERS + elif units == DistanceUnit.MILES: + R = MEAN_RADIUS_EARTH_MILES + else: + raise Exception("Invalid units specified") + x0r = x0 * (math.pi / 180.0) # To radians + x1r = x1 * (math.pi / 180.0) # To radians + xd = (x1 - x0) * (math.pi / 180.0) + yd = (y1 - y0) * (math.pi / 180.0) + + a = math.sin(xd / 2.0) * math.sin(xd / 2.0) + \ + math.cos(x0r) * math.cos(x1r) * \ + math.sin(yd / 2.0) * math.sin(yd / 2.0) + c = 2.0 * math.atan2(math.sqrt(a), math.sqrt(1.0 - a)) + d = R * c + return d + + +# Equirectangular approximation for when performance is key. Better at smaller distances +def equirectangularApprox(x0, y0, x1, y1): + R = 6371000.0 # Meters + x0r = x0 * (math.pi / 180.0) # To radians + x1r = x1 * (math.pi / 180.0) + y0r = y0 * (math.pi / 180.0) + y1r = y1 * (math.pi / 180.0) + + x = (y1r - y0r) * math.cos((x0r + x1r) / 2.0) + y = x1r - x0r + d = math.sqrt(x * x + y * y) * R + return d + + +class BoundingBox(object): + + def __init__(self, north=None, south=None, west=None, east=None, asString=None): + if asString is not None: + bboxParts = asString.split(",") + self.west = float(bboxParts[0]) + self.south = float(bboxParts[1]) + self.east = float(bboxParts[2]) + self.north = float(bboxParts[3]) + else: + self.north = north + self.south = south + self.west = west + self.east = east + + def toString(self): + return "%s,%s,%s,%s" % (self.west, self.south, self.east, self.north) + + def toMap(self): + return { + "xmin": self.west, + "xmax": self.east, + "ymin": self.south, + "ymax": self.north + } + + +''' + Constrains, does not expand. +''' + + +class BoundsConstrainer(BoundingBox): + + def __init__(self, north=None, south=None, west=None, east=None, asString=None): + BoundingBox.__init__(self, north, south, west, east, asString) + + def testNorth(self, v): + if v is None: + return + self.north = max([self.north, v]) + + def testSouth(self, v): + if v is None: + return + self.south = min([self.south, v]) + + def testEast(self, v): + if v is None: + return + self.east = max([self.east, v]) + + def testWest(self, v): + if v is None: + return + self.west = min([self.west, v]) + + def testCoords(self, north=None, south=None, west=None, east=None): + self.testNorth(north) + self.testSouth(south) + self.testWest(west) + self.testEast(east) + + def testOtherConstrainer(self, other): + self.testCoords(north=other.north, south=other.south, west=other.west, east=other.east) diff --git a/analysis/webservice/algorithms/doms/histogramplot.py b/analysis/webservice/algorithms/doms/histogramplot.py new file mode 100644 index 00000000..1e06b66b --- /dev/null +++ b/analysis/webservice/algorithms/doms/histogramplot.py @@ -0,0 +1,127 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import string +from cStringIO import StringIO +from multiprocessing import Process, Manager + +import matplotlib +import matplotlib.mlab as mlab +import matplotlib.pyplot as plt +import numpy as np + +import BaseDomsHandler +import ResultsStorage + +if not matplotlib.get_backend(): + matplotlib.use('Agg') + +PARAMETER_TO_FIELD = { + "sst": "sea_water_temperature", + "sss": "sea_water_salinity" +} + +PARAMETER_TO_UNITS = { + "sst": "($^\circ$C)", + "sss": "(g/L)" +} + + +class DomsHistogramPlotQueryResults(BaseDomsHandler.DomsQueryResults): + + def __init__(self, x, parameter, primary, secondary, args=None, bounds=None, count=None, details=None, + computeOptions=None, executionId=None, plot=None): + BaseDomsHandler.DomsQueryResults.__init__(self, results=x, args=args, details=details, bounds=bounds, + count=count, computeOptions=computeOptions, executionId=executionId) + self.__primary = primary + self.__secondary = secondary + self.__x = x + self.__parameter = parameter + self.__plot = plot + + def toImage(self): + return self.__plot + + +def render(d, x, primary, secondary, parameter, norm_and_curve=False): + fig, ax = plt.subplots() + fig.suptitle(string.upper("%s vs. %s" % (primary, secondary)), fontsize=14, fontweight='bold') + + n, bins, patches = plt.hist(x, 50, normed=norm_and_curve, facecolor='green', alpha=0.75) + + if norm_and_curve: + mean = np.mean(x) + variance = np.var(x) + sigma = np.sqrt(variance) + y = mlab.normpdf(bins, mean, sigma) + l = plt.plot(bins, y, 'r--', linewidth=1) + + ax.set_title('n = %d' % len(x)) + + units = PARAMETER_TO_UNITS[parameter] if parameter in PARAMETER_TO_UNITS else PARAMETER_TO_UNITS["sst"] + ax.set_xlabel("%s - %s %s" % (primary, secondary, units)) + + if norm_and_curve: + ax.set_ylabel("Probability per unit difference") + else: + ax.set_ylabel("Frequency") + + plt.grid(True) + + sio = StringIO() + plt.savefig(sio, format='png') + d['plot'] = sio.getvalue() + + +def renderAsync(x, primary, secondary, parameter, norm_and_curve): + manager = Manager() + d = manager.dict() + p = Process(target=render, args=(d, x, primary, secondary, parameter, norm_and_curve)) + p.start() + p.join() + return d['plot'] + + +def createHistogramPlot(id, parameter, norm_and_curve=False): + with ResultsStorage.ResultsRetrieval() as storage: + params, stats, data = storage.retrieveResults(id) + + primary = params["primary"] + secondary = params["matchup"][0] + + x = createHistTable(data, secondary, parameter) + + plot = renderAsync(x, primary, secondary, parameter, norm_and_curve) + + r = DomsHistogramPlotQueryResults(x=x, parameter=parameter, primary=primary, secondary=secondary, + args=params, details=stats, + bounds=None, count=None, computeOptions=None, executionId=id, plot=plot) + return r + + +def createHistTable(results, secondary, parameter): + x = [] + + field = PARAMETER_TO_FIELD[parameter] if parameter in PARAMETER_TO_FIELD else PARAMETER_TO_FIELD["sst"] + + for entry in results: + for match in entry["matches"]: + if match["source"] == secondary: + if field in entry and field in match: + a = entry[field] + b = match[field] + x.append((a - b)) + + return x diff --git a/analysis/webservice/algorithms/doms/insitusubset.py b/analysis/webservice/algorithms/doms/insitusubset.py new file mode 100644 index 00000000..7f60e997 --- /dev/null +++ b/analysis/webservice/algorithms/doms/insitusubset.py @@ -0,0 +1,263 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import StringIO +import csv +import json +import logging +from datetime import datetime + +import requests + +import BaseDomsHandler +from webservice.NexusHandler import nexus_handler +from webservice.algorithms.doms import config as edge_endpoints +from webservice.webmodel import NexusProcessingException, NoDataException + +ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' + + +@nexus_handler +class DomsResultsRetrievalHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): + name = "DOMS In Situ Subsetter" + path = "/domsinsitusubset" + description = "Subset a DOMS in situ source given the search domain." + + params = [ + { + "name": "source", + "type": "comma-delimited string", + "description": "The in situ Dataset to be sub-setted", + "required": "true", + "sample": "spurs" + }, + { + "name": "parameter", + "type": "string", + "description": "The parameter of interest. One of 'sst', 'sss', 'wind'", + "required": "false", + "default": "All", + "sample": "sss" + }, + { + "name": "startTime", + "type": "string", + "description": "Starting time in format YYYY-MM-DDTHH:mm:ssZ or seconds since EPOCH", + "required": "true", + "sample": "2013-10-21T00:00:00Z" + }, + { + "name": "endTime", + "type": "string", + "description": "Ending time in format YYYY-MM-DDTHH:mm:ssZ or seconds since EPOCH", + "required": "true", + "sample": "2013-10-31T23:59:59Z" + }, + { + "name": "b", + "type": "comma-delimited float", + "description": "Minimum (Western) Longitude, Minimum (Southern) Latitude, " + "Maximum (Eastern) Longitude, Maximum (Northern) Latitude", + "required": "true", + "sample": "-30,15,-45,30" + }, + { + "name": "depthMin", + "type": "float", + "description": "Minimum depth of measurements. Must be less than depthMax", + "required": "false", + "default": "No limit", + "sample": "0" + }, + { + "name": "depthMax", + "type": "float", + "description": "Maximum depth of measurements. Must be greater than depthMin", + "required": "false", + "default": "No limit", + "sample": "5" + }, + { + "name": "platforms", + "type": "comma-delimited integer", + "description": "Platforms to include for subset consideration", + "required": "false", + "default": "All", + "sample": "1,2,3,4,5,6,7,8,9" + }, + { + "name": "output", + "type": "string", + "description": "Output type. Only 'CSV' or 'JSON' is currently supported", + "required": "false", + "default": "JSON", + "sample": "CSV" + } + ] + singleton = True + + def __init__(self): + BaseDomsHandler.BaseDomsQueryCalcHandler.__init__(self) + self.log = logging.getLogger(__name__) + + def parse_arguments(self, request): + # Parse input arguments + self.log.debug("Parsing arguments") + + source_name = request.get_argument('source', None) + if source_name is None or source_name.strip() == '': + raise NexusProcessingException(reason="'source' argument is required", code=400) + + parameter_s = request.get_argument('parameter', None) + if parameter_s not in ['sst', 'sss', 'wind', None]: + raise NexusProcessingException( + reason="Parameter %s not supported. Must be one of 'sst', 'sss', 'wind'." % parameter_s, code=400) + + try: + start_time = request.get_start_datetime() + start_time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") + except: + raise NexusProcessingException( + reason="'startTime' argument is required. Can be int value seconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ", + code=400) + try: + end_time = request.get_end_datetime() + end_time = end_time.strftime("%Y-%m-%dT%H:%M:%SZ") + except: + raise NexusProcessingException( + reason="'endTime' argument is required. Can be int value seconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ", + code=400) + + if start_time > end_time: + raise NexusProcessingException( + reason="The starting time must be before the ending time. Received startTime: %s, endTime: %s" % ( + request.get_start_datetime().strftime(ISO_8601), request.get_end_datetime().strftime(ISO_8601)), + code=400) + + try: + bounding_polygon = request.get_bounding_polygon() + except: + raise NexusProcessingException( + reason="'b' argument is required. Must be comma-delimited float formatted as Minimum (Western) Longitude, Minimum (Southern) Latitude, Maximum (Eastern) Longitude, Maximum (Northern) Latitude", + code=400) + + depth_min = request.get_decimal_arg('depthMin', default=None) + depth_max = request.get_decimal_arg('depthMax', default=None) + + if depth_min is not None and depth_max is not None and depth_min >= depth_max: + raise NexusProcessingException( + reason="Depth Min should be less than Depth Max", code=400) + + platforms = request.get_argument('platforms', None) + if platforms is not None: + try: + p_validation = platforms.split(',') + p_validation = [int(p) for p in p_validation] + del p_validation + except: + raise NexusProcessingException(reason="platforms must be a comma-delimited list of integers", code=400) + + return source_name, parameter_s, start_time, end_time, bounding_polygon, depth_min, depth_max, platforms + + def calc(self, request, **args): + + source_name, parameter_s, start_time, end_time, bounding_polygon, \ + depth_min, depth_max, platforms = self.parse_arguments(request) + + with requests.session() as edge_session: + edge_results = query_edge(source_name, parameter_s, start_time, end_time, + ','.join([str(bound) for bound in bounding_polygon.bounds]), + platforms, depth_min, depth_max, edge_session)['results'] + + if len(edge_results) == 0: + raise NoDataException + return InSituSubsetResult(results=edge_results) + + +class InSituSubsetResult(object): + def __init__(self, results): + self.results = results + + def toJson(self): + return json.dumps(self.results, indent=4) + + def toCSV(self): + fieldnames = sorted(next(iter(self.results)).keys()) + + csv_mem_file = StringIO.StringIO() + try: + writer = csv.DictWriter(csv_mem_file, fieldnames=fieldnames) + + writer.writeheader() + writer.writerows(self.results) + csv_out = csv_mem_file.getvalue() + finally: + csv_mem_file.close() + + return csv_out + + +def query_edge(dataset, variable, startTime, endTime, bbox, platform, depth_min, depth_max, session, itemsPerPage=1000, + startIndex=0, stats=True): + log = logging.getLogger('webservice.algorithms.doms.insitusubset.query_edge') + try: + startTime = datetime.utcfromtimestamp(startTime).strftime('%Y-%m-%dT%H:%M:%SZ') + except TypeError: + # Assume we were passed a properly formatted string + pass + + try: + endTime = datetime.utcfromtimestamp(endTime).strftime('%Y-%m-%dT%H:%M:%SZ') + except TypeError: + # Assume we were passed a properly formatted string + pass + + try: + platform = platform.split(',') + except AttributeError: + # Assume we were passed a list + pass + + params = {"startTime": startTime, + "endTime": endTime, + "bbox": bbox, + "minDepth": depth_min, + "maxDepth": depth_max, + "itemsPerPage": itemsPerPage, "startIndex": startIndex, "stats": str(stats).lower()} + + if variable: + params['variable'] = variable + if platform: + params['platform'] = platform + + edge_request = session.get(edge_endpoints.getEndpointByName(dataset)['url'], params=params) + + edge_request.raise_for_status() + edge_response = json.loads(edge_request.text) + + # Get all edge results + next_page_url = edge_response.get('next', None) + while next_page_url is not None: + log.debug("requesting %s" % next_page_url) + edge_page_request = session.get(next_page_url) + + edge_page_request.raise_for_status() + edge_page_response = json.loads(edge_page_request.text) + + edge_response['results'].extend(edge_page_response['results']) + + next_page_url = edge_page_response.get('next', None) + + return edge_response diff --git a/analysis/webservice/algorithms/doms/mapplot.py b/analysis/webservice/algorithms/doms/mapplot.py new file mode 100644 index 00000000..3af85d30 --- /dev/null +++ b/analysis/webservice/algorithms/doms/mapplot.py @@ -0,0 +1,175 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import string +from cStringIO import StringIO +from multiprocessing import Process, Manager + +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +from mpl_toolkits.basemap import Basemap + +import BaseDomsHandler +import ResultsStorage + +if not matplotlib.get_backend(): + matplotlib.use('Agg') + +PARAMETER_TO_FIELD = { + "sst": "sea_water_temperature", + "sss": "sea_water_salinity" +} + +PARAMETER_TO_UNITS = { + "sst": "($^\circ$ C)", + "sss": "(g/L)" +} + + +def __square(minLon, maxLon, minLat, maxLat): + if maxLat - minLat > maxLon - minLon: + a = ((maxLat - minLat) - (maxLon - minLon)) / 2.0 + minLon -= a + maxLon += a + elif maxLon - minLon > maxLat - minLat: + a = ((maxLon - minLon) - (maxLat - minLat)) / 2.0 + minLat -= a + maxLat += a + + return minLon, maxLon, minLat, maxLat + + +def render(d, lats, lons, z, primary, secondary, parameter): + fig = plt.figure() + ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) + + ax.set_title(string.upper("%s vs. %s" % (primary, secondary))) + # ax.set_ylabel('Latitude') + # ax.set_xlabel('Longitude') + + minLatA = np.min(lats) + maxLatA = np.max(lats) + minLonA = np.min(lons) + maxLonA = np.max(lons) + + minLat = minLatA - (abs(maxLatA - minLatA) * 0.1) + maxLat = maxLatA + (abs(maxLatA - minLatA) * 0.1) + + minLon = minLonA - (abs(maxLonA - minLonA) * 0.1) + maxLon = maxLonA + (abs(maxLonA - minLonA) * 0.1) + + minLon, maxLon, minLat, maxLat = __square(minLon, maxLon, minLat, maxLat) + + # m = Basemap(projection='mill', llcrnrlon=-180,llcrnrlat=-80,urcrnrlon=180,urcrnrlat=80,resolution='l') + m = Basemap(projection='mill', llcrnrlon=minLon, llcrnrlat=minLat, urcrnrlon=maxLon, urcrnrlat=maxLat, + resolution='l') + + m.drawparallels(np.arange(minLat, maxLat, (maxLat - minLat) / 5.0), labels=[1, 0, 0, 0], fontsize=10) + m.drawmeridians(np.arange(minLon, maxLon, (maxLon - minLon) / 5.0), labels=[0, 0, 0, 1], fontsize=10) + + m.drawcoastlines() + m.drawmapboundary(fill_color='#99ffff') + m.fillcontinents(color='#cc9966', lake_color='#99ffff') + + # lats, lons = np.meshgrid(lats, lons) + + masked_array = np.ma.array(z, mask=np.isnan(z)) + z = masked_array + + values = np.zeros(len(z)) + for i in range(0, len(z)): + values[i] = ((z[i] - np.min(z)) / (np.max(z) - np.min(z)) * 20.0) + 10 + + x, y = m(lons, lats) + + im1 = m.scatter(x, y, values) + + im1.set_array(z) + cb = m.colorbar(im1) + + units = PARAMETER_TO_UNITS[parameter] if parameter in PARAMETER_TO_UNITS else PARAMETER_TO_UNITS["sst"] + cb.set_label("Difference %s" % units) + + sio = StringIO() + plt.savefig(sio, format='png') + plot = sio.getvalue() + if d is not None: + d['plot'] = plot + return plot + + +class DomsMapPlotQueryResults(BaseDomsHandler.DomsQueryResults): + def __init__(self, lats, lons, z, parameter, primary, secondary, args=None, bounds=None, count=None, details=None, + computeOptions=None, executionId=None, plot=None): + BaseDomsHandler.DomsQueryResults.__init__(self, results={"lats": lats, "lons": lons, "values": z}, args=args, + details=details, bounds=bounds, count=count, + computeOptions=computeOptions, executionId=executionId) + self.__lats = lats + self.__lons = lons + self.__z = np.array(z) + self.__parameter = parameter + self.__primary = primary + self.__secondary = secondary + self.__plot = plot + + def toImage(self): + return self.__plot + + +def renderAsync(x, y, z, primary, secondary, parameter): + manager = Manager() + d = manager.dict() + p = Process(target=render, args=(d, x, y, z, primary, secondary, parameter)) + p.start() + p.join() + return d['plot'] + + +def createMapPlot(id, parameter): + with ResultsStorage.ResultsRetrieval() as storage: + params, stats, data = storage.retrieveResults(id) + + primary = params["primary"] + secondary = params["matchup"][0] + + lats = [] + lons = [] + z = [] + + field = PARAMETER_TO_FIELD[parameter] if parameter in PARAMETER_TO_FIELD else PARAMETER_TO_FIELD["sst"] + + for entry in data: + for match in entry["matches"]: + if match["source"] == secondary: + + if field in entry and field in match: + a = entry[field] + b = match[field] + z.append((a - b)) + z.append((a - b)) + else: + z.append(1.0) + z.append(1.0) + lats.append(entry["y"]) + lons.append(entry["x"]) + lats.append(match["y"]) + lons.append(match["x"]) + + plot = renderAsync(lats, lons, z, primary, secondary, parameter) + r = DomsMapPlotQueryResults(lats=lats, lons=lons, z=z, parameter=parameter, primary=primary, secondary=secondary, + args=params, + details=stats, bounds=None, count=None, computeOptions=None, executionId=id, plot=plot) + return r diff --git a/analysis/webservice/algorithms/doms/scatterplot.py b/analysis/webservice/algorithms/doms/scatterplot.py new file mode 100644 index 00000000..2ff57ee9 --- /dev/null +++ b/analysis/webservice/algorithms/doms/scatterplot.py @@ -0,0 +1,118 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import string +from cStringIO import StringIO +from multiprocessing import Process, Manager + +import matplotlib +import matplotlib.pyplot as plt + +import BaseDomsHandler +import ResultsStorage + +if not matplotlib.get_backend(): + matplotlib.use('Agg') + +PARAMETER_TO_FIELD = { + "sst": "sea_water_temperature", + "sss": "sea_water_salinity" +} + +PARAMETER_TO_UNITS = { + "sst": "($^\circ$ C)", + "sss": "(g/L)" +} + + +def render(d, x, y, z, primary, secondary, parameter): + fig, ax = plt.subplots() + + ax.set_title(string.upper("%s vs. %s" % (primary, secondary))) + + units = PARAMETER_TO_UNITS[parameter] if parameter in PARAMETER_TO_UNITS else PARAMETER_TO_UNITS[ + "sst"] + ax.set_ylabel("%s %s" % (secondary, units)) + ax.set_xlabel("%s %s" % (primary, units)) + + ax.scatter(x, y) + + sio = StringIO() + plt.savefig(sio, format='png') + d['plot'] = sio.getvalue() + + +class DomsScatterPlotQueryResults(BaseDomsHandler.DomsQueryResults): + + def __init__(self, x, y, z, parameter, primary, secondary, args=None, bounds=None, count=None, details=None, + computeOptions=None, executionId=None, plot=None): + BaseDomsHandler.DomsQueryResults.__init__(self, results=[x, y], args=args, details=details, bounds=bounds, + count=count, computeOptions=computeOptions, executionId=executionId) + self.__primary = primary + self.__secondary = secondary + self.__x = x + self.__y = y + self.__z = z + self.__parameter = parameter + self.__plot = plot + + def toImage(self): + return self.__plot + + +def renderAsync(x, y, z, primary, secondary, parameter): + manager = Manager() + d = manager.dict() + p = Process(target=render, args=(d, x, y, z, primary, secondary, parameter)) + p.start() + p.join() + return d['plot'] + + +def createScatterPlot(id, parameter): + with ResultsStorage.ResultsRetrieval() as storage: + params, stats, data = storage.retrieveResults(id) + + primary = params["primary"] + secondary = params["matchup"][0] + + x, y, z = createScatterTable(data, secondary, parameter) + + plot = renderAsync(x, y, z, primary, secondary, parameter) + + r = DomsScatterPlotQueryResults(x=x, y=y, z=z, parameter=parameter, primary=primary, secondary=secondary, + args=params, details=stats, + bounds=None, count=None, computeOptions=None, executionId=id, plot=plot) + return r + + +def createScatterTable(results, secondary, parameter): + x = [] + y = [] + z = [] + + field = PARAMETER_TO_FIELD[parameter] if parameter in PARAMETER_TO_FIELD else PARAMETER_TO_FIELD["sst"] + + for entry in results: + for match in entry["matches"]: + if match["source"] == secondary: + if field in entry and field in match: + a = entry[field] + b = match[field] + x.append(a) + y.append(b) + z.append(a - b) + + return x, y, z diff --git a/analysis/webservice/algorithms/doms/subsetter.py b/analysis/webservice/algorithms/doms/subsetter.py new file mode 100644 index 00000000..67a2276f --- /dev/null +++ b/analysis/webservice/algorithms/doms/subsetter.py @@ -0,0 +1,260 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import tempfile +import zipfile +from datetime import datetime + +import requests + +import BaseDomsHandler +from webservice.NexusHandler import nexus_handler +from webservice.webmodel import NexusProcessingException + +ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' + + +def is_blank(my_string): + return not (my_string and my_string.strip() != '') + + +@nexus_handler +class DomsResultsRetrievalHandler(BaseDomsHandler.BaseDomsQueryCalcHandler): + name = "DOMS Subsetter" + path = "/domssubset" + description = "Subset DOMS sources given the search domain" + + params = { + "dataset": { + "name": "NEXUS Dataset", + "type": "string", + "description": "The NEXUS dataset. Optional but at least one of 'dataset' or 'insitu' are required" + }, + "insitu": { + "name": "In Situ sources", + "type": "comma-delimited string", + "description": "The in situ source(s). Optional but at least one of 'dataset' or 'insitu' are required" + }, + "parameter": { + "name": "Data Parameter", + "type": "string", + "description": "The parameter of interest. One of 'sst', 'sss', 'wind'. Required" + }, + "startTime": { + "name": "Start Time", + "type": "string", + "description": "Starting time in format YYYY-MM-DDTHH:mm:ssZ or seconds since EPOCH. Required" + }, + "endTime": { + "name": "End Time", + "type": "string", + "description": "Ending time in format YYYY-MM-DDTHH:mm:ssZ or seconds since EPOCH. Required" + }, + "b": { + "name": "Bounding box", + "type": "comma-delimited float", + "description": "Minimum (Western) Longitude, Minimum (Southern) Latitude, " + "Maximum (Eastern) Longitude, Maximum (Northern) Latitude. Required" + }, + "depthMin": { + "name": "Minimum Depth", + "type": "float", + "description": "Minimum depth of measurements. Must be less than depthMax. Optional" + }, + "depthMax": { + "name": "Maximum Depth", + "type": "float", + "description": "Maximum depth of measurements. Must be greater than depthMin. Optional" + }, + "platforms": { + "name": "Platforms", + "type": "comma-delimited integer", + "description": "Platforms to include for subset consideration. Optional" + }, + "output": { + "name": "Output", + "type": "string", + "description": "Output type. Only 'ZIP' is currently supported. Required" + } + } + singleton = True + + def __init__(self): + BaseDomsHandler.BaseDomsQueryCalcHandler.__init__(self) + self.log = logging.getLogger(__name__) + + def parse_arguments(self, request): + # Parse input arguments + self.log.debug("Parsing arguments") + + primary_ds_name = request.get_argument('dataset', None) + matchup_ds_names = request.get_argument('insitu', None) + + if is_blank(primary_ds_name) and is_blank(matchup_ds_names): + raise NexusProcessingException(reason="Either 'dataset', 'insitu', or both arguments are required", + code=400) + + if matchup_ds_names is not None: + try: + matchup_ds_names = matchup_ds_names.split(',') + except: + raise NexusProcessingException(reason="'insitu' argument should be a comma-seperated list", code=400) + + parameter_s = request.get_argument('parameter', None) + if parameter_s not in ['sst', 'sss', 'wind']: + raise NexusProcessingException( + reason="Parameter %s not supported. Must be one of 'sst', 'sss', 'wind'." % parameter_s, code=400) + + try: + start_time = request.get_start_datetime() + start_time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") + except: + raise NexusProcessingException( + reason="'startTime' argument is required. Can be int value seconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ", + code=400) + try: + end_time = request.get_end_datetime() + end_time = end_time.strftime("%Y-%m-%dT%H:%M:%SZ") + except: + raise NexusProcessingException( + reason="'endTime' argument is required. Can be int value seconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ", + code=400) + + if start_time > end_time: + raise NexusProcessingException( + reason="The starting time must be before the ending time. Received startTime: %s, endTime: %s" % ( + request.get_start_datetime().strftime(ISO_8601), request.get_end_datetime().strftime(ISO_8601)), + code=400) + + try: + bounding_polygon = request.get_bounding_polygon() + except: + raise NexusProcessingException( + reason="'b' argument is required. Must be comma-delimited float formatted as Minimum (Western) Longitude, Minimum (Southern) Latitude, Maximum (Eastern) Longitude, Maximum (Northern) Latitude", + code=400) + + depth_min = request.get_decimal_arg('depthMin', default=None) + depth_max = request.get_decimal_arg('depthMax', default=None) + + if depth_min is not None and depth_max is not None and depth_min >= depth_max: + raise NexusProcessingException( + reason="Depth Min should be less than Depth Max", code=400) + + platforms = request.get_argument('platforms', None) + if platforms is not None: + try: + p_validation = platforms.split(',') + p_validation = [int(p) for p in p_validation] + del p_validation + except: + raise NexusProcessingException(reason="platforms must be a comma-delimited list of integers", code=400) + + return primary_ds_name, matchup_ds_names, parameter_s, start_time, end_time, \ + bounding_polygon, depth_min, depth_max, platforms + + def calc(self, request, **args): + + primary_ds_name, matchup_ds_names, parameter_s, start_time, end_time, \ + bounding_polygon, depth_min, depth_max, platforms = self.parse_arguments(request) + + primary_url = "https://doms.jpl.nasa.gov/datainbounds" + primary_params = { + 'ds': primary_ds_name, + 'parameter': parameter_s, + 'b': ','.join([str(bound) for bound in bounding_polygon.bounds]), + 'startTime': start_time, + 'endTime': end_time, + 'output': "CSV" + } + + matchup_url = "https://doms.jpl.nasa.gov/domsinsitusubset" + matchup_params = { + 'source': None, + 'parameter': parameter_s, + 'startTime': start_time, + 'endTime': end_time, + 'b': ','.join([str(bound) for bound in bounding_polygon.bounds]), + 'depthMin': depth_min, + 'depthMax': depth_max, + 'platforms': platforms, + 'output': 'CSV' + } + + primary_temp_file_path = None + matchup_downloads = None + + with requests.session() as session: + + if not is_blank(primary_ds_name): + # Download primary + primary_temp_file, primary_temp_file_path = tempfile.mkstemp(suffix='.csv') + download_file(primary_url, primary_temp_file_path, session, params=primary_params) + + if len(matchup_ds_names) > 0: + # Download matchup + matchup_downloads = {} + for matchup_ds in matchup_ds_names: + matchup_downloads[matchup_ds] = tempfile.mkstemp(suffix='.csv') + matchup_params['source'] = matchup_ds + download_file(matchup_url, matchup_downloads[matchup_ds][1], session, params=matchup_params) + + # Zip downloads + date_range = "%s-%s" % (datetime.strptime(start_time, "%Y-%m-%dT%H:%M:%SZ").strftime("%Y%m%d"), + datetime.strptime(end_time, "%Y-%m-%dT%H:%M:%SZ").strftime("%Y%m%d")) + bounds = '%.4fW_%.4fS_%.4fE_%.4fN' % bounding_polygon.bounds + zip_dir = tempfile.mkdtemp() + zip_path = '%s/subset.%s.%s.zip' % (zip_dir, date_range, bounds) + with zipfile.ZipFile(zip_path, 'w') as my_zip: + if primary_temp_file_path: + my_zip.write(primary_temp_file_path, arcname='%s.%s.%s.csv' % (primary_ds_name, date_range, bounds)) + if matchup_downloads: + for matchup_ds, download in matchup_downloads.iteritems(): + my_zip.write(download[1], arcname='%s.%s.%s.csv' % (matchup_ds, date_range, bounds)) + + # Clean up + if primary_temp_file_path: + os.remove(primary_temp_file_path) + if matchup_downloads: + for matchup_ds, download in matchup_downloads.iteritems(): + os.remove(download[1]) + + return SubsetResult(zip_path) + + +class SubsetResult(object): + def __init__(self, zip_path): + self.zip_path = zip_path + + def toJson(self): + raise NotImplementedError + + def toZip(self): + with open(self.zip_path, 'rb') as zip_file: + zip_contents = zip_file.read() + + return zip_contents + + def cleanup(self): + os.remove(self.zip_path) + + +def download_file(url, filepath, session, params=None): + r = session.get(url, params=params, stream=True) + with open(filepath, 'wb') as f: + for chunk in r.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + f.write(chunk) diff --git a/analysis/webservice/algorithms/doms/values.py b/analysis/webservice/algorithms/doms/values.py new file mode 100644 index 00000000..c47d4503 --- /dev/null +++ b/analysis/webservice/algorithms/doms/values.py @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +PLATFORMS = [ + {"id": 1, "desc": "ship"}, + {"id": 2, "desc": "moored surface buoy"}, + {"id": 3, "desc": "drifting surface float"}, + {"id": 4, "desc": "drifting subsurface profiling float"}, + {"id": 5, "desc": "autonomous underwater vehicle"}, + {"id": 6, "desc": "offshore structure"}, + {"id": 7, "desc": "coastal structure"}, + {"id": 8, "desc": "towed unmanned submersible"}, + {"id": 9, "desc": "orbiting satellite"} +] + +DEVICES = [ + {"id": 1, "desc": "bathythermographs"}, + {"id": 2, "desc": "discrete water samplers"}, + {"id": 3, "desc": "CTD"}, + {"id": 4, "desc": "Current profilers / acousticDopplerCurrentProfiler"}, + {"id": 5, "desc": "radiometers"}, + {"id": 6, "desc": "scatterometers"} +] + +MISSIONS = [ + {"id": 1, "desc": "SAMOS"}, + {"id": 2, "desc": "ICOADS"}, + {"id": 3, "desc": "Aquarius"}, + {"id": 4, "desc": "SPURS1"} +] + + +def getDescById(list, id): + for item in list: + if item["id"] == id: + return item["desc"] + return id + + +def getPlatformById(id): + return getDescById(PLATFORMS, id) + + +def getDeviceById(id): + return getDescById(DEVICES, id) + + +def getMissionById(id): + return getDescById(MISSIONS, id) + + +def getDescByListNameAndId(listName, id): + if listName.upper() == "PLATFORM": + return getPlatformById(id) + elif listName.upper() == "DEVICE": + return getDeviceById(id) + elif listName.upper() == "MISSION": + return getMissionById(id) + else: + raise Exception("Invalid list name specified ('%s')" % listName) diff --git a/analysis/webservice/algorithms/doms/workerthread.py b/analysis/webservice/algorithms/doms/workerthread.py new file mode 100644 index 00000000..7639c006 --- /dev/null +++ b/analysis/webservice/algorithms/doms/workerthread.py @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import threading + + +class WorkerThread(threading.Thread): + + def __init__(self, method, params): + threading.Thread.__init__(self) + self.method = method + self.params = params + self.completed = False + self.results = None + + def run(self): + self.results = self.method(*self.params) + self.completed = True + + +def __areAllComplete(threads): + for thread in threads: + if not thread.completed: + return False + + return True + + +def wait(threads, startFirst=False, poll=0.5): + if startFirst: + for thread in threads: + thread.start() + + while not __areAllComplete(threads): + threading._sleep(poll) + + +def foo(param1, param2): + print param1, param2 + return "c" + + +if __name__ == "__main__": + + thread = WorkerThread(foo, params=("a", "b")) + thread.start() + while not thread.completed: + threading._sleep(0.5) + print thread.results From c4c4cfb6ce1640817fa494d341328be6f8081abe Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Wed, 5 Aug 2020 15:27:27 -0700 Subject: [PATCH 23/26] revert --- .../tests/algorithms_spark/Matchup_test.py | 321 ++++++++++++++++++ analysis/tests/algorithms_spark/__init__.py | 16 + 2 files changed, 337 insertions(+) create mode 100644 analysis/tests/algorithms_spark/Matchup_test.py create mode 100644 analysis/tests/algorithms_spark/__init__.py diff --git a/analysis/tests/algorithms_spark/Matchup_test.py b/analysis/tests/algorithms_spark/Matchup_test.py new file mode 100644 index 00000000..5dee17ca --- /dev/null +++ b/analysis/tests/algorithms_spark/Matchup_test.py @@ -0,0 +1,321 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pickle +import random +import timeit +import unittest + +from webservice.algorithms_spark.Matchup import * + + +class TestMatch_Points(unittest.TestCase): + def test_one_point_match_exact(self): + primary = DomsPoint(longitude=1.0, latitude=2.0, time=1000, depth=5.0, data_id=1) + matchup = DomsPoint(longitude=1.0, latitude=2.0, time=1000, depth=5.0, data_id=2) + + primary_points = [primary] + matchup_points = [matchup] + + matches = list(match_points_generator(primary_points, matchup_points, 0)) + + self.assertEquals(1, len(matches)) + + p_match_point, match = matches[0] + + self.assertEqual(primary, p_match_point) + self.assertEqual(matchup, match) + + def test_one_point_match_within_tolerance_150km(self): + primary = DomsPoint(longitude=1.0, latitude=2.0, time=1000, depth=5.0, data_id=1) + matchup = DomsPoint(longitude=1.0, latitude=3.0, time=1000, depth=5.0, data_id=2) + + primary_points = [primary] + matchup_points = [matchup] + + matches = list(match_points_generator(primary_points, matchup_points, 150000)) # tolerance 150 km + + self.assertEquals(1, len(matches)) + + p_match_point, match = matches[0] + + self.assertEqual(primary, p_match_point) + self.assertEqual(matchup, match) + + def test_one_point_match_within_tolerance_200m(self): + primary = DomsPoint(longitude=1.0, latitude=2.0, time=1000, depth=5.0, data_id=1) + matchup = DomsPoint(longitude=1.001, latitude=2.0, time=1000, depth=5.0, data_id=2) + + primary_points = [primary] + matchup_points = [matchup] + + matches = list(match_points_generator(primary_points, matchup_points, 200)) # tolerance 200 m + + self.assertEquals(1, len(matches)) + + p_match_point, match = matches[0] + + self.assertEqual(primary, p_match_point) + self.assertEqual(matchup, match) + + def test_one_point_not_match_tolerance_150km(self): + primary = DomsPoint(longitude=1.0, latitude=2.0, time=1000, depth=5.0, data_id=1) + matchup = DomsPoint(longitude=1.0, latitude=4.0, time=1000, depth=5.0, data_id=2) + + primary_points = [primary] + matchup_points = [matchup] + + matches = list(match_points_generator(primary_points, matchup_points, 150000)) # tolerance 150 km + + self.assertEquals(0, len(matches)) + + def test_one_point_not_match_tolerance_100m(self): + primary = DomsPoint(longitude=1.0, latitude=2.0, time=1000, depth=5.0, data_id=1) + matchup = DomsPoint(longitude=1.001, latitude=2.0, time=1000, depth=5.0, data_id=2) + + primary_points = [primary] + matchup_points = [matchup] + + matches = list(match_points_generator(primary_points, matchup_points, 100)) # tolerance 100 m + + self.assertEquals(0, len(matches)) + + def test_multiple_point_match(self): + primary = DomsPoint(longitude=1.0, latitude=2.0, time=1000, depth=5.0, data_id=1) + primary_points = [primary] + + matchup_points = [ + DomsPoint(longitude=1.0, latitude=3.0, time=1000, depth=10.0, data_id=2), + DomsPoint(longitude=2.0, latitude=2.0, time=1000, depth=0.0, data_id=3), + DomsPoint(longitude=0.5, latitude=1.5, time=1000, depth=3.0, data_id=4) + ] + + matches = list(match_points_generator(primary_points, matchup_points, 150000)) # tolerance 150 km + + self.assertEquals(3, len(matches)) + + self.assertSetEqual({primary}, {x[0] for x in matches}) + + list_of_matches = [x[1] for x in matches] + + self.assertEquals(3, len(list_of_matches)) + self.assertItemsEqual(matchup_points, list_of_matches) + + def test_multiple_point_match_multiple_times(self): + primary_points = [ + DomsPoint(longitude=1.0, latitude=2.0, time=1000, depth=5.0, data_id=1), + DomsPoint(longitude=1.5, latitude=1.5, time=1000, depth=5.0, data_id=2) + ] + + matchup_points = [ + DomsPoint(longitude=1.0, latitude=3.0, time=1000, depth=10.0, data_id=3), + DomsPoint(longitude=2.0, latitude=2.0, time=1000, depth=0.0, data_id=4), + DomsPoint(longitude=0.5, latitude=1.5, time=1000, depth=3.0, data_id=5) + ] + + matches = list(match_points_generator(primary_points, matchup_points, 150000)) # tolerance 150 km + + self.assertEquals(5, len(matches)) + + self.assertSetEqual({p for p in primary_points}, {x[0] for x in matches}) + + # First primary point matches all 3 secondary + self.assertEquals(3, [x[0] for x in matches].count(primary_points[0])) + self.assertItemsEqual(matchup_points, [x[1] for x in matches if x[0] == primary_points[0]]) + + # Second primary point matches only last 2 secondary + self.assertEquals(2, [x[0] for x in matches].count(primary_points[1])) + self.assertItemsEqual(matchup_points[1:], [x[1] for x in matches if x[0] == primary_points[1]]) + + def test_one_of_many_primary_matches_one_of_many_matchup(self): + primary_points = [ + DomsPoint(longitude=-33.76764, latitude=30.42946, time=1351553994, data_id=1), + DomsPoint(longitude=-33.75731, latitude=29.86216, time=1351554004, data_id=2) + ] + + matchup_points = [ + DomsPoint(longitude=-33.762, latitude=28.877, time=1351521432, depth=3.973, data_id=3), + DomsPoint(longitude=-34.916, latitude=28.879, time=1351521770, depth=2.9798, data_id=4), + DomsPoint(longitude=-31.121, latitude=31.256, time=1351519892, depth=4.07, data_id=5) + ] + + matches = list(match_points_generator(primary_points, matchup_points, 110000)) # tolerance 110 km + + self.assertEquals(1, len(matches)) + + self.assertSetEqual({p for p in primary_points if p.data_id == 2}, {x[0] for x in matches}) + + # First primary point matches none + self.assertEquals(0, [x[0] for x in matches].count(primary_points[0])) + + # Second primary point matches only first secondary + self.assertEquals(1, [x[0] for x in matches].count(primary_points[1])) + self.assertItemsEqual(matchup_points[0:1], [x[1] for x in matches if x[0] == primary_points[1]]) + + @unittest.skip("This test is just for timing, doesn't actually assert anything.") + def test_time_many_primary_many_matchup(self): + import logging + import sys + logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout) + log = logging.getLogger(__name__) + # Generate 160000 DomsPoints distributed equally in a box from -2.0 lat/lon to 2.0 lat/lon + log.info("Generating primary points") + x = np.arange(-2.0, 2.0, 0.01) + y = np.arange(-2.0, 2.0, 0.01) + primary_points = [DomsPoint(longitude=xy[0], latitude=xy[1], time=1000, depth=5.0, data_id=i) for i, xy in + enumerate(np.array(np.meshgrid(x, y)).T.reshape(-1, 2))] + + # Generate 2000 DomsPoints distributed randomly in a box from -2.0 lat/lon to 2.0 lat/lon + log.info("Generating matchup points") + matchup_points = [ + DomsPoint(longitude=random.uniform(-2.0, 2.0), latitude=random.uniform(-2.0, 2.0), time=1000, depth=5.0, + data_id=i) for i in xrange(0, 2000)] + + log.info("Starting matchup") + log.info("Best of repeat(3, 2) matchups: %s seconds" % min( + timeit.repeat(lambda: list(match_points_generator(primary_points, matchup_points, 1500)), repeat=3, + number=2))) + + +class TestDOMSPoint(unittest.TestCase): + def test_is_pickleable(self): + edge_point = json.loads("""{ +"id": "argo-profiles-5903995(46, 0)", +"time": "2012-10-15T14:24:04Z", +"point": "-33.467 29.728", +"sea_water_temperature": 24.5629997253, +"sea_water_temperature_depth": 2.9796258642, +"wind_speed": null, +"sea_water_salinity": null, +"sea_water_salinity_depth": null, +"platform": 4, +"device": 3, +"fileurl": "ftp://podaac-ftp.jpl.nasa.gov/allData/insitu/L2/spurs1/argo/argo-profiles-5903995.nc" +}""") + point = DomsPoint.from_edge_point(edge_point) + self.assertIsNotNone(pickle.dumps(point)) + + +def check_all(): + return check_solr() and check_cass() and check_edge() + + +def check_solr(): + # TODO eventually this might do something. + return False + + +def check_cass(): + # TODO eventually this might do something. + return False + + +def check_edge(): + # TODO eventually this might do something. + return False + + +@unittest.skipUnless(check_all(), + "These tests require local instances of Solr, Cassandra, and Edge to be running.") +class TestMatchup(unittest.TestCase): + def setUp(self): + from os import environ + environ['PYSPARK_DRIVER_PYTHON'] = '/Users/greguska/anaconda/envs/nexus-analysis/bin/python2.7' + environ['PYSPARK_PYTHON'] = '/Users/greguska/anaconda/envs/nexus-analysis/bin/python2.7' + environ['SPARK_HOME'] = '/Users/greguska/sandbox/spark-2.0.0-bin-hadoop2.7' + + def test_mur_match(self): + from shapely.wkt import loads + from nexustiles.nexustiles import NexusTileService + + polygon = loads("POLYGON((-34.98 29.54, -30.1 29.54, -30.1 31.00, -34.98 31.00, -34.98 29.54))") + primary_ds = "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1" + matchup_ds = "spurs" + parameter = "sst" + start_time = 1350259200 # 2012-10-15T00:00:00Z + end_time = 1350345600 # 2012-10-16T00:00:00Z + time_tolerance = 86400 + depth_tolerance = 5.0 + radius_tolerance = 1500.0 + platforms = "1,2,3,4,5,6,7,8,9" + + tile_service = NexusTileService() + tile_ids = [tile.tile_id for tile in + tile_service.find_tiles_in_polygon(polygon, primary_ds, start_time, end_time, fetch_data=False, + fl='id')] + result = spark_matchup_driver(tile_ids, wkt.dumps(polygon), primary_ds, matchup_ds, parameter, time_tolerance, + depth_tolerance, radius_tolerance, platforms) + for k, v in result.iteritems(): + print "primary: %s\n\tmatches:\n\t\t%s" % ( + "lon: %s, lat: %s, time: %s, sst: %s" % (k.longitude, k.latitude, k.time, k.sst), + '\n\t\t'.join( + ["lon: %s, lat: %s, time: %s, sst: %s" % (i.longitude, i.latitude, i.time, i.sst) for i in v])) + + def test_smap_match(self): + from shapely.wkt import loads + from nexustiles.nexustiles import NexusTileService + + polygon = loads("POLYGON((-34.98 29.54, -30.1 29.54, -30.1 31.00, -34.98 31.00, -34.98 29.54))") + primary_ds = "SMAP_L2B_SSS" + matchup_ds = "spurs" + parameter = "sss" + start_time = 1350259200 # 2012-10-15T00:00:00Z + end_time = 1350345600 # 2012-10-16T00:00:00Z + time_tolerance = 86400 + depth_tolerance = 5.0 + radius_tolerance = 1500.0 + platforms = "1,2,3,4,5,6,7,8,9" + + tile_service = NexusTileService() + tile_ids = [tile.tile_id for tile in + tile_service.find_tiles_in_polygon(polygon, primary_ds, start_time, end_time, fetch_data=False, + fl='id')] + result = spark_matchup_driver(tile_ids, wkt.dumps(polygon), primary_ds, matchup_ds, parameter, time_tolerance, + depth_tolerance, radius_tolerance, platforms) + for k, v in result.iteritems(): + print "primary: %s\n\tmatches:\n\t\t%s" % ( + "lon: %s, lat: %s, time: %s, sst: %s" % (k.longitude, k.latitude, k.time, k.sst), + '\n\t\t'.join( + ["lon: %s, lat: %s, time: %s, sst: %s" % (i.longitude, i.latitude, i.time, i.sst) for i in v])) + + def test_ascatb_match(self): + from shapely.wkt import loads + from nexustiles.nexustiles import NexusTileService + + polygon = loads("POLYGON((-34.98 29.54, -30.1 29.54, -30.1 31.00, -34.98 31.00, -34.98 29.54))") + primary_ds = "ASCATB-L2-Coastal" + matchup_ds = "spurs" + parameter = "wind" + start_time = 1351468800 # 2012-10-29T00:00:00Z + end_time = 1351555200 # 2012-10-30T00:00:00Z + time_tolerance = 86400 + depth_tolerance = 5.0 + radius_tolerance = 110000.0 # 110 km + platforms = "1,2,3,4,5,6,7,8,9" + + tile_service = NexusTileService() + tile_ids = [tile.tile_id for tile in + tile_service.find_tiles_in_polygon(polygon, primary_ds, start_time, end_time, fetch_data=False, + fl='id')] + result = spark_matchup_driver(tile_ids, wkt.dumps(polygon), primary_ds, matchup_ds, parameter, time_tolerance, + depth_tolerance, radius_tolerance, platforms) + for k, v in result.iteritems(): + print "primary: %s\n\tmatches:\n\t\t%s" % ( + "lon: %s, lat: %s, time: %s, wind u,v: %s,%s" % (k.longitude, k.latitude, k.time, k.wind_u, k.wind_v), + '\n\t\t'.join( + ["lon: %s, lat: %s, time: %s, wind u,v: %s,%s" % ( + i.longitude, i.latitude, i.time, i.wind_u, i.wind_v) for i in v])) diff --git a/analysis/tests/algorithms_spark/__init__.py b/analysis/tests/algorithms_spark/__init__.py new file mode 100644 index 00000000..07073680 --- /dev/null +++ b/analysis/tests/algorithms_spark/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + From 4e7833b9c10afba1e5b1f604bd099967dba6873b Mon Sep 17 00:00:00 2001 From: Eamon Ford Date: Thu, 6 Aug 2020 10:15:00 -0700 Subject: [PATCH 24/26] update images --- helm/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helm/values.yaml b/helm/values.yaml index 41b20bda..657dfe9b 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -31,7 +31,7 @@ ingestion: granuleIngester: replicas: 2 - image: nexusjpl/granule-ingester:0.0.1 + image: nexusjpl/granule-ingester:0.0.3 ## cpu refers to both request and limit cpu: 1 @@ -40,7 +40,7 @@ ingestion: memory: 1Gi collectionManager: - image: nexusjpl/collection-manager:0.0.2 + image: nexusjpl/collection-manager:0.0.3 ## cpu refers to both request and limit cpu: 0.5 From e01d0a0dc06f378107f6d47faa29594e9bc3bd33 Mon Sep 17 00:00:00 2001 From: thomas loubrieu Date: Mon, 10 Aug 2020 18:58:57 -0700 Subject: [PATCH 25/26] spark fair scheduling, asynchronous job in apii (jobs/), multiple api end-points (...Spark for legacy, or algorithm/...) create handler manager to have multiple endpoint for the same algorithm implement a demo asynchronous mode in the restapi remove pydataclasses dependency --- analysis/README.md | 3 +- analysis/conda-requirements.txt | 1 + analysis/webservice/NexusHandler.py | 14 ++- .../webservice/algorithms/Capabilities.py | 4 +- .../algorithms_spark/TimeAvgMapSpark.py | 37 +++--- .../algorithms_spark/TimeSeriesSpark.py | 3 +- .../webservice/algorithms_spark/__init__.py | 3 +- analysis/webservice/config/spark_pools.xml | 8 ++ analysis/webservice/jobs/__init__.py | 1 + analysis/webservice/jobs/job.py | 12 ++ .../request/handlers/NexusAsyncJobHandler.py | 81 +++++++++++++ .../request/handlers/NexusHandlerManager.py | 108 ++++++++++++++++++ .../request/handlers/NexusRequestHandler.py | 64 ++++++++++- .../request/handlers/__init__.py | 4 +- .../request/renderers/NexusRendererFactory.py | 2 + analysis/webservice/webapp.py | 50 ++------ analysis/webservice/webmodel/Exceptions.py | 2 +- 17 files changed, 320 insertions(+), 77 deletions(-) create mode 100644 analysis/webservice/config/spark_pools.xml create mode 100644 analysis/webservice/jobs/__init__.py create mode 100644 analysis/webservice/jobs/job.py create mode 100644 analysis/webservice/nexus_tornado/request/handlers/NexusAsyncJobHandler.py create mode 100644 analysis/webservice/nexus_tornado/request/handlers/NexusHandlerManager.py diff --git a/analysis/README.md b/analysis/README.md index a55841b2..eaea5309 100644 --- a/analysis/README.md +++ b/analysis/README.md @@ -14,10 +14,11 @@ Python module that exposes NEXUS analytical capabilities via a HTTP webservice. conda activate nexus-analysis ```` -2. Install conda dependencies +2. Install conda dependencies and other dependencies ```` cd analysis + pip install asyncio # for asynchronous job management conda install pyspark conda install -c conda-forge --file conda-requirements.txt #conda install numpy matplotlib mpld3 scipy netCDF4 basemap gdal pyproj=1.9.5.1 libnetcdf=4.3.3.1 diff --git a/analysis/conda-requirements.txt b/analysis/conda-requirements.txt index 6d9a35ed..83d6d2d0 100644 --- a/analysis/conda-requirements.txt +++ b/analysis/conda-requirements.txt @@ -15,3 +15,4 @@ gdal==3.0.2 mock==2.0.0 singledispatch==3.4.0.3 + diff --git a/analysis/webservice/NexusHandler.py b/analysis/webservice/NexusHandler.py index 42972ec9..e4d35d77 100644 --- a/analysis/webservice/NexusHandler.py +++ b/analysis/webservice/NexusHandler.py @@ -16,8 +16,11 @@ import logging import types +from functools import partial -AVAILABLE_HANDLERS = [] +AVAILABLE_LEGACY_HANDLERS = [] +AVAILABLE_RESTAPI_HANDLERS = [] +AVAILABLE_WPS_HANDLERS = [] AVAILABLE_INITIALIZERS = [] @@ -32,17 +35,22 @@ def nexus_initializer(clazz): return clazz -def nexus_handler(clazz): +def nexus_handler(clazz, handler_list=AVAILABLE_LEGACY_HANDLERS): log = logging.getLogger(__name__) try: clazz.validate() log.info("Adding algorithm module '%s' with path '%s' (%s)" % (clazz.name, clazz.path, clazz)) - AVAILABLE_HANDLERS.append(clazz) + handler_list.append(clazz) except Exception as ex: log.warn("Handler '%s' is invalid and will be skipped (reason: %s)" % (clazz, ex.message), exc_info=True) return clazz +nexus_restapi_handler = partial(nexus_handler, handler_list=AVAILABLE_RESTAPI_HANDLERS) +nexus_wps_handler = partial(nexus_handler, handler_list=AVAILABLE_WPS_HANDLERS) + + + DEFAULT_PARAMETERS_SPEC = { "ds": { "name": "Dataset", diff --git a/analysis/webservice/algorithms/Capabilities.py b/analysis/webservice/algorithms/Capabilities.py index fa85a7c0..57d05009 100644 --- a/analysis/webservice/algorithms/Capabilities.py +++ b/analysis/webservice/algorithms/Capabilities.py @@ -16,7 +16,7 @@ import json -from webservice.NexusHandler import nexus_handler, AVAILABLE_HANDLERS +from webservice.NexusHandler import nexus_handler, AVAILABLE_LEGACY_HANDLERS from webservice.algorithms.NexusCalcHandler import NexusCalcHandler from webservice.webmodel import NexusResults @@ -32,7 +32,7 @@ class CapabilitiesListCalcHandlerImpl(NexusCalcHandler): def calc(self, computeOptions, **args): capabilities = [] - for capability in AVAILABLE_HANDLERS: + for capability in AVAILABLE_LEGACY_HANDLERS: capabilityDef = { "name": capability.name, "path": capability.path, diff --git a/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py b/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py index 6231873b..f7077820 100644 --- a/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py +++ b/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py @@ -16,11 +16,12 @@ from datetime import datetime from functools import partial +import uuid import numpy as np import shapely.geometry from pytz import timezone -from webservice.NexusHandler import nexus_handler +from webservice.NexusHandler import nexus_handler, nexus_restapi_handler from webservice.algorithms_spark.NexusCalcSparkHandler import NexusCalcSparkHandler from webservice.webmodel import NexusResults, NexusProcessingException, NoDataException @@ -29,6 +30,7 @@ @nexus_handler +@nexus_restapi_handler class TimeAvgMapNexusSparkHandlerImpl(NexusCalcSparkHandler): # __singleton_lock = threading.Lock() # __singleton_instance = None @@ -67,19 +69,6 @@ class TimeAvgMapNexusSparkHandlerImpl(NexusCalcSparkHandler): } singleton = True - # @classmethod - # def instance(cls, algorithm_config=None, sc=None): - # with cls.__singleton_lock: - # if not cls.__singleton_instance: - # try: - # singleton_instance = cls() - # singleton_instance.set_config(algorithm_config) - # singleton_instance.set_spark_context(sc) - # cls.__singleton_instance = singleton_instance - # except AttributeError: - # pass - # return cls.__singleton_instance - def parse_arguments(self, request): # Parse input arguments self.log.debug("Parsing arguments") @@ -118,7 +107,8 @@ def parse_arguments(self, request): return ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch, nparts_requested - def calc(self, compute_options, **args): + def calc(self, compute_options, + **args): """ :param compute_options: StatsComputeOptions @@ -130,6 +120,7 @@ def calc(self, compute_options, **args): metrics_record = self._create_metrics_record() ds, bbox, start_time, end_time, nparts_requested = self.parse_arguments(compute_options) + self._setQueryParams(ds, (float(bbox.bounds[1]), float(bbox.bounds[3]), @@ -147,13 +138,13 @@ def calc(self, compute_options, **args): print('Found {} tiles'.format(len(nexus_tiles))) daysinrange = self._get_tile_service().find_days_in_range_asc(bbox.bounds[1], - bbox.bounds[3], - bbox.bounds[0], - bbox.bounds[2], - ds, - start_time, - end_time, - metrics_callback=metrics_record.record_metrics) + bbox.bounds[3], + bbox.bounds[0], + bbox.bounds[2], + ds, + start_time, + end_time, + metrics_callback=metrics_record.record_metrics) ndays = len(daysinrange) if ndays == 0: raise NoDataException(reason="No data found for selected timeframe") @@ -262,6 +253,8 @@ def calc(self, compute_options, **args): maxLon=bbox.bounds[2], ds=ds, startTime=start_time, endTime=end_time) + + @staticmethod def _map(tile_service_factory, metrics_callback, tile_in_spark): tile_bounds = tile_in_spark[0] diff --git a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py index 43f7f6da..83d79ff8 100644 --- a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py +++ b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py @@ -32,7 +32,7 @@ from pytz import timezone from scipy import stats from webservice import Filtering as filtering -from webservice.NexusHandler import nexus_handler +from webservice.NexusHandler import nexus_handler, nexus_restapi_handler, nexus_wps_handler from webservice.algorithms_spark.NexusCalcSparkHandler import NexusCalcSparkHandler from webservice.webmodel import NexusResults, NoDataException, NexusProcessingException @@ -43,6 +43,7 @@ @nexus_handler +@nexus_restapi_handler class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler): name = "Time Series Spark" path = "/timeSeriesSpark" diff --git a/analysis/webservice/algorithms_spark/__init__.py b/analysis/webservice/algorithms_spark/__init__.py index a25c8d5c..c8e3fd15 100644 --- a/analysis/webservice/algorithms_spark/__init__.py +++ b/analysis/webservice/algorithms_spark/__init__.py @@ -21,11 +21,10 @@ import DailyDifferenceAverageSpark import HofMoellerSpark import MaximaMinimaSpark -import NexusCalcSparkHandler import TimeAvgMapSpark import TimeSeriesSpark import VarianceSpark - +import NexusCalcSparkHandler log = logging.getLogger(__name__) diff --git a/analysis/webservice/config/spark_pools.xml b/analysis/webservice/config/spark_pools.xml new file mode 100644 index 00000000..50906ad8 --- /dev/null +++ b/analysis/webservice/config/spark_pools.xml @@ -0,0 +1,8 @@ + + + + FAIR + 1 + 2 + + \ No newline at end of file diff --git a/analysis/webservice/jobs/__init__.py b/analysis/webservice/jobs/__init__.py new file mode 100644 index 00000000..a7a02cef --- /dev/null +++ b/analysis/webservice/jobs/__init__.py @@ -0,0 +1 @@ +from .job import Job \ No newline at end of file diff --git a/analysis/webservice/jobs/job.py b/analysis/webservice/jobs/job.py new file mode 100644 index 00000000..c97121cb --- /dev/null +++ b/analysis/webservice/jobs/job.py @@ -0,0 +1,12 @@ +from datetime import datetime + +class Job(): + def __init__(self): + self.request = None # NexusRequestObject + self.result_future = None # tornado.gen.Future + self.time_created = datetime.now() + self.time_done = None + + + + diff --git a/analysis/webservice/nexus_tornado/request/handlers/NexusAsyncJobHandler.py b/analysis/webservice/nexus_tornado/request/handlers/NexusAsyncJobHandler.py new file mode 100644 index 00000000..8a2b33d9 --- /dev/null +++ b/analysis/webservice/nexus_tornado/request/handlers/NexusAsyncJobHandler.py @@ -0,0 +1,81 @@ +import logging +import json +import uuid +from datetime import datetime, timedelta +import tornado.web +import tornado.ioloop +from webservice.nexus_tornado.request.renderers import NexusRendererFactory + + +class NexusAsyncJobHandler(tornado.web.RequestHandler): + + _job_pool = {} + __logger = logging.getLogger('nexus') + + obsolete_after = timedelta(hours=12) + clean_obsolete_every = timedelta(minutes=15) + + @classmethod + def get_job_pool(cls): + return cls._job_pool + + @classmethod + def start_jobs_cleaner(cls): + + def clean(): + for key, job in cls._job_pool.iteritems(): + if datetime.now() - job.time_done > cls.obsolete_after: + cls.__logger.info("clean job {}".format(key)) + del cls._job_pool[key] + + tornado.ioloop.IOLoop.current().call_later(cls.clean_obsolete_every.seconds, clean) + + def get(self, job_id): + self.__logger.info("get job among {}".format(self._job_pool)) + if job_id in self._job_pool: + job = self._job_pool[job_id] + if job.result_future.done(): + renderer = NexusRendererFactory.get_renderer(job.request) + renderer.render(self, job.result_future.result()) + else: + self._non_completed_job_callback(job_id) + + else: + self._non_existing_job_callback(job_id) + + def _non_existing_job_callback(self, job_id, code=404): + message = "Job {} does not exist".format(job_id) + self._error_callback(message, code) + + def _non_completed_job_callback(self, job_id, code=202): + message = "Job {} is being processed".format(job_id) + self._error_callback(message, code) + + def _error_callback(self, message, code): + self.__logger.info(message, exc_info=True) + + self.set_header("Content-Type", "application/json") + self.set_header("Cache-Control", "no-cache, no-store, must-revalidate") + self.set_header("Pragma", "no-cache") + self.set_header("Expires", 0) + self.set_status(code) + + response = { + "error": message, + "code": code + } + + self.write(json.dumps(response, indent=5)) + self.finish() + + def data_received(self, chunk): + pass + + @classmethod + def get_short_job_id(cls): + while True: + job_id = str(uuid.uuid4())[:6] + if job_id not in cls._job_pool: + return job_id + + diff --git a/analysis/webservice/nexus_tornado/request/handlers/NexusHandlerManager.py b/analysis/webservice/nexus_tornado/request/handlers/NexusHandlerManager.py new file mode 100644 index 00000000..d6af1c42 --- /dev/null +++ b/analysis/webservice/nexus_tornado/request/handlers/NexusHandlerManager.py @@ -0,0 +1,108 @@ +import os +import logging +import sys +import importlib +import pkg_resources +import tornado.web +from webservice import NexusHandler +from webservice.nexus_tornado.request.handlers import NexusRequestHandler +from webservice.nexus_tornado.request.handlers import NexusAsyncJobHandler +import webservice.algorithms_spark.NexusCalcSparkHandler + +logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout) +logger = logging.getLogger(__name__) + + +class VersionHandler(tornado.web.RequestHandler): + def get(self): + self.write(pkg_resources.get_distribution("nexusanalysis").version) + + +class NexusHandlerManager(object): + _spark_context = None + + def __init__(self, module_dirs, + algorithm_config, tile_service_factory, + max_request_threads=1, + static_dir=None): + + for moduleDir in module_dirs: + logger.info("Loading modules from %s" % moduleDir) + importlib.import_module(moduleDir) + + logger.info("Running Nexus Initializers") + NexusHandler.executeInitializers(algorithm_config) + + self._tile_service_factory = tile_service_factory + + logger.info("Initializing request ThreadPool to %s" % max_request_threads) + self._request_thread_pool = tornado.concurrent.futures.ThreadPoolExecutor(max_request_threads) + + self._static_dir = static_dir + + def get_handlers(self): + handlers = self._get_legacy_handlers() + handlers.extend(self._get_restapi_algorithm_handlers()) + + handlers.append((r"/version", VersionHandler)) + + NexusAsyncJobHandler.start_jobs_cleaner() + handlers.append((r"/jobs/(.*)", NexusAsyncJobHandler)) + + if self._static_dir: + handlers.append( + (r'/(.*)', tornado.web.StaticFileHandler, {'path': self._static_dir, "default_filename": "index.html"})) + + return handlers + + def _get_legacy_handlers(self): + return self.__get_tornado_handlers(NexusHandler.AVAILABLE_LEGACY_HANDLERS, lambda x: x) + + def _get_restapi_algorithm_handlers(self): + + def path_spark_to_restapi(s): + i_spark = s.find('Spark') + return '/algorithms' + s[:i_spark] + + return self.__get_tornado_handlers(NexusHandler.AVAILABLE_RESTAPI_HANDLERS, path_spark_to_restapi) + + def _get_restapi_job_handler(self): + pass + + def __get_tornado_handlers(self, wrappers, path_func): + handlers = [] + + for clazzWrapper in wrappers: + path = path_func(clazzWrapper.path) + logger.info("adding request handler for class {} on path {}".format(clazzWrapper, path)) + if issubclass(clazzWrapper, webservice.algorithms_spark.NexusCalcSparkHandler.NexusCalcSparkHandler): + spark_context = self._get_spark_context() + handlers.append((path, + NexusRequestHandler, + dict(clazz=clazzWrapper, + tile_service_factory=self._tile_service_factory, + sc=spark_context, + thread_pool=self._request_thread_pool))) + else: + handlers.append((path, + NexusRequestHandler, + dict(clazz=clazzWrapper, + tile_service_factory=self._tile_service_factory, + thread_pool=self._request_thread_pool))) + + return handlers + + def _get_spark_context(self): + if self._spark_context is None: + from pyspark.sql import SparkSession + + spark = SparkSession.builder.appName("nexus-analysis") \ + .config("spark.scheduler.mode", "FAIR") \ + .config("spark.scheduler.allocation.file", os.path.abspath("webservice/config/spark_pools.xml")) \ + .getOrCreate() + self._spark_context = spark.sparkContext + + return self._spark_context diff --git a/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py b/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py index 210c1f31..a5a66e4b 100644 --- a/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py +++ b/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py @@ -1,18 +1,29 @@ import json +import datetime import logging +import functools import tornado.gen import tornado.ioloop +import tornado.util +import uuid +from datetime import datetime, timedelta +from webservice.jobs import Job from webservice.nexus_tornado.request.renderers import NexusRendererFactory +from webservice.nexus_tornado.request.handlers.NexusAsyncJobHandler import NexusAsyncJobHandler from webservice.webmodel import NexusRequestObjectTornadoFree, NexusRequestObject, NexusProcessingException class NexusRequestHandler(tornado.web.RequestHandler): + def initialize(self, thread_pool, clazz=None, **kargs): self.logger = logging.getLogger('nexus') self.executor = thread_pool self.__clazz = clazz - self._clazz_init_args = kargs # 'algorithm_config', 'sc' for spark handler + self.__synchronous_time_out_seconds = timedelta(seconds=30) + self._clazz_init_args = kargs # 'algorithm_config', 'sc' for spark handler + + @tornado.gen.coroutine def get(self): @@ -32,7 +43,14 @@ def get(self): try: # process the request asynchronously on a different thread, # the current tornado handler is still available to get other user requests - results = yield tornado.ioloop.IOLoop.current().run_in_executor(self.executor, instance.calc, request) + result_future = tornado.ioloop.IOLoop.current().run_in_executor(self.executor, + instance.calc, + request) + if self.__synchronous_time_out_seconds: + results = yield tornado.gen.with_timeout(self.__synchronous_time_out_seconds, + result_future) + else: + results = yield result_future try: self.set_status(results.status_code) @@ -42,12 +60,19 @@ def get(self): renderer = NexusRendererFactory.get_renderer(request) renderer.render(self, results) + except tornado.gen.TimeoutError as e: + self.logger.info("synchronous time out reached, switch to async mode") + + self._switch_to_async(request, result_future) + except NexusProcessingException as e: self.async_onerror_callback(e.reason, e.code) except Exception as e: self.async_onerror_callback(str(e), 500) + + def async_onerror_callback(self, reason, code=500): self.logger.error("Error processing request", exc_info=True) @@ -60,4 +85,37 @@ def async_onerror_callback(self, reason, code=500): } self.write(json.dumps(response, indent=5)) - self.finish() \ No newline at end of file + self.finish() + + def _switch_to_async(self, request, result_future): + job = Job() + job.request = request + + def set_job_done_datetime(job, future): + job.time_done = datetime.now() + + result_future.add_done_callback(functools.partial(set_job_done_datetime, job)) + job.result_future = result_future + job_id = NexusAsyncJobHandler.get_short_job_id() + NexusAsyncJobHandler.get_job_pool()[job_id] = job + self.async_onsynctimeout_callback(job_id) + + + def async_onsynctimeout_callback(self, job_id, code=202): + message = "Processing request is taking more than {} s, switch to async mode, check status at /jobs/{}".format( + self.__synchronous_time_out_seconds, job_id) + self.logger.info(message, + exc_info=True) + + self.set_header("Content-Type", "application/json") + self.set_status(code) + + response = { + "error": message, + "code": code, + "job_id": job_id + } + + self.write(json.dumps(response, indent=5)) + self.finish() + diff --git a/analysis/webservice/nexus_tornado/request/handlers/__init__.py b/analysis/webservice/nexus_tornado/request/handlers/__init__.py index 7c6b1f4e..0b6ad53c 100644 --- a/analysis/webservice/nexus_tornado/request/handlers/__init__.py +++ b/analysis/webservice/nexus_tornado/request/handlers/__init__.py @@ -1 +1,3 @@ -from .NexusRequestHandler import NexusRequestHandler \ No newline at end of file +from .NexusRequestHandler import NexusRequestHandler +from .NexusHandlerManager import NexusHandlerManager +from .NexusAsyncJobHandler import NexusAsyncJobHandler diff --git a/analysis/webservice/nexus_tornado/request/renderers/NexusRendererFactory.py b/analysis/webservice/nexus_tornado/request/renderers/NexusRendererFactory.py index 9fc06e31..54dd273e 100644 --- a/analysis/webservice/nexus_tornado/request/renderers/NexusRendererFactory.py +++ b/analysis/webservice/nexus_tornado/request/renderers/NexusRendererFactory.py @@ -14,3 +14,5 @@ def get_renderer(cls, request): + + diff --git a/analysis/webservice/webapp.py b/analysis/webservice/webapp.py index d1ada7f5..bf7fd488 100644 --- a/analysis/webservice/webapp.py +++ b/analysis/webservice/webapp.py @@ -17,6 +17,7 @@ import importlib import logging import sys +import os from functools import partial import pkg_resources @@ -27,6 +28,7 @@ from nexustiles.nexustiles import NexusTileService from webservice import NexusHandler from webservice.nexus_tornado.request.handlers import NexusRequestHandler +from nexus_tornado.request.handlers import NexusHandlerManager def inject_args_in_config(args, config): @@ -78,10 +80,7 @@ def inject_args_in_config(args, config): parse_command_line() algorithm_config = inject_args_in_config(options, algorithm_config) - moduleDirs = webconfig.get("modules", "module_dirs").split(",") - for moduleDir in moduleDirs: - log.info("Loading modules from %s" % moduleDir) - importlib.import_module(moduleDir) + module_dirs = webconfig.get("modules", "module_dirs").split(",") staticDir = webconfig.get("static", "static_dir") staticEnabled = webconfig.get("static", "static_enabled") == "true" @@ -94,47 +93,16 @@ def inject_args_in_config(args, config): else: log.info("Static resources disabled") - handlers = [] - - log.info("Running Nexus Initializers") - NexusHandler.executeInitializers(algorithm_config) - max_request_threads = webconfig.getint("global", "server.max_simultaneous_requests") - log.info("Initializing request ThreadPool to %s" % max_request_threads) - request_thread_pool = tornado.concurrent.futures.ThreadPoolExecutor(max_request_threads) tile_service_factory = partial(NexusTileService, False, False, algorithm_config) - spark_context = None - for clazzWrapper in NexusHandler.AVAILABLE_HANDLERS: - if issubclass(clazzWrapper, webservice.algorithms_spark.NexusCalcSparkHandler.NexusCalcSparkHandler): - if spark_context is None: - from pyspark.sql import SparkSession - - spark = SparkSession.builder.appName("nexus-analysis").getOrCreate() - spark_context = spark.sparkContext - - handlers.append((clazzWrapper.path, - NexusRequestHandler, - dict(clazz=clazzWrapper, - tile_service_factory=tile_service_factory, - sc=spark_context, - thread_pool=request_thread_pool))) - else: - handlers.append((clazzWrapper.path, - NexusRequestHandler, - dict(clazz=clazzWrapper, - tile_service_factory=tile_service_factory, - thread_pool=request_thread_pool))) - - class VersionHandler(tornado.web.RequestHandler): - def get(self): - self.write(pkg_resources.get_distribution("nexusanalysis").version) - - handlers.append((r"/version", VersionHandler)) + job_pool = {} - if staticEnabled: - handlers.append( - (r'/(.*)', tornado.web.StaticFileHandler, {'path': staticDir, "default_filename": "index.html"})) + nexus_handler_manager = NexusHandlerManager(module_dirs, + algorithm_config, tile_service_factory, + max_request_threads=max_request_threads, + static_dir=staticDir) + handlers = nexus_handler_manager.get_handlers() app = tornado.web.Application( handlers, diff --git a/analysis/webservice/webmodel/Exceptions.py b/analysis/webservice/webmodel/Exceptions.py index c07174e6..d8594513 100644 --- a/analysis/webservice/webmodel/Exceptions.py +++ b/analysis/webservice/webmodel/Exceptions.py @@ -16,4 +16,4 @@ def __init__(self, reason="No data found for the selected timeframe"): class DatasetNotFoundException(NexusProcessingException): def __init__(self, reason="Dataset not found"): - NexusProcessingException.__init__(self, StandardNexusErrors.DATASET_MISSING, reason, code=404) \ No newline at end of file + NexusProcessingException.__init__(self, StandardNexusErrors.DATASET_MISSING, reason, code=404) From 142ce38a99bf7084936f19f7e64ce11118af8987 Mon Sep 17 00:00:00 2001 From: thomas loubrieu Date: Fri, 2 Oct 2020 14:42:58 -0700 Subject: [PATCH 26/26] finalize master rebase --- analysis/webservice/jobs/job.py | 1 + .../nexus_tornado/request/handlers/NexusHandlerManager.py | 3 --- .../nexus_tornado/request/handlers/NexusRequestHandler.py | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/analysis/webservice/jobs/job.py b/analysis/webservice/jobs/job.py index c97121cb..fad0a68d 100644 --- a/analysis/webservice/jobs/job.py +++ b/analysis/webservice/jobs/job.py @@ -1,5 +1,6 @@ from datetime import datetime + class Job(): def __init__(self): self.request = None # NexusRequestObject diff --git a/analysis/webservice/nexus_tornado/request/handlers/NexusHandlerManager.py b/analysis/webservice/nexus_tornado/request/handlers/NexusHandlerManager.py index d6af1c42..67121221 100644 --- a/analysis/webservice/nexus_tornado/request/handlers/NexusHandlerManager.py +++ b/analysis/webservice/nexus_tornado/request/handlers/NexusHandlerManager.py @@ -69,9 +69,6 @@ def path_spark_to_restapi(s): return self.__get_tornado_handlers(NexusHandler.AVAILABLE_RESTAPI_HANDLERS, path_spark_to_restapi) - def _get_restapi_job_handler(self): - pass - def __get_tornado_handlers(self, wrappers, path_func): handlers = [] diff --git a/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py b/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py index a5a66e4b..66643537 100644 --- a/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py +++ b/analysis/webservice/nexus_tornado/request/handlers/NexusRequestHandler.py @@ -5,7 +5,6 @@ import tornado.gen import tornado.ioloop import tornado.util -import uuid from datetime import datetime, timedelta from webservice.jobs import Job