diff --git a/.github/workflows/ci-full.yaml b/.github/workflows/ci-full.yaml
index b1b5526..90042f6 100644
--- a/.github/workflows/ci-full.yaml
+++ b/.github/workflows/ci-full.yaml
@@ -99,7 +99,7 @@ jobs:
- name: Run Perf Analyzer Job
run: |
kubectl apply -f ci/perf-analyzer-job.yaml
- kubectl wait --for=condition=complete job/perf-analyzer-job -n cms --timeout=240s || \
+ kubectl wait --for=condition=complete job/perf-analyzer-job -n cms --timeout=300s || \
(echo "Perf-analyzer job did not complete in time or failed." && exit 1)
POD_NAME=$(kubectl get pods -n cms -l job-name=perf-analyzer-job -o jsonpath="{.items[0].metadata.name}")
diff --git a/docs/advanced-monitoring.rst b/docs/advanced-monitoring.rst
index 5be0df5..4de9f91 100644
--- a/docs/advanced-monitoring.rst
+++ b/docs/advanced-monitoring.rst
@@ -63,7 +63,6 @@ shows how tracing is configured for CMS SuperSONIC instance:
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoBTag/Combined/data/models/ \
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoTauTag/TrainingFiles/data/DeepTauIdSONIC/ \
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoMET/METPUSubtraction/data/models/ \
- --model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoEgamma/EgammaPhotonProducers/data/models/ \
--trace-config mode=opentelemetry \
--trace-config=opentelemetry,resource=pod_name=$(hostname) \
--trace-config opentelemetry,url=supersonic-opentelemetry-collector:4318/v1/traces \
diff --git a/docs/configuration-guide.rst b/docs/configuration-guide.rst
index d8ac473..4c4788d 100644
--- a/docs/configuration-guide.rst
+++ b/docs/configuration-guide.rst
@@ -33,7 +33,6 @@ Triton version must be specified in the ``triton.image`` parameter in the values
- |
/opt/tritonserver/bin/tritonserver \
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoBTag/Combined/data/models/ \
- --model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoEgamma/EgammaPhotonProducers/data/models/ \
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoTauTag/TrainingFiles/data/DeepTauIdSONIC/ \
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoMET/METPUSubtraction/data/models/ \
--allow-gpu-metrics=true \
@@ -90,7 +89,7 @@ Triton version must be specified in the ``triton.image`` parameter in the values
-3. Select Resources for Triton Pods
+1. Select Resources for Triton Pods
=============================================
- You can configure CPU, memory, and GPU resources for Triton pods via the ``triton.resources`` parameter in the values file:
diff --git a/helm/supersonic/Chart.yaml b/helm/supersonic/Chart.yaml
index 00440c2..f47c428 100644
--- a/helm/supersonic/Chart.yaml
+++ b/helm/supersonic/Chart.yaml
@@ -2,7 +2,7 @@ apiVersion: v2
name: supersonic
description: Server infrastructure for inference-as-a-service in large scientific experiments.
icon: https://github.com/fastmachinelearning/SuperSONIC/blob/main/docs/img/SuperSONIC_small_512.png?raw=true
-version: 0.2.1
+version: 0.3.0
type: application
home: https://fastmachinelearning.org/SuperSONIC/
annotations:
diff --git a/values/values-anvil-cms.yaml b/values/values-anvil-cms.yaml
index 6b1572a..276bf8f 100644
--- a/values/values-anvil-cms.yaml
+++ b/values/values-anvil-cms.yaml
@@ -1,12 +1,10 @@
triton:
- # image: fastml/triton-torchgeo:21.02-py3-geometric # run2
- image: fastml/triton-torchgeo:22.07-py3-geometric # run3
+ image: nvcr.io/nvidia/tritonserver:24.11-py3
command: ["/bin/sh", "-c"]
args:
- |
/opt/tritonserver/bin/tritonserver \
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoBTag/Combined/data/models/ \
- --model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoEgamma/EgammaPhotonProducers/data/models/ \
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoTauTag/TrainingFiles/data/DeepTauIdSONIC/ \
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoMET/METPUSubtraction/data/models/ \
--allow-gpu-metrics=true \
diff --git a/values/values-geddes-cms.yaml b/values/values-geddes-cms.yaml
index 4378370..a81fda3 100644
--- a/values/values-geddes-cms.yaml
+++ b/values/values-geddes-cms.yaml
@@ -1,31 +1,30 @@
serverLoadThreshold: 20
serverLoadMetric: 'sum by (release) (rate(nv_inference_queue_duration_us{release=~"sonic-server"}[30s]) / (rate(nv_inference_exec_count{release=~"sonic-server"}[30s]) * 1000 + 0.001))'
-triton:
- # image: fastml/triton-torchgeo:21.02-py3-geometric # run2
- image: fastml/triton-torchgeo:22.07-py3-geometric # run3
+triton:
+ image: nvcr.io/nvidia/tritonserver:24.11-py3
command: ["/bin/sh", "-c"]
- args:
+ args:
- |
/opt/tritonserver/bin/tritonserver \
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoBTag/Combined/data/models/ \
- --model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoEgamma/EgammaPhotonProducers/data/models/ \
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoTauTag/TrainingFiles/data/DeepTauIdSONIC/ \
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoMET/METPUSubtraction/data/models/ \
+ --trace-config mode=opentelemetry \
+ --trace-config=opentelemetry,resource=pod_name=$(hostname) \
+ --trace-config opentelemetry,url=sonic-server-opentelemetry-collector:4318/v1/traces \
+ --trace-config rate=100 \
+ --trace-config level=TIMESTAMPS \
+ --trace-config count=-1 \
--allow-gpu-metrics=true \
--log-verbose=0 \
--strict-model-config=false \
--exit-timeout-secs=60
- # --trace-config mode=opentelemetry
- # --trace-config=opentelemetry,resource=pod_name=$(hostname)
- # --trace-config opentelemetry,url=sonic-server-opentelemetry-collector:4318/v1/traces
- # --trace-config rate=100 # 1 in 100 requests
- # --trace-config level=TIMESTAMPS
- # --trace-config count=-1
+
resources:
- limits: { nvidia.com/gpu: 1, cpu: 2, memory: 4G}
- requests: { nvidia.com/gpu: 1, cpu: 2, memory: 4G}
- nodeSelector: {'cms-af-prod': 'true'}
+ limits: { nvidia.com/gpu: 1, cpu: 2, memory: 4G }
+ requests: { nvidia.com/gpu: 1, cpu: 2, memory: 4G }
+ nodeSelector: { "cms-af-prod": "true" }
tolerations:
- key: hub.jupyter.org/dedicated
operator: Equal
@@ -43,7 +42,7 @@ triton:
envoy:
enabled: true
- nodeSelector: {'cms-af-prod': 'true'}
+ nodeSelector: { "cms-af-prod": "true" }
tolerations:
- key: hub.jupyter.org/dedicated
operator: Equal
@@ -56,30 +55,33 @@ envoy:
enabled: true
hostName: sonic-cms.geddes.rcac.purdue.edu
ingressClassName: public
+ rate_limiter:
+ prometheus_based:
+ enabled: true
+ tracing_sampling_rate: 0.01
keda:
enabled: true
minReplicaCount: 1
- maxReplicaCount: 7
+ maxReplicaCount: 11
+ scaleUp:
+ stabilizationWindowSeconds: 30
+ periodSeconds: 15
+ stepsize: 1
+ scaleDown:
+ stabilizationWindowSeconds: 45
+ periodSeconds: 45
+ stepsize: 1
ingress:
enabled: false
prometheus:
- enabled: true
- server:
- useExistingClusterRoleName: sonic-server-prometheus-role
- ingress:
- enabled: true
- hosts:
- - prometheus-cms.geddes.rcac.purdue.edu
- tls:
- - hosts:
- - prometheus-cms.geddes.rcac.purdue.edu
- ingressClassName: public
- serviceAccounts:
- server:
- name: sonic-server-prometheus-sa
+ external:
+ enabled: true
+ url: prometheus-af.geddes.rcac.purdue.edu
+ port: 443
+ scheme: https
grafana:
enabled: true
@@ -92,7 +94,7 @@ grafana:
type: prometheus
access: proxy
isDefault: true
- url: http://sonic-server-prometheus-server:9090
+ url: https://prometheus-af.geddes.rcac.purdue.edu
jsonData:
timeInterval: "5s"
tlsSkipVerify: true
@@ -106,7 +108,7 @@ grafana:
timeInterval: "5s"
tlsSkipVerify: true
serviceMap:
- datasourceUid: 'prometheus'
+ datasourceUid: "prometheus"
nodeGraph:
enabled: true
ingress:
@@ -127,13 +129,14 @@ opentelemetry-collector:
exporters:
otlp:
endpoint: http://sonic-server-tempo:4317
- otlphttp:
+ otlphttp:
endpoint: http://sonic-server-tempo:4318
prometheusremotewrite:
- endpoint: http://sonic-server-prometheus-server:9090/api/v1/write
+ endpoint: http://prometheus-server:9090/api/v1/write
+
tempo:
enabled: true
tempo:
metricsGenerator:
enabled: true
- remoteWriteUrl: http://sonic-server-prometheus-server:9090/api/v1/write
\ No newline at end of file
+ remoteWriteUrl: http://prometheus-server:9090/api/v1/write
diff --git a/values/values-nautilus-cms.yaml b/values/values-nautilus-cms.yaml
index 82850c4..b63169e 100644
--- a/values/values-nautilus-cms.yaml
+++ b/values/values-nautilus-cms.yaml
@@ -18,7 +18,6 @@ triton:
--strict-model-config=false \
--exit-timeout-secs=60 \
--backend-config=onnxruntime,enable-global-threadpool=1
-# --model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoEgamma/EgammaPhotonProducers/data/models/ \
resources:
limits: { cpu: 1, memory: 3G, nvidia.com/gpu: 1}