Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
7148c09
remove plugin from documentation and create a CI job to test installa…
kondratyevd Jul 29, 2025
b449a77
Update helm docs
actions-user Jul 29, 2025
1311da5
fix repo URL
kondratyevd Jul 29, 2025
4f6a0b5
Update helm docs
actions-user Jul 29, 2025
75128be
expand values.yaml for CMS Geddes config
kondratyevd Jul 29, 2025
aae90a7
don't validate prometheus
kondratyevd Jul 29, 2025
83cd87d
instructions to install from GitHub
kondratyevd Jul 29, 2025
0bbc22e
Update helm docs
actions-user Jul 29, 2025
1b20f4e
update cms values
kondratyevd Jul 29, 2025
8c2a376
configure tracing
kondratyevd Jul 29, 2025
da1f007
decouple parameters that control tracing rate in OTel and Triton
kondratyevd Jul 29, 2025
d4dd100
Update JSON schema
actions-user Jul 29, 2025
4e3a661
Update helm docs
actions-user Jul 29, 2025
c0f247b
test both local and remote installations
kondratyevd Jul 29, 2025
e79aeeb
add dependencies to installation test
kondratyevd Jul 29, 2025
e83caba
remove installer plugin files
kondratyevd Jul 29, 2025
87377ca
don't validate grafana existence
kondratyevd Jul 29, 2025
e01cbfa
fix condition
kondratyevd Jul 29, 2025
ec296f5
update values files
kondratyevd Jul 29, 2025
491fbb0
fix condition
kondratyevd Jul 29, 2025
7cc7f10
delete obsolete installation scripts
kondratyevd Jul 29, 2025
69f18a7
update ci workflows and add minimal values file
kondratyevd Jul 29, 2025
a68a0ed
increase timeout for keda
kondratyevd Jul 29, 2025
b28fee4
fix CI
kondratyevd Jul 29, 2025
108a573
fix CI - we are already in the repo
kondratyevd Jul 29, 2025
f3b5d40
update README
kondratyevd Jul 29, 2025
ff109d7
Update helm docs
actions-user Jul 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: ci [CMS]
name: ci [full]

on:
push:
Expand Down Expand Up @@ -55,7 +55,7 @@ jobs:
helm repo add opentelemetry https://open-telemetry.github.io/opentelemetry-helm-charts
helm dependency build ./helm/supersonic
helm upgrade --install supersonic ./helm/supersonic \
--values values/values-cms-ci.yaml -n cms
--values values/values-minimal-full.yaml -n cms

- name: CVMFS Mount ready
run: |
Expand Down Expand Up @@ -84,8 +84,8 @@ jobs:

- name: Autoscaler ready
run: |
kubectl wait --for condition=AbleToScale hpa -l app.kubernetes.io/component=keda --timeout 120s -n cms
kubectl wait --for condition=Ready so -l app.kubernetes.io/component=keda --timeout 120s -n cms
kubectl wait --for condition=AbleToScale hpa -l app.kubernetes.io/component=keda --timeout 180s -n cms
kubectl wait --for condition=Ready so -l app.kubernetes.io/component=keda --timeout 180s -n cms

- name: Triton server ready
run: |
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: ci [installer plugin]
name: ci [installation]

on:
push:
Expand All @@ -9,7 +9,7 @@ on:
- "main"

jobs:
test-installer-plugin:
test-installation:
runs-on: ubuntu-latest
steps:
- name: Checkout code
Expand All @@ -25,9 +25,9 @@ jobs:
with:
version: v3.12.0

- name: Create CMS namespace
- name: Create test namespace
run: |
kubectl create namespace cms
kubectl create namespace test-ns

- name: Install Prometheus Operator CRDs
run: |
Expand All @@ -36,17 +36,32 @@ jobs:
kubectl create namespace monitoring
helm install prometheus-operator prometheus-community/kube-prometheus-stack --namespace monitoring --set prometheusOperator.createCustomResource=false --set defaultRules.create=false --set alertmanager.enabled=false --set prometheus.enabled=false --set grafana.enabled=false

- name: Install KEDA Autoscaler
- name: Install KEDA Autoscaler CRDs
run: |
helm repo add kedacore https://kedacore.github.io/charts
helm repo update
kubectl create namespace keda
helm install keda kedacore/keda --namespace keda

- name: Install SuperSONIC from remote repo via plugin
- name: Test installation of SuperSONIC from remote repo via plugin
run: |
helm plugin install .
helm install-supersonic supersonic --local --values values/values-cms-ci.yaml -n cms
helm repo add fastml https://fastmachinelearning.org/SuperSONIC/
helm repo update
helm install supersonic fastml/supersonic -n test-ns -f values/values-minimal.yaml
helm uninstall supersonic -n test-ns

- name: Test installation of SuperSONIC from GitHub
run: |
# Add dependencies
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo add grafana https://grafana.github.io/helm-charts
helm repo add opentelemetry https://open-telemetry.github.io/opentelemetry-helm-charts
helm repo update

# Install SuperSONIC
helm dependency build helm/supersonic
helm install supersonic helm/supersonic -n test-ns -f values/values-minimal.yaml
helm uninstall supersonic -n test-ns

- name: Cleanup
run: kind delete cluster --name gh-k8s-cluster
35 changes: 13 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,36 +31,27 @@ The main components of SuperSONIC are:

## Installation

The installation is done via a custom Helm plugin which takes care of
internal connectivity of the chart components. Standard Helm installation
is also supported, but requires a lot more manual configuration.
### Install from Helm repository

```
helm plugin install https://github.com/fastmachinelearning/SuperSONIC/
helm install-supersonic <release-name> -n <namespace> -f <your-values.yaml>
```

Installer plugin usage:
```
Usage:
helm install-supersonic [RELEASE_NAME] [flags]

Flags:
-h, --help Show this help message
-f, --values Specify values file for custom configuration
-n, --namespace Specify Kubernetes namespace for deployment
--version Specify chart version (default: latest version)
Note: Ignored if --local flag is set
--local Install from local chart path instead of remote repository
--path Local chart path (default: ./helm/supersonic)
Only used when --local flag is set
Additional flags will be passed directly to the 'helm install' command
helm repo add fastml https://fastmachinelearning.org/SuperSONIC
helm repo update
helm install <release-name> fastml/supersonic -n <namespace> -f <your-values.yaml>
```

To construct the `values.yaml` file for your application, follow [Configuration guide](http://fastmachinelearning.org/SuperSONIC/configuration-guide.html "Configuration guide").

The full list of configuration parameters is available in the [Configuration reference](http://fastmachinelearning.org/SuperSONIC/configuration-reference.html "Configuration reference").

### Install from GitHub

```
git clone https://github.com/fastmachinelearning/SuperSONIC.git
cd SuperSONIC
git checkout <branch-or-commit>
helm dependency build helm/supersonic
helm install <release-name> helm/supersonic -n <namespace> -f <your-values.yaml>
```

## Server diagram

Expand Down
2 changes: 0 additions & 2 deletions deploy-geddes-cms.sh

This file was deleted.

2 changes: 0 additions & 2 deletions deploy-nautilus-atlas.sh

This file was deleted.

2 changes: 0 additions & 2 deletions deploy-nautilus-cms.sh

This file was deleted.

2 changes: 1 addition & 1 deletion docs/.values-table.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
| envoy.auth.audiences | list | `[]` | |
| envoy.auth.url | string | `""` | |
| envoy.auth.port | int | `443` | |
| envoy.tracing_sampling_rate | float | `0.01` | |
| autoscaler.enabled | bool | `false` | Enable autoscaling (requires Prometheus to also be enabled). Autoscaling will be based on the metric is taken from parameter ``prometheus.serverLoadMetric``, new Triton servers will spawn if the metric exceedds the threshold set by ``prometheus.serverLoadThreshold``. |
| autoscaler.minReplicaCount | int | `1` | Minimum and maximum number of Triton servers. Warning: if min=0 and desired Prometheus metric is empty, the first server will never start |
| autoscaler.maxReplicaCount | int | `2` | |
Expand Down Expand Up @@ -125,7 +126,6 @@
| tempo.tempo.receivers.otlp.protocols.http.endpoint | string | `"0.0.0.0:4318"` | |
| tempo.tempo.livenessProbe.initialDelaySeconds | int | `0` | |
| tempo.tempo.readinessProbe.initialDelaySeconds | int | `0` | |
| tracing_sampling_rate | float | `0.01` | |
| opentelemetry-collector.enabled | bool | `false` | |
| opentelemetry-collector.image.repository | string | `"otel/opentelemetry-collector-contrib"` | |
| opentelemetry-collector.image.tag | string | `"0.120.0"` | |
Expand Down
3 changes: 1 addition & 2 deletions docs/advanced-monitoring.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ Displaying Tracing Data in Grafana

If Grafana is enabled in your ``values.yaml``, you can display the tracing data
in the Grafana dashboard. In order to achieve this, Grafana needs to have a
Tempo datasource configured. This is done automatically when you install
SuperSONIC via the ``install-supersonic`` plugin.
Tempo datasource configured.

If OpenTelemetry Collector and Tempo are enabled, the default Grafana dashboard
will include an interactive server map, where you can study tracing data in detail
Expand Down
31 changes: 4 additions & 27 deletions docs/getting-started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,41 +19,18 @@ Installation
- `Configuration reference <configuration-reference>`_
- `Example values.yaml files <https://github.com/fastmachinelearning/SuperSONIC/tree/main/values>`_

2. Install Helm plugin to handle SuperSONIC installation
2. Install Helm repository

.. code:: shell

helm plugin install https://github.com/fastmachinelearning/SuperSONIC/


The Helm plugin is needed to ensure internal connectivity of the SuperSONIC
components. Standard Helm installation without a plugin is also supported,
but requires a lot more manual configuration.
helm repo add fastml https://fastmachinelearning.org/SuperSONIC/
helm repo update

3. Modify the following command to install the chart at your cluster:

.. code:: shell

helm install-supersonic <release-name> -n <namespace> -f <your-values.yaml>

Installer plugin usage:

.. code:: shell

Usage:
helm install-supersonic [RELEASE_NAME] [flags]

Flags:
-h, --help Show this help message
-f, --values Specify values file for custom configuration
-n, --namespace Specify Kubernetes namespace for deployment
--version Specify chart version (default: latest version)
Note: Ignored if --local flag is set
--local Install from local chart path instead of remote repository
--path Local chart path (default: ./helm/supersonic)
Only used when --local flag is set
Additional flags will be passed directly to the 'helm install' command

helm install <release-name> fastml/supersonic -n <namespace> -f <your-values.yaml>

Use a unique meaningful lowercase value as <release-name>, for example
``supersonic-cms-run3``.
Expand Down
35 changes: 13 additions & 22 deletions helm/supersonic/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,36 +31,27 @@ The main components of SuperSONIC are:

## Installation

The installation is done via a custom Helm plugin which takes care of
internal connectivity of the chart components. Standard Helm installation
is also supported, but requires a lot more manual configuration.
### Install from Helm repository

```
helm plugin install https://github.com/fastmachinelearning/SuperSONIC/
helm install-supersonic <release-name> -n <namespace> -f <your-values.yaml>
```

Installer plugin usage:
```
Usage:
helm install-supersonic [RELEASE_NAME] [flags]

Flags:
-h, --help Show this help message
-f, --values Specify values file for custom configuration
-n, --namespace Specify Kubernetes namespace for deployment
--version Specify chart version (default: latest version)
Note: Ignored if --local flag is set
--local Install from local chart path instead of remote repository
--path Local chart path (default: ./helm/supersonic)
Only used when --local flag is set
Additional flags will be passed directly to the 'helm install' command
helm repo add fastml https://fastmachinelearning.org/SuperSONIC
helm repo update
helm install <release-name> fastml/supersonic -n <namespace> -f <your-values.yaml>
```

To construct the `values.yaml` file for your application, follow [Configuration guide](http://fastmachinelearning.org/SuperSONIC/configuration-guide.html "Configuration guide").

The full list of configuration parameters is available in the [Configuration reference](http://fastmachinelearning.org/SuperSONIC/configuration-reference.html "Configuration reference").

### Install from GitHub

```
git clone https://github.com/fastmachinelearning/SuperSONIC.git
cd SuperSONIC
git checkout <branch-or-commit>
helm dependency build helm/supersonic
helm install <release-name> helm/supersonic -n <namespace> -f <your-values.yaml>
```

## Server diagram

Expand Down
3 changes: 1 addition & 2 deletions helm/supersonic/templates/NOTES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
{{- /* Run validation checks */ -}}
{{- include "supersonic.validateGrafanaAddressConsistency" . -}}
{{- include "supersonic.validateGrafanaValues" . -}}
{{- include "supersonic.validatePrometheus" . -}}
{{- include "supersonic.validatePrometheusAddressConsistency" . -}}
{{- include "supersonic.validatePrometheusValues" . -}}

Expand Down Expand Up @@ -38,7 +37,7 @@ Scaling threshold: {{ include "supersonic.defaultThreshold" . }}
|
| Prometheus UI: {{ include "supersonic.prometheusDisplayUrl" . }}
{{- end }}
{{- if or .Values.grafana.enabled (include "supersonic.grafanaExists" .) }}
{{- if .Values.grafana.enabled }}
|
| Grafana dashboard: {{ include "supersonic.grafanaDisplayUrl" . }}
{{- end }}
Expand Down
20 changes: 0 additions & 20 deletions helm/supersonic/templates/_helpers/_grafana.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -53,26 +53,6 @@ Get full Grafana URL
{{- include "supersonic.common.getServiceUrl" (dict "scheme" (include "supersonic.grafanaScheme" .) "host" (include "supersonic.grafanaHost" .) "port" (include "supersonic.grafanaPort" .)) -}}
{{- end -}}

{{/*
Check if Grafana exists in the namespace
*/}}
{{- define "supersonic.grafanaExists" -}}
{{- include "supersonic.common.serviceExists" (dict "serviceName" "grafana" "root" .) -}}
{{- end -}}

{{/*
Validate that there is no existing Grafana instance when enabling a new one
*/}}
{{- define "supersonic.validateGrafana" -}}
{{- if .Values.grafana.enabled -}}
{{- if include "supersonic.grafanaExists" . -}}
{{- $details := fromJson (include "supersonic.common.getExistingServiceDetails" (dict "serviceType" "grafana" "root" .)) -}}
{{- $url := include "supersonic.common.getServiceDisplayUrl" (dict "scheme" $details.scheme "host" $details.host) -}}
{{- fail (printf "Error: Found existing Grafana instance in the namespace:\n- Namespace: %s\n- URL: %s\n\nTo proceed, either:\n1. Set grafana.enabled=false in values.yaml to use the existing Grafana instance, OR\n2. Uninstall the existing Grafana instance" .Release.Namespace $url) -}}
{{- end -}}
{{- end -}}
{{- end -}}

{{/*
Validate Grafana address consistency
*/}}
Expand Down
7 changes: 0 additions & 7 deletions helm/supersonic/templates/_helpers/_prometheus.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,6 @@ Check if Prometheus exists in the namespace
{{- include "supersonic.common.serviceExists" (dict "serviceName" "prometheus" "root" .) -}}
{{- end -}}

{{/*
Validate that there is no existing Prometheus instance when enabling a new one
*/}}
{{- define "supersonic.validatePrometheus" -}}
{{- include "supersonic.common.validateNoExistingService" (dict "serviceType" "prometheus" "values" .Values "root" .) -}}
{{- end -}}

{{/*
Validate RBAC permissions for Prometheus
*/}}
Expand Down
2 changes: 1 addition & 1 deletion helm/supersonic/templates/envoy/configmaps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ static_resources:
{{- if (index .root.Values "opentelemetry-collector" "enabled") }}
tracing:
random_sampling:
value: {{ mulf .root.Values.tracing_sampling_rate 100 }}
value: {{ mulf .envoy.tracing_sampling_rate 100 }}
provider:
name: envoy.tracers.opentelemetry
typed_config:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if and .Values.grafana.enabled (ne (include "supersonic.grafanaExists" .) "true") }}
{{- if .Values.grafana.enabled -}}
apiVersion: v1
kind: ConfigMap
metadata:
Expand Down
10 changes: 5 additions & 5 deletions helm/supersonic/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,9 @@
"port",
"url"
]
},
"tracing_sampling_rate": {
"type": "number"
}
},
"required": [
Expand All @@ -432,7 +435,8 @@
"rate_limiter",
"replicas",
"resources",
"service"
"service",
"tracing_sampling_rate"
]
},
"autoscaler": {
Expand Down Expand Up @@ -1360,9 +1364,6 @@
"tempo"
]
},
"tracing_sampling_rate": {
"type": "number"
},
"opentelemetry-collector": {
"type": "object",
"properties": {
Expand Down Expand Up @@ -1948,7 +1949,6 @@
"serverLoadThreshold",
"tempo",
"tolerations",
"tracing_sampling_rate",
"triton"
]
}
Loading