From 88819fdc979e3361b1df4678de45c953865732f5 Mon Sep 17 00:00:00 2001 From: Mark Milligan Date: Sat, 14 Feb 2026 18:25:45 -0600 Subject: [PATCH 01/16] feat: add observability stack and kubelogstream to coder app Adds comprehensive monitoring and Kubernetes event streaming for better visibility into Coder deployments and workspace troubleshooting. Components: - Kubelogstream: streams pod/event logs to workspace startup logs - Observability: full stack with Prometheus, Grafana, Loki, Alertmanager Changes: - Add component 5 (kubelogstream) with Helm values - Add component 6 (observability) with full monitoring stack - Configure Coder to expose Prometheus metrics and agent stats - Add coder-observability namespace to sandbox - Enhance RDS secrets action to create secrets in both namespaces - Use ebs-auto storage class for all persistent volumes Co-Authored-By: Claude Sonnet 4.5 --- .gitignore | 3 +- coder/components/5-kubelogstream.toml | 13 +++ coder/components/6-observability.toml | 13 +++ coder/components/values/coder.yaml | 8 ++ coder/components/values/kubelogstream.yaml | 20 +++++ coder/components/values/observability.yaml | 95 ++++++++++++++++++++++ coder/sandbox.tfvars | 2 +- coder/src/actions/rds_secrets/import.sh | 16 ++++ 8 files changed, 168 insertions(+), 2 deletions(-) create mode 100644 coder/components/5-kubelogstream.toml create mode 100644 coder/components/6-observability.toml create mode 100644 coder/components/values/kubelogstream.yaml create mode 100644 coder/components/values/observability.yaml diff --git a/.gitignore b/.gitignore index 6e9488d..e95861c 100644 --- a/.gitignore +++ b/.gitignore @@ -56,4 +56,5 @@ tags # Persistent undo [._]*.un~ -# End of https://www.toptal.com/developers/gitignore/api/osx,vim +# AI +.clauderc diff --git a/coder/components/5-kubelogstream.toml b/coder/components/5-kubelogstream.toml new file mode 100644 index 0000000..8df6d7d --- /dev/null +++ b/coder/components/5-kubelogstream.toml @@ -0,0 +1,13 @@ +name = "kubelogstream" +type = "helm_chart" +chart_name = "coder-logstream-kube" +namespace = "coder" +storage_driver = "configmap" +dependencies = ["coder", "application_load_balancer"] + +[helm_repo] +repo_url = "https://helm.coder.com/logstream-kube" +chart = "coder-logstream-kube" + +[[values_file]] +contents = "./values/kubelogstream.yaml" diff --git a/coder/components/6-observability.toml b/coder/components/6-observability.toml new file mode 100644 index 0000000..beffe49 --- /dev/null +++ b/coder/components/6-observability.toml @@ -0,0 +1,13 @@ +name = "observability" +type = "helm_chart" +chart_name = "coder-observability" +namespace = "coder-observability" +storage_driver = "configmap" +dependencies = ["coder"] + +[helm_repo] +repo_url = "https://helm.coder.com/observability" +chart = "coder-observability" + +[[values_file]] +contents = "./values/observability.yaml" diff --git a/coder/components/values/coder.yaml b/coder/components/values/coder.yaml index 948bc26..95c9fe5 100644 --- a/coder/components/values/coder.yaml +++ b/coder/components/values/coder.yaml @@ -13,6 +13,10 @@ coder: replicaCount: "{{.nuon.inputs.inputs.replicas}}" + podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "2112" + # since tls is terminated at alb tls: secretNames: [] @@ -84,6 +88,10 @@ coder: value: "{{.nuon.inputs.inputs.telemetry}}" - name: CODER_PROMETHEUS_ENABLE value: "true" + - name: CODER_PROMETHEUS_COLLECT_AGENT_STATS + value: "true" + - name: CODER_LOGGING_HUMAN + value: "/dev/stderr" - name: CODER_UPDATE_CHECK value: "true" diff --git a/coder/components/values/kubelogstream.yaml b/coder/components/values/kubelogstream.yaml new file mode 100644 index 0000000..75ea015 --- /dev/null +++ b/coder/components/values/kubelogstream.yaml @@ -0,0 +1,20 @@ +url: "https://{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}" + +namespaces: ["coder"] + +image: + repo: "ghcr.io/coder/coder-logstream-kube" + pullPolicy: IfNotPresent + +resources: + limits: + cpu: 200m + memory: 256Mi + requests: + cpu: 100m + memory: 128Mi + +serviceAccount: + name: coder-logstream-kube + annotations: {} + labels: {} diff --git a/coder/components/values/observability.yaml b/coder/components/values/observability.yaml new file mode 100644 index 0000000..dcf3c1b --- /dev/null +++ b/coder/components/values/observability.yaml @@ -0,0 +1,95 @@ +global: + coder: + controlPlaneNamespace: coder + externalProvisionersNamespace: coder + coderdSelector: 'pod=~`coder.*`, pod!~`.*provisioner.*`' + provisionerdSelector: 'pod=~`coder-provisioner.*`' + workspacesSelector: 'namespace=`coder`' + logFormat: human + + postgres: + hostname: "{{.nuon.components.rds_cluster_coder.outputs.endpoint}}" + port: "{{.nuon.components.rds_cluster_coder.outputs.port}}" + database: coder + username: coder + mountSecret: coder-db-password + exporter: + enabled: true + image: "quay.io/prometheuscommunity/postgres-exporter" + + telemetry: + metrics: + scrape_interval: 15s + scrape_timeout: 12s + native_histograms: false + + alerts: + enabled: true + kind: "configmap" + + dashboards: + enabled: true + timerange: 12h + refresh: 30s + queryTimeout: 900 + +grafana-agent: + enabled: true + controller: + type: daemonset + +grafana: + enabled: true + replicas: 1 + persistence: + enabled: true + size: 10Gi + storageClassName: ebs-auto + "grafana.ini": + "auth.anonymous": + enabled: true + org_name: "Main Org." + org_role: Admin + analytics: + reporting_enabled: false + users: + allow_sign_up: false + +prometheus: + enabled: true + server: + replicaCount: 1 + persistentVolume: + enabled: true + size: 20Gi + storageClassName: ebs-auto + retentionSize: 18GB + retention: 15d + resources: + limits: + cpu: 1000m + memory: 2Gi + requests: + cpu: 500m + memory: 1Gi + alertmanager: + enabled: true + persistentVolume: + enabled: true + size: 2Gi + storageClassName: ebs-auto + kube-state-metrics: + enabled: true + prometheus-node-exporter: + enabled: true + +loki: + enabled: true + backend: + replicas: 1 + read: + replicas: 1 + write: + replicas: 1 + minio: + enabled: true diff --git a/coder/sandbox.tfvars b/coder/sandbox.tfvars index 5c3e888..1581e69 100644 --- a/coder/sandbox.tfvars +++ b/coder/sandbox.tfvars @@ -13,7 +13,7 @@ ebs_storage_class = { } -additional_namespaces = ["coder"] +additional_namespaces = ["coder", "coder-observability"] enable_irsa = true diff --git a/coder/src/actions/rds_secrets/import.sh b/coder/src/actions/rds_secrets/import.sh index 8711c2b..5ff6397 100755 --- a/coder/src/actions/rds_secrets/import.sh +++ b/coder/src/actions/rds_secrets/import.sh @@ -41,3 +41,19 @@ kubectl create -n "$namespace" secret generic "$name" \ -o yaml | kubectl apply -f - echo "[rds-secrets import] secret created successfully" + +# Also create secret for observability namespace +observability_namespace="coder-observability" +observability_secret_name="coder-db-password" + +echo "[rds-secrets import] creating observability namespace if not exists" +kubectl create namespace "$observability_namespace" --dry-run=client -o yaml | kubectl apply -f - + +echo "[rds-secrets import] creating postgres password secret for observability" +kubectl create -n "$observability_namespace" secret generic "$observability_secret_name" \ + --save-config \ + --dry-run=client \ + --from-literal=PGPASSWORD="$password" \ + -o yaml | kubectl apply -f - + +echo "[rds-secrets import] observability secret created successfully" From 6ebc27b70cafd656bdd7371a0ef6787d00cb7211 Mon Sep 17 00:00:00 2001 From: Mark Milligan Date: Sat, 14 Feb 2026 19:20:05 -0600 Subject: [PATCH 02/16] feat: add Grafana subdomain access with secure authentication Enables public access to Grafana dashboards via subdomain with proper authentication instead of port-forwarding. Changes: - Generate random Grafana admin password in RDS secrets action - Store credentials in AWS Secrets Manager (grafana-admin-{install-id}) - Add grafana_password action to retrieve credentials for admins - Configure Grafana ingress for subdomain: grafana.{install-id}.nuon.run - Disable anonymous authentication (require login) - Update README with step-by-step access instructions Admin retrieves credentials by running grafana_password action in Nuon UI, then logs in at the subdomain with username 'admin' and the generated password. Co-Authored-By: Claude Sonnet 4.5 --- coder/README.md | 37 +++++++++++++++++++ coder/actions/coder_rds_creds.toml | 1 + coder/actions/grafana-password.toml | 18 +++++++++ coder/components/values/observability.yaml | 23 ++++++++++-- .../actions/grafana-password/get-password.sh | 24 ++++++++++++ coder/src/actions/rds_secrets/import.sh | 31 ++++++++++++++++ 6 files changed, 131 insertions(+), 3 deletions(-) create mode 100644 coder/actions/grafana-password.toml create mode 100755 coder/src/actions/grafana-password/get-password.sh diff --git a/coder/README.md b/coder/README.md index 2c6dc3d..f4e10cc 100644 --- a/coder/README.md +++ b/coder/README.md @@ -57,10 +57,47 @@ This follows Nuon best practices for deploying public Helm charts. > A CNAME record must be manually created in Route 53 for wildcard subdomains to work. This enables features like web apps e.g., Jupyter and web port fowarding. For example, if your domain is `{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}`, create a CNAME record for `*.{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}` that points to the DNS name of the load balancer created by this Nuon app config. The load balancer DNS name can be found in AWS Console. +### Observability & Monitoring + +This app includes comprehensive monitoring and Kubernetes event streaming: + +- **Observability Stack**: Prometheus, Grafana, Loki, and Alertmanager deployed in the `coder-observability` namespace for metrics collection, log aggregation, and alerting +- **Kubelogstream**: Streams Kubernetes pod events directly to Coder workspace startup logs for easier troubleshooting + +**Accessing Grafana Dashboards**: + +1. In the Nuon dashboard, navigate to your Coder installation +2. Go to the **Actions** tab +3. Run the `grafana_password` action (manual trigger) +4. The action output will display: + - Grafana URL: `https://grafana.{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}` + - Username: `admin` + - Password: (randomly generated, stored in AWS Secrets Manager) +5. Open the Grafana URL in your browser and log in with the credentials + +**Available Dashboards**: +- Coder Status - Overview of Coder health +- Coder Coderd - Control plane metrics +- Workspaces - Workspace utilization and performance +- Workspace Detail - Individual workspace deep-dive +- Provisioner - Terraform provisioner metrics +- Postgres Database - RDS performance +- Infrastructure - Node metrics + +The admin password is generated once during initial deployment and persisted in AWS Secrets Manager for the lifetime of the installation. + ## Coder Resources [Coder Environment Variable docs](https://coder.com/docs/reference/cli/server) [Coder Releases](https://github.com/coder/coder/releases/) +[Coder Monitoring](https://coder.com/docs/admin/monitoring) + +[Coder Kubernetes Logs Integration](https://coder.com/docs/admin/integrations/kubernetes-logs) + +[Coder Logstream Kube GitHub](https://github.com/coder/coder-logstream-kube) + +[Coder Observability GitHub](https://github.com/coder/observability) + [AWS Instance Types](https://aws.amazon.com/ec2/instance-types/) diff --git a/coder/actions/coder_rds_creds.toml b/coder/actions/coder_rds_creds.toml index 165bb3a..80ae196 100644 --- a/coder/actions/coder_rds_creds.toml +++ b/coder/actions/coder_rds_creds.toml @@ -26,3 +26,4 @@ TARGET_NAMESPACE = "coder" DB_ADDRESS = "{{ .nuon.components.rds_cluster_coder.outputs.address }}" DB_PORT = "{{ .nuon.components.rds_cluster_coder.outputs.db_instance_port }}" DB_NAME = "{{ .nuon.components.rds_cluster_coder.outputs.db_instance_name }}" +INSTALL_ID = "{{ .nuon.install.id }}" diff --git a/coder/actions/grafana-password.toml b/coder/actions/grafana-password.toml new file mode 100644 index 0000000..634291d --- /dev/null +++ b/coder/actions/grafana-password.toml @@ -0,0 +1,18 @@ +name = "grafana_password" +timeout = "1m" + +[[triggers]] +type = "manual" + +[[steps]] +name = "Retrieve Grafana Admin Password" +command = "./get-password.sh" + +[steps.public_repo] +repo = "nuonco/example-app-configs" +directory = "coder/src/actions/grafana-password" +branch = "mm/coder-observe-kubelogstream" + +[steps.env_vars] +REGION = "{{ .nuon.install_stack.outputs.region }}" +INSTALL_ID = "{{ .nuon.install.id }}" diff --git a/coder/components/values/observability.yaml b/coder/components/values/observability.yaml index dcf3c1b..89e64bc 100644 --- a/coder/components/values/observability.yaml +++ b/coder/components/values/observability.yaml @@ -41,15 +41,32 @@ grafana-agent: grafana: enabled: true replicas: 1 + admin: + existingSecret: grafana-admin + userKey: username + passwordKey: password persistence: enabled: true size: 10Gi storageClassName: ebs-auto + ingress: + enabled: true + ingressClassName: alb + annotations: + alb.ingress.kubernetes.io/scheme: internet-facing + alb.ingress.kubernetes.io/target-type: ip + alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]' + alb.ingress.kubernetes.io/healthcheck-path: /api/health + hosts: + - grafana.{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}} + path: / + pathType: Prefix "grafana.ini": + server: + domain: grafana.{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}} + root_url: "https://grafana.{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}" "auth.anonymous": - enabled: true - org_name: "Main Org." - org_role: Admin + enabled: false analytics: reporting_enabled: false users: diff --git a/coder/src/actions/grafana-password/get-password.sh b/coder/src/actions/grafana-password/get-password.sh new file mode 100755 index 0000000..fc4a602 --- /dev/null +++ b/coder/src/actions/grafana-password/get-password.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +set -e +set -o pipefail +set -u + +region="$REGION" +install_id="$INSTALL_ID" +secret_name="grafana-admin-${install_id}" + +echo "==========================================" +echo "Grafana Admin Credentials" +echo "==========================================" +echo "" + +secret=$(aws --region "$region" secretsmanager get-secret-value --secret-id="$secret_name") +username=$(echo "$secret" | jq -r '.SecretString' | jq -r '.username') +password=$(echo "$secret" | jq -r '.SecretString' | jq -r '.password') + +echo "URL: https://grafana.{{ .nuon.install.sandbox.outputs.nuon_dns.public_domain.name }}" +echo "Username: $username" +echo "Password: $password" +echo "" +echo "==========================================" diff --git a/coder/src/actions/rds_secrets/import.sh b/coder/src/actions/rds_secrets/import.sh index 5ff6397..879bcc6 100755 --- a/coder/src/actions/rds_secrets/import.sh +++ b/coder/src/actions/rds_secrets/import.sh @@ -57,3 +57,34 @@ kubectl create -n "$observability_namespace" secret generic "$observability_secr -o yaml | kubectl apply -f - echo "[rds-secrets import] observability secret created successfully" + +# Generate and store Grafana admin password +grafana_secret_name="grafana-admin-${INSTALL_ID}" +grafana_username="admin" + +echo "[rds-secrets import] checking if Grafana admin secret exists in Secrets Manager" +if aws --region "$region" secretsmanager describe-secret --secret-id="$grafana_secret_name" 2>/dev/null; then + echo "[rds-secrets import] Grafana admin secret already exists, retrieving" + grafana_secret=$(aws --region "$region" secretsmanager get-secret-value --secret-id="$grafana_secret_name") + grafana_password=$(echo "$grafana_secret" | jq -r '.SecretString' | jq -r '.password') +else + echo "[rds-secrets import] generating new Grafana admin password" + grafana_password=$(openssl rand -base64 32 | tr -d "=+/" | cut -c1-32) + + echo "[rds-secrets import] storing Grafana admin password in Secrets Manager" + aws --region "$region" secretsmanager create-secret \ + --name "$grafana_secret_name" \ + --description "Grafana admin credentials for Nuon install ${INSTALL_ID}" \ + --secret-string "{\"username\":\"${grafana_username}\",\"password\":\"${grafana_password}\"}" \ + --tags Key=nuon-install-id,Value="${INSTALL_ID}" Key=component,Value=observability +fi + +echo "[rds-secrets import] creating Grafana admin secret in Kubernetes" +kubectl create -n "$observability_namespace" secret generic grafana-admin \ + --save-config \ + --dry-run=client \ + --from-literal=username="$grafana_username" \ + --from-literal=password="$grafana_password" \ + -o yaml | kubectl apply -f - + +echo "[rds-secrets import] Grafana admin secret created successfully" From 2eeac26d051ebf7829366ac029bd1c45c75a96d1 Mon Sep 17 00:00:00 2001 From: Mark Milligan Date: Sat, 14 Feb 2026 19:24:17 -0600 Subject: [PATCH 03/16] feat: automate wildcard DNS creation via external-dns Removes manual CNAME creation step by adding wildcard domain to external-dns annotation in ALB ingress. Changes: - Add *.{domain} to external-dns hostname annotation in ALB template - Update README to note wildcard DNS is now automatic - Enables workspace web apps and port forwarding without manual DNS config external-dns now creates both the main domain and wildcard CNAME records automatically, pointing to the ALB DNS name. Co-Authored-By: Claude Sonnet 4.5 --- coder/README.md | 2 +- coder/src/components/alb/templates/alb.tpl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/coder/README.md b/coder/README.md index f4e10cc..1f713e1 100644 --- a/coder/README.md +++ b/coder/README.md @@ -55,7 +55,7 @@ This follows Nuon best practices for deploying public Helm charts. > This is a development/demo installation of Coder. Do not use in production. -> A CNAME record must be manually created in Route 53 for wildcard subdomains to work. This enables features like web apps e.g., Jupyter and web port fowarding. For example, if your domain is `{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}`, create a CNAME record for `*.{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}` that points to the DNS name of the load balancer created by this Nuon app config. The load balancer DNS name can be found in AWS Console. +> Wildcard DNS for workspace subdomains is automatically configured via external-dns. This enables features like web apps (e.g., Jupyter) and web port forwarding without manual DNS configuration. ### Observability & Monitoring diff --git a/coder/src/components/alb/templates/alb.tpl b/coder/src/components/alb/templates/alb.tpl index a287447..3dd1219 100644 --- a/coder/src/components/alb/templates/alb.tpl +++ b/coder/src/components/alb/templates/alb.tpl @@ -18,7 +18,7 @@ metadata: alb.ingress.kubernetes.io/healthy-threshold-count: '2' alb.ingress.kubernetes.io/certificate-arn: {{ .Values.domain_certificate }} - external-dns.alpha.kubernetes.io/hostname: {{ .Values.domain }} + external-dns.alpha.kubernetes.io/hostname: {{ .Values.domain }},*.{{ .Values.domain }} spec: ingressClassName: alb rules: From 441b875ed04f255e4f3a8fa38a35fda570e8e9da Mon Sep 17 00:00:00 2001 From: Mark Milligan Date: Sat, 14 Feb 2026 19:34:27 -0600 Subject: [PATCH 04/16] refactor: use shared ALB for Grafana instead of separate ALB Changes Grafana from subdomain (new ALB) to path-based routing on existing Coder ALB, reducing cost and complexity. Changes: - Add group.name annotation to Coder ALB for sharing - Configure Grafana ingress to join same ALB group - Serve Grafana from /grafana path with serve_from_sub_path - Update URLs in README and password action - Set group.order=200 to route after Coder paths Result: One ALB instead of two, saves ~$20/month. Grafana accessible at https://{domain}/grafana. Co-Authored-By: Claude Sonnet 4.5 --- coder/README.md | 4 +++- coder/components/values/observability.yaml | 15 +++++++-------- .../src/actions/grafana-password/get-password.sh | 2 +- coder/src/components/alb/templates/alb.tpl | 1 + 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/coder/README.md b/coder/README.md index 1f713e1..bd86fdf 100644 --- a/coder/README.md +++ b/coder/README.md @@ -70,11 +70,13 @@ This app includes comprehensive monitoring and Kubernetes event streaming: 2. Go to the **Actions** tab 3. Run the `grafana_password` action (manual trigger) 4. The action output will display: - - Grafana URL: `https://grafana.{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}` + - Grafana URL: `https://{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}/grafana` - Username: `admin` - Password: (randomly generated, stored in AWS Secrets Manager) 5. Open the Grafana URL in your browser and log in with the credentials +Grafana is served from `/grafana` path on the same ALB as Coder, reducing infrastructure cost and complexity. + **Available Dashboards**: - Coder Status - Overview of Coder health - Coder Coderd - Control plane metrics diff --git a/coder/components/values/observability.yaml b/coder/components/values/observability.yaml index 89e64bc..f1199b2 100644 --- a/coder/components/values/observability.yaml +++ b/coder/components/values/observability.yaml @@ -53,18 +53,17 @@ grafana: enabled: true ingressClassName: alb annotations: - alb.ingress.kubernetes.io/scheme: internet-facing - alb.ingress.kubernetes.io/target-type: ip - alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]' - alb.ingress.kubernetes.io/healthcheck-path: /api/health + alb.ingress.kubernetes.io/group.name: {{.nuon.install.id}} + alb.ingress.kubernetes.io/group.order: "200" hosts: - - grafana.{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}} - path: / + - {{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}} + path: /grafana pathType: Prefix "grafana.ini": server: - domain: grafana.{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}} - root_url: "https://grafana.{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}" + domain: {{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}} + root_url: "https://{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}/grafana" + serve_from_sub_path: true "auth.anonymous": enabled: false analytics: diff --git a/coder/src/actions/grafana-password/get-password.sh b/coder/src/actions/grafana-password/get-password.sh index fc4a602..a49aae0 100755 --- a/coder/src/actions/grafana-password/get-password.sh +++ b/coder/src/actions/grafana-password/get-password.sh @@ -17,7 +17,7 @@ secret=$(aws --region "$region" secretsmanager get-secret-value --secret-id="$se username=$(echo "$secret" | jq -r '.SecretString' | jq -r '.username') password=$(echo "$secret" | jq -r '.SecretString' | jq -r '.password') -echo "URL: https://grafana.{{ .nuon.install.sandbox.outputs.nuon_dns.public_domain.name }}" +echo "URL: https://{{ .nuon.install.sandbox.outputs.nuon_dns.public_domain.name }}/grafana" echo "Username: $username" echo "Password: $password" echo "" diff --git a/coder/src/components/alb/templates/alb.tpl b/coder/src/components/alb/templates/alb.tpl index 3dd1219..a165c7a 100644 --- a/coder/src/components/alb/templates/alb.tpl +++ b/coder/src/components/alb/templates/alb.tpl @@ -17,6 +17,7 @@ metadata: alb.ingress.kubernetes.io/unhealthy-threshold-count: '2' alb.ingress.kubernetes.io/healthy-threshold-count: '2' alb.ingress.kubernetes.io/certificate-arn: {{ .Values.domain_certificate }} + alb.ingress.kubernetes.io/group.name: {{ .Values.install_name }} external-dns.alpha.kubernetes.io/hostname: {{ .Values.domain }},*.{{ .Values.domain }} spec: From aef5e7b673ec3c62db6e73bc7672d88d9eb58059 Mon Sep 17 00:00:00 2001 From: Mark Milligan Date: Sun, 15 Feb 2026 08:49:04 -0600 Subject: [PATCH 05/16] chore: remove obsolete gp2 storage class action Deletes remove-gp2-default action that was only needed for troubleshooting a previous storage class issue. Co-Authored-By: Claude Sonnet 4.5 --- coder/actions/remove-gp2-default.toml | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 coder/actions/remove-gp2-default.toml diff --git a/coder/actions/remove-gp2-default.toml b/coder/actions/remove-gp2-default.toml deleted file mode 100644 index fffcdb1..0000000 --- a/coder/actions/remove-gp2-default.toml +++ /dev/null @@ -1,16 +0,0 @@ -# action - -name = "remove_gp2_default" -timeout = "1m" - -[[triggers]] -type = "manual" - -[[steps]] -name = "remove_gp2_default_annotation" -inline_contents = """ -#!/usr/bin/env sh -kubectl patch storageclass gp2 -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}' -echo "Removed default annotation from gp2 storage class" -kubectl get storageclass -""" From c3c3d4cb98a07d44828c14102beeeb99aa12aeaf Mon Sep 17 00:00:00 2001 From: Mark Milligan Date: Sun, 15 Feb 2026 09:03:21 -0600 Subject: [PATCH 06/16] refactor: set storage class as default in sandbox, keep action as fallback Changes storage class to be default from the start rather than relying on action. Changes: - Set is_default_class=true in sandbox.tfvars - Remove automatic trigger from default-storage-class action - Keep action as manual-only for troubleshooting if needed Storage class is now default from sandbox creation, with manual action available if ever needed to re-apply. Co-Authored-By: Claude Sonnet 4.5 --- coder/actions/default-storage-class.toml | 4 ---- coder/sandbox.tfvars | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/coder/actions/default-storage-class.toml b/coder/actions/default-storage-class.toml index ada7984..e32fc8f 100644 --- a/coder/actions/default-storage-class.toml +++ b/coder/actions/default-storage-class.toml @@ -3,10 +3,6 @@ name = "default_storage_class" timeout = "1m" -[[triggers]] -type = "pre-deploy-component" -component_name = "coder" - [[triggers]] type = "manual" diff --git a/coder/sandbox.tfvars b/coder/sandbox.tfvars index 1581e69..918ec86 100644 --- a/coder/sandbox.tfvars +++ b/coder/sandbox.tfvars @@ -1,7 +1,7 @@ ebs_storage_class = { enabled = true name = "ebs-auto" - is_default_class = false + is_default_class = true provisioner = "ebs.csi.eks.amazonaws.com" volume_binding_mode = "WaitForFirstConsumer" reclaim_policy = "Delete" From 0560f22fe96f3aee684ece0f55b714c19c0bdaf8 Mon Sep 17 00:00:00 2001 From: Mark Milligan Date: Sun, 22 Feb 2026 12:19:03 -0600 Subject: [PATCH 07/16] fix: point branch refs to mm/coder-observe-kubelogstream, remove debug env var Co-Authored-By: Claude Sonnet 4.6 --- coder/actions/coder_rds_creds.toml | 2 +- coder/components/4-alb.toml | 2 +- coder/runner.toml | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/coder/actions/coder_rds_creds.toml b/coder/actions/coder_rds_creds.toml index 80ae196..87e7da1 100644 --- a/coder/actions/coder_rds_creds.toml +++ b/coder/actions/coder_rds_creds.toml @@ -16,7 +16,7 @@ command = "./import.sh" [steps.public_repo] repo = "nuonco/example-app-configs" directory = "coder/src/actions/rds_secrets" -branch = "main" +branch = "mm/coder-observe-kubelogstream" [steps.env_vars] SECRET_ARN = "{{ .nuon.components.rds_cluster_coder.outputs.db_instance_master_user_secret_arn }}" diff --git a/coder/components/4-alb.toml b/coder/components/4-alb.toml index 553176a..e1bd618 100644 --- a/coder/components/4-alb.toml +++ b/coder/components/4-alb.toml @@ -8,7 +8,7 @@ dependencies = ["coder"] [public_repo] repo = "nuonco/example-app-configs" directory = "coder/src/components/alb" -branch = "main" +branch = "mm/coder-observe-kubelogstream" [values] domain_certificate = "{{.nuon.components.certificate.outputs.public_domain_certificate_arn}}" diff --git a/coder/runner.toml b/coder/runner.toml index 21bdd6d..e6c02e6 100644 --- a/coder/runner.toml +++ b/coder/runner.toml @@ -4,5 +4,3 @@ runner_type = "aws" helm_driver = "configmap" init_script_url = "https://raw.githubusercontent.com/nuonco/runner/refs/heads/main/scripts/aws/init-mng-v2.sh" -[env_vars] -foo = "bar" From f3bcdd56041f112b3acd5a88da3625c4b213b4ff Mon Sep 17 00:00:00 2001 From: Mark Milligan Date: Sun, 22 Feb 2026 15:10:40 -0600 Subject: [PATCH 08/16] fix: correct rds output keys in observability values Replace endpoint/port with address/db_instance_port to match actual rds_cluster_coder outputs, fixing template rendering failure. Co-Authored-By: Claude Sonnet 4.6 --- coder/components/values/observability.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/coder/components/values/observability.yaml b/coder/components/values/observability.yaml index f1199b2..d41e84a 100644 --- a/coder/components/values/observability.yaml +++ b/coder/components/values/observability.yaml @@ -8,8 +8,8 @@ global: logFormat: human postgres: - hostname: "{{.nuon.components.rds_cluster_coder.outputs.endpoint}}" - port: "{{.nuon.components.rds_cluster_coder.outputs.port}}" + hostname: "{{.nuon.components.rds_cluster_coder.outputs.address}}" + port: "{{.nuon.components.rds_cluster_coder.outputs.db_instance_port}}" database: coder username: coder mountSecret: coder-db-password From 91db6f0b6d336592ead4ef6c603c790cd53ad781 Mon Sep 17 00:00:00 2001 From: Mark Milligan Date: Sun, 22 Feb 2026 15:50:58 -0600 Subject: [PATCH 09/16] fix: set high group.order on main ALB so sub-path ingresses take precedence Without group.order, the main ALB's catch-all '/' rule (priority 0) intercepts all traffic including /grafana before Grafana's ingress rule can match. Setting group.order=1000 ensures more specific paths from other ingresses (e.g. Grafana at /grafana) are evaluated first. Co-Authored-By: Claude Sonnet 4.6 --- coder/src/components/alb/templates/alb.tpl | 1 + 1 file changed, 1 insertion(+) diff --git a/coder/src/components/alb/templates/alb.tpl b/coder/src/components/alb/templates/alb.tpl index a165c7a..e7ab222 100644 --- a/coder/src/components/alb/templates/alb.tpl +++ b/coder/src/components/alb/templates/alb.tpl @@ -18,6 +18,7 @@ metadata: alb.ingress.kubernetes.io/healthy-threshold-count: '2' alb.ingress.kubernetes.io/certificate-arn: {{ .Values.domain_certificate }} alb.ingress.kubernetes.io/group.name: {{ .Values.install_name }} + alb.ingress.kubernetes.io/group.order: "1000" external-dns.alpha.kubernetes.io/hostname: {{ .Values.domain }},*.{{ .Values.domain }} spec: From 6261577dee5c03961ff17e143514e10409d5f846 Mon Sep 17 00:00:00 2001 From: Mark Milligan Date: Sun, 22 Feb 2026 17:50:55 -0600 Subject: [PATCH 10/16] fix: add target-type ip to grafana ingress annotation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit target-type is not inherited from the group leader ingress — each ingress must declare it. Without it the controller defaults to instance mode, Grafana's ClusterIP service has no NodePort, port resolves to 0, and CreateTargetGroup fails. Co-Authored-By: Claude Sonnet 4.6 --- coder/components/values/observability.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/coder/components/values/observability.yaml b/coder/components/values/observability.yaml index d41e84a..fecac71 100644 --- a/coder/components/values/observability.yaml +++ b/coder/components/values/observability.yaml @@ -55,6 +55,7 @@ grafana: annotations: alb.ingress.kubernetes.io/group.name: {{.nuon.install.id}} alb.ingress.kubernetes.io/group.order: "200" + alb.ingress.kubernetes.io/target-type: ip hosts: - {{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}} path: /grafana From a49ceec7de5b71963a8cf59fb7abee7efdaa312b Mon Sep 17 00:00:00 2001 From: Mark Milligan Date: Sun, 22 Feb 2026 18:06:29 -0600 Subject: [PATCH 11/16] fix: add listen-ports HTTPS:443 to grafana ingress MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this annotation the ALB controller places the /grafana rule on the HTTP:80 listener only. The main ALB ingress uses HTTPS:443, so /grafana was never matched on that listener — Coder's /* caught it first. Co-Authored-By: Claude Sonnet 4.6 --- coder/components/values/observability.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/coder/components/values/observability.yaml b/coder/components/values/observability.yaml index fecac71..a52db0a 100644 --- a/coder/components/values/observability.yaml +++ b/coder/components/values/observability.yaml @@ -56,6 +56,7 @@ grafana: alb.ingress.kubernetes.io/group.name: {{.nuon.install.id}} alb.ingress.kubernetes.io/group.order: "200" alb.ingress.kubernetes.io/target-type: ip + alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]' hosts: - {{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}} path: /grafana From 013bce15dec2b9d8398382c3aee46a687b15b6d9 Mon Sep 17 00:00:00 2001 From: Mark Milligan Date: Sun, 22 Feb 2026 19:04:29 -0600 Subject: [PATCH 12/16] fix: disable grafana password policy to allow generated passwords The upstream coder-observability chart enforces password complexity. Generated passwords (base64, no special chars) fail the policy, blocking both initial login and grafana-cli password reset. Co-Authored-By: Claude Sonnet 4.6 --- coder/components/values/observability.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/coder/components/values/observability.yaml b/coder/components/values/observability.yaml index a52db0a..7c743da 100644 --- a/coder/components/values/observability.yaml +++ b/coder/components/values/observability.yaml @@ -66,6 +66,8 @@ grafana: domain: {{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}} root_url: "https://{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}/grafana" serve_from_sub_path: true + security: + password_policy: false "auth.anonymous": enabled: false analytics: From 2a0b3b732e565eaa92af64793893b10dc7b68142 Mon Sep 17 00:00:00 2001 From: Mark Milligan Date: Sun, 22 Feb 2026 19:13:59 -0600 Subject: [PATCH 13/16] fix: override DISABLE_INITIAL_ADMIN_CREATION to allow admin user creation The upstream chart sets GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION=true which prevents Grafana from creating the admin user from the existingSecret env vars. Override to false so admin is created from grafana-admin secret on first start. Co-Authored-By: Claude Sonnet 4.6 --- coder/components/values/observability.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/coder/components/values/observability.yaml b/coder/components/values/observability.yaml index 7c743da..8e98089 100644 --- a/coder/components/values/observability.yaml +++ b/coder/components/values/observability.yaml @@ -41,6 +41,8 @@ grafana-agent: grafana: enabled: true replicas: 1 + env: + GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION: "false" admin: existingSecret: grafana-admin userKey: username From 2026c750925566f3d7558521a045058a3f362466 Mon Sep 17 00:00:00 2001 From: Mark Milligan Date: Sun, 22 Feb 2026 19:31:52 -0600 Subject: [PATCH 14/16] refactor: move grafana secret setup to dedicated action Extract Grafana admin credential creation from rds_secrets into its own grafana_setup action, triggered post-deploy of coder component. This ensures the grafana-admin secret exists before observability deploys, and keeps each action focused on a single concern. Co-Authored-By: Claude Sonnet 4.6 --- coder/actions/grafana-setup.toml | 22 +++++++++++++ coder/src/actions/grafana-setup/setup.sh | 41 ++++++++++++++++++++++++ coder/src/actions/rds_secrets/import.sh | 30 ----------------- 3 files changed, 63 insertions(+), 30 deletions(-) create mode 100644 coder/actions/grafana-setup.toml create mode 100644 coder/src/actions/grafana-setup/setup.sh diff --git a/coder/actions/grafana-setup.toml b/coder/actions/grafana-setup.toml new file mode 100644 index 0000000..fed92a3 --- /dev/null +++ b/coder/actions/grafana-setup.toml @@ -0,0 +1,22 @@ +name = "grafana_setup" +timeout = "2m" + +[[triggers]] +type = "post-deploy-component" +component_name = "coder" + +[[triggers]] +type = "manual" + +[[steps]] +name = "Create Grafana Admin Secret" +command = "./setup.sh" + +[steps.public_repo] +repo = "nuonco/example-app-configs" +directory = "coder/src/actions/grafana-setup" +branch = "mm/coder-observe-kubelogstream" + +[steps.env_vars] +REGION = "{{ .nuon.install_stack.outputs.region }}" +INSTALL_ID = "{{ .nuon.install.id }}" diff --git a/coder/src/actions/grafana-setup/setup.sh b/coder/src/actions/grafana-setup/setup.sh new file mode 100644 index 0000000..da6e569 --- /dev/null +++ b/coder/src/actions/grafana-setup/setup.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -e +set -o pipefail +set -u + +region="$REGION" +install_id="$INSTALL_ID" +grafana_secret_name="grafana-admin-${install_id}" +grafana_username="admin" +observability_namespace="coder-observability" + +echo "[grafana-setup] creating namespace if not exists" +kubectl create namespace "$observability_namespace" --dry-run=client -o yaml | kubectl apply -f - + +echo "[grafana-setup] checking if Grafana admin secret exists in Secrets Manager" +if aws --region "$region" secretsmanager describe-secret --secret-id="$grafana_secret_name" 2>/dev/null; then + echo "[grafana-setup] Grafana admin secret already exists, retrieving" + grafana_secret=$(aws --region "$region" secretsmanager get-secret-value --secret-id="$grafana_secret_name") + grafana_password=$(echo "$grafana_secret" | jq -r '.SecretString' | jq -r '.password') +else + echo "[grafana-setup] generating new Grafana admin password" + grafana_password=$(openssl rand -base64 32 | tr -d "=+/" | cut -c1-32) + + echo "[grafana-setup] storing Grafana admin password in Secrets Manager" + aws --region "$region" secretsmanager create-secret \ + --name "$grafana_secret_name" \ + --description "Grafana admin credentials for Nuon install ${install_id}" \ + --secret-string "{\"username\":\"${grafana_username}\",\"password\":\"${grafana_password}\"}" \ + --tags Key=nuon-install-id,Value="${install_id}" Key=component,Value=observability +fi + +echo "[grafana-setup] creating Grafana admin secret in Kubernetes" +kubectl create -n "$observability_namespace" secret generic grafana-admin \ + --save-config \ + --dry-run=client \ + --from-literal=username="$grafana_username" \ + --from-literal=password="$grafana_password" \ + -o yaml | kubectl apply -f - + +echo "[grafana-setup] Grafana admin secret created successfully" diff --git a/coder/src/actions/rds_secrets/import.sh b/coder/src/actions/rds_secrets/import.sh index 879bcc6..c8497c8 100755 --- a/coder/src/actions/rds_secrets/import.sh +++ b/coder/src/actions/rds_secrets/import.sh @@ -58,33 +58,3 @@ kubectl create -n "$observability_namespace" secret generic "$observability_secr echo "[rds-secrets import] observability secret created successfully" -# Generate and store Grafana admin password -grafana_secret_name="grafana-admin-${INSTALL_ID}" -grafana_username="admin" - -echo "[rds-secrets import] checking if Grafana admin secret exists in Secrets Manager" -if aws --region "$region" secretsmanager describe-secret --secret-id="$grafana_secret_name" 2>/dev/null; then - echo "[rds-secrets import] Grafana admin secret already exists, retrieving" - grafana_secret=$(aws --region "$region" secretsmanager get-secret-value --secret-id="$grafana_secret_name") - grafana_password=$(echo "$grafana_secret" | jq -r '.SecretString' | jq -r '.password') -else - echo "[rds-secrets import] generating new Grafana admin password" - grafana_password=$(openssl rand -base64 32 | tr -d "=+/" | cut -c1-32) - - echo "[rds-secrets import] storing Grafana admin password in Secrets Manager" - aws --region "$region" secretsmanager create-secret \ - --name "$grafana_secret_name" \ - --description "Grafana admin credentials for Nuon install ${INSTALL_ID}" \ - --secret-string "{\"username\":\"${grafana_username}\",\"password\":\"${grafana_password}\"}" \ - --tags Key=nuon-install-id,Value="${INSTALL_ID}" Key=component,Value=observability -fi - -echo "[rds-secrets import] creating Grafana admin secret in Kubernetes" -kubectl create -n "$observability_namespace" secret generic grafana-admin \ - --save-config \ - --dry-run=client \ - --from-literal=username="$grafana_username" \ - --from-literal=password="$grafana_password" \ - -o yaml | kubectl apply -f - - -echo "[rds-secrets import] Grafana admin secret created successfully" From bfda8e72992c612e8d4f51203f26782e05945c2f Mon Sep 17 00:00:00 2001 From: Mark Milligan Date: Sun, 22 Feb 2026 19:33:40 -0600 Subject: [PATCH 15/16] fix: use pre-deploy-component trigger for grafana setup action Trigger directly before observability deploys rather than post-coder, which is the correct lifecycle hook for pre-seeding secrets. Co-Authored-By: Claude Sonnet 4.6 --- coder/actions/grafana-setup.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/coder/actions/grafana-setup.toml b/coder/actions/grafana-setup.toml index fed92a3..c8b94b3 100644 --- a/coder/actions/grafana-setup.toml +++ b/coder/actions/grafana-setup.toml @@ -2,8 +2,8 @@ name = "grafana_setup" timeout = "2m" [[triggers]] -type = "post-deploy-component" -component_name = "coder" +type = "pre-deploy-component" +component_name = "observability" [[triggers]] type = "manual" From 284b6ea4e7dd6e49542f84691df16b7b34a7d0b6 Mon Sep 17 00:00:00 2001 From: Mark Milligan Date: Sun, 22 Feb 2026 19:37:50 -0600 Subject: [PATCH 16/16] fix: add missing component dependencies - rds_cluster_coder depends on rds_subnet (uses its subnet group id) - application_load_balancer depends on certificate (uses its ARN) - observability depends on application_load_balancer (Grafana joins ALB group) Co-Authored-By: Claude Sonnet 4.6 --- coder/components/1-rds_cluster_coder.toml | 1 + coder/components/4-alb.toml | 2 +- coder/components/6-observability.toml | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/coder/components/1-rds_cluster_coder.toml b/coder/components/1-rds_cluster_coder.toml index 65ec11b..3fa2d35 100644 --- a/coder/components/1-rds_cluster_coder.toml +++ b/coder/components/1-rds_cluster_coder.toml @@ -2,6 +2,7 @@ name = "rds_cluster_coder" type = "terraform_module" terraform_version = "1.13.5" +dependencies = ["rds_subnet"] [public_repo] repo = "nuonco/example-app-configs" diff --git a/coder/components/4-alb.toml b/coder/components/4-alb.toml index e1bd618..5720569 100644 --- a/coder/components/4-alb.toml +++ b/coder/components/4-alb.toml @@ -3,7 +3,7 @@ name = "application_load_balancer" type = "helm_chart" chart_name = "application-load-balancer" -dependencies = ["coder"] +dependencies = ["coder", "certificate"] [public_repo] repo = "nuonco/example-app-configs" diff --git a/coder/components/6-observability.toml b/coder/components/6-observability.toml index beffe49..41569b6 100644 --- a/coder/components/6-observability.toml +++ b/coder/components/6-observability.toml @@ -3,7 +3,7 @@ type = "helm_chart" chart_name = "coder-observability" namespace = "coder-observability" storage_driver = "configmap" -dependencies = ["coder"] +dependencies = ["coder", "application_load_balancer"] [helm_repo] repo_url = "https://helm.coder.com/observability"