Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,5 @@ tags
# Persistent undo
[._]*.un~

# End of https://www.toptal.com/developers/gitignore/api/osx,vim
# AI
.clauderc
41 changes: 40 additions & 1 deletion coder/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,51 @@ This follows Nuon best practices for deploying public Helm charts.

> This is a development/demo installation of Coder. Do not use in production.

> A CNAME record must be manually created in Route 53 for wildcard subdomains to work. This enables features like web apps e.g., Jupyter and web port fowarding. For example, if your domain is `{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}`, create a CNAME record for `*.{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}` that points to the DNS name of the load balancer created by this Nuon app config. The load balancer DNS name can be found in AWS Console.
> Wildcard DNS for workspace subdomains is automatically configured via external-dns. This enables features like web apps (e.g., Jupyter) and web port forwarding without manual DNS configuration.

### Observability & Monitoring

This app includes comprehensive monitoring and Kubernetes event streaming:

- **Observability Stack**: Prometheus, Grafana, Loki, and Alertmanager deployed in the `coder-observability` namespace for metrics collection, log aggregation, and alerting
- **Kubelogstream**: Streams Kubernetes pod events directly to Coder workspace startup logs for easier troubleshooting

**Accessing Grafana Dashboards**:

1. In the Nuon dashboard, navigate to your Coder installation
2. Go to the **Actions** tab
3. Run the `grafana_password` action (manual trigger)
4. The action output will display:
- Grafana URL: `https://{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}/grafana`
- Username: `admin`
- Password: (randomly generated, stored in AWS Secrets Manager)
5. Open the Grafana URL in your browser and log in with the credentials

Grafana is served from `/grafana` path on the same ALB as Coder, reducing infrastructure cost and complexity.

**Available Dashboards**:
- Coder Status - Overview of Coder health
- Coder Coderd - Control plane metrics
- Workspaces - Workspace utilization and performance
- Workspace Detail - Individual workspace deep-dive
- Provisioner - Terraform provisioner metrics
- Postgres Database - RDS performance
- Infrastructure - Node metrics

The admin password is generated once during initial deployment and persisted in AWS Secrets Manager for the lifetime of the installation.

## Coder Resources

[Coder Environment Variable docs](https://coder.com/docs/reference/cli/server)

[Coder Releases](https://github.com/coder/coder/releases/)

[Coder Monitoring](https://coder.com/docs/admin/monitoring)

[Coder Kubernetes Logs Integration](https://coder.com/docs/admin/integrations/kubernetes-logs)

[Coder Logstream Kube GitHub](https://github.com/coder/coder-logstream-kube)

[Coder Observability GitHub](https://github.com/coder/observability)

[AWS Instance Types](https://aws.amazon.com/ec2/instance-types/)
3 changes: 2 additions & 1 deletion coder/actions/coder_rds_creds.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ command = "./import.sh"
[steps.public_repo]
repo = "nuonco/example-app-configs"
directory = "coder/src/actions/rds_secrets"
branch = "main"
branch = "mm/coder-observe-kubelogstream"

[steps.env_vars]
SECRET_ARN = "{{ .nuon.components.rds_cluster_coder.outputs.db_instance_master_user_secret_arn }}"
Expand All @@ -26,3 +26,4 @@ TARGET_NAMESPACE = "coder"
DB_ADDRESS = "{{ .nuon.components.rds_cluster_coder.outputs.address }}"
DB_PORT = "{{ .nuon.components.rds_cluster_coder.outputs.db_instance_port }}"
DB_NAME = "{{ .nuon.components.rds_cluster_coder.outputs.db_instance_name }}"
INSTALL_ID = "{{ .nuon.install.id }}"
4 changes: 0 additions & 4 deletions coder/actions/default-storage-class.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@
name = "default_storage_class"
timeout = "1m"

[[triggers]]
type = "pre-deploy-component"
component_name = "coder"

[[triggers]]
type = "manual"

Expand Down
18 changes: 18 additions & 0 deletions coder/actions/grafana-password.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name = "grafana_password"
timeout = "1m"

[[triggers]]
type = "manual"

[[steps]]
name = "Retrieve Grafana Admin Password"
command = "./get-password.sh"

[steps.public_repo]
repo = "nuonco/example-app-configs"
directory = "coder/src/actions/grafana-password"
branch = "mm/coder-observe-kubelogstream"

[steps.env_vars]
REGION = "{{ .nuon.install_stack.outputs.region }}"
INSTALL_ID = "{{ .nuon.install.id }}"
22 changes: 22 additions & 0 deletions coder/actions/grafana-setup.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name = "grafana_setup"
timeout = "2m"

[[triggers]]
type = "pre-deploy-component"
component_name = "observability"

[[triggers]]
type = "manual"

[[steps]]
name = "Create Grafana Admin Secret"
command = "./setup.sh"

[steps.public_repo]
repo = "nuonco/example-app-configs"
directory = "coder/src/actions/grafana-setup"
branch = "mm/coder-observe-kubelogstream"

[steps.env_vars]
REGION = "{{ .nuon.install_stack.outputs.region }}"
INSTALL_ID = "{{ .nuon.install.id }}"
16 changes: 0 additions & 16 deletions coder/actions/remove-gp2-default.toml

This file was deleted.

1 change: 1 addition & 0 deletions coder/components/1-rds_cluster_coder.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
name = "rds_cluster_coder"
type = "terraform_module"
terraform_version = "1.13.5"
dependencies = ["rds_subnet"]

[public_repo]
repo = "nuonco/example-app-configs"
Expand Down
4 changes: 2 additions & 2 deletions coder/components/4-alb.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
name = "application_load_balancer"
type = "helm_chart"
chart_name = "application-load-balancer"
dependencies = ["coder"]
dependencies = ["coder", "certificate"]

[public_repo]
repo = "nuonco/example-app-configs"
directory = "coder/src/components/alb"
branch = "main"
branch = "mm/coder-observe-kubelogstream"

[values]
domain_certificate = "{{.nuon.components.certificate.outputs.public_domain_certificate_arn}}"
Expand Down
13 changes: 13 additions & 0 deletions coder/components/5-kubelogstream.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name = "kubelogstream"
type = "helm_chart"
chart_name = "coder-logstream-kube"
namespace = "coder"
storage_driver = "configmap"
dependencies = ["coder", "application_load_balancer"]

[helm_repo]
repo_url = "https://helm.coder.com/logstream-kube"
chart = "coder-logstream-kube"

[[values_file]]
contents = "./values/kubelogstream.yaml"
13 changes: 13 additions & 0 deletions coder/components/6-observability.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name = "observability"
type = "helm_chart"
chart_name = "coder-observability"
namespace = "coder-observability"
storage_driver = "configmap"
dependencies = ["coder", "application_load_balancer"]

[helm_repo]
repo_url = "https://helm.coder.com/observability"
chart = "coder-observability"

[[values_file]]
contents = "./values/observability.yaml"
8 changes: 8 additions & 0 deletions coder/components/values/coder.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ coder:

replicaCount: "{{.nuon.inputs.inputs.replicas}}"

podAnnotations:
prometheus.io/scrape: "true"
prometheus.io/port: "2112"

# since tls is terminated at alb
tls:
secretNames: []
Expand Down Expand Up @@ -84,6 +88,10 @@ coder:
value: "{{.nuon.inputs.inputs.telemetry}}"
- name: CODER_PROMETHEUS_ENABLE
value: "true"
- name: CODER_PROMETHEUS_COLLECT_AGENT_STATS
value: "true"
- name: CODER_LOGGING_HUMAN
value: "/dev/stderr"
- name: CODER_UPDATE_CHECK
value: "true"

Expand Down
20 changes: 20 additions & 0 deletions coder/components/values/kubelogstream.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
url: "https://{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}"

namespaces: ["coder"]

image:
repo: "ghcr.io/coder/coder-logstream-kube"
pullPolicy: IfNotPresent

resources:
limits:
cpu: 200m
memory: 256Mi
requests:
cpu: 100m
memory: 128Mi

serviceAccount:
name: coder-logstream-kube
annotations: {}
labels: {}
117 changes: 117 additions & 0 deletions coder/components/values/observability.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
global:
coder:
controlPlaneNamespace: coder
externalProvisionersNamespace: coder
coderdSelector: 'pod=~`coder.*`, pod!~`.*provisioner.*`'
provisionerdSelector: 'pod=~`coder-provisioner.*`'
workspacesSelector: 'namespace=`coder`'
logFormat: human

postgres:
hostname: "{{.nuon.components.rds_cluster_coder.outputs.address}}"
port: "{{.nuon.components.rds_cluster_coder.outputs.db_instance_port}}"
database: coder
username: coder
mountSecret: coder-db-password
exporter:
enabled: true
image: "quay.io/prometheuscommunity/postgres-exporter"

telemetry:
metrics:
scrape_interval: 15s
scrape_timeout: 12s
native_histograms: false

alerts:
enabled: true
kind: "configmap"

dashboards:
enabled: true
timerange: 12h
refresh: 30s
queryTimeout: 900

grafana-agent:
enabled: true
controller:
type: daemonset

grafana:
enabled: true
replicas: 1
env:
GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION: "false"
admin:
existingSecret: grafana-admin
userKey: username
passwordKey: password
persistence:
enabled: true
size: 10Gi
storageClassName: ebs-auto
ingress:
enabled: true
ingressClassName: alb
annotations:
alb.ingress.kubernetes.io/group.name: {{.nuon.install.id}}
alb.ingress.kubernetes.io/group.order: "200"
alb.ingress.kubernetes.io/target-type: ip
alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
hosts:
- {{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}
path: /grafana
pathType: Prefix
"grafana.ini":
server:
domain: {{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}
root_url: "https://{{.nuon.install.sandbox.outputs.nuon_dns.public_domain.name}}/grafana"
serve_from_sub_path: true
security:
password_policy: false
"auth.anonymous":
enabled: false
analytics:
reporting_enabled: false
users:
allow_sign_up: false

prometheus:
enabled: true
server:
replicaCount: 1
persistentVolume:
enabled: true
size: 20Gi
storageClassName: ebs-auto
retentionSize: 18GB
retention: 15d
resources:
limits:
cpu: 1000m
memory: 2Gi
requests:
cpu: 500m
memory: 1Gi
alertmanager:
enabled: true
persistentVolume:
enabled: true
size: 2Gi
storageClassName: ebs-auto
kube-state-metrics:
enabled: true
prometheus-node-exporter:
enabled: true

loki:
enabled: true
backend:
replicas: 1
read:
replicas: 1
write:
replicas: 1
minio:
enabled: true
2 changes: 0 additions & 2 deletions coder/runner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,3 @@ runner_type = "aws"
helm_driver = "configmap"
init_script_url = "https://raw.githubusercontent.com/nuonco/runner/refs/heads/main/scripts/aws/init-mng-v2.sh"

[env_vars]
foo = "bar"
4 changes: 2 additions & 2 deletions coder/sandbox.tfvars
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ebs_storage_class = {
enabled = true
name = "ebs-auto"
is_default_class = false
is_default_class = true
provisioner = "ebs.csi.eks.amazonaws.com"
volume_binding_mode = "WaitForFirstConsumer"
reclaim_policy = "Delete"
Expand All @@ -13,7 +13,7 @@ ebs_storage_class = {
}


additional_namespaces = ["coder"]
additional_namespaces = ["coder", "coder-observability"]

enable_irsa = true

Expand Down
Loading