From 5056b2f1da5bd6fec01ffbf918859e409b351ec3 Mon Sep 17 00:00:00 2001 From: Stuart Harris Date: Thu, 8 Apr 2021 14:19:43 +0100 Subject: [PATCH 01/12] update kube-state-metrics to support k8s 1.17 --- kube-state-metrics/base/deployment.yaml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/kube-state-metrics/base/deployment.yaml b/kube-state-metrics/base/deployment.yaml index cd21903..2722266 100644 --- a/kube-state-metrics/base/deployment.yaml +++ b/kube-state-metrics/base/deployment.yaml @@ -3,7 +3,7 @@ kind: Deployment metadata: labels: app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 1.9.5 + app.kubernetes.io/version: 2.0.0 name: kube-state-metrics spec: replicas: 1 @@ -14,10 +14,10 @@ spec: metadata: labels: app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 1.9.5 + app.kubernetes.io/version: 2.0.0 spec: containers: - - image: quay.io/coreos/kube-state-metrics:v1.9.5 + - image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.0.0-rc.1 livenessProbe: httpGet: path: /healthz @@ -30,6 +30,13 @@ spec: name: http-metrics - containerPort: 8081 name: telemetry + resources: + limits: + cpu: 500m + memory: 500Mi + requests: + cpu: 200m + memory: 200Mi readinessProbe: httpGet: path: / From f2ec8957eee31a1079239ff19ba43cd8b61df2df Mon Sep 17 00:00:00 2001 From: Stuart Harris Date: Thu, 8 Apr 2021 15:01:48 +0100 Subject: [PATCH 02/12] install prometheus --- {observability => oms-agent}/Makefile | 0 .../container-azm-ms-agentconfig.yaml | 0 .../kustomization.yaml | 1 - prometheus/Makefile | 16 + prometheus/README.md | 10 + prometheus/generated/prometheus-cm.yaml | 274 ++++++++++ prometheus/generated/prometheus-cr.yaml | 41 ++ prometheus/generated/prometheus-crb.yaml | 19 + .../generated/prometheus-deployment.yaml | 93 ++++ prometheus/generated/prometheus-sa.yaml | 14 + prometheus/generated/prometheus-svc.yaml | 24 + prometheus/install.yaml | 475 ++++++++++++++++++ prometheus/kustomization.yaml | 7 + prometheus/prometheus-cm.yaml | 350 +++++++++++++ .../prometheus-configmap.yaml | 0 15 files changed, 1323 insertions(+), 1 deletion(-) rename {observability => oms-agent}/Makefile (100%) rename {observability => oms-agent}/container-azm-ms-agentconfig.yaml (100%) rename {observability => oms-agent}/kustomization.yaml (62%) create mode 100644 prometheus/Makefile create mode 100644 prometheus/README.md create mode 100755 prometheus/generated/prometheus-cm.yaml create mode 100755 prometheus/generated/prometheus-cr.yaml create mode 100755 prometheus/generated/prometheus-crb.yaml create mode 100755 prometheus/generated/prometheus-deployment.yaml create mode 100755 prometheus/generated/prometheus-sa.yaml create mode 100755 prometheus/generated/prometheus-svc.yaml create mode 100644 prometheus/install.yaml create mode 100644 prometheus/kustomization.yaml create mode 100755 prometheus/prometheus-cm.yaml rename {observability => prometheus}/prometheus-configmap.yaml (100%) diff --git a/observability/Makefile b/oms-agent/Makefile similarity index 100% rename from observability/Makefile rename to oms-agent/Makefile diff --git a/observability/container-azm-ms-agentconfig.yaml b/oms-agent/container-azm-ms-agentconfig.yaml similarity index 100% rename from observability/container-azm-ms-agentconfig.yaml rename to oms-agent/container-azm-ms-agentconfig.yaml diff --git a/observability/kustomization.yaml b/oms-agent/kustomization.yaml similarity index 62% rename from observability/kustomization.yaml rename to oms-agent/kustomization.yaml index 90c5901..4b396dd 100644 --- a/observability/kustomization.yaml +++ b/oms-agent/kustomization.yaml @@ -1,3 +1,2 @@ resources: - container-azm-ms-agentconfig.yaml - - prometheus-configmap.yaml diff --git a/prometheus/Makefile b/prometheus/Makefile new file mode 100644 index 0000000..b705010 --- /dev/null +++ b/prometheus/Makefile @@ -0,0 +1,16 @@ + +.PHONY: apply +apply: ## Apply kubernetes manifests + kustomize build . | kubectl apply -f - + +.PHONY: restart-prometheus +restart-prometheus: ## Restart prometheus so config changes take effect + kubectl delete pods -l app=prometheus -n istio-system + +.PHONY: delete +delete: ## Delete resources + kustomize build . | kubectl delete --ignore-not-found -f - || true + +.PHONY: help +help: ## Display this help screen + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' diff --git a/prometheus/README.md b/prometheus/README.md new file mode 100644 index 0000000..20daa8e --- /dev/null +++ b/prometheus/README.md @@ -0,0 +1,10 @@ +# Prometheus install + +```bash +curl https://raw.githubusercontent.com/istio/istio/release-1.9/samples/addons/prometheus.yaml -o install.yaml + +# https://github.com/mogensen/kubernetes-split-yaml +go get -v github.com/mogensen/kubernetes-split-yaml + +~/go/bin/kubernetes-split-yaml install.yaml +``` diff --git a/prometheus/generated/prometheus-cm.yaml b/prometheus/generated/prometheus-cm.yaml new file mode 100755 index 0000000..b45acbf --- /dev/null +++ b/prometheus/generated/prometheus-cm.yaml @@ -0,0 +1,274 @@ +# Source: prometheus/templates/server/cm.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + component: "server" + app: prometheus + release: prometheus + chart: prometheus-11.16.2 + heritage: Helm + name: prometheus + namespace: istio-system +data: + alerting_rules.yml: | + {} + alerts: | + {} + prometheus.yml: | + global: + evaluation_interval: 1m + scrape_interval: 15s + scrape_timeout: 10s + rule_files: + - /etc/config/recording_rules.yml + - /etc/config/alerting_rules.yml + - /etc/config/rules + - /etc/config/alerts + scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + job_name: kubernetes-apiservers + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: default;kubernetes;https + source_labels: + - __meta_kubernetes_namespace + - __meta_kubernetes_service_name + - __meta_kubernetes_endpoint_port_name + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + job_name: kubernetes-nodes + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - replacement: kubernetes.default.svc:443 + target_label: __address__ + - regex: (.+) + replacement: /api/v1/nodes/$1/proxy/metrics + source_labels: + - __meta_kubernetes_node_name + target_label: __metrics_path__ + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + job_name: kubernetes-nodes-cadvisor + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - replacement: kubernetes.default.svc:443 + target_label: __address__ + - regex: (.+) + replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor + source_labels: + - __meta_kubernetes_node_name + target_label: __metrics_path__ + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + - job_name: kubernetes-service-endpoints + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scrape + - action: replace + regex: (https?) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scheme + target_label: __scheme__ + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_service_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_service_name + target_label: kubernetes_name + - action: replace + source_labels: + - __meta_kubernetes_pod_node_name + target_label: kubernetes_node + - job_name: kubernetes-service-endpoints-slow + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scrape_slow + - action: replace + regex: (https?) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scheme + target_label: __scheme__ + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_service_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_service_name + target_label: kubernetes_name + - action: replace + source_labels: + - __meta_kubernetes_pod_node_name + target_label: kubernetes_node + scrape_interval: 5m + scrape_timeout: 30s + - honor_labels: true + job_name: prometheus-pushgateway + kubernetes_sd_configs: + - role: service + relabel_configs: + - action: keep + regex: pushgateway + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_probe + - job_name: kubernetes-services + kubernetes_sd_configs: + - role: service + metrics_path: /probe + params: + module: + - http_2xx + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_probe + - source_labels: + - __address__ + target_label: __param_target + - replacement: blackbox + target_label: __address__ + - source_labels: + - __param_target + target_label: instance + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - source_labels: + - __meta_kubernetes_service_name + target_label: kubernetes_name + - job_name: kubernetes-pods + kubernetes_sd_configs: + - role: pod + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_scrape + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_pod_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_pod_name + target_label: kubernetes_pod_name + - action: drop + regex: Pending|Succeeded|Failed + source_labels: + - __meta_kubernetes_pod_phase + - job_name: kubernetes-pods-slow + kubernetes_sd_configs: + - role: pod + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_scrape_slow + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_pod_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_pod_name + target_label: kubernetes_pod_name + - action: drop + regex: Pending|Succeeded|Failed + source_labels: + - __meta_kubernetes_pod_phase + scrape_interval: 5m + scrape_timeout: 30s + recording_rules.yml: | + {} + rules: | + {} diff --git a/prometheus/generated/prometheus-cr.yaml b/prometheus/generated/prometheus-cr.yaml new file mode 100755 index 0000000..d8eca84 --- /dev/null +++ b/prometheus/generated/prometheus-cr.yaml @@ -0,0 +1,41 @@ +# Source: prometheus/templates/server/clusterrole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + component: "server" + app: prometheus + release: prometheus + chart: prometheus-11.16.2 + heritage: Helm + name: prometheus +rules: + - apiGroups: + - "" + resources: + - nodes + - nodes/proxy + - nodes/metrics + - services + - endpoints + - pods + - ingresses + - configmaps + verbs: + - get + - list + - watch + - apiGroups: + - "extensions" + - "networking.k8s.io" + resources: + - ingresses/status + - ingresses + verbs: + - get + - list + - watch + - nonResourceURLs: + - "/metrics" + verbs: + - get diff --git a/prometheus/generated/prometheus-crb.yaml b/prometheus/generated/prometheus-crb.yaml new file mode 100755 index 0000000..0346d19 --- /dev/null +++ b/prometheus/generated/prometheus-crb.yaml @@ -0,0 +1,19 @@ +# Source: prometheus/templates/server/clusterrolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + component: "server" + app: prometheus + release: prometheus + chart: prometheus-11.16.2 + heritage: Helm + name: prometheus +subjects: + - kind: ServiceAccount + name: prometheus + namespace: istio-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus diff --git a/prometheus/generated/prometheus-deployment.yaml b/prometheus/generated/prometheus-deployment.yaml new file mode 100755 index 0000000..d4c8edf --- /dev/null +++ b/prometheus/generated/prometheus-deployment.yaml @@ -0,0 +1,93 @@ +# Source: prometheus/templates/server/deploy.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + component: "server" + app: prometheus + release: prometheus + chart: prometheus-11.16.2 + heritage: Helm + name: prometheus + namespace: istio-system +spec: + selector: + matchLabels: + component: "server" + app: prometheus + release: prometheus + replicas: 1 + template: + metadata: + annotations: + sidecar.istio.io/inject: "false" + labels: + component: "server" + app: prometheus + release: prometheus + chart: prometheus-11.16.2 + heritage: Helm + spec: + serviceAccountName: prometheus + containers: + - name: prometheus-server-configmap-reload + image: "jimmidyson/configmap-reload:v0.4.0" + imagePullPolicy: "IfNotPresent" + args: + - --volume-dir=/etc/config + - --webhook-url=http://127.0.0.1:9090/-/reload + resources: {} + volumeMounts: + - name: config-volume + mountPath: /etc/config + readOnly: true + + - name: prometheus-server + image: "prom/prometheus:v2.21.0" + imagePullPolicy: "IfNotPresent" + args: + - --storage.tsdb.retention.time=15d + - --config.file=/etc/config/prometheus.yml + - --storage.tsdb.path=/data + - --web.console.libraries=/etc/prometheus/console_libraries + - --web.console.templates=/etc/prometheus/consoles + - --web.enable-lifecycle + ports: + - containerPort: 9090 + readinessProbe: + httpGet: + path: /-/ready + port: 9090 + initialDelaySeconds: 0 + periodSeconds: 5 + timeoutSeconds: 30 + failureThreshold: 3 + successThreshold: 1 + livenessProbe: + httpGet: + path: /-/healthy + port: 9090 + initialDelaySeconds: 30 + periodSeconds: 15 + timeoutSeconds: 30 + failureThreshold: 3 + successThreshold: 1 + resources: {} + volumeMounts: + - name: config-volume + mountPath: /etc/config + - name: storage-volume + mountPath: /data + subPath: "" + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + terminationGracePeriodSeconds: 300 + volumes: + - name: config-volume + configMap: + name: prometheus + - name: storage-volume + emptyDir: {} diff --git a/prometheus/generated/prometheus-sa.yaml b/prometheus/generated/prometheus-sa.yaml new file mode 100755 index 0000000..7176fed --- /dev/null +++ b/prometheus/generated/prometheus-sa.yaml @@ -0,0 +1,14 @@ +--- +# Source: prometheus/templates/server/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + component: "server" + app: prometheus + release: prometheus + chart: prometheus-11.16.2 + heritage: Helm + name: prometheus + namespace: istio-system + annotations: {} diff --git a/prometheus/generated/prometheus-svc.yaml b/prometheus/generated/prometheus-svc.yaml new file mode 100755 index 0000000..5c288af --- /dev/null +++ b/prometheus/generated/prometheus-svc.yaml @@ -0,0 +1,24 @@ +# Source: prometheus/templates/server/service.yaml +apiVersion: v1 +kind: Service +metadata: + labels: + component: "server" + app: prometheus + release: prometheus + chart: prometheus-11.16.2 + heritage: Helm + name: prometheus + namespace: istio-system +spec: + ports: + - name: http + port: 9090 + protocol: TCP + targetPort: 9090 + selector: + component: "server" + app: prometheus + release: prometheus + sessionAffinity: None + type: "ClusterIP" diff --git a/prometheus/install.yaml b/prometheus/install.yaml new file mode 100644 index 0000000..8418a2c --- /dev/null +++ b/prometheus/install.yaml @@ -0,0 +1,475 @@ +--- +# Source: prometheus/templates/server/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + component: "server" + app: prometheus + release: prometheus + chart: prometheus-11.16.2 + heritage: Helm + name: prometheus + namespace: istio-system + annotations: + {} +--- +# Source: prometheus/templates/server/cm.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + component: "server" + app: prometheus + release: prometheus + chart: prometheus-11.16.2 + heritage: Helm + name: prometheus + namespace: istio-system +data: + alerting_rules.yml: | + {} + alerts: | + {} + prometheus.yml: | + global: + evaluation_interval: 1m + scrape_interval: 15s + scrape_timeout: 10s + rule_files: + - /etc/config/recording_rules.yml + - /etc/config/alerting_rules.yml + - /etc/config/rules + - /etc/config/alerts + scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + job_name: kubernetes-apiservers + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: default;kubernetes;https + source_labels: + - __meta_kubernetes_namespace + - __meta_kubernetes_service_name + - __meta_kubernetes_endpoint_port_name + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + job_name: kubernetes-nodes + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - replacement: kubernetes.default.svc:443 + target_label: __address__ + - regex: (.+) + replacement: /api/v1/nodes/$1/proxy/metrics + source_labels: + - __meta_kubernetes_node_name + target_label: __metrics_path__ + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + job_name: kubernetes-nodes-cadvisor + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - replacement: kubernetes.default.svc:443 + target_label: __address__ + - regex: (.+) + replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor + source_labels: + - __meta_kubernetes_node_name + target_label: __metrics_path__ + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + - job_name: kubernetes-service-endpoints + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scrape + - action: replace + regex: (https?) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scheme + target_label: __scheme__ + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_service_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_service_name + target_label: kubernetes_name + - action: replace + source_labels: + - __meta_kubernetes_pod_node_name + target_label: kubernetes_node + - job_name: kubernetes-service-endpoints-slow + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scrape_slow + - action: replace + regex: (https?) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scheme + target_label: __scheme__ + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_service_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_service_name + target_label: kubernetes_name + - action: replace + source_labels: + - __meta_kubernetes_pod_node_name + target_label: kubernetes_node + scrape_interval: 5m + scrape_timeout: 30s + - honor_labels: true + job_name: prometheus-pushgateway + kubernetes_sd_configs: + - role: service + relabel_configs: + - action: keep + regex: pushgateway + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_probe + - job_name: kubernetes-services + kubernetes_sd_configs: + - role: service + metrics_path: /probe + params: + module: + - http_2xx + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_probe + - source_labels: + - __address__ + target_label: __param_target + - replacement: blackbox + target_label: __address__ + - source_labels: + - __param_target + target_label: instance + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - source_labels: + - __meta_kubernetes_service_name + target_label: kubernetes_name + - job_name: kubernetes-pods + kubernetes_sd_configs: + - role: pod + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_scrape + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_pod_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_pod_name + target_label: kubernetes_pod_name + - action: drop + regex: Pending|Succeeded|Failed + source_labels: + - __meta_kubernetes_pod_phase + - job_name: kubernetes-pods-slow + kubernetes_sd_configs: + - role: pod + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_scrape_slow + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_pod_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_pod_name + target_label: kubernetes_pod_name + - action: drop + regex: Pending|Succeeded|Failed + source_labels: + - __meta_kubernetes_pod_phase + scrape_interval: 5m + scrape_timeout: 30s + recording_rules.yml: | + {} + rules: | + {} +--- +# Source: prometheus/templates/server/clusterrole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + component: "server" + app: prometheus + release: prometheus + chart: prometheus-11.16.2 + heritage: Helm + name: prometheus +rules: + - apiGroups: + - "" + resources: + - nodes + - nodes/proxy + - nodes/metrics + - services + - endpoints + - pods + - ingresses + - configmaps + verbs: + - get + - list + - watch + - apiGroups: + - "extensions" + - "networking.k8s.io" + resources: + - ingresses/status + - ingresses + verbs: + - get + - list + - watch + - nonResourceURLs: + - "/metrics" + verbs: + - get +--- +# Source: prometheus/templates/server/clusterrolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + component: "server" + app: prometheus + release: prometheus + chart: prometheus-11.16.2 + heritage: Helm + name: prometheus +subjects: + - kind: ServiceAccount + name: prometheus + namespace: istio-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +--- +# Source: prometheus/templates/server/service.yaml +apiVersion: v1 +kind: Service +metadata: + labels: + component: "server" + app: prometheus + release: prometheus + chart: prometheus-11.16.2 + heritage: Helm + name: prometheus + namespace: istio-system +spec: + ports: + - name: http + port: 9090 + protocol: TCP + targetPort: 9090 + selector: + component: "server" + app: prometheus + release: prometheus + sessionAffinity: None + type: "ClusterIP" +--- +# Source: prometheus/templates/server/deploy.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + component: "server" + app: prometheus + release: prometheus + chart: prometheus-11.16.2 + heritage: Helm + name: prometheus + namespace: istio-system +spec: + selector: + matchLabels: + component: "server" + app: prometheus + release: prometheus + replicas: 1 + template: + metadata: + annotations: + + sidecar.istio.io/inject: "false" + labels: + component: "server" + app: prometheus + release: prometheus + chart: prometheus-11.16.2 + heritage: Helm + spec: + serviceAccountName: prometheus + containers: + - name: prometheus-server-configmap-reload + image: "jimmidyson/configmap-reload:v0.4.0" + imagePullPolicy: "IfNotPresent" + args: + - --volume-dir=/etc/config + - --webhook-url=http://127.0.0.1:9090/-/reload + resources: + {} + volumeMounts: + - name: config-volume + mountPath: /etc/config + readOnly: true + + - name: prometheus-server + image: "prom/prometheus:v2.21.0" + imagePullPolicy: "IfNotPresent" + args: + - --storage.tsdb.retention.time=15d + - --config.file=/etc/config/prometheus.yml + - --storage.tsdb.path=/data + - --web.console.libraries=/etc/prometheus/console_libraries + - --web.console.templates=/etc/prometheus/consoles + - --web.enable-lifecycle + ports: + - containerPort: 9090 + readinessProbe: + httpGet: + path: /-/ready + port: 9090 + initialDelaySeconds: 0 + periodSeconds: 5 + timeoutSeconds: 30 + failureThreshold: 3 + successThreshold: 1 + livenessProbe: + httpGet: + path: /-/healthy + port: 9090 + initialDelaySeconds: 30 + periodSeconds: 15 + timeoutSeconds: 30 + failureThreshold: 3 + successThreshold: 1 + resources: + {} + volumeMounts: + - name: config-volume + mountPath: /etc/config + - name: storage-volume + mountPath: /data + subPath: "" + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + terminationGracePeriodSeconds: 300 + volumes: + - name: config-volume + configMap: + name: prometheus + - name: storage-volume + emptyDir: + {} diff --git a/prometheus/kustomization.yaml b/prometheus/kustomization.yaml new file mode 100644 index 0000000..cd8f8eb --- /dev/null +++ b/prometheus/kustomization.yaml @@ -0,0 +1,7 @@ +resources: + - prometheus-cm.yaml + - generated/prometheus-cr.yaml + - generated/prometheus-crb.yaml + - generated/prometheus-deployment.yaml + - generated/prometheus-sa.yaml + - generated/prometheus-svc.yaml diff --git a/prometheus/prometheus-cm.yaml b/prometheus/prometheus-cm.yaml new file mode 100755 index 0000000..9ae95ef --- /dev/null +++ b/prometheus/prometheus-cm.yaml @@ -0,0 +1,350 @@ +# Source: prometheus/templates/server/cm.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + component: "server" + app: prometheus + release: prometheus + chart: prometheus-11.16.2 + heritage: Helm + name: prometheus + namespace: istio-system +data: + alerting_rules.yml: | + {} + alerts: | + {} + prometheus.yml: | + global: + evaluation_interval: 1m + scrape_interval: 15s + scrape_timeout: 10s + rule_files: + - /etc/config/recording_rules.yml + - /etc/config/alerting_rules.yml + - /etc/config/rules + - /etc/config/alerts + scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + job_name: kubernetes-apiservers + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: default;kubernetes;https + source_labels: + - __meta_kubernetes_namespace + - __meta_kubernetes_service_name + - __meta_kubernetes_endpoint_port_name + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + job_name: kubernetes-nodes + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - replacement: kubernetes.default.svc:443 + target_label: __address__ + - regex: (.+) + replacement: /api/v1/nodes/$1/proxy/metrics + source_labels: + - __meta_kubernetes_node_name + target_label: __metrics_path__ + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + job_name: kubernetes-nodes-cadvisor + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - replacement: kubernetes.default.svc:443 + target_label: __address__ + - regex: (.+) + replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor + source_labels: + - __meta_kubernetes_node_name + target_label: __metrics_path__ + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + - job_name: kubernetes-service-endpoints + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scrape + - action: replace + regex: (https?) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scheme + target_label: __scheme__ + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_service_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_service_name + target_label: kubernetes_name + - action: replace + source_labels: + - __meta_kubernetes_pod_node_name + target_label: kubernetes_node + - job_name: kubernetes-service-endpoints-slow + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scrape_slow + - action: replace + regex: (https?) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scheme + target_label: __scheme__ + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_service_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_service_name + target_label: kubernetes_name + - action: replace + source_labels: + - __meta_kubernetes_pod_node_name + target_label: kubernetes_node + scrape_interval: 5m + scrape_timeout: 30s + - honor_labels: true + job_name: prometheus-pushgateway + kubernetes_sd_configs: + - role: service + relabel_configs: + - action: keep + regex: pushgateway + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_probe + - job_name: kubernetes-services + kubernetes_sd_configs: + - role: service + metrics_path: /probe + params: + module: + - http_2xx + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_probe + - source_labels: + - __address__ + target_label: __param_target + - replacement: blackbox + target_label: __address__ + - source_labels: + - __param_target + target_label: instance + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - source_labels: + - __meta_kubernetes_service_name + target_label: kubernetes_name + - job_name: kubernetes-pods + kubernetes_sd_configs: + - role: pod + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_scrape + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_pod_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_pod_name + target_label: kubernetes_pod_name + - action: drop + regex: Pending|Succeeded|Failed + source_labels: + - __meta_kubernetes_pod_phase + - job_name: kubernetes-pods-slow + kubernetes_sd_configs: + - role: pod + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_scrape_slow + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_pod_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_pod_name + target_label: kubernetes_pod_name + - action: drop + regex: Pending|Succeeded|Failed + source_labels: + - __meta_kubernetes_pod_phase + scrape_interval: 5m + scrape_timeout: 30s + + - job_name: 'istiod' + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - istio-system + relabel_configs: + - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: istiod;http-monitoring + + - job_name: 'envoy-stats' + metrics_path: /stats/prometheus + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_container_port_name] + action: keep + regex: '.*-envoy-prom' + + - job_name: 'kube-state-metrics' + static_configs: + - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] + + recording_rules.yml: | + {} + rules: | + groups: + - name: traffic + rules: + - record: job:incoming_requests_per_second_per_pod:mean + expr: | + sum by (namespace, pod_name) (rate(istio_requests_total[1m])) + labels: + azure_monitor: true + + - name: request_latency + rules: + - record: job:success_response_latency_milliseconds_per_pod:mean + expr: | + sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_sum{response_code!~"5.."}[1m])) + / + sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_count{response_code!~"5.."}[1m])) + labels: + azure_monitor: true + + - record: job:error_response_latency_milliseconds_per_pod:mean + expr: | + sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_sum{response_code=~"5.."}[1m])) + / + sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_count{response_code=~"5.."}[1m])) + labels: + azure_monitor: true + + - name: request_error_rate + rules: + - record: job:request_error_rate_per_pod:mean + expr: | + sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_count{response_code=~"5.."}[1m])) + / + sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_count{response_code!~"5.."}[1m])) + labels: + azure_monitor: true + + - name: saturation + rules: + - record: job:cpu_usage_seconds_per_pod:mean + expr: | + sum by (namespace, pod) (rate(container_cpu_usage_seconds_total{image!=""}[1m])) + labels: + azure_monitor: true + + - record: job:memory_usage_percent_per_pod:mean + expr: | + sum(container_memory_working_set_bytes) by (namespace, pod) + / + sum(label_join(kube_pod_container_resource_limits_memory_bytes, "pod", "", "pod")) by (namespace, pod) + labels: + azure_monitor: true diff --git a/observability/prometheus-configmap.yaml b/prometheus/prometheus-configmap.yaml similarity index 100% rename from observability/prometheus-configmap.yaml rename to prometheus/prometheus-configmap.yaml From 731ed9eb04680f944a028525161279d6bae305b1 Mon Sep 17 00:00:00 2001 From: Stuart Harris Date: Fri, 9 Apr 2021 13:58:36 +0100 Subject: [PATCH 03/12] attempt at sidecar configuratio for prometheus --- prometheus/README.md | 6 + prometheus/kustomization.yaml | 2 +- prometheus/prometheus-cm.yaml | 7 + prometheus/prometheus-configmap.yaml | 711 +++++++++++++------------- prometheus/prometheus-deployment.yaml | 303 +++++++++++ 5 files changed, 672 insertions(+), 357 deletions(-) create mode 100644 prometheus/prometheus-deployment.yaml diff --git a/prometheus/README.md b/prometheus/README.md index 20daa8e..d61bb79 100644 --- a/prometheus/README.md +++ b/prometheus/README.md @@ -7,4 +7,10 @@ curl https://raw.githubusercontent.com/istio/istio/release-1.9/samples/addons/pr go get -v github.com/mogensen/kubernetes-split-yaml ~/go/bin/kubernetes-split-yaml install.yaml + +# remove annotation to prevent sidecar injection and create new manifest with a sidecar injected... +cat generated/prometheus-deployment.yaml \ + | grep -v sidecar.istio.io/inject \ + | istioctl kube-inject -f - \ + > prometheus-deployment.yaml ``` diff --git a/prometheus/kustomization.yaml b/prometheus/kustomization.yaml index cd8f8eb..658e1e8 100644 --- a/prometheus/kustomization.yaml +++ b/prometheus/kustomization.yaml @@ -2,6 +2,6 @@ resources: - prometheus-cm.yaml - generated/prometheus-cr.yaml - generated/prometheus-crb.yaml - - generated/prometheus-deployment.yaml + - prometheus-deployment.yaml - generated/prometheus-sa.yaml - generated/prometheus-svc.yaml diff --git a/prometheus/prometheus-cm.yaml b/prometheus/prometheus-cm.yaml index 9ae95ef..aacf408 100755 --- a/prometheus/prometheus-cm.yaml +++ b/prometheus/prometheus-cm.yaml @@ -288,6 +288,13 @@ data: - source_labels: [__meta_kubernetes_pod_container_port_name] action: keep regex: '.*-envoy-prom' + scheme: https + tls_config: + ca_file: /etc/prom-certs/root-cert.pem + cert_file: /etc/prom-certs/cert-chain.pem + key_file: /etc/prom-certs/key.pem + insecure_skip_verify: true # Prometheus does not support Istio security naming, thus skip verifying target pod certificate + - job_name: 'kube-state-metrics' static_configs: diff --git a/prometheus/prometheus-configmap.yaml b/prometheus/prometheus-configmap.yaml index b793df3..c0f9e40 100644 --- a/prometheus/prometheus-configmap.yaml +++ b/prometheus/prometheus-configmap.yaml @@ -1,356 +1,355 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - annotations: - install.operator.istio.io/chart-owner: AddonComponents - labels: - app: prometheus - install.operator.istio.io/owner-group: install.istio.io - install.operator.istio.io/owner-kind: IstioOperator - install.operator.istio.io/owner-name: istiocontrolplane - release: istio - name: prometheus - namespace: istio-system -data: - prometheus.rules.yml: | - groups: - - name: traffic - rules: - - record: job:incoming_requests_per_second_per_pod:mean - expr: | - sum by (namespace, pod_name) (rate(istio_requests_total[1m])) - labels: - azure_monitor: true - - - name: request_latency - rules: - - record: job:success_response_latency_milliseconds_per_pod:mean - expr: | - sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_sum{response_code!~"5.."}[1m])) - / - sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_count{response_code!~"5.."}[1m])) - labels: - azure_monitor: true - - - record: job:error_response_latency_milliseconds_per_pod:mean - expr: | - sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_sum{response_code=~"5.."}[1m])) - / - sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_count{response_code=~"5.."}[1m])) - labels: - azure_monitor: true - - - name: request_error_rate - rules: - - record: job:request_error_rate_per_pod:mean - expr: | - sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_count{response_code=~"5.."}[1m])) - / - sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_count{response_code!~"5.."}[1m])) - labels: - azure_monitor: true - - - name: saturation - rules: - - record: job:cpu_usage_seconds_per_pod:mean - expr: | - sum by (namespace, pod) (rate(container_cpu_usage_seconds_total{image!=""}[1m])) - labels: - azure_monitor: true - - - record: job:memory_usage_percent_per_pod:mean - expr: | - sum(container_memory_working_set_bytes) by (namespace, pod) - / - sum(label_join(kube_pod_container_resource_limits_memory_bytes, "pod", "", "pod")) by (namespace, pod) - labels: - azure_monitor: true - - prometheus.yml: |- - global: - scrape_interval: 15s - - rule_files: - - "prometheus.rules.yml" - - scrape_configs: - - # Mixer scrapping. Defaults to Prometheus and mixer on same namespace. - # - - job_name: 'istio-mesh' - kubernetes_sd_configs: - - role: endpoints - namespaces: - names: - - istio-system - relabel_configs: - - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] - action: keep - regex: istio-telemetry;prometheus - - # Scrape config for envoy stats - - job_name: 'envoy-stats' - metrics_path: /stats/prometheus - kubernetes_sd_configs: - - role: pod - - relabel_configs: - - source_labels: [__meta_kubernetes_pod_container_port_name] - action: keep - regex: '.*-envoy-prom' - - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] - action: replace - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:15090 - target_label: __address__ - - action: labeldrop - regex: __meta_kubernetes_pod_label_(.+) - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: namespace - - source_labels: [__meta_kubernetes_pod_name] - action: replace - target_label: pod_name - - - job_name: 'istio-policy' - kubernetes_sd_configs: - - role: endpoints - namespaces: - names: - - istio-system - - - relabel_configs: - - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] - action: keep - regex: istio-policy;http-policy-monitoring - - - job_name: 'istio-telemetry' - kubernetes_sd_configs: - - role: endpoints - namespaces: - names: - - istio-system - - relabel_configs: - - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] - action: keep - regex: istio-telemetry;http-monitoring - - - job_name: 'pilot' - kubernetes_sd_configs: - - role: endpoints - namespaces: - names: - - istio-system - - relabel_configs: - - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] - action: keep - regex: istio-pilot;http-monitoring - - - job_name: 'galley' - kubernetes_sd_configs: - - role: endpoints - namespaces: - names: - - istio-system - - relabel_configs: - - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] - action: keep - regex: istio-galley;http-monitoring - - - job_name: 'citadel' - kubernetes_sd_configs: - - role: endpoints - namespaces: - names: - - istio-system - - relabel_configs: - - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] - action: keep - regex: istio-citadel;http-monitoring - - - job_name: 'sidecar-injector' - - kubernetes_sd_configs: - - role: endpoints - namespaces: - names: - - istio-system - - relabel_configs: - - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] - action: keep - regex: istio-sidecar-injector;http-monitoring - - # scrape config for API servers - - job_name: 'kubernetes-apiservers' - kubernetes_sd_configs: - - role: endpoints - namespaces: - names: - - default - scheme: https - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - relabel_configs: - - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] - action: keep - regex: kubernetes;https - - target_label: __address__ - replacement: kubernetes.default.svc:443 - - # scrape config for nodes (kubelet) - - job_name: 'kubernetes-nodes' - scheme: https - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - kubernetes_sd_configs: - - role: node - relabel_configs: - - action: labelmap - regex: __meta_kubernetes_node_label_(.+) - - target_label: __address__ - replacement: kubernetes.default.svc:443 - - source_labels: [__meta_kubernetes_node_name] - regex: (.+) - target_label: __metrics_path__ - replacement: /api/v1/nodes/${1}/proxy/metrics - - # Scrape config for Kubelet cAdvisor. - # - # This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics - # (those whose names begin with 'container_') have been removed from the - # Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to - # retrieve those metrics. - # - # In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor - # HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics" - # in that case (and ensure cAdvisor's HTTP server hasn't been disabled with - # the --cadvisor-port=0 Kubelet flag). - # - # This job is not necessary and should be removed in Kubernetes 1.6 and - # earlier versions, or it will cause the metrics to be scraped twice. - - job_name: 'kubernetes-cadvisor' - scheme: https - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - kubernetes_sd_configs: - - role: node - relabel_configs: - - action: labelmap - regex: __meta_kubernetes_node_label_(.+) - - target_label: __address__ - replacement: kubernetes.default.svc:443 - - source_labels: [__meta_kubernetes_node_name] - regex: (.+) - target_label: __metrics_path__ - replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor - - # scrape config for service endpoints. - - job_name: 'kubernetes-service-endpoints' - kubernetes_sd_configs: - - role: endpoints - relabel_configs: - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] - action: keep - regex: true - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] - action: replace - target_label: __scheme__ - regex: (https?) - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] - action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] - action: replace - target_label: __address__ - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 - - action: labelmap - regex: __meta_kubernetes_service_label_(.+) - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: kubernetes_namespace - - source_labels: [__meta_kubernetes_service_name] - action: replace - target_label: kubernetes_name - - - job_name: 'kubernetes-pods' - kubernetes_sd_configs: - - role: pod - relabel_configs: # If first two labels are present, pod should be scraped by the istio-secure job. - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] - action: keep - regex: true - - source_labels: [__meta_kubernetes_pod_annotation_sidecar_istio_io_status] - action: drop - regex: (.+) - - source_labels: [__meta_kubernetes_pod_annotation_istio_mtls] - action: drop - regex: (true) - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] - action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] - action: replace - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 - target_label: __address__ - - action: labelmap - regex: __meta_kubernetes_pod_label_(.+) - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: namespace - - source_labels: [__meta_kubernetes_pod_name] - action: replace - target_label: pod_name - - # ^ Above is istio default config - # Below are our additions - - - job_name: 'doc-index-updater' - kubernetes_sd_configs: - - role: pod - namespaces: - names: - - doc-index-updater - relabel_configs: # If first two labels are present, pod should be scraped by the istio-secure job. - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] - action: keep - regex: true - - source_labels: [__meta_kubernetes_pod_annotation_sidecar_istio_io_status] - action: drop - regex: (.+) - - source_labels: [__meta_kubernetes_pod_annotation_istio_mtls] - action: drop - regex: (true) - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] - action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] - action: replace - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 - target_label: __address__ - - action: labelmap - regex: __meta_kubernetes_pod_label_(.+) - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: namespace - - source_labels: [__meta_kubernetes_pod_name] - action: replace - target_label: pod_name - - - job_name: 'kube-state-metrics' - static_configs: - - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] +# apiVersion: v1 +# kind: ConfigMap +# metadata: +# annotations: +# install.operator.istio.io/chart-owner: AddonComponents +# labels: +# app: prometheus +# install.operator.istio.io/owner-group: install.istio.io +# install.operator.istio.io/owner-kind: IstioOperator +# install.operator.istio.io/owner-name: istiocontrolplane +# release: istio +# name: prometheus +# namespace: istio-system +# data: +# prometheus.rules.yml: | +# groups: +# - name: traffic +# rules: +# - record: job:incoming_requests_per_second_per_pod:mean +# expr: | +# sum by (namespace, pod_name) (rate(istio_requests_total[1m])) +# labels: +# azure_monitor: true + +# - name: request_latency +# rules: +# - record: job:success_response_latency_milliseconds_per_pod:mean +# expr: | +# sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_sum{response_code!~"5.."}[1m])) +# / +# sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_count{response_code!~"5.."}[1m])) +# labels: +# azure_monitor: true + +# - record: job:error_response_latency_milliseconds_per_pod:mean +# expr: | +# sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_sum{response_code=~"5.."}[1m])) +# / +# sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_count{response_code=~"5.."}[1m])) +# labels: +# azure_monitor: true + +# - name: request_error_rate +# rules: +# - record: job:request_error_rate_per_pod:mean +# expr: | +# sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_count{response_code=~"5.."}[1m])) +# / +# sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_count{response_code!~"5.."}[1m])) +# labels: +# azure_monitor: true + +# - name: saturation +# rules: +# - record: job:cpu_usage_seconds_per_pod:mean +# expr: | +# sum by (namespace, pod) (rate(container_cpu_usage_seconds_total{image!=""}[1m])) +# labels: +# azure_monitor: true + +# - record: job:memory_usage_percent_per_pod:mean +# expr: | +# sum(container_memory_working_set_bytes) by (namespace, pod) +# / +# sum(label_join(kube_pod_container_resource_limits_memory_bytes, "pod", "", "pod")) by (namespace, pod) +# labels: +# azure_monitor: true + +# prometheus.yml: |- +# global: +# scrape_interval: 15s + +# rule_files: +# - "prometheus.rules.yml" + +# scrape_configs: + +# # Mixer scrapping. Defaults to Prometheus and mixer on same namespace. +# # +# - job_name: 'istio-mesh' +# kubernetes_sd_configs: +# - role: endpoints +# namespaces: +# names: +# - istio-system +# relabel_configs: +# - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] +# action: keep +# regex: istio-telemetry;prometheus + +# # Scrape config for envoy stats +# - job_name: 'envoy-stats' +# metrics_path: /stats/prometheus +# kubernetes_sd_configs: +# - role: pod + +# relabel_configs: +# - source_labels: [__meta_kubernetes_pod_container_port_name] +# action: keep +# regex: '.*-envoy-prom' +# - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] +# action: replace +# regex: ([^:]+)(?::\d+)?;(\d+) +# replacement: $1:15090 +# target_label: __address__ +# - action: labeldrop +# regex: __meta_kubernetes_pod_label_(.+) +# - source_labels: [__meta_kubernetes_namespace] +# action: replace +# target_label: namespace +# - source_labels: [__meta_kubernetes_pod_name] +# action: replace +# target_label: pod_name + +# - job_name: 'istio-policy' +# kubernetes_sd_configs: +# - role: endpoints +# namespaces: +# names: +# - istio-system + +# relabel_configs: +# - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] +# action: keep +# regex: istio-policy;http-policy-monitoring + +# - job_name: 'istio-telemetry' +# kubernetes_sd_configs: +# - role: endpoints +# namespaces: +# names: +# - istio-system + +# relabel_configs: +# - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] +# action: keep +# regex: istio-telemetry;http-monitoring + +# - job_name: 'pilot' +# kubernetes_sd_configs: +# - role: endpoints +# namespaces: +# names: +# - istio-system + +# relabel_configs: +# - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] +# action: keep +# regex: istio-pilot;http-monitoring + +# - job_name: 'galley' +# kubernetes_sd_configs: +# - role: endpoints +# namespaces: +# names: +# - istio-system + +# relabel_configs: +# - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] +# action: keep +# regex: istio-galley;http-monitoring + +# - job_name: 'citadel' +# kubernetes_sd_configs: +# - role: endpoints +# namespaces: +# names: +# - istio-system + +# relabel_configs: +# - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] +# action: keep +# regex: istio-citadel;http-monitoring + +# - job_name: 'sidecar-injector' + +# kubernetes_sd_configs: +# - role: endpoints +# namespaces: +# names: +# - istio-system + +# relabel_configs: +# - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] +# action: keep +# regex: istio-sidecar-injector;http-monitoring + +# # scrape config for API servers +# - job_name: 'kubernetes-apiservers' +# kubernetes_sd_configs: +# - role: endpoints +# namespaces: +# names: +# - default +# scheme: https +# tls_config: +# ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt +# bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token +# relabel_configs: +# - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] +# action: keep +# regex: kubernetes;https +# - target_label: __address__ +# replacement: kubernetes.default.svc:443 + +# # scrape config for nodes (kubelet) +# - job_name: 'kubernetes-nodes' +# scheme: https +# tls_config: +# ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt +# bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token +# kubernetes_sd_configs: +# - role: node +# relabel_configs: +# - action: labelmap +# regex: __meta_kubernetes_node_label_(.+) +# - target_label: __address__ +# replacement: kubernetes.default.svc:443 +# - source_labels: [__meta_kubernetes_node_name] +# regex: (.+) +# target_label: __metrics_path__ +# replacement: /api/v1/nodes/${1}/proxy/metrics + +# # Scrape config for Kubelet cAdvisor. +# # +# # This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics +# # (those whose names begin with 'container_') have been removed from the +# # Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to +# # retrieve those metrics. +# # +# # In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor +# # HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics" +# # in that case (and ensure cAdvisor's HTTP server hasn't been disabled with +# # the --cadvisor-port=0 Kubelet flag). +# # +# # This job is not necessary and should be removed in Kubernetes 1.6 and +# # earlier versions, or it will cause the metrics to be scraped twice. +# - job_name: 'kubernetes-cadvisor' +# scheme: https +# tls_config: +# ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt +# bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token +# kubernetes_sd_configs: +# - role: node +# relabel_configs: +# - action: labelmap +# regex: __meta_kubernetes_node_label_(.+) +# - target_label: __address__ +# replacement: kubernetes.default.svc:443 +# - source_labels: [__meta_kubernetes_node_name] +# regex: (.+) +# target_label: __metrics_path__ +# replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor + +# # scrape config for service endpoints. +# - job_name: 'kubernetes-service-endpoints' +# kubernetes_sd_configs: +# - role: endpoints +# relabel_configs: +# - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] +# action: keep +# regex: true +# - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] +# action: replace +# target_label: __scheme__ +# regex: (https?) +# - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] +# action: replace +# target_label: __metrics_path__ +# regex: (.+) +# - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] +# action: replace +# target_label: __address__ +# regex: ([^:]+)(?::\d+)?;(\d+) +# replacement: $1:$2 +# - action: labelmap +# regex: __meta_kubernetes_service_label_(.+) +# - source_labels: [__meta_kubernetes_namespace] +# action: replace +# target_label: kubernetes_namespace +# - source_labels: [__meta_kubernetes_service_name] +# action: replace +# target_label: kubernetes_name + +# - job_name: 'kubernetes-pods' +# kubernetes_sd_configs: +# - role: pod +# relabel_configs: # If first two labels are present, pod should be scraped by the istio-secure job. +# - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] +# action: keep +# regex: true +# - source_labels: [__meta_kubernetes_pod_annotation_sidecar_istio_io_status] +# action: drop +# regex: (.+) +# - source_labels: [__meta_kubernetes_pod_annotation_istio_mtls] +# action: drop +# regex: (true) +# - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] +# action: replace +# target_label: __metrics_path__ +# regex: (.+) +# - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] +# action: replace +# regex: ([^:]+)(?::\d+)?;(\d+) +# replacement: $1:$2 +# target_label: __address__ +# - action: labelmap +# regex: __meta_kubernetes_pod_label_(.+) +# - source_labels: [__meta_kubernetes_namespace] +# action: replace +# target_label: namespace +# - source_labels: [__meta_kubernetes_pod_name] +# action: replace +# target_label: pod_name + +# # ^ Above is istio default config +# # Below are our additions + +# - job_name: 'doc-index-updater' +# kubernetes_sd_configs: +# - role: pod +# namespaces: +# names: +# - doc-index-updater +# relabel_configs: # If first two labels are present, pod should be scraped by the istio-secure job. +# - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] +# action: keep +# regex: true +# - source_labels: [__meta_kubernetes_pod_annotation_sidecar_istio_io_status] +# action: drop +# regex: (.+) +# - source_labels: [__meta_kubernetes_pod_annotation_istio_mtls] +# action: drop +# regex: (true) +# - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] +# action: replace +# target_label: __metrics_path__ +# regex: (.+) +# - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] +# action: replace +# regex: ([^:]+)(?::\d+)?;(\d+) +# replacement: $1:$2 +# target_label: __address__ +# - action: labelmap +# regex: __meta_kubernetes_pod_label_(.+) +# - source_labels: [__meta_kubernetes_namespace] +# action: replace +# target_label: namespace +# - source_labels: [__meta_kubernetes_pod_name] +# action: replace +# target_label: pod_name + +# - job_name: 'kube-state-metrics' +# static_configs: +# - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] diff --git a/prometheus/prometheus-deployment.yaml b/prometheus/prometheus-deployment.yaml new file mode 100644 index 0000000..c888152 --- /dev/null +++ b/prometheus/prometheus-deployment.yaml @@ -0,0 +1,303 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + creationTimestamp: null + labels: + app: prometheus + chart: prometheus-11.16.2 + component: server + heritage: Helm + release: prometheus + name: prometheus + namespace: istio-system +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus + component: server + release: prometheus + strategy: {} + template: + metadata: + annotations: + prometheus.io/path: /stats/prometheus + prometheus.io/port: "15020" + prometheus.io/scrape: "true" + sidecar.istio.io/status: '{"initContainers":["istio-init"],"containers":["istio-proxy"],"volumes":["istio-envoy","istio-data","istio-podinfo","istio-token","istiod-ca-cert"],"imagePullSecrets":null}' + traffic.sidecar.istio.io/includeInboundPorts: "" # do not intercept any inbound ports + traffic.sidecar.istio.io/includeOutboundIPRanges: "" # do not intercept any outbound traffic + # configure an env variable `OUTPUT_CERTS` to write certificates to the given folder + proxy.istio.io/config: | + proxyMetadata: + OUTPUT_CERTS: /etc/istio-output-certs + sidecar.istio.io/userVolumeMount: '[{"name": "istio-certs", "mountPath": "/etc/istio-output-certs"}]' # mount the shared volume at sidecar proxy + creationTimestamp: null + labels: + app: prometheus + chart: prometheus-11.16.2 + component: server + heritage: Helm + istio.io/rev: default + release: prometheus + security.istio.io/tlsMode: istio + service.istio.io/canonical-name: prometheus + service.istio.io/canonical-revision: latest + spec: + containers: + - args: + - --volume-dir=/etc/config + - --webhook-url=http://127.0.0.1:9090/-/reload + image: jimmidyson/configmap-reload:v0.4.0 + imagePullPolicy: IfNotPresent + name: prometheus-server-configmap-reload + resources: {} + volumeMounts: + - mountPath: /etc/config + name: config-volume + readOnly: true + - args: + - --storage.tsdb.retention.time=15d + - --config.file=/etc/config/prometheus.yml + - --storage.tsdb.path=/data + - --web.console.libraries=/etc/prometheus/console_libraries + - --web.console.templates=/etc/prometheus/consoles + - --web.enable-lifecycle + image: prom/prometheus:v2.21.0 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: /app-health/prometheus-server/livez + port: 15020 + initialDelaySeconds: 30 + periodSeconds: 15 + successThreshold: 1 + timeoutSeconds: 30 + name: prometheus-server + ports: + - containerPort: 9090 + readinessProbe: + failureThreshold: 3 + httpGet: + path: /app-health/prometheus-server/readyz + port: 15020 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 30 + resources: {} + volumeMounts: + - mountPath: /etc/config + name: config-volume + - mountPath: /data + name: storage-volume + - mountPath: /etc/prom-certs/ + name: istio-certs + - args: + - proxy + - sidecar + - --domain + - $(POD_NAMESPACE).svc.cluster.local + - --serviceCluster + - prometheus.$(POD_NAMESPACE) + - --proxyLogLevel=warning + - --proxyComponentLogLevel=misc:error + - --log_output_level=default:info + - --concurrency + - "2" + env: + - name: JWT_POLICY + value: third-party-jwt + - name: PILOT_CERT_PROVIDER + value: istiod + - name: CA_ADDR + value: istiod.istio-system.svc:15012 + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: INSTANCE_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: SERVICE_ACCOUNT + valueFrom: + fieldRef: + fieldPath: spec.serviceAccountName + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: CANONICAL_SERVICE + valueFrom: + fieldRef: + fieldPath: metadata.labels['service.istio.io/canonical-name'] + - name: CANONICAL_REVISION + valueFrom: + fieldRef: + fieldPath: metadata.labels['service.istio.io/canonical-revision'] + - name: PROXY_CONFIG + value: | + {} + - name: ISTIO_META_POD_PORTS + value: |- + [ + {"containerPort":9090} + ] + - name: ISTIO_META_APP_CONTAINERS + value: prometheus-server-configmap-reload,prometheus-server + - name: ISTIO_META_CLUSTER_ID + value: Kubernetes + - name: ISTIO_META_INTERCEPTION_MODE + value: REDIRECT + - name: ISTIO_META_WORKLOAD_NAME + value: prometheus + - name: ISTIO_META_OWNER + value: kubernetes://apis/apps/v1/namespaces/istio-system/deployments/prometheus + - name: ISTIO_META_MESH_ID + value: cluster.local + - name: TRUST_DOMAIN + value: cluster.local + - name: ISTIO_KUBE_APP_PROBERS + value: '{"/app-health/prometheus-server/livez":{"httpGet":{"path":"/-/healthy","port":9090},"timeoutSeconds":30},"/app-health/prometheus-server/readyz":{"httpGet":{"path":"/-/ready","port":9090},"timeoutSeconds":30}}' + image: docker.io/istio/proxyv2:1.9.2 + imagePullPolicy: Always + name: istio-proxy + ports: + - containerPort: 15090 + name: http-envoy-prom + protocol: TCP + readinessProbe: + failureThreshold: 30 + httpGet: + path: /healthz/ready + port: 15021 + initialDelaySeconds: 1 + periodSeconds: 2 + timeoutSeconds: 3 + resources: + limits: + cpu: "2" + memory: 1Gi + requests: + cpu: 100m + memory: 128Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + readOnlyRootFilesystem: true + runAsGroup: 1337 + runAsNonRoot: true + runAsUser: 1337 + volumeMounts: + - mountPath: /var/run/secrets/istio + name: istiod-ca-cert + - mountPath: /var/lib/istio/data + name: istio-data + - mountPath: /etc/istio/proxy + name: istio-envoy + - mountPath: /var/run/secrets/tokens + name: istio-token + - mountPath: /etc/istio/pod + name: istio-podinfo + initContainers: + - args: + - istio-iptables + - -p + - "15001" + - -z + - "15006" + - -u + - "1337" + - -m + - REDIRECT + - -i + - "*" + - -x + - "" + - -b + - "*" + - -d + - 15090,15021,15020 + image: docker.io/istio/proxyv2:1.9.2 + imagePullPolicy: Always + name: istio-init + resources: + limits: + cpu: "2" + memory: 1Gi + requests: + cpu: 100m + memory: 128Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + add: + - NET_ADMIN + - NET_RAW + drop: + - ALL + privileged: false + readOnlyRootFilesystem: false + runAsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + securityContext: + fsGroup: 1337 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + serviceAccountName: prometheus + terminationGracePeriodSeconds: 300 + volumes: + - emptyDir: + medium: Memory + name: istio-envoy + - emptyDir: + medium: Memory + name: istio-certs + - emptyDir: {} + name: istio-data + - downwardAPI: + items: + - fieldRef: + fieldPath: metadata.labels + path: labels + - fieldRef: + fieldPath: metadata.annotations + path: annotations + - path: cpu-limit + resourceFieldRef: + containerName: istio-proxy + divisor: 1m + resource: limits.cpu + - path: cpu-request + resourceFieldRef: + containerName: istio-proxy + divisor: 1m + resource: requests.cpu + name: istio-podinfo + - name: istio-token + projected: + sources: + - serviceAccountToken: + audience: istio-ca + expirationSeconds: 43200 + path: istio-token + - configMap: + name: istio-ca-root-cert + name: istiod-ca-cert + - configMap: + name: prometheus + name: config-volume + - emptyDir: {} + name: storage-volume +status: {} +--- From 1e7241aee9ef33eb46d6ffa626a004b739aa580b Mon Sep 17 00:00:00 2001 From: Tim Lee Date: Fri, 9 Apr 2021 16:38:43 +0100 Subject: [PATCH 04/12] Add OUTPUT_CERTS env and volumeMount directly to istio container config --- prometheus/prometheus-deployment.yaml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/prometheus/prometheus-deployment.yaml b/prometheus/prometheus-deployment.yaml index c888152..95e95b4 100644 --- a/prometheus/prometheus-deployment.yaml +++ b/prometheus/prometheus-deployment.yaml @@ -28,10 +28,10 @@ spec: traffic.sidecar.istio.io/includeInboundPorts: "" # do not intercept any inbound ports traffic.sidecar.istio.io/includeOutboundIPRanges: "" # do not intercept any outbound traffic # configure an env variable `OUTPUT_CERTS` to write certificates to the given folder - proxy.istio.io/config: | - proxyMetadata: - OUTPUT_CERTS: /etc/istio-output-certs - sidecar.istio.io/userVolumeMount: '[{"name": "istio-certs", "mountPath": "/etc/istio-output-certs"}]' # mount the shared volume at sidecar proxy + # proxy.istio.io/config: | + # proxyMetadata: + # OUTPUT_CERTS: /etc/istio-output-certs + # sidecar.istio.io/userVolumeMount: '[{"name": "istio-certs", "mountPath": "/etc/istio-output-certs"}]' # mount the shared volume at sidecar proxy creationTimestamp: null labels: app: prometheus @@ -164,6 +164,8 @@ spec: value: cluster.local - name: ISTIO_KUBE_APP_PROBERS value: '{"/app-health/prometheus-server/livez":{"httpGet":{"path":"/-/healthy","port":9090},"timeoutSeconds":30},"/app-health/prometheus-server/readyz":{"httpGet":{"path":"/-/ready","port":9090},"timeoutSeconds":30}}' + - name: OUTPUT_CERTS + value: /etc/istio-output-certs image: docker.io/istio/proxyv2:1.9.2 imagePullPolicy: Always name: istio-proxy @@ -207,6 +209,8 @@ spec: name: istio-token - mountPath: /etc/istio/pod name: istio-podinfo + - mountPath: /etc/istio-output-certs + name: istio-certs initContainers: - args: - istio-iptables From e2b49806c4b70b4536a07311c54eb4fc18d60bda Mon Sep 17 00:00:00 2001 From: Tim Lee Date: Mon, 12 Apr 2021 16:21:17 +0100 Subject: [PATCH 05/12] Add security context for prometheus container --- prometheus/prometheus-deployment.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/prometheus/prometheus-deployment.yaml b/prometheus/prometheus-deployment.yaml index 95e95b4..90f4203 100644 --- a/prometheus/prometheus-deployment.yaml +++ b/prometheus/prometheus-deployment.yaml @@ -86,6 +86,16 @@ spec: successThreshold: 1 timeoutSeconds: 30 resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + readOnlyRootFilesystem: true + runAsGroup: 1337 + runAsNonRoot: true + runAsUser: 1337 volumeMounts: - mountPath: /etc/config name: config-volume From f31119f2a6a923ae7cd377d62d32c0fe415f348d Mon Sep 17 00:00:00 2001 From: Stuart Harris Date: Mon, 12 Apr 2021 18:21:25 +0100 Subject: [PATCH 06/12] use kustomize to setup manifests before sidecar injection --- prometheus/Makefile | 9 +- prometheus/README.md | 7 +- prometheus/generated/kustomization.yaml | 7 + prometheus/kustomization.yaml | 7 - prometheus/overlay/kustomization.yaml | 5 + prometheus/{ => overlay}/prometheus-cm.yaml | 0 prometheus/overlay/prometheus-deployment.yaml | 41 ++ prometheus/prometheus-configmap.yaml | 355 ------------------ prometheus/prometheus-deployment.yaml | 317 ---------------- 9 files changed, 62 insertions(+), 686 deletions(-) create mode 100644 prometheus/generated/kustomization.yaml delete mode 100644 prometheus/kustomization.yaml create mode 100644 prometheus/overlay/kustomization.yaml rename prometheus/{ => overlay}/prometheus-cm.yaml (100%) create mode 100644 prometheus/overlay/prometheus-deployment.yaml delete mode 100644 prometheus/prometheus-configmap.yaml delete mode 100644 prometheus/prometheus-deployment.yaml diff --git a/prometheus/Makefile b/prometheus/Makefile index b705010..e2705c2 100644 --- a/prometheus/Makefile +++ b/prometheus/Makefile @@ -1,7 +1,9 @@ .PHONY: apply apply: ## Apply kubernetes manifests - kustomize build . | kubectl apply -f - + kustomize build overlay \ + | istioctl kube-inject -f - \ + | kubectl apply -f - .PHONY: restart-prometheus restart-prometheus: ## Restart prometheus so config changes take effect @@ -9,7 +11,10 @@ restart-prometheus: ## Restart prometheus so config changes take effect .PHONY: delete delete: ## Delete resources - kustomize build . | kubectl delete --ignore-not-found -f - || true + kustomize build overlay \ + | istioctl kube-inject -f - \ + | kubectl apply -f - \ + | kubectl delete --ignore-not-found -f - || true .PHONY: help help: ## Display this help screen diff --git a/prometheus/README.md b/prometheus/README.md index d61bb79..f1c4ee7 100644 --- a/prometheus/README.md +++ b/prometheus/README.md @@ -1,5 +1,7 @@ # Prometheus install +refresh generated manifests... + ```bash curl https://raw.githubusercontent.com/istio/istio/release-1.9/samples/addons/prometheus.yaml -o install.yaml @@ -8,9 +10,4 @@ go get -v github.com/mogensen/kubernetes-split-yaml ~/go/bin/kubernetes-split-yaml install.yaml -# remove annotation to prevent sidecar injection and create new manifest with a sidecar injected... -cat generated/prometheus-deployment.yaml \ - | grep -v sidecar.istio.io/inject \ - | istioctl kube-inject -f - \ - > prometheus-deployment.yaml ``` diff --git a/prometheus/generated/kustomization.yaml b/prometheus/generated/kustomization.yaml new file mode 100644 index 0000000..e00efe4 --- /dev/null +++ b/prometheus/generated/kustomization.yaml @@ -0,0 +1,7 @@ +resources: + - prometheus-cm.yaml + - prometheus-cr.yaml + - prometheus-crb.yaml + - prometheus-deployment.yaml + - prometheus-sa.yaml + - prometheus-svc.yaml diff --git a/prometheus/kustomization.yaml b/prometheus/kustomization.yaml deleted file mode 100644 index 658e1e8..0000000 --- a/prometheus/kustomization.yaml +++ /dev/null @@ -1,7 +0,0 @@ -resources: - - prometheus-cm.yaml - - generated/prometheus-cr.yaml - - generated/prometheus-crb.yaml - - prometheus-deployment.yaml - - generated/prometheus-sa.yaml - - generated/prometheus-svc.yaml diff --git a/prometheus/overlay/kustomization.yaml b/prometheus/overlay/kustomization.yaml new file mode 100644 index 0000000..cacd37c --- /dev/null +++ b/prometheus/overlay/kustomization.yaml @@ -0,0 +1,5 @@ +resources: + - ../generated + +patchesStrategicMerge: + - prometheus-deployment.yaml diff --git a/prometheus/prometheus-cm.yaml b/prometheus/overlay/prometheus-cm.yaml similarity index 100% rename from prometheus/prometheus-cm.yaml rename to prometheus/overlay/prometheus-cm.yaml diff --git a/prometheus/overlay/prometheus-deployment.yaml b/prometheus/overlay/prometheus-deployment.yaml new file mode 100644 index 0000000..6c37da5 --- /dev/null +++ b/prometheus/overlay/prometheus-deployment.yaml @@ -0,0 +1,41 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus + namespace: istio-system +spec: + template: + metadata: + annotations: + prometheus.io/path: /stats/prometheus + prometheus.io/port: "15020" + prometheus.io/scrape: "true" + sidecar.istio.io/inject: "true" + sidecar.istio.io/status: '{"initContainers":["istio-init"],"containers":["istio-proxy"],"volumes":["istio-envoy","istio-data","istio-podinfo","istio-token","istiod-ca-cert"],"imagePullSecrets":null}' + traffic.sidecar.istio.io/includeInboundPorts: "" # do not intercept any inbound ports + traffic.sidecar.istio.io/includeOutboundIPRanges: "" # do not intercept any outbound traffic + # configure an env variable `OUTPUT_CERTS` to write certificates to the given folder + proxy.istio.io/config: | + proxyMetadata: + OUTPUT_CERTS: /etc/istio-output-certs + sidecar.istio.io/userVolumeMount: '[{"name": "istio-certs", "mountPath": "/etc/istio-output-certs"}]' # mount the shared volume at sidecar proxy + spec: + containers: + - name: prometheus-server + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + readOnlyRootFilesystem: true + runAsGroup: 1337 + runAsNonRoot: true + runAsUser: 1337 + volumeMounts: + - mountPath: /etc/prom-certs/ + name: istio-certs + volumes: + - emptyDir: + medium: Memory + name: istio-certs diff --git a/prometheus/prometheus-configmap.yaml b/prometheus/prometheus-configmap.yaml deleted file mode 100644 index c0f9e40..0000000 --- a/prometheus/prometheus-configmap.yaml +++ /dev/null @@ -1,355 +0,0 @@ -# apiVersion: v1 -# kind: ConfigMap -# metadata: -# annotations: -# install.operator.istio.io/chart-owner: AddonComponents -# labels: -# app: prometheus -# install.operator.istio.io/owner-group: install.istio.io -# install.operator.istio.io/owner-kind: IstioOperator -# install.operator.istio.io/owner-name: istiocontrolplane -# release: istio -# name: prometheus -# namespace: istio-system -# data: -# prometheus.rules.yml: | -# groups: -# - name: traffic -# rules: -# - record: job:incoming_requests_per_second_per_pod:mean -# expr: | -# sum by (namespace, pod_name) (rate(istio_requests_total[1m])) -# labels: -# azure_monitor: true - -# - name: request_latency -# rules: -# - record: job:success_response_latency_milliseconds_per_pod:mean -# expr: | -# sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_sum{response_code!~"5.."}[1m])) -# / -# sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_count{response_code!~"5.."}[1m])) -# labels: -# azure_monitor: true - -# - record: job:error_response_latency_milliseconds_per_pod:mean -# expr: | -# sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_sum{response_code=~"5.."}[1m])) -# / -# sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_count{response_code=~"5.."}[1m])) -# labels: -# azure_monitor: true - -# - name: request_error_rate -# rules: -# - record: job:request_error_rate_per_pod:mean -# expr: | -# sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_count{response_code=~"5.."}[1m])) -# / -# sum by (namespace, pod_name) (rate(istio_request_duration_milliseconds_count{response_code!~"5.."}[1m])) -# labels: -# azure_monitor: true - -# - name: saturation -# rules: -# - record: job:cpu_usage_seconds_per_pod:mean -# expr: | -# sum by (namespace, pod) (rate(container_cpu_usage_seconds_total{image!=""}[1m])) -# labels: -# azure_monitor: true - -# - record: job:memory_usage_percent_per_pod:mean -# expr: | -# sum(container_memory_working_set_bytes) by (namespace, pod) -# / -# sum(label_join(kube_pod_container_resource_limits_memory_bytes, "pod", "", "pod")) by (namespace, pod) -# labels: -# azure_monitor: true - -# prometheus.yml: |- -# global: -# scrape_interval: 15s - -# rule_files: -# - "prometheus.rules.yml" - -# scrape_configs: - -# # Mixer scrapping. Defaults to Prometheus and mixer on same namespace. -# # -# - job_name: 'istio-mesh' -# kubernetes_sd_configs: -# - role: endpoints -# namespaces: -# names: -# - istio-system -# relabel_configs: -# - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] -# action: keep -# regex: istio-telemetry;prometheus - -# # Scrape config for envoy stats -# - job_name: 'envoy-stats' -# metrics_path: /stats/prometheus -# kubernetes_sd_configs: -# - role: pod - -# relabel_configs: -# - source_labels: [__meta_kubernetes_pod_container_port_name] -# action: keep -# regex: '.*-envoy-prom' -# - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] -# action: replace -# regex: ([^:]+)(?::\d+)?;(\d+) -# replacement: $1:15090 -# target_label: __address__ -# - action: labeldrop -# regex: __meta_kubernetes_pod_label_(.+) -# - source_labels: [__meta_kubernetes_namespace] -# action: replace -# target_label: namespace -# - source_labels: [__meta_kubernetes_pod_name] -# action: replace -# target_label: pod_name - -# - job_name: 'istio-policy' -# kubernetes_sd_configs: -# - role: endpoints -# namespaces: -# names: -# - istio-system - -# relabel_configs: -# - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] -# action: keep -# regex: istio-policy;http-policy-monitoring - -# - job_name: 'istio-telemetry' -# kubernetes_sd_configs: -# - role: endpoints -# namespaces: -# names: -# - istio-system - -# relabel_configs: -# - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] -# action: keep -# regex: istio-telemetry;http-monitoring - -# - job_name: 'pilot' -# kubernetes_sd_configs: -# - role: endpoints -# namespaces: -# names: -# - istio-system - -# relabel_configs: -# - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] -# action: keep -# regex: istio-pilot;http-monitoring - -# - job_name: 'galley' -# kubernetes_sd_configs: -# - role: endpoints -# namespaces: -# names: -# - istio-system - -# relabel_configs: -# - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] -# action: keep -# regex: istio-galley;http-monitoring - -# - job_name: 'citadel' -# kubernetes_sd_configs: -# - role: endpoints -# namespaces: -# names: -# - istio-system - -# relabel_configs: -# - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] -# action: keep -# regex: istio-citadel;http-monitoring - -# - job_name: 'sidecar-injector' - -# kubernetes_sd_configs: -# - role: endpoints -# namespaces: -# names: -# - istio-system - -# relabel_configs: -# - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] -# action: keep -# regex: istio-sidecar-injector;http-monitoring - -# # scrape config for API servers -# - job_name: 'kubernetes-apiservers' -# kubernetes_sd_configs: -# - role: endpoints -# namespaces: -# names: -# - default -# scheme: https -# tls_config: -# ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt -# bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token -# relabel_configs: -# - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] -# action: keep -# regex: kubernetes;https -# - target_label: __address__ -# replacement: kubernetes.default.svc:443 - -# # scrape config for nodes (kubelet) -# - job_name: 'kubernetes-nodes' -# scheme: https -# tls_config: -# ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt -# bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token -# kubernetes_sd_configs: -# - role: node -# relabel_configs: -# - action: labelmap -# regex: __meta_kubernetes_node_label_(.+) -# - target_label: __address__ -# replacement: kubernetes.default.svc:443 -# - source_labels: [__meta_kubernetes_node_name] -# regex: (.+) -# target_label: __metrics_path__ -# replacement: /api/v1/nodes/${1}/proxy/metrics - -# # Scrape config for Kubelet cAdvisor. -# # -# # This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics -# # (those whose names begin with 'container_') have been removed from the -# # Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to -# # retrieve those metrics. -# # -# # In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor -# # HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics" -# # in that case (and ensure cAdvisor's HTTP server hasn't been disabled with -# # the --cadvisor-port=0 Kubelet flag). -# # -# # This job is not necessary and should be removed in Kubernetes 1.6 and -# # earlier versions, or it will cause the metrics to be scraped twice. -# - job_name: 'kubernetes-cadvisor' -# scheme: https -# tls_config: -# ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt -# bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token -# kubernetes_sd_configs: -# - role: node -# relabel_configs: -# - action: labelmap -# regex: __meta_kubernetes_node_label_(.+) -# - target_label: __address__ -# replacement: kubernetes.default.svc:443 -# - source_labels: [__meta_kubernetes_node_name] -# regex: (.+) -# target_label: __metrics_path__ -# replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor - -# # scrape config for service endpoints. -# - job_name: 'kubernetes-service-endpoints' -# kubernetes_sd_configs: -# - role: endpoints -# relabel_configs: -# - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] -# action: keep -# regex: true -# - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] -# action: replace -# target_label: __scheme__ -# regex: (https?) -# - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] -# action: replace -# target_label: __metrics_path__ -# regex: (.+) -# - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] -# action: replace -# target_label: __address__ -# regex: ([^:]+)(?::\d+)?;(\d+) -# replacement: $1:$2 -# - action: labelmap -# regex: __meta_kubernetes_service_label_(.+) -# - source_labels: [__meta_kubernetes_namespace] -# action: replace -# target_label: kubernetes_namespace -# - source_labels: [__meta_kubernetes_service_name] -# action: replace -# target_label: kubernetes_name - -# - job_name: 'kubernetes-pods' -# kubernetes_sd_configs: -# - role: pod -# relabel_configs: # If first two labels are present, pod should be scraped by the istio-secure job. -# - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] -# action: keep -# regex: true -# - source_labels: [__meta_kubernetes_pod_annotation_sidecar_istio_io_status] -# action: drop -# regex: (.+) -# - source_labels: [__meta_kubernetes_pod_annotation_istio_mtls] -# action: drop -# regex: (true) -# - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] -# action: replace -# target_label: __metrics_path__ -# regex: (.+) -# - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] -# action: replace -# regex: ([^:]+)(?::\d+)?;(\d+) -# replacement: $1:$2 -# target_label: __address__ -# - action: labelmap -# regex: __meta_kubernetes_pod_label_(.+) -# - source_labels: [__meta_kubernetes_namespace] -# action: replace -# target_label: namespace -# - source_labels: [__meta_kubernetes_pod_name] -# action: replace -# target_label: pod_name - -# # ^ Above is istio default config -# # Below are our additions - -# - job_name: 'doc-index-updater' -# kubernetes_sd_configs: -# - role: pod -# namespaces: -# names: -# - doc-index-updater -# relabel_configs: # If first two labels are present, pod should be scraped by the istio-secure job. -# - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] -# action: keep -# regex: true -# - source_labels: [__meta_kubernetes_pod_annotation_sidecar_istio_io_status] -# action: drop -# regex: (.+) -# - source_labels: [__meta_kubernetes_pod_annotation_istio_mtls] -# action: drop -# regex: (true) -# - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] -# action: replace -# target_label: __metrics_path__ -# regex: (.+) -# - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] -# action: replace -# regex: ([^:]+)(?::\d+)?;(\d+) -# replacement: $1:$2 -# target_label: __address__ -# - action: labelmap -# regex: __meta_kubernetes_pod_label_(.+) -# - source_labels: [__meta_kubernetes_namespace] -# action: replace -# target_label: namespace -# - source_labels: [__meta_kubernetes_pod_name] -# action: replace -# target_label: pod_name - -# - job_name: 'kube-state-metrics' -# static_configs: -# - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] diff --git a/prometheus/prometheus-deployment.yaml b/prometheus/prometheus-deployment.yaml deleted file mode 100644 index 90f4203..0000000 --- a/prometheus/prometheus-deployment.yaml +++ /dev/null @@ -1,317 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - creationTimestamp: null - labels: - app: prometheus - chart: prometheus-11.16.2 - component: server - heritage: Helm - release: prometheus - name: prometheus - namespace: istio-system -spec: - replicas: 1 - selector: - matchLabels: - app: prometheus - component: server - release: prometheus - strategy: {} - template: - metadata: - annotations: - prometheus.io/path: /stats/prometheus - prometheus.io/port: "15020" - prometheus.io/scrape: "true" - sidecar.istio.io/status: '{"initContainers":["istio-init"],"containers":["istio-proxy"],"volumes":["istio-envoy","istio-data","istio-podinfo","istio-token","istiod-ca-cert"],"imagePullSecrets":null}' - traffic.sidecar.istio.io/includeInboundPorts: "" # do not intercept any inbound ports - traffic.sidecar.istio.io/includeOutboundIPRanges: "" # do not intercept any outbound traffic - # configure an env variable `OUTPUT_CERTS` to write certificates to the given folder - # proxy.istio.io/config: | - # proxyMetadata: - # OUTPUT_CERTS: /etc/istio-output-certs - # sidecar.istio.io/userVolumeMount: '[{"name": "istio-certs", "mountPath": "/etc/istio-output-certs"}]' # mount the shared volume at sidecar proxy - creationTimestamp: null - labels: - app: prometheus - chart: prometheus-11.16.2 - component: server - heritage: Helm - istio.io/rev: default - release: prometheus - security.istio.io/tlsMode: istio - service.istio.io/canonical-name: prometheus - service.istio.io/canonical-revision: latest - spec: - containers: - - args: - - --volume-dir=/etc/config - - --webhook-url=http://127.0.0.1:9090/-/reload - image: jimmidyson/configmap-reload:v0.4.0 - imagePullPolicy: IfNotPresent - name: prometheus-server-configmap-reload - resources: {} - volumeMounts: - - mountPath: /etc/config - name: config-volume - readOnly: true - - args: - - --storage.tsdb.retention.time=15d - - --config.file=/etc/config/prometheus.yml - - --storage.tsdb.path=/data - - --web.console.libraries=/etc/prometheus/console_libraries - - --web.console.templates=/etc/prometheus/consoles - - --web.enable-lifecycle - image: prom/prometheus:v2.21.0 - imagePullPolicy: IfNotPresent - livenessProbe: - failureThreshold: 3 - httpGet: - path: /app-health/prometheus-server/livez - port: 15020 - initialDelaySeconds: 30 - periodSeconds: 15 - successThreshold: 1 - timeoutSeconds: 30 - name: prometheus-server - ports: - - containerPort: 9090 - readinessProbe: - failureThreshold: 3 - httpGet: - path: /app-health/prometheus-server/readyz - port: 15020 - periodSeconds: 5 - successThreshold: 1 - timeoutSeconds: 30 - resources: {} - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - privileged: false - readOnlyRootFilesystem: true - runAsGroup: 1337 - runAsNonRoot: true - runAsUser: 1337 - volumeMounts: - - mountPath: /etc/config - name: config-volume - - mountPath: /data - name: storage-volume - - mountPath: /etc/prom-certs/ - name: istio-certs - - args: - - proxy - - sidecar - - --domain - - $(POD_NAMESPACE).svc.cluster.local - - --serviceCluster - - prometheus.$(POD_NAMESPACE) - - --proxyLogLevel=warning - - --proxyComponentLogLevel=misc:error - - --log_output_level=default:info - - --concurrency - - "2" - env: - - name: JWT_POLICY - value: third-party-jwt - - name: PILOT_CERT_PROVIDER - value: istiod - - name: CA_ADDR - value: istiod.istio-system.svc:15012 - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: INSTANCE_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: SERVICE_ACCOUNT - valueFrom: - fieldRef: - fieldPath: spec.serviceAccountName - - name: HOST_IP - valueFrom: - fieldRef: - fieldPath: status.hostIP - - name: CANONICAL_SERVICE - valueFrom: - fieldRef: - fieldPath: metadata.labels['service.istio.io/canonical-name'] - - name: CANONICAL_REVISION - valueFrom: - fieldRef: - fieldPath: metadata.labels['service.istio.io/canonical-revision'] - - name: PROXY_CONFIG - value: | - {} - - name: ISTIO_META_POD_PORTS - value: |- - [ - {"containerPort":9090} - ] - - name: ISTIO_META_APP_CONTAINERS - value: prometheus-server-configmap-reload,prometheus-server - - name: ISTIO_META_CLUSTER_ID - value: Kubernetes - - name: ISTIO_META_INTERCEPTION_MODE - value: REDIRECT - - name: ISTIO_META_WORKLOAD_NAME - value: prometheus - - name: ISTIO_META_OWNER - value: kubernetes://apis/apps/v1/namespaces/istio-system/deployments/prometheus - - name: ISTIO_META_MESH_ID - value: cluster.local - - name: TRUST_DOMAIN - value: cluster.local - - name: ISTIO_KUBE_APP_PROBERS - value: '{"/app-health/prometheus-server/livez":{"httpGet":{"path":"/-/healthy","port":9090},"timeoutSeconds":30},"/app-health/prometheus-server/readyz":{"httpGet":{"path":"/-/ready","port":9090},"timeoutSeconds":30}}' - - name: OUTPUT_CERTS - value: /etc/istio-output-certs - image: docker.io/istio/proxyv2:1.9.2 - imagePullPolicy: Always - name: istio-proxy - ports: - - containerPort: 15090 - name: http-envoy-prom - protocol: TCP - readinessProbe: - failureThreshold: 30 - httpGet: - path: /healthz/ready - port: 15021 - initialDelaySeconds: 1 - periodSeconds: 2 - timeoutSeconds: 3 - resources: - limits: - cpu: "2" - memory: 1Gi - requests: - cpu: 100m - memory: 128Mi - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - privileged: false - readOnlyRootFilesystem: true - runAsGroup: 1337 - runAsNonRoot: true - runAsUser: 1337 - volumeMounts: - - mountPath: /var/run/secrets/istio - name: istiod-ca-cert - - mountPath: /var/lib/istio/data - name: istio-data - - mountPath: /etc/istio/proxy - name: istio-envoy - - mountPath: /var/run/secrets/tokens - name: istio-token - - mountPath: /etc/istio/pod - name: istio-podinfo - - mountPath: /etc/istio-output-certs - name: istio-certs - initContainers: - - args: - - istio-iptables - - -p - - "15001" - - -z - - "15006" - - -u - - "1337" - - -m - - REDIRECT - - -i - - "*" - - -x - - "" - - -b - - "*" - - -d - - 15090,15021,15020 - image: docker.io/istio/proxyv2:1.9.2 - imagePullPolicy: Always - name: istio-init - resources: - limits: - cpu: "2" - memory: 1Gi - requests: - cpu: 100m - memory: 128Mi - securityContext: - allowPrivilegeEscalation: false - capabilities: - add: - - NET_ADMIN - - NET_RAW - drop: - - ALL - privileged: false - readOnlyRootFilesystem: false - runAsGroup: 0 - runAsNonRoot: false - runAsUser: 0 - securityContext: - fsGroup: 1337 - runAsGroup: 65534 - runAsNonRoot: true - runAsUser: 65534 - serviceAccountName: prometheus - terminationGracePeriodSeconds: 300 - volumes: - - emptyDir: - medium: Memory - name: istio-envoy - - emptyDir: - medium: Memory - name: istio-certs - - emptyDir: {} - name: istio-data - - downwardAPI: - items: - - fieldRef: - fieldPath: metadata.labels - path: labels - - fieldRef: - fieldPath: metadata.annotations - path: annotations - - path: cpu-limit - resourceFieldRef: - containerName: istio-proxy - divisor: 1m - resource: limits.cpu - - path: cpu-request - resourceFieldRef: - containerName: istio-proxy - divisor: 1m - resource: requests.cpu - name: istio-podinfo - - name: istio-token - projected: - sources: - - serviceAccountToken: - audience: istio-ca - expirationSeconds: 43200 - path: istio-token - - configMap: - name: istio-ca-root-cert - name: istiod-ca-cert - - configMap: - name: prometheus - name: config-volume - - emptyDir: {} - name: storage-volume -status: {} ---- From 82311c476bfb3aa0b63c9686def6cb4c24c86339 Mon Sep 17 00:00:00 2001 From: Stuart Harris Date: Mon, 12 Apr 2021 18:28:49 +0100 Subject: [PATCH 07/12] add configmap --- prometheus/overlay/kustomization.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/prometheus/overlay/kustomization.yaml b/prometheus/overlay/kustomization.yaml index cacd37c..d4687d7 100644 --- a/prometheus/overlay/kustomization.yaml +++ b/prometheus/overlay/kustomization.yaml @@ -2,4 +2,5 @@ resources: - ../generated patchesStrategicMerge: + - prometheus-cm.yaml - prometheus-deployment.yaml From 64cb88a1be21493a7fe67ac861d0f195c3b5cc90 Mon Sep 17 00:00:00 2001 From: Stuart Harris Date: Tue, 13 Apr 2021 13:56:42 +0100 Subject: [PATCH 08/12] tidy up, add readme etc --- cluster-init/Makefile | 10 +-- observability/README.md | 63 +++++++++++++++++++ .../kube-state-metrics}/Makefile | 0 .../base/cluster-role-binding.yaml | 0 .../base/cluster-role.yaml | 0 .../kube-state-metrics}/base/deployment.yaml | 0 .../base/kustomization.yaml | 0 .../base/service-account.yaml | 0 .../kube-state-metrics}/base/service.yaml | 0 .../oms-agent}/Makefile | 6 +- .../container-azm-ms-agentconfig.yaml | 0 .../oms-agent}/kustomization.yaml | 0 .../prometheus}/Makefile | 0 observability/prometheus/README.md | 16 +++++ .../prometheus}/generated/kustomization.yaml | 0 .../prometheus}/generated/prometheus-cm.yaml | 0 .../prometheus}/generated/prometheus-cr.yaml | 0 .../prometheus}/generated/prometheus-crb.yaml | 0 .../generated/prometheus-deployment.yaml | 0 .../prometheus}/generated/prometheus-sa.yaml | 0 .../prometheus}/generated/prometheus-svc.yaml | 0 .../prometheus}/install.yaml | 0 .../prometheus}/overlay/kustomization.yaml | 0 .../prometheus}/overlay/prometheus-cm.yaml | 0 .../overlay/prometheus-deployment.yaml | 0 prometheus/README.md | 13 ---- 26 files changed, 86 insertions(+), 22 deletions(-) create mode 100644 observability/README.md rename {kube-state-metrics => observability/kube-state-metrics}/Makefile (100%) rename {kube-state-metrics => observability/kube-state-metrics}/base/cluster-role-binding.yaml (100%) rename {kube-state-metrics => observability/kube-state-metrics}/base/cluster-role.yaml (100%) rename {kube-state-metrics => observability/kube-state-metrics}/base/deployment.yaml (100%) rename {kube-state-metrics => observability/kube-state-metrics}/base/kustomization.yaml (100%) rename {kube-state-metrics => observability/kube-state-metrics}/base/service-account.yaml (100%) rename {kube-state-metrics => observability/kube-state-metrics}/base/service.yaml (100%) rename {oms-agent => observability/oms-agent}/Makefile (66%) rename {oms-agent => observability/oms-agent}/container-azm-ms-agentconfig.yaml (100%) rename {oms-agent => observability/oms-agent}/kustomization.yaml (100%) rename {prometheus => observability/prometheus}/Makefile (100%) create mode 100644 observability/prometheus/README.md rename {prometheus => observability/prometheus}/generated/kustomization.yaml (100%) rename {prometheus => observability/prometheus}/generated/prometheus-cm.yaml (100%) rename {prometheus => observability/prometheus}/generated/prometheus-cr.yaml (100%) rename {prometheus => observability/prometheus}/generated/prometheus-crb.yaml (100%) rename {prometheus => observability/prometheus}/generated/prometheus-deployment.yaml (100%) rename {prometheus => observability/prometheus}/generated/prometheus-sa.yaml (100%) rename {prometheus => observability/prometheus}/generated/prometheus-svc.yaml (100%) rename {prometheus => observability/prometheus}/install.yaml (100%) rename {prometheus => observability/prometheus}/overlay/kustomization.yaml (100%) rename {prometheus => observability/prometheus}/overlay/prometheus-cm.yaml (100%) rename {prometheus => observability/prometheus}/overlay/prometheus-deployment.yaml (100%) delete mode 100644 prometheus/README.md diff --git a/cluster-init/Makefile b/cluster-init/Makefile index 5445519..03f61b1 100644 --- a/cluster-init/Makefile +++ b/cluster-init/Makefile @@ -2,11 +2,12 @@ overlay := non-prod .PHONY: default default: ## Deploy cluster management tools - cd ../kube-state-metrics/ && make cd ../sealed-secrets && make overlay=$(overlay) cd ../cert-manager && make overlay=$(overlay) cd ../istio && make overlay=$(overlay) - cd ../observability/ && make + cd ../observability/kube-state-metrics/ && make + cd ../observability/prometheus && make + cd ../observability/oms-agent && make cd ../egress && make overlay=$(overlay) cd ../argo-cd && make overlay=$(overlay) @@ -14,11 +15,12 @@ default: ## Deploy cluster management tools delete: ## Remove cluster management tools cd ../argo-cd && make delete overlay=$(overlay) || true cd ../egress && make delete overlay=$(overlay) || true - cd ../observability/ && make delete || true + cd ../observability/oms-agent && make delete || true + cd ../observability/prometheus && make delete || true + cd ../observability/kube-state-metrics/ && make delete || true cd ../istio && make delete overlay=$(overlay) || true cd ../cert-manager && make delete overlay=$(overlay) || true cd ../sealed-secrets && make delete overlay=$(overlay) || true - cd ../kube-state-metrics/ && make delete || true .PHONY: help help: ## Display this help screen diff --git a/observability/README.md b/observability/README.md new file mode 100644 index 0000000..63a250d --- /dev/null +++ b/observability/README.md @@ -0,0 +1,63 @@ +# Monitoring + +## AKS + +Azure Kubernetes Service (AKS) provides good high-level monitoring of the cluster, such as the CPU and memory usage of each node in the cluster. To view this find the cluster in the Azure portal and then click on the "Insights" tab. + +## Custom dashboards + +We have custom dashboards for the doc-index-updater that can be found by searching for "Shared Dashboards" in the Azure Portal. + +They are set up in the following way: + +- [Prometheus](https://prometheus.io/) scrapes metrics from different pods in the cluster (such as [Istio](https://istio.io/) and [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics#overview)). +- [Azure's OMS agent](https://docs.microsoft.com/en-us/azure/azure-monitor/platform/log-analytics-agent) scrapes this data and adds it to the logs analytics workspace for the cluster. +- The Azure Monitor dashboard runs queries against the log analytics workspace and plots the results. + +### Prometheus + +Prometheus is no longer installed by Istio, so we have a set of [manifests](./prometheus) for that. + +There are two parts to the [config](./prometheus/overlay/prometheus-cm.yaml): + +- `prometheus.yml` specifies what pods to scrape and other general settings +- `prometheus.rules.yml` specifies some [Prometheus rules](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/), basically each rule is a query that Prometheus runs regularly and stores the results as a new metric. These are what we export to the Azure Monitor (by setting the `azure_monitor: true` label for each rule, see the Azure OMS agent section below). + +Prometheus [stores its data locally on disk](https://prometheus.io/docs/prometheus/latest/storage/). This means that if the Prometheus pod is deleted then Prometheus's database is deleted as well. **This happens if you run `make` in the deployments repo** in order to force Prometheus to refresh its config. It is possible to make [Prometheus can reload its config whilst still running](https://prometheus.io/docs/prometheus/latest/configuration/configuration/) if you enable the `--web.enable-lifecycle` flag but I haven't figured out how to inject that into the Istio profile yet. + +### Azure OMS agent + +The OMS agent pulls logs and metrics from the Kubernetes cluster and add it to a log analytics workspace. + +This is configured by the `oms_agent` block in terraform (in the [products](https://github.com/MHRA/products) repo): + +```terraform +resource "azurerm_kubernetes_cluster" "cluster" { + # ...other properties... + + addon_profile { + oms_agent { + enabled = true + log_analytics_workspace_id = azurerm_log_analytics_workspace.cluster.id + } + } +} +``` + +The configuration for the OMS agent lives [here](./oms-agent/container-azm-ms-agentconfig.yaml). + +In this configuration we tell the OMS agent to only scrape Prometheus metrics which have the label `azure_monitor: true` by setting the scrape URLs in `prometheus-data-collection-settings` to: + +```yaml +urls = [ +"http://prometheus.istio-system.svc.cluster.local:9090/federate?match[]={azure_monitor=%22true%22}" +] +``` + +(This uses [Prometheus federation](https://prometheus.io/docs/prometheus/latest/federation/)). + +### Azure Monitor Dashboard + +The code for the dashboard lives in terraform in [modules/cluster/dashboard.tf](../modules/cluster/dashboard.tf). The JSON code for the dashboard is pretty gnarly so if you want to make changes I would recommend making them in the UI, then exporting the dashboard and JSON and pop that into terraform (and don't forget to parametrise things like the subscription id etc). + +The queries for the Azure Monitor dashboard and written using Azure's [Kusto Query Language (KQL)](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/concepts/). diff --git a/kube-state-metrics/Makefile b/observability/kube-state-metrics/Makefile similarity index 100% rename from kube-state-metrics/Makefile rename to observability/kube-state-metrics/Makefile diff --git a/kube-state-metrics/base/cluster-role-binding.yaml b/observability/kube-state-metrics/base/cluster-role-binding.yaml similarity index 100% rename from kube-state-metrics/base/cluster-role-binding.yaml rename to observability/kube-state-metrics/base/cluster-role-binding.yaml diff --git a/kube-state-metrics/base/cluster-role.yaml b/observability/kube-state-metrics/base/cluster-role.yaml similarity index 100% rename from kube-state-metrics/base/cluster-role.yaml rename to observability/kube-state-metrics/base/cluster-role.yaml diff --git a/kube-state-metrics/base/deployment.yaml b/observability/kube-state-metrics/base/deployment.yaml similarity index 100% rename from kube-state-metrics/base/deployment.yaml rename to observability/kube-state-metrics/base/deployment.yaml diff --git a/kube-state-metrics/base/kustomization.yaml b/observability/kube-state-metrics/base/kustomization.yaml similarity index 100% rename from kube-state-metrics/base/kustomization.yaml rename to observability/kube-state-metrics/base/kustomization.yaml diff --git a/kube-state-metrics/base/service-account.yaml b/observability/kube-state-metrics/base/service-account.yaml similarity index 100% rename from kube-state-metrics/base/service-account.yaml rename to observability/kube-state-metrics/base/service-account.yaml diff --git a/kube-state-metrics/base/service.yaml b/observability/kube-state-metrics/base/service.yaml similarity index 100% rename from kube-state-metrics/base/service.yaml rename to observability/kube-state-metrics/base/service.yaml diff --git a/oms-agent/Makefile b/observability/oms-agent/Makefile similarity index 66% rename from oms-agent/Makefile rename to observability/oms-agent/Makefile index f054a4d..b36b4dc 100644 --- a/oms-agent/Makefile +++ b/observability/oms-agent/Makefile @@ -1,15 +1,11 @@ .PHONY: default -default: apply restart-prometheus ## Create resources +default: apply ## Create resources .PHONY: apply apply: ## Apply kubernetes manifests kustomize build . | kubectl apply -f - -.PHONY: restart-prometheus -restart-prometheus: ## Restart prometheus so config changes take effect - kubectl delete pods -l app=prometheus -n istio-system - .PHONY: delete delete: ## Delete resources kustomize build . | kubectl delete --ignore-not-found -f - || true diff --git a/oms-agent/container-azm-ms-agentconfig.yaml b/observability/oms-agent/container-azm-ms-agentconfig.yaml similarity index 100% rename from oms-agent/container-azm-ms-agentconfig.yaml rename to observability/oms-agent/container-azm-ms-agentconfig.yaml diff --git a/oms-agent/kustomization.yaml b/observability/oms-agent/kustomization.yaml similarity index 100% rename from oms-agent/kustomization.yaml rename to observability/oms-agent/kustomization.yaml diff --git a/prometheus/Makefile b/observability/prometheus/Makefile similarity index 100% rename from prometheus/Makefile rename to observability/prometheus/Makefile diff --git a/observability/prometheus/README.md b/observability/prometheus/README.md new file mode 100644 index 0000000..ed97dae --- /dev/null +++ b/observability/prometheus/README.md @@ -0,0 +1,16 @@ +# Prometheus install + +The `generated` folder contains the vanilla installation manifests for Prometheus from the Istio repository. To refresh generated manifests, fetch the relevant source yaml (see below) and split them into the `generated` folder, like this: + +```bash + +# the url will probably change for newer versions of Istio +curl https://raw.githubusercontent.com/istio/istio/release-1.9/samples/addons/prometheus.yaml -o install.yaml + +# install https://github.com/mogensen/kubernetes-split-yaml +go get -v github.com/mogensen/kubernetes-split-yaml + +# splits the yaml into resource oriented manifests and stores them in the `generated` folder +~/go/bin/kubernetes-split-yaml install.yaml + +``` diff --git a/prometheus/generated/kustomization.yaml b/observability/prometheus/generated/kustomization.yaml similarity index 100% rename from prometheus/generated/kustomization.yaml rename to observability/prometheus/generated/kustomization.yaml diff --git a/prometheus/generated/prometheus-cm.yaml b/observability/prometheus/generated/prometheus-cm.yaml similarity index 100% rename from prometheus/generated/prometheus-cm.yaml rename to observability/prometheus/generated/prometheus-cm.yaml diff --git a/prometheus/generated/prometheus-cr.yaml b/observability/prometheus/generated/prometheus-cr.yaml similarity index 100% rename from prometheus/generated/prometheus-cr.yaml rename to observability/prometheus/generated/prometheus-cr.yaml diff --git a/prometheus/generated/prometheus-crb.yaml b/observability/prometheus/generated/prometheus-crb.yaml similarity index 100% rename from prometheus/generated/prometheus-crb.yaml rename to observability/prometheus/generated/prometheus-crb.yaml diff --git a/prometheus/generated/prometheus-deployment.yaml b/observability/prometheus/generated/prometheus-deployment.yaml similarity index 100% rename from prometheus/generated/prometheus-deployment.yaml rename to observability/prometheus/generated/prometheus-deployment.yaml diff --git a/prometheus/generated/prometheus-sa.yaml b/observability/prometheus/generated/prometheus-sa.yaml similarity index 100% rename from prometheus/generated/prometheus-sa.yaml rename to observability/prometheus/generated/prometheus-sa.yaml diff --git a/prometheus/generated/prometheus-svc.yaml b/observability/prometheus/generated/prometheus-svc.yaml similarity index 100% rename from prometheus/generated/prometheus-svc.yaml rename to observability/prometheus/generated/prometheus-svc.yaml diff --git a/prometheus/install.yaml b/observability/prometheus/install.yaml similarity index 100% rename from prometheus/install.yaml rename to observability/prometheus/install.yaml diff --git a/prometheus/overlay/kustomization.yaml b/observability/prometheus/overlay/kustomization.yaml similarity index 100% rename from prometheus/overlay/kustomization.yaml rename to observability/prometheus/overlay/kustomization.yaml diff --git a/prometheus/overlay/prometheus-cm.yaml b/observability/prometheus/overlay/prometheus-cm.yaml similarity index 100% rename from prometheus/overlay/prometheus-cm.yaml rename to observability/prometheus/overlay/prometheus-cm.yaml diff --git a/prometheus/overlay/prometheus-deployment.yaml b/observability/prometheus/overlay/prometheus-deployment.yaml similarity index 100% rename from prometheus/overlay/prometheus-deployment.yaml rename to observability/prometheus/overlay/prometheus-deployment.yaml diff --git a/prometheus/README.md b/prometheus/README.md deleted file mode 100644 index f1c4ee7..0000000 --- a/prometheus/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# Prometheus install - -refresh generated manifests... - -```bash -curl https://raw.githubusercontent.com/istio/istio/release-1.9/samples/addons/prometheus.yaml -o install.yaml - -# https://github.com/mogensen/kubernetes-split-yaml -go get -v github.com/mogensen/kubernetes-split-yaml - -~/go/bin/kubernetes-split-yaml install.yaml - -``` From 5e578bdd6cf6be07b55650450c46ae6c3ae5ef14 Mon Sep 17 00:00:00 2001 From: Stuart Harris Date: Tue, 13 Apr 2021 14:47:45 +0100 Subject: [PATCH 09/12] update kube-state-metrics manifests --- observability/kube-state-metrics/Makefile | 4 +- observability/kube-state-metrics/README.md | 18 +++ .../base/cluster-role-binding.yaml | 15 -- .../kube-state-metrics/base/cluster-role.yaml | 117 -------------- .../kube-state-metrics/base/deployment.yaml | 50 ------ .../base/kustomization.yaml | 8 - .../base/service-account.yaml | 7 - .../kube-state-metrics/base/service.yaml | 18 --- .../generated/kube-state-metrics-cr.yaml | 146 ++++++++++++++++++ .../generated/kube-state-metrics-crb.yaml | 18 +++ .../kube-state-metrics-deployment.yaml | 103 ++++++++++++ .../generated/kube-state-metrics-sa.yaml | 14 ++ .../generated/kube-state-metrics-svc.yaml | 25 +++ .../generated/kustomization.yaml | 6 + observability/kube-state-metrics/install.yaml | 0 .../kube-state-metrics-deployment.yaml | 20 +++ .../overlay/kube-state-metrics-svc.yaml | 8 + .../overlay/kustomization.yaml | 6 + sealed-secrets/Makefile | 2 +- 19 files changed, 367 insertions(+), 218 deletions(-) create mode 100644 observability/kube-state-metrics/README.md delete mode 100644 observability/kube-state-metrics/base/cluster-role-binding.yaml delete mode 100644 observability/kube-state-metrics/base/cluster-role.yaml delete mode 100644 observability/kube-state-metrics/base/deployment.yaml delete mode 100644 observability/kube-state-metrics/base/kustomization.yaml delete mode 100644 observability/kube-state-metrics/base/service-account.yaml delete mode 100644 observability/kube-state-metrics/base/service.yaml create mode 100755 observability/kube-state-metrics/generated/kube-state-metrics-cr.yaml create mode 100755 observability/kube-state-metrics/generated/kube-state-metrics-crb.yaml create mode 100755 observability/kube-state-metrics/generated/kube-state-metrics-deployment.yaml create mode 100755 observability/kube-state-metrics/generated/kube-state-metrics-sa.yaml create mode 100755 observability/kube-state-metrics/generated/kube-state-metrics-svc.yaml create mode 100644 observability/kube-state-metrics/generated/kustomization.yaml create mode 100644 observability/kube-state-metrics/install.yaml create mode 100755 observability/kube-state-metrics/overlay/kube-state-metrics-deployment.yaml create mode 100755 observability/kube-state-metrics/overlay/kube-state-metrics-svc.yaml create mode 100644 observability/kube-state-metrics/overlay/kustomization.yaml diff --git a/observability/kube-state-metrics/Makefile b/observability/kube-state-metrics/Makefile index 06ef988..ab6d756 100644 --- a/observability/kube-state-metrics/Makefile +++ b/observability/kube-state-metrics/Makefile @@ -1,10 +1,10 @@ .PHONY: default default: ## Deploy using Kustomize - kustomize build ./base | kubectl apply -f - + kustomize build ./overlay | kubectl apply -f - .PHONY: delete delete: ## Deploy using Kustomize - kustomize build ./base | kubectl delete --ignore-not-found -f - || true + kustomize build ./overlay | kubectl delete --ignore-not-found -f - || true .PHONY: help help: ## Display this help screen diff --git a/observability/kube-state-metrics/README.md b/observability/kube-state-metrics/README.md new file mode 100644 index 0000000..82cfdcb --- /dev/null +++ b/observability/kube-state-metrics/README.md @@ -0,0 +1,18 @@ +# kube-state-metrics install + +The `generated` folder contains the vanilla installation manifests for `kube-state-metrics`. To refresh generated manifests, fetch the helm repo (see below) and split them into the `generated` folder, like this: + +```bash + +helm repo add kube-state-metrics https://kubernetes.github.io/kube-state-metrics +helm repo update + +helm template kube-state-metrics kube-state-metrics/kube-state-metrics >install.yaml + +# install https://github.com/mogensen/kubernetes-split-yaml +go get -v github.com/mogensen/kubernetes-split-yaml + +# splits the yaml into resource oriented manifests and stores them in the `generated` folder +~/go/bin/kubernetes-split-yaml install.yaml + +``` diff --git a/observability/kube-state-metrics/base/cluster-role-binding.yaml b/observability/kube-state-metrics/base/cluster-role-binding.yaml deleted file mode 100644 index 7363054..0000000 --- a/observability/kube-state-metrics/base/cluster-role-binding.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - labels: - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 1.9.5 - name: kube-state-metrics -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: kube-state-metrics -subjects: - - kind: ServiceAccount - name: kube-state-metrics - namespace: kube-system diff --git a/observability/kube-state-metrics/base/cluster-role.yaml b/observability/kube-state-metrics/base/cluster-role.yaml deleted file mode 100644 index 67fd5b7..0000000 --- a/observability/kube-state-metrics/base/cluster-role.yaml +++ /dev/null @@ -1,117 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 1.9.5 - name: kube-state-metrics -rules: - - apiGroups: - - "" - resources: - - configmaps - - secrets - - nodes - - pods - - services - - resourcequotas - - replicationcontrollers - - limitranges - - persistentvolumeclaims - - persistentvolumes - - namespaces - - endpoints - verbs: - - list - - watch - - apiGroups: - - extensions - resources: - - daemonsets - - deployments - - replicasets - - ingresses - verbs: - - list - - watch - - apiGroups: - - apps - resources: - - statefulsets - - daemonsets - - deployments - - replicasets - verbs: - - list - - watch - - apiGroups: - - batch - resources: - - cronjobs - - jobs - verbs: - - list - - watch - - apiGroups: - - autoscaling - resources: - - horizontalpodautoscalers - verbs: - - list - - watch - - apiGroups: - - authentication.k8s.io - resources: - - tokenreviews - verbs: - - create - - apiGroups: - - authorization.k8s.io - resources: - - subjectaccessreviews - verbs: - - create - - apiGroups: - - policy - resources: - - poddisruptionbudgets - verbs: - - list - - watch - - apiGroups: - - certificates.k8s.io - resources: - - certificatesigningrequests - verbs: - - list - - watch - - apiGroups: - - storage.k8s.io - resources: - - storageclasses - - volumeattachments - verbs: - - list - - watch - - apiGroups: - - admissionregistration.k8s.io - resources: - - mutatingwebhookconfigurations - - validatingwebhookconfigurations - verbs: - - list - - watch - - apiGroups: - - networking.k8s.io - resources: - - networkpolicies - verbs: - - list - - watch - - apiGroups: - - coordination.k8s.io - resources: - - leases - verbs: - - list - - watch diff --git a/observability/kube-state-metrics/base/deployment.yaml b/observability/kube-state-metrics/base/deployment.yaml deleted file mode 100644 index 2722266..0000000 --- a/observability/kube-state-metrics/base/deployment.yaml +++ /dev/null @@ -1,50 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 2.0.0 - name: kube-state-metrics -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: kube-state-metrics - template: - metadata: - labels: - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 2.0.0 - spec: - containers: - - image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.0.0-rc.1 - livenessProbe: - httpGet: - path: /healthz - port: 8080 - initialDelaySeconds: 5 - timeoutSeconds: 5 - name: kube-state-metrics - ports: - - containerPort: 8080 - name: http-metrics - - containerPort: 8081 - name: telemetry - resources: - limits: - cpu: 500m - memory: 500Mi - requests: - cpu: 200m - memory: 200Mi - readinessProbe: - httpGet: - path: / - port: 8081 - initialDelaySeconds: 5 - timeoutSeconds: 5 - securityContext: - runAsUser: 65534 - nodeSelector: - kubernetes.io/os: linux - serviceAccountName: kube-state-metrics diff --git a/observability/kube-state-metrics/base/kustomization.yaml b/observability/kube-state-metrics/base/kustomization.yaml deleted file mode 100644 index df94033..0000000 --- a/observability/kube-state-metrics/base/kustomization.yaml +++ /dev/null @@ -1,8 +0,0 @@ -namespace: kube-system - -resources: - - cluster-role-binding.yaml - - cluster-role.yaml - - deployment.yaml - - service-account.yaml - - service.yaml diff --git a/observability/kube-state-metrics/base/service-account.yaml b/observability/kube-state-metrics/base/service-account.yaml deleted file mode 100644 index 2357dc2..0000000 --- a/observability/kube-state-metrics/base/service-account.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - labels: - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 1.9.5 - name: kube-state-metrics diff --git a/observability/kube-state-metrics/base/service.yaml b/observability/kube-state-metrics/base/service.yaml deleted file mode 100644 index e6346ba..0000000 --- a/observability/kube-state-metrics/base/service.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - labels: - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 1.9.5 - name: kube-state-metrics -spec: - clusterIP: None - ports: - - name: http-metrics - port: 8080 - targetPort: http-metrics - - name: telemetry - port: 8081 - targetPort: telemetry - selector: - app.kubernetes.io/name: kube-state-metrics diff --git a/observability/kube-state-metrics/generated/kube-state-metrics-cr.yaml b/observability/kube-state-metrics/generated/kube-state-metrics-cr.yaml new file mode 100755 index 0000000..1f96287 --- /dev/null +++ b/observability/kube-state-metrics/generated/kube-state-metrics-cr.yaml @@ -0,0 +1,146 @@ +# Source: kube-state-metrics/templates/role.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + helm.sh/chart: kube-state-metrics-2.13.2 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: kube-state-metrics + name: kube-state-metrics +rules: + +- apiGroups: ["certificates.k8s.io"] + resources: + - certificatesigningrequests + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - configmaps + verbs: ["list", "watch"] + +- apiGroups: ["batch"] + resources: + - cronjobs + verbs: ["list", "watch"] + +- apiGroups: ["extensions", "apps"] + resources: + - daemonsets + verbs: ["list", "watch"] + +- apiGroups: ["extensions", "apps"] + resources: + - deployments + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - endpoints + verbs: ["list", "watch"] + +- apiGroups: ["autoscaling"] + resources: + - horizontalpodautoscalers + verbs: ["list", "watch"] + +- apiGroups: ["extensions", "networking.k8s.io"] + resources: + - ingresses + verbs: ["list", "watch"] + +- apiGroups: ["batch"] + resources: + - jobs + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - limitranges + verbs: ["list", "watch"] + +- apiGroups: ["admissionregistration.k8s.io"] + resources: + - mutatingwebhookconfigurations + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - namespaces + verbs: ["list", "watch"] + +- apiGroups: ["networking.k8s.io"] + resources: + - networkpolicies + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - nodes + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - persistentvolumeclaims + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - persistentvolumes + verbs: ["list", "watch"] + +- apiGroups: ["policy"] + resources: + - poddisruptionbudgets + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - pods + verbs: ["list", "watch"] + +- apiGroups: ["extensions", "apps"] + resources: + - replicasets + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - replicationcontrollers + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - resourcequotas + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - secrets + verbs: ["list", "watch"] + +- apiGroups: [""] + resources: + - services + verbs: ["list", "watch"] + +- apiGroups: ["apps"] + resources: + - statefulsets + verbs: ["list", "watch"] + +- apiGroups: ["storage.k8s.io"] + resources: + - storageclasses + verbs: ["list", "watch"] + +- apiGroups: ["admissionregistration.k8s.io"] + resources: + - validatingwebhookconfigurations + verbs: ["list", "watch"] + +- apiGroups: ["storage.k8s.io"] + resources: + - volumeattachments + verbs: ["list", "watch"] diff --git a/observability/kube-state-metrics/generated/kube-state-metrics-crb.yaml b/observability/kube-state-metrics/generated/kube-state-metrics-crb.yaml new file mode 100755 index 0000000..47d58d0 --- /dev/null +++ b/observability/kube-state-metrics/generated/kube-state-metrics-crb.yaml @@ -0,0 +1,18 @@ +# Source: kube-state-metrics/templates/clusterrolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + helm.sh/chart: kube-state-metrics-2.13.2 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: kube-state-metrics + name: kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: +- kind: ServiceAccount + name: kube-state-metrics + namespace: kube-system diff --git a/observability/kube-state-metrics/generated/kube-state-metrics-deployment.yaml b/observability/kube-state-metrics/generated/kube-state-metrics-deployment.yaml new file mode 100755 index 0000000..a903dec --- /dev/null +++ b/observability/kube-state-metrics/generated/kube-state-metrics-deployment.yaml @@ -0,0 +1,103 @@ +# Source: kube-state-metrics/templates/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kube-state-metrics + namespace: kube-system + labels: + app.kubernetes.io/name: kube-state-metrics + helm.sh/chart: "kube-state-metrics-2.13.2" + app.kubernetes.io/instance: "kube-state-metrics" + app.kubernetes.io/managed-by: "Helm" + app.kubernetes.io/version: "1.9.8" +spec: + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + replicas: 1 + template: + metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: "kube-state-metrics" + spec: + hostNetwork: false + serviceAccountName: kube-state-metrics + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsUser: 65534 + containers: + - name: kube-state-metrics + args: + - --collectors=certificatesigningrequests + + - --collectors=configmaps + + - --collectors=cronjobs + + - --collectors=daemonsets + + - --collectors=deployments + + - --collectors=endpoints + + - --collectors=horizontalpodautoscalers + + - --collectors=ingresses + + - --collectors=jobs + + - --collectors=limitranges + + - --collectors=mutatingwebhookconfigurations + + - --collectors=namespaces + + - --collectors=networkpolicies + + - --collectors=nodes + + - --collectors=persistentvolumeclaims + + - --collectors=persistentvolumes + + - --collectors=poddisruptionbudgets + + - --collectors=pods + + - --collectors=replicasets + + - --collectors=replicationcontrollers + + - --collectors=resourcequotas + + - --collectors=secrets + + - --collectors=services + + - --collectors=statefulsets + + - --collectors=storageclasses + + - --collectors=validatingwebhookconfigurations + + - --collectors=volumeattachments + + - --telemetry-port=8081 + imagePullPolicy: IfNotPresent + image: "k8s.gcr.io/kube-state-metrics/kube-state-metrics:v1.9.8" + ports: + - containerPort: 8080 + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: / + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 5 diff --git a/observability/kube-state-metrics/generated/kube-state-metrics-sa.yaml b/observability/kube-state-metrics/generated/kube-state-metrics-sa.yaml new file mode 100755 index 0000000..7d2951d --- /dev/null +++ b/observability/kube-state-metrics/generated/kube-state-metrics-sa.yaml @@ -0,0 +1,14 @@ +--- +# Source: kube-state-metrics/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + helm.sh/chart: kube-state-metrics-2.13.2 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: kube-state-metrics + name: kube-state-metrics + namespace: kube-system +imagePullSecrets: + [] diff --git a/observability/kube-state-metrics/generated/kube-state-metrics-svc.yaml b/observability/kube-state-metrics/generated/kube-state-metrics-svc.yaml new file mode 100755 index 0000000..20d5d7a --- /dev/null +++ b/observability/kube-state-metrics/generated/kube-state-metrics-svc.yaml @@ -0,0 +1,25 @@ +# Source: kube-state-metrics/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: kube-state-metrics + namespace: kube-system + labels: + app.kubernetes.io/name: kube-state-metrics + helm.sh/chart: "kube-state-metrics-2.13.2" + app.kubernetes.io/instance: "kube-state-metrics" + app.kubernetes.io/managed-by: "Helm" + annotations: + prometheus.io/scrape: "true" +spec: + type: "ClusterIP" + clusterIP: None + ports: + - name: "http" + protocol: TCP + port: 8080 + targetPort: 8080 + + selector: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/instance: kube-state-metrics diff --git a/observability/kube-state-metrics/generated/kustomization.yaml b/observability/kube-state-metrics/generated/kustomization.yaml new file mode 100644 index 0000000..60a92ba --- /dev/null +++ b/observability/kube-state-metrics/generated/kustomization.yaml @@ -0,0 +1,6 @@ +resources: + - kube-state-metrics-cr.yaml + - kube-state-metrics-crb.yaml + - kube-state-metrics-deployment.yaml + - kube-state-metrics-sa.yaml + - kube-state-metrics-svc.yaml diff --git a/observability/kube-state-metrics/install.yaml b/observability/kube-state-metrics/install.yaml new file mode 100644 index 0000000..e69de29 diff --git a/observability/kube-state-metrics/overlay/kube-state-metrics-deployment.yaml b/observability/kube-state-metrics/overlay/kube-state-metrics-deployment.yaml new file mode 100755 index 0000000..78790b1 --- /dev/null +++ b/observability/kube-state-metrics/overlay/kube-state-metrics-deployment.yaml @@ -0,0 +1,20 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kube-state-metrics + namespace: kube-system + # labels: + # app.kubernetes.io/version: "2.0.0" +spec: + template: + spec: + containers: + - name: kube-state-metrics + # image: "k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.0.0" + resources: + limits: + cpu: 500m + memory: 500Mi + requests: + cpu: 200m + memory: 200Mi diff --git a/observability/kube-state-metrics/overlay/kube-state-metrics-svc.yaml b/observability/kube-state-metrics/overlay/kube-state-metrics-svc.yaml new file mode 100755 index 0000000..d2f2eb2 --- /dev/null +++ b/observability/kube-state-metrics/overlay/kube-state-metrics-svc.yaml @@ -0,0 +1,8 @@ +# Source: kube-state-metrics/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: kube-state-metrics + namespace: kube-system +spec: + clusterIP: None diff --git a/observability/kube-state-metrics/overlay/kustomization.yaml b/observability/kube-state-metrics/overlay/kustomization.yaml new file mode 100644 index 0000000..5a7f2dd --- /dev/null +++ b/observability/kube-state-metrics/overlay/kustomization.yaml @@ -0,0 +1,6 @@ +resources: + - ../generated + +patchesStrategicMerge: + - kube-state-metrics-deployment.yaml + - kube-state-metrics-svc.yaml diff --git a/sealed-secrets/Makefile b/sealed-secrets/Makefile index 0eb5b40..696c178 100644 --- a/sealed-secrets/Makefile +++ b/sealed-secrets/Makefile @@ -10,7 +10,7 @@ default: ## Create resources [ $$(kubectl get secret \ -n kube-system \ --selector "sealedsecrets.bitnami.com/sealed-secrets-key=active" \ - --no-headers | wc -l) -eq 1 ] || ( \ + --no-headers | wc -l) -gt 0 ] || ( \ az keyvault secret show \ --vault-name $(vault-$(overlay)) \ --name sealing-key \ From 52f5741af153b79af99e44cbcf5ced42d9044dfc Mon Sep 17 00:00:00 2001 From: Stuart Harris Date: Tue, 13 Apr 2021 14:50:58 +0100 Subject: [PATCH 10/12] remove install.yaml files --- observability/kube-state-metrics/install.yaml | 0 observability/prometheus/install.yaml | 475 ------------------ 2 files changed, 475 deletions(-) delete mode 100644 observability/kube-state-metrics/install.yaml delete mode 100644 observability/prometheus/install.yaml diff --git a/observability/kube-state-metrics/install.yaml b/observability/kube-state-metrics/install.yaml deleted file mode 100644 index e69de29..0000000 diff --git a/observability/prometheus/install.yaml b/observability/prometheus/install.yaml deleted file mode 100644 index 8418a2c..0000000 --- a/observability/prometheus/install.yaml +++ /dev/null @@ -1,475 +0,0 @@ ---- -# Source: prometheus/templates/server/serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - labels: - component: "server" - app: prometheus - release: prometheus - chart: prometheus-11.16.2 - heritage: Helm - name: prometheus - namespace: istio-system - annotations: - {} ---- -# Source: prometheus/templates/server/cm.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - labels: - component: "server" - app: prometheus - release: prometheus - chart: prometheus-11.16.2 - heritage: Helm - name: prometheus - namespace: istio-system -data: - alerting_rules.yml: | - {} - alerts: | - {} - prometheus.yml: | - global: - evaluation_interval: 1m - scrape_interval: 15s - scrape_timeout: 10s - rule_files: - - /etc/config/recording_rules.yml - - /etc/config/alerting_rules.yml - - /etc/config/rules - - /etc/config/alerts - scrape_configs: - - job_name: prometheus - static_configs: - - targets: - - localhost:9090 - - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - job_name: kubernetes-apiservers - kubernetes_sd_configs: - - role: endpoints - relabel_configs: - - action: keep - regex: default;kubernetes;https - source_labels: - - __meta_kubernetes_namespace - - __meta_kubernetes_service_name - - __meta_kubernetes_endpoint_port_name - scheme: https - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecure_skip_verify: true - - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - job_name: kubernetes-nodes - kubernetes_sd_configs: - - role: node - relabel_configs: - - action: labelmap - regex: __meta_kubernetes_node_label_(.+) - - replacement: kubernetes.default.svc:443 - target_label: __address__ - - regex: (.+) - replacement: /api/v1/nodes/$1/proxy/metrics - source_labels: - - __meta_kubernetes_node_name - target_label: __metrics_path__ - scheme: https - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecure_skip_verify: true - - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - job_name: kubernetes-nodes-cadvisor - kubernetes_sd_configs: - - role: node - relabel_configs: - - action: labelmap - regex: __meta_kubernetes_node_label_(.+) - - replacement: kubernetes.default.svc:443 - target_label: __address__ - - regex: (.+) - replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor - source_labels: - - __meta_kubernetes_node_name - target_label: __metrics_path__ - scheme: https - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecure_skip_verify: true - - job_name: kubernetes-service-endpoints - kubernetes_sd_configs: - - role: endpoints - relabel_configs: - - action: keep - regex: true - source_labels: - - __meta_kubernetes_service_annotation_prometheus_io_scrape - - action: replace - regex: (https?) - source_labels: - - __meta_kubernetes_service_annotation_prometheus_io_scheme - target_label: __scheme__ - - action: replace - regex: (.+) - source_labels: - - __meta_kubernetes_service_annotation_prometheus_io_path - target_label: __metrics_path__ - - action: replace - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 - source_labels: - - __address__ - - __meta_kubernetes_service_annotation_prometheus_io_port - target_label: __address__ - - action: labelmap - regex: __meta_kubernetes_service_label_(.+) - - action: replace - source_labels: - - __meta_kubernetes_namespace - target_label: kubernetes_namespace - - action: replace - source_labels: - - __meta_kubernetes_service_name - target_label: kubernetes_name - - action: replace - source_labels: - - __meta_kubernetes_pod_node_name - target_label: kubernetes_node - - job_name: kubernetes-service-endpoints-slow - kubernetes_sd_configs: - - role: endpoints - relabel_configs: - - action: keep - regex: true - source_labels: - - __meta_kubernetes_service_annotation_prometheus_io_scrape_slow - - action: replace - regex: (https?) - source_labels: - - __meta_kubernetes_service_annotation_prometheus_io_scheme - target_label: __scheme__ - - action: replace - regex: (.+) - source_labels: - - __meta_kubernetes_service_annotation_prometheus_io_path - target_label: __metrics_path__ - - action: replace - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 - source_labels: - - __address__ - - __meta_kubernetes_service_annotation_prometheus_io_port - target_label: __address__ - - action: labelmap - regex: __meta_kubernetes_service_label_(.+) - - action: replace - source_labels: - - __meta_kubernetes_namespace - target_label: kubernetes_namespace - - action: replace - source_labels: - - __meta_kubernetes_service_name - target_label: kubernetes_name - - action: replace - source_labels: - - __meta_kubernetes_pod_node_name - target_label: kubernetes_node - scrape_interval: 5m - scrape_timeout: 30s - - honor_labels: true - job_name: prometheus-pushgateway - kubernetes_sd_configs: - - role: service - relabel_configs: - - action: keep - regex: pushgateway - source_labels: - - __meta_kubernetes_service_annotation_prometheus_io_probe - - job_name: kubernetes-services - kubernetes_sd_configs: - - role: service - metrics_path: /probe - params: - module: - - http_2xx - relabel_configs: - - action: keep - regex: true - source_labels: - - __meta_kubernetes_service_annotation_prometheus_io_probe - - source_labels: - - __address__ - target_label: __param_target - - replacement: blackbox - target_label: __address__ - - source_labels: - - __param_target - target_label: instance - - action: labelmap - regex: __meta_kubernetes_service_label_(.+) - - source_labels: - - __meta_kubernetes_namespace - target_label: kubernetes_namespace - - source_labels: - - __meta_kubernetes_service_name - target_label: kubernetes_name - - job_name: kubernetes-pods - kubernetes_sd_configs: - - role: pod - relabel_configs: - - action: keep - regex: true - source_labels: - - __meta_kubernetes_pod_annotation_prometheus_io_scrape - - action: replace - regex: (.+) - source_labels: - - __meta_kubernetes_pod_annotation_prometheus_io_path - target_label: __metrics_path__ - - action: replace - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 - source_labels: - - __address__ - - __meta_kubernetes_pod_annotation_prometheus_io_port - target_label: __address__ - - action: labelmap - regex: __meta_kubernetes_pod_label_(.+) - - action: replace - source_labels: - - __meta_kubernetes_namespace - target_label: kubernetes_namespace - - action: replace - source_labels: - - __meta_kubernetes_pod_name - target_label: kubernetes_pod_name - - action: drop - regex: Pending|Succeeded|Failed - source_labels: - - __meta_kubernetes_pod_phase - - job_name: kubernetes-pods-slow - kubernetes_sd_configs: - - role: pod - relabel_configs: - - action: keep - regex: true - source_labels: - - __meta_kubernetes_pod_annotation_prometheus_io_scrape_slow - - action: replace - regex: (.+) - source_labels: - - __meta_kubernetes_pod_annotation_prometheus_io_path - target_label: __metrics_path__ - - action: replace - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 - source_labels: - - __address__ - - __meta_kubernetes_pod_annotation_prometheus_io_port - target_label: __address__ - - action: labelmap - regex: __meta_kubernetes_pod_label_(.+) - - action: replace - source_labels: - - __meta_kubernetes_namespace - target_label: kubernetes_namespace - - action: replace - source_labels: - - __meta_kubernetes_pod_name - target_label: kubernetes_pod_name - - action: drop - regex: Pending|Succeeded|Failed - source_labels: - - __meta_kubernetes_pod_phase - scrape_interval: 5m - scrape_timeout: 30s - recording_rules.yml: | - {} - rules: | - {} ---- -# Source: prometheus/templates/server/clusterrole.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - component: "server" - app: prometheus - release: prometheus - chart: prometheus-11.16.2 - heritage: Helm - name: prometheus -rules: - - apiGroups: - - "" - resources: - - nodes - - nodes/proxy - - nodes/metrics - - services - - endpoints - - pods - - ingresses - - configmaps - verbs: - - get - - list - - watch - - apiGroups: - - "extensions" - - "networking.k8s.io" - resources: - - ingresses/status - - ingresses - verbs: - - get - - list - - watch - - nonResourceURLs: - - "/metrics" - verbs: - - get ---- -# Source: prometheus/templates/server/clusterrolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - labels: - component: "server" - app: prometheus - release: prometheus - chart: prometheus-11.16.2 - heritage: Helm - name: prometheus -subjects: - - kind: ServiceAccount - name: prometheus - namespace: istio-system -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: prometheus ---- -# Source: prometheus/templates/server/service.yaml -apiVersion: v1 -kind: Service -metadata: - labels: - component: "server" - app: prometheus - release: prometheus - chart: prometheus-11.16.2 - heritage: Helm - name: prometheus - namespace: istio-system -spec: - ports: - - name: http - port: 9090 - protocol: TCP - targetPort: 9090 - selector: - component: "server" - app: prometheus - release: prometheus - sessionAffinity: None - type: "ClusterIP" ---- -# Source: prometheus/templates/server/deploy.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - component: "server" - app: prometheus - release: prometheus - chart: prometheus-11.16.2 - heritage: Helm - name: prometheus - namespace: istio-system -spec: - selector: - matchLabels: - component: "server" - app: prometheus - release: prometheus - replicas: 1 - template: - metadata: - annotations: - - sidecar.istio.io/inject: "false" - labels: - component: "server" - app: prometheus - release: prometheus - chart: prometheus-11.16.2 - heritage: Helm - spec: - serviceAccountName: prometheus - containers: - - name: prometheus-server-configmap-reload - image: "jimmidyson/configmap-reload:v0.4.0" - imagePullPolicy: "IfNotPresent" - args: - - --volume-dir=/etc/config - - --webhook-url=http://127.0.0.1:9090/-/reload - resources: - {} - volumeMounts: - - name: config-volume - mountPath: /etc/config - readOnly: true - - - name: prometheus-server - image: "prom/prometheus:v2.21.0" - imagePullPolicy: "IfNotPresent" - args: - - --storage.tsdb.retention.time=15d - - --config.file=/etc/config/prometheus.yml - - --storage.tsdb.path=/data - - --web.console.libraries=/etc/prometheus/console_libraries - - --web.console.templates=/etc/prometheus/consoles - - --web.enable-lifecycle - ports: - - containerPort: 9090 - readinessProbe: - httpGet: - path: /-/ready - port: 9090 - initialDelaySeconds: 0 - periodSeconds: 5 - timeoutSeconds: 30 - failureThreshold: 3 - successThreshold: 1 - livenessProbe: - httpGet: - path: /-/healthy - port: 9090 - initialDelaySeconds: 30 - periodSeconds: 15 - timeoutSeconds: 30 - failureThreshold: 3 - successThreshold: 1 - resources: - {} - volumeMounts: - - name: config-volume - mountPath: /etc/config - - name: storage-volume - mountPath: /data - subPath: "" - securityContext: - fsGroup: 65534 - runAsGroup: 65534 - runAsNonRoot: true - runAsUser: 65534 - terminationGracePeriodSeconds: 300 - volumes: - - name: config-volume - configMap: - name: prometheus - - name: storage-volume - emptyDir: - {} From fdcd676f30b8fc0a0cd19ed66f431e0b6375a7db Mon Sep 17 00:00:00 2001 From: Stuart Harris Date: Tue, 13 Apr 2021 14:51:24 +0100 Subject: [PATCH 11/12] ignore install.yaml --- observability/kube-state-metrics/.gitignore | 1 + observability/prometheus/.gitignore | 1 + 2 files changed, 2 insertions(+) create mode 100644 observability/kube-state-metrics/.gitignore create mode 100644 observability/prometheus/.gitignore diff --git a/observability/kube-state-metrics/.gitignore b/observability/kube-state-metrics/.gitignore new file mode 100644 index 0000000..6eb86f1 --- /dev/null +++ b/observability/kube-state-metrics/.gitignore @@ -0,0 +1 @@ +install.yaml diff --git a/observability/prometheus/.gitignore b/observability/prometheus/.gitignore new file mode 100644 index 0000000..6eb86f1 --- /dev/null +++ b/observability/prometheus/.gitignore @@ -0,0 +1 @@ +install.yaml From 0c614e06ec3b8eddcc438bd32896e2e3ab412f3a Mon Sep 17 00:00:00 2001 From: Tim Lee Date: Fri, 23 Apr 2021 08:50:20 +0100 Subject: [PATCH 12/12] Add restart proxies make command and remove telemetry from profile --- istio/Makefile | 11 +++++++++++ istio/init-1/profile.yaml | 33 --------------------------------- 2 files changed, 11 insertions(+), 33 deletions(-) diff --git a/istio/Makefile b/istio/Makefile index f1eed03..6da47aa 100644 --- a/istio/Makefile +++ b/istio/Makefile @@ -37,6 +37,17 @@ init1: ## Install SSL certs and Istio profile init2: ## Install custom manifests kustomize build overlays-2/$(overlay) | kubectl apply -f - +.PHONY: restart_proxies +restart_proxies: ## Restarts all istio dataplane proxies, can be used when rolling out upgrade + kubectl rollout restart deployment/argocd-application-controller -n argocd + kubectl rollout restart deployment/argocd-dex-server -n argocd + kubectl rollout restart deployment/argocd-redis -n argocd + kubectl rollout restart deployment/argocd-repo-server -n argocd + kubectl rollout restart deployment/argocd-server -n argocd + kubectl rollout restart deployment/doc-index-updater -n doc-index-updater + kubectl rollout restart deployment/medicines-api -n medicines-api + cd ../observability/prometheus && make + .PHONY: delete delete: ## Remove Istio kubectl delete istiooperators.install.istio.io -n istio-system istiocontrolplane --ignore-not-found || true diff --git a/istio/init-1/profile.yaml b/istio/init-1/profile.yaml index 796a92c..7d8428f 100644 --- a/istio/init-1/profile.yaml +++ b/istio/init-1/profile.yaml @@ -7,15 +7,6 @@ spec: meshConfig: outboundTrafficPolicy: mode: REGISTRY_ONLY - addonComponents: - kiali: - enabled: true - grafana: - enabled: false - prometheus: - enabled: true - tracing: - enabled: true components: pilot: enabled: true @@ -31,24 +22,6 @@ spec: patches: - path: spec.minReplicas value: 2 - telemetry: - enabled: true - k8s: - resources: - requests: - cpu: "200m" - memory: "500M" - overlays: - - kind: Deployment - name: istio-telemetry - patches: - - path: spec.replicas - value: 2 - - kind: HorizontalPodAutoscaler - name: istio-telemetry - patches: - - path: spec.minReplicas - value: 2 ingressGateways: - name: istio-ingressgateway enabled: true @@ -58,9 +31,3 @@ spec: values: sidecarInjectorWebhook: rewriteAppHTTPProbe: true - telemetry: - enabled: true - v1: - enabled: false - v2: - enabled: true