diff --git a/cmd/controller/main.go b/cmd/controller/main.go index be50a1f22..40e1978b8 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -8,8 +8,10 @@ import ( "os/signal" "syscall" + "github.com/prometheus/client_golang/prometheus" internalclient "github.com/skupperproject/skupper/internal/kube/client" "github.com/skupperproject/skupper/internal/kube/controller" + "github.com/skupperproject/skupper/internal/kube/metrics" "github.com/skupperproject/skupper/internal/version" ) @@ -73,6 +75,15 @@ func main() { log.Fatal("Error getting new site controller ", err.Error()) } + if !config.MetricsConfig.Disabled { + reg := prometheus.NewRegistry() + metrics.MustRegisterClientGoMetrics(reg) + srv := metrics.NewServer(config.MetricsConfig, reg) + if err := srv.Start(stopCh); err != nil { + log.Fatalf("Error starting metrics server: %s", err) + } + } + if err = controller.Run(stopCh); err != nil { log.Fatal("Error running site controller: ", err.Error()) } diff --git a/cmd/kube-adaptor/main.go b/cmd/kube-adaptor/main.go index 761c28843..179028749 100644 --- a/cmd/kube-adaptor/main.go +++ b/cmd/kube-adaptor/main.go @@ -11,9 +11,11 @@ import ( "time" "github.com/cenkalti/backoff/v4" + "github.com/prometheus/client_golang/prometheus" iflag "github.com/skupperproject/skupper/internal/flag" "github.com/skupperproject/skupper/internal/kube/adaptor" internalclient "github.com/skupperproject/skupper/internal/kube/client" + "github.com/skupperproject/skupper/internal/kube/metrics" "github.com/skupperproject/skupper/internal/qdr" "github.com/skupperproject/skupper/internal/version" ) @@ -53,6 +55,11 @@ func main() { // if -version used, report and exit isVersion := flags.Bool("version", false, "Report the version of Config Sync") isInit := flags.Bool("init", false, "Downloads configuration and ssl profile artefacts") + + metricsConfig, err := metrics.BoundConfig(flags) + if err != nil { + log.Fatalf("Error reading metrics configuration: %s", err) + } flags.Parse(os.Args[1:]) if *isVersion { fmt.Println(version.Version) @@ -77,6 +84,15 @@ func main() { os.Exit(0) } + if !metricsConfig.Disabled { + reg := prometheus.NewRegistry() + metrics.MustRegisterClientGoMetrics(reg) + srv := metrics.NewServer(metricsConfig, reg) + if err := srv.Start(stopCh); err != nil { + log.Fatalf("Error starting metrics server: %s", err) + } + } + log.Println("Waiting for Skupper router to be ready") if err := waitForAMQPConnection("amqp://localhost:5672", time.Second*180, time.Second*5); err != nil { log.Fatal("Error waiting for router ", err.Error()) diff --git a/internal/kube/controller/config.go b/internal/kube/controller/config.go index 334c3be60..29aa5ded4 100644 --- a/internal/kube/controller/config.go +++ b/internal/kube/controller/config.go @@ -7,12 +7,14 @@ import ( iflag "github.com/skupperproject/skupper/internal/flag" "github.com/skupperproject/skupper/internal/kube/grants" + "github.com/skupperproject/skupper/internal/kube/metrics" "github.com/skupperproject/skupper/internal/kube/securedaccess" ) type Config struct { GrantConfig *grants.GrantConfig SecuredAccessConfig *securedaccess.Config + MetricsConfig *metrics.Config Namespace string Kubeconfig string WatchNamespace string @@ -39,9 +41,14 @@ func BoundConfig(flags *flag.FlagSet) (*Config, error) { } else if err := securedAccessConfig.Verify(); err != nil { return nil, err } + metricsConfig, err := metrics.BoundConfig(flags) + if err != nil { + return nil, err + } c := &Config{ GrantConfig: grantConfig, SecuredAccessConfig: securedAccessConfig, + MetricsConfig: metricsConfig, } iflag.StringVar(flags, &c.Namespace, "namespace", "NAMESPACE", "", "The Kubernetes namespace scope for the controller") iflag.StringVar(flags, &c.Kubeconfig, "kubeconfig", "KUBECONFIG", "", "A path to the kubeconfig file to use") diff --git a/internal/kube/metrics/client.go b/internal/kube/metrics/client.go new file mode 100644 index 000000000..c6c6af5a7 --- /dev/null +++ b/internal/kube/metrics/client.go @@ -0,0 +1,77 @@ +package metrics + +import ( + "context" + "net/url" + "time" + + "github.com/prometheus/client_golang/prometheus" + "k8s.io/client-go/tools/metrics" +) + +// MustRegisterClientGoMetrics registers a set of metrics exposed from the +// k8s.io/client-go/tools/metrics package with the prometheus registry. +func MustRegisterClientGoMetrics(registry *prometheus.Registry) { + httpMetrics := &clientGoHttpMetrics{ + latency: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: "skupper", + Subsystem: "kubernetes_client", + Name: "http_request_duration_seconds", + Help: "Latency of kubernetes client requests in seconds by endpoint.", + }, []string{"method", "endpoint"}), + results: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "skupper", + Subsystem: "kubernetes_client", + Name: "http_requests_total", + Help: "Total number of kubernetes client requests by status code.", + }, []string{"method", "status_code"}), + retries: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "skupper", + Subsystem: "kubernetes_client", + Name: "http_retries_total", + Help: "Total number of kubernetes client requests retried by status code.", + }, []string{"method", "status_code"}), + } + rateLimiterMetrics := &clientGoRateLimiterMetrics{ + latency: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: "skupper", + Subsystem: "kubernetes_client", + Name: "rate_limiter_duration_seconds", + Help: "Latency of kubernetes client side rate limiting in seconds by endpoint.", + }, []string{"method", "endpoint"}), + } + + registry.MustRegister(httpMetrics.latency, httpMetrics.results, httpMetrics.retries, rateLimiterMetrics.latency) + metrics.Register(metrics.RegisterOpts{ + RequestLatency: httpMetrics, + RequestResult: httpMetrics, + RequestRetry: httpMetrics, + + RateLimiterLatency: rateLimiterMetrics, + }) +} + +type clientGoHttpMetrics struct { + latency *prometheus.HistogramVec + results *prometheus.CounterVec + retries *prometheus.CounterVec +} + +func (m *clientGoHttpMetrics) Observe(ctx context.Context, verb string, url url.URL, latency time.Duration) { + m.latency.WithLabelValues(verb, url.EscapedPath()).Observe(latency.Seconds()) +} + +func (m *clientGoHttpMetrics) Increment(ctx context.Context, code string, method string, host string) { + m.results.WithLabelValues(method, code).Inc() +} +func (m *clientGoHttpMetrics) IncrementRetry(ctx context.Context, code string, method string, _ string) { + m.retries.WithLabelValues(method, code).Inc() +} + +type clientGoRateLimiterMetrics struct { + latency *prometheus.HistogramVec +} + +func (m *clientGoRateLimiterMetrics) Observe(ctx context.Context, verb string, url url.URL, latency time.Duration) { + m.latency.WithLabelValues(verb, url.EscapedPath()).Observe(latency.Seconds()) +} diff --git a/internal/kube/metrics/server.go b/internal/kube/metrics/server.go new file mode 100644 index 000000000..ca98535a4 --- /dev/null +++ b/internal/kube/metrics/server.go @@ -0,0 +1,74 @@ +package metrics + +import ( + "context" + "flag" + "fmt" + "log" + "net" + "net/http" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + iflag "github.com/skupperproject/skupper/internal/flag" +) + +const metricsPath = "/metrics" + +type Config struct { + Disabled bool + Address string +} + +func BoundConfig(flags *flag.FlagSet) (*Config, error) { + cfg := &Config{} + err := iflag.BoolVar(flags, &cfg.Disabled, "disable-metrics", "SKUPPER_METRICS_DISABLE", false, "Set to disable metrics.") + iflag.StringVar(flags, &cfg.Address, "metrics-address", "SKUPPER_METRICS_ADDRESS", ":9000", "The address for the metrics http server to listen on.") + return cfg, err +} + +func NewServer(cfg *Config, registry *prometheus.Registry) *Server { + mux := http.NewServeMux() + mux.Handle(metricsPath, promhttp.HandlerFor(registry, promhttp.HandlerOpts{})) + srv := &http.Server{ + ReadTimeout: 5 * time.Second, + WriteTimeout: 5 * time.Second, + IdleTimeout: 120 * time.Second, + Handler: mux, + } + return &Server{ + config: *cfg, + server: srv, + } +} + +type Server struct { + config Config + server *http.Server +} + +func (s *Server) Start(stopCh <-chan struct{}) error { + listenCtx, cancel := context.WithCancel(context.Background()) + go func() { + select { + case <-stopCh: + cancel() + case <-listenCtx.Done(): + } + }() + defer cancel() + + var lc net.ListenConfig + ln, err := lc.Listen(listenCtx, "tcp", s.config.Address) + if err != nil { + return fmt.Errorf("failed to start listener: %s", err) + } + go func() { + if err := s.server.Serve(ln); err != nil { + log.Printf("metrics server error: %s", err) + } + }() + log.Printf("Started metrics server at: %s", s.config.Address) + return nil +} diff --git a/internal/kube/site/resources/skupper-router-deployment.yaml b/internal/kube/site/resources/skupper-router-deployment.yaml index f9c4590b0..ca64c34f1 100644 --- a/internal/kube/site/resources/skupper-router-deployment.yaml +++ b/internal/kube/site/resources/skupper-router-deployment.yaml @@ -135,6 +135,10 @@ spec: periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 + ports: + - containerPort: 9000 + name: metrics + protocol: TCP volumeMounts: - mountPath: /etc/skupper-router-certs name: skupper-router-certs diff --git a/scripts/skupper-deployment-generator.sh b/scripts/skupper-deployment-generator.sh index 2001881dd..3e1a211af 100755 --- a/scripts/skupper-deployment-generator.sh +++ b/scripts/skupper-deployment-generator.sh @@ -119,6 +119,9 @@ spec: imagePullPolicy: Always command: ["/app/controller"] args: ["-enable-grants", "-grant-server-autoconfigure"] + ports: + - name: metrics + containerPort: 9000 env: - name: SKUPPER_KUBE_ADAPTOR_IMAGE value: ${SKUPPER_KUBE_ADAPTOR_IMAGE} @@ -181,6 +184,9 @@ spec: imagePullPolicy: Always command: ["/app/controller"] args: ["-enable-grants", "-grant-server-autoconfigure"] + ports: + - name: metrics + containerPort: 9000 env: - name: WATCH_NAMESPACE valueFrom: