From 3d6a34a91f34d9c1a04e8ce0ef3e706a4a8c6428 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Tue, 20 Jan 2026 16:37:01 +0100 Subject: [PATCH 01/17] Add heartbeat client & reporter to verifier --- build/devenv/fakes/go.mod | 1 + build/devenv/fakes/go.sum | 2 + cmd/verifier/committee/main.go | 28 +++ .../pkg/heartbeatclient/heartbeatclient.go | 76 ++++++++ .../observed_heartbeat_client.go | 87 +++++++++ verifier/heartbeat_reporter.go | 184 ++++++++++++++++++ verifier/interfaces.go | 17 ++ verifier/pkg/monitoring/metrics.go | 102 ++++++++++ verifier/pkg/monitoring/monitoring.go | 14 ++ verifier/verification_coordinator.go | 49 +++++ 10 files changed, 560 insertions(+) create mode 100644 integration/pkg/heartbeatclient/heartbeatclient.go create mode 100644 integration/pkg/heartbeatclient/observed_heartbeat_client.go create mode 100644 verifier/heartbeat_reporter.go diff --git a/build/devenv/fakes/go.mod b/build/devenv/fakes/go.mod index 9c02c55ed..4249917df 100644 --- a/build/devenv/fakes/go.mod +++ b/build/devenv/fakes/go.mod @@ -77,6 +77,7 @@ require ( github.com/sirupsen/logrus v1.9.3 // indirect github.com/smartcontractkit/chain-selectors v1.0.79 // indirect github.com/smartcontractkit/chainlink-common v0.9.6-0.20260114190811-74301cd99dc3 // indirect + github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat v0.0.0-20260115142640-f6b99095c12e // indirect github.com/smartcontractkit/chainlink-protos/chainlink-ccv/verifier v0.0.0-20251211142334-5c3421fe2c8d // indirect github.com/smartcontractkit/chainlink-testing-framework/framework v0.12.6 // indirect github.com/smartcontractkit/libocr v0.0.0-20250912173940-f3ab0246e23d // indirect diff --git a/build/devenv/fakes/go.sum b/build/devenv/fakes/go.sum index 4fbf42445..75e0a5d0c 100644 --- a/build/devenv/fakes/go.sum +++ b/build/devenv/fakes/go.sum @@ -246,6 +246,8 @@ github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10 h1:FJAFgXS9 github.com/smartcontractkit/chainlink-common/pkg/chipingress v0.0.10/go.mod h1:oiDa54M0FwxevWwyAX773lwdWvFYYlYHHQV1LQ5HpWY= github.com/smartcontractkit/chainlink-protos/chainlink-ccv/committee-verifier v0.0.0-20251211142334-5c3421fe2c8d h1:VYoBBNnQpZ5p+enPTl8SkKBRaubqyGpO0ul3B1np++I= github.com/smartcontractkit/chainlink-protos/chainlink-ccv/committee-verifier v0.0.0-20251211142334-5c3421fe2c8d/go.mod h1:oNFoKHRIerxuaANa8ASNejtHrdsG26LqGtQ2XhSac2g= +github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat v0.0.0-20260115142640-f6b99095c12e h1:c7vgdeidC0LMtV1a01B/rPL4fEC/cnPanRDflRijXCM= +github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat v0.0.0-20260115142640-f6b99095c12e/go.mod h1:rZV/gLc1wlSp2r5oXN09iOrlyZPFX4iK+cqoSW2k5dc= github.com/smartcontractkit/chainlink-protos/chainlink-ccv/verifier v0.0.0-20251211142334-5c3421fe2c8d h1:AJy55QJ/pBhXkZjc7N+ATnWfxrcjq9BI9DmdtdjwDUQ= github.com/smartcontractkit/chainlink-protos/chainlink-ccv/verifier v0.0.0-20251211142334-5c3421fe2c8d/go.mod h1:5JdppgngCOUS76p61zCinSCgOhPeYQ+OcDUuome5THQ= github.com/smartcontractkit/chainlink-testing-framework/framework v0.12.6 h1:+Pjg5HsFo+AG6Id/iN/VHHHuU1HRfXNLBc/HHu30yjg= diff --git a/cmd/verifier/committee/main.go b/cmd/verifier/committee/main.go index 2a77b9da6..29699222f 100644 --- a/cmd/verifier/committee/main.go +++ b/cmd/verifier/committee/main.go @@ -18,6 +18,7 @@ import ( cmd "github.com/smartcontractkit/chainlink-ccv/cmd/verifier" ccvcommon "github.com/smartcontractkit/chainlink-ccv/common" + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" "github.com/smartcontractkit/chainlink-ccv/integration/storageaccess" "github.com/smartcontractkit/chainlink-ccv/protocol" "github.com/smartcontractkit/chainlink-ccv/protocol/common/hmac" @@ -221,6 +222,31 @@ func main() { verifierMonitoring, ) + // TODO: make heartbeat interval configurable + heartbeatInterval := 10 * time.Second + heartbeatClient, err := heartbeatclient.NewHeartbeatClient( + config.AggregatorAddress, + lggr, + hmacConfig, + config.InsecureAggregatorConnection, + ) + if err != nil { + lggr.Errorw("Failed to create heartbeat client", "error", err) + os.Exit(1) + } + defer func() { + if heartbeatClient != nil { + _ = heartbeatClient.Close() + } + }() + + observedHeartbeatClient := heartbeatclient.NewObservedHeartbeatClient( + heartbeatClient, + config.VerifierID, + lggr, + verifierMonitoring, + ) + messageTracker := monitoring.NewMessageLatencyTracker( lggr, config.VerifierID, @@ -238,6 +264,8 @@ func main() { messageTracker, verifierMonitoring, chainStatusManager, + observedHeartbeatClient, + heartbeatInterval, ) if err != nil { lggr.Errorw("Failed to create verification coordinator", "error", err) diff --git a/integration/pkg/heartbeatclient/heartbeatclient.go b/integration/pkg/heartbeatclient/heartbeatclient.go new file mode 100644 index 000000000..efe2f8bf4 --- /dev/null +++ b/integration/pkg/heartbeatclient/heartbeatclient.go @@ -0,0 +1,76 @@ +package heartbeatclient + +import ( + "context" + "crypto/tls" + "fmt" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + insecuregrpc "google.golang.org/grpc/credentials/insecure" + + "github.com/smartcontractkit/chainlink-ccv/protocol/common/hmac" + "github.com/smartcontractkit/chainlink-common/pkg/logger" + heartbeatpb "github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat/v1" +) + +const ( + MinTLSVersion = tls.VersionTLS12 +) + +// HeartbeatClient provides methods to send heartbeats to the aggregator service. +type HeartbeatClient struct { + client heartbeatpb.HeartbeatServiceClient + conn *grpc.ClientConn + lggr logger.Logger +} + +// NewHeartbeatClient creates a new heartbeat client that communicates with the aggregator. +// If insecure is true, TLS verification is disabled (only for testing). +func NewHeartbeatClient(address string, lggr logger.Logger, hmacConfig *hmac.ClientConfig, insecure bool) (*HeartbeatClient, error) { + var dialOptions []grpc.DialOption + if insecure { + dialOptions = append(dialOptions, grpc.WithTransportCredentials(insecuregrpc.NewCredentials())) + } else { + dialOptions = append(dialOptions, grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{MinVersion: MinTLSVersion}))) + } + + if hmacConfig != nil { + dialOptions = append(dialOptions, grpc.WithUnaryInterceptor(hmac.NewClientInterceptor(hmacConfig))) + } + + conn, err := grpc.NewClient( + address, + dialOptions..., + ) + if err != nil { + return nil, err + } + + lggr.Infof("Created HeartbeatClient connecting to %s", address) + + return &HeartbeatClient{ + client: heartbeatpb.NewHeartbeatServiceClient(conn), + conn: conn, + lggr: logger.With(lggr, "service", "heartbeat_client", "aggregatorAddress", address), + }, nil +} + +// SendHeartbeat sends a heartbeat request to the aggregator. +func (hc *HeartbeatClient) SendHeartbeat(ctx context.Context, req *heartbeatpb.HeartbeatRequest, opts ...grpc.CallOption) (*heartbeatpb.HeartbeatResponse, error) { + resp, err := hc.client.SendHeartbeat(ctx, req, opts...) + if err != nil { + hc.lggr.Errorw("Failed to send heartbeat", "error", err) + return nil, fmt.Errorf("failed to send heartbeat: %w", err) + } + hc.lggr.Debugw("Heartbeat sent successfully", "timestamp", req.SendTimestamp) + return resp, nil +} + +// Close closes the gRPC connection to the aggregator server. +func (hc *HeartbeatClient) Close() error { + if hc.conn != nil { + return hc.conn.Close() + } + return nil +} diff --git a/integration/pkg/heartbeatclient/observed_heartbeat_client.go b/integration/pkg/heartbeatclient/observed_heartbeat_client.go new file mode 100644 index 000000000..124dc477c --- /dev/null +++ b/integration/pkg/heartbeatclient/observed_heartbeat_client.go @@ -0,0 +1,87 @@ +package heartbeatclient + +import ( + "context" + "fmt" + "time" + + "google.golang.org/grpc" + + "github.com/smartcontractkit/chainlink-ccv/verifier" + "github.com/smartcontractkit/chainlink-common/pkg/logger" + heartbeatpb "github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat/v1" +) + +// ObservedHeartbeatClient wraps a HeartbeatClient with observability. +type ObservedHeartbeatClient struct { + delegate *HeartbeatClient + verifierID string + lggr logger.Logger + monitoring verifier.Monitoring +} + +// NewObservedHeartbeatClient creates a new observed heartbeat client. +func NewObservedHeartbeatClient( + delegate *HeartbeatClient, + verifierID string, + lggr logger.Logger, + monitoring verifier.Monitoring, +) *ObservedHeartbeatClient { + return &ObservedHeartbeatClient{ + delegate: delegate, + verifierID: verifierID, + lggr: lggr, + monitoring: monitoring, + } +} + +// SendHeartbeat sends a heartbeat request with observability. +func (o *ObservedHeartbeatClient) SendHeartbeat(ctx context.Context, req *heartbeatpb.HeartbeatRequest, opts ...grpc.CallOption) (*heartbeatpb.HeartbeatResponse, error) { + start := time.Now() + + resp, err := o.delegate.SendHeartbeat(ctx, req, opts...) + + duration := time.Since(start) + + metrics := o.monitoring.Metrics().With("verifier_id", o.verifierID) + metrics.RecordHeartbeatDuration(ctx, duration) + + // Record what we're sending in the request. It will be used for monitoring of the lag. + for chainSelector, blockHeight := range req.ChainDetails.BlockHeightsByChain { + chainMetrics := metrics.With("chain_selector", fmt.Sprintf("%d", chainSelector)) + chainMetrics.SetVerifierHeartbeatSentChainHeads(ctx, blockHeight) + } + + if err != nil { + metrics.IncrementHeartbeatsFailed(ctx) + o.lggr.Errorw("Heartbeat failed", + "error", err, + "duration", duration, + ) + return nil, err + } + + metrics.IncrementHeartbeatsSent(ctx) + + metrics.SetVerifierHeartbeatTimestamp(ctx, resp.Timestamp) + + // Record per-chain benchmarks from the response. + for chainSelector, benchmark := range resp.ChainBenchmarks { + chainMetrics := metrics.With("chain_selector", fmt.Sprintf("%d", chainSelector)) + chainMetrics.SetVerifierHeartbeatChainHeads(ctx, benchmark.BlockHeight) + chainMetrics.SetVerifierHeartbeatScore(ctx, float64(benchmark.Score)) + } + + o.lggr.Debugw("Heartbeat succeeded", + "duration", duration, + "chainCount", len(req.ChainDetails.BlockHeightsByChain), + "chainBenchmarkCount", len(resp.ChainBenchmarks), + ) + + return resp, nil +} + +// Close closes the underlying heartbeat client. +func (o *ObservedHeartbeatClient) Close() error { + return o.delegate.Close() +} diff --git a/verifier/heartbeat_reporter.go b/verifier/heartbeat_reporter.go new file mode 100644 index 000000000..c39f744da --- /dev/null +++ b/verifier/heartbeat_reporter.go @@ -0,0 +1,184 @@ +package verifier + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/smartcontractkit/chainlink-ccv/protocol" + "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink-common/pkg/services" + heartbeatpb "github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat/v1" +) + +const ( + // DefaultHeartbeatInterval is how often to send heartbeat with chain statuses to aggregator. + DefaultHeartbeatInterval = 10 * time.Second +) + +// HeartbeatReporter periodically reads chain statuses and sends them to the aggregator via heartbeat. +type HeartbeatReporter struct { + services.StateMachine + stopCh services.StopChan + wg sync.WaitGroup + + logger logger.Logger + chainStatusManager protocol.ChainStatusManager + heartbeatClient heartbeatpb.HeartbeatServiceClient + allSelectors []protocol.ChainSelector + verifierID string + interval time.Duration +} + +// NewHeartbeatReporter creates a new heartbeat reporter service. +func NewHeartbeatReporter( + lggr logger.Logger, + chainStatusManager protocol.ChainStatusManager, + heartbeatClient heartbeatpb.HeartbeatServiceClient, + allSelectors []protocol.ChainSelector, + verifierID string, + interval time.Duration, +) (*HeartbeatReporter, error) { + if lggr == nil { + return nil, fmt.Errorf("logger cannot be nil") + } + if chainStatusManager == nil { + return nil, fmt.Errorf("chainStatusManager cannot be nil") + } + if heartbeatClient == nil { + return nil, fmt.Errorf("heartbeatClient cannot be nil") + } + if len(allSelectors) == 0 { + return nil, fmt.Errorf("allSelectors cannot be empty") + } + if verifierID == "" { + return nil, fmt.Errorf("verifierID cannot be empty") + } + + if interval == 0 { + interval = DefaultHeartbeatInterval + } + + return &HeartbeatReporter{ + stopCh: make(chan struct{}), + logger: lggr, + chainStatusManager: chainStatusManager, + heartbeatClient: heartbeatClient, + allSelectors: allSelectors, + verifierID: verifierID, + interval: interval, + }, nil +} + +// Start begins the heartbeat reporter service. +func (hr *HeartbeatReporter) Start(ctx context.Context) error { + return hr.StartOnce(hr.Name(), func() error { + hr.logger.Infow("Starting heartbeat reporter", "interval", hr.interval) + hr.wg.Add(1) + go hr.reportLoop(ctx) + return nil + }) +} + +// Close stops the heartbeat reporter service. +func (hr *HeartbeatReporter) Close() error { + return hr.StopOnce(hr.Name(), func() error { + hr.logger.Infow("Stopping heartbeat reporter") + close(hr.stopCh) + hr.wg.Wait() + hr.logger.Infow("Heartbeat reporter stopped") + return nil + }) +} + +// Name returns the name of the service. +func (hr *HeartbeatReporter) Name() string { + return fmt.Sprintf("verifier.HeartbeatReporter[%s]", hr.verifierID) +} + +// HealthReport returns a health report for the heartbeat reporter. +func (hr *HeartbeatReporter) HealthReport() map[string]error { + report := make(map[string]error) + report[hr.Name()] = hr.Ready() + return report +} + +// reportLoop is the main loop that periodically sends heartbeats with chain statuses. +func (hr *HeartbeatReporter) reportLoop(ctx context.Context) { + defer hr.wg.Done() + + ticker := time.NewTicker(hr.interval) + defer ticker.Stop() + + // Send initial heartbeat immediately. + hr.sendHeartbeat(ctx) + + for { + select { + case <-hr.stopCh: + hr.logger.Infow("Heartbeat reporter loop stopped") + return + case <-ctx.Done(): + hr.logger.Infow("Heartbeat reporter context cancelled") + return + case <-ticker.C: + hr.sendHeartbeat(ctx) + } + } +} + +// sendHeartbeat reads chain statuses and sends them to the aggregator. +func (hr *HeartbeatReporter) sendHeartbeat(ctx context.Context) { + // Read chain statuses for all selectors. + statusMap, err := hr.chainStatusManager.ReadChainStatuses(ctx, hr.allSelectors) + if err != nil { + hr.logger.Errorw("Failed to read chain statuses", "error", err) + return + } + + // Build block heights map for heartbeat. + blockHeightsByChain := make(map[uint64]uint64) + for _, selector := range hr.allSelectors { + status, ok := statusMap[selector] + if !ok { + hr.logger.Debugw("Chain status not found", "chainSelector", selector) + continue + } + + // Add block height for this chain if available. + // TODO: change to use latest seen block height instead of finalized when available. + if status.FinalizedBlockHeight != nil { + blockHeightsByChain[uint64(selector)] = status.FinalizedBlockHeight.Uint64() + } + } + + // Create and send heartbeat request. + req := &heartbeatpb.HeartbeatRequest{ + SendTimestamp: time.Now().Unix(), + ChainDetails: &heartbeatpb.ChainHealthDetails{ + BlockHeightsByChain: blockHeightsByChain, + }, + } + + resp, err := hr.heartbeatClient.SendHeartbeat(ctx, req) + if err != nil { + hr.logger.Errorw("Failed to send heartbeat", "error", err) + return + } + + hr.logger.Infow("Heartbeat sent successfully", + "verifierId", hr.verifierID, + "aggregatorId", resp.AggregatorId, + "chainCount", len(blockHeightsByChain), + ) + hr.logger.Debugw("Heartbeat details", + "verifierId", hr.verifierID, + "blockHeightsByChain", blockHeightsByChain, + "chainBenchmarks", resp.ChainBenchmarks, + "aggregatorId", resp.AggregatorId, + "respTimestamp", resp.Timestamp, + ) +} + +var _ services.Service = (*HeartbeatReporter)(nil) diff --git a/verifier/interfaces.go b/verifier/interfaces.go index b2780417e..b79055ed8 100644 --- a/verifier/interfaces.go +++ b/verifier/interfaces.go @@ -75,6 +75,23 @@ type MetricLabeler interface { // IncrementStorageWriteErrors increments the counter for storage write errors. IncrementStorageWriteErrors(ctx context.Context) + // Heartbeat tracking + + // IncrementHeartbeatsSent increments the counter for successfully sent heartbeats. + IncrementHeartbeatsSent(ctx context.Context) + // IncrementHeartbeatsFailed increments the counter for failed heartbeat attempts. + IncrementHeartbeatsFailed(ctx context.Context) + // RecordHeartbeatDuration records the duration of a heartbeat request. + RecordHeartbeatDuration(ctx context.Context, duration time.Duration) + // SetVerifierHeartbeatTimestamp sets the timestamp from the heartbeat response. + SetVerifierHeartbeatTimestamp(ctx context.Context, timestamp int64) + // SetVerifierHeartbeatSentChainHeads sets the block height sent in the heartbeat request for a chain. + SetVerifierHeartbeatSentChainHeads(ctx context.Context, blockHeight uint64) + // SetVerifierHeartbeatChainHeads sets the block height for a chain from the heartbeat response. + SetVerifierHeartbeatChainHeads(ctx context.Context, blockHeight uint64) + // SetVerifierHeartbeatScore sets the score for a chain from the heartbeat response. + SetVerifierHeartbeatScore(ctx context.Context, score float64) + // Chain state tracking (for multi-chain monitoring) // RecordSourceChainLatestBlock records the latest block number for a source chain. diff --git a/verifier/pkg/monitoring/metrics.go b/verifier/pkg/monitoring/metrics.go index ef35f1f6d..65d14741e 100644 --- a/verifier/pkg/monitoring/metrics.go +++ b/verifier/pkg/monitoring/metrics.go @@ -35,6 +35,15 @@ type VerifierMetrics struct { // Error Tracking storageWriteErrorsCounter metric.Int64Counter + // Heartbeat Tracking + heartbeatsSentCounter metric.Int64Counter + heartbeatsFailedCounter metric.Int64Counter + heartbeatDurationSeconds metric.Float64Histogram + verifierHeartbeatTimestamp metric.Float64Gauge + verifierHeartbeatSentChainHeads metric.Int64Gauge + verifierHeartbeatChainHeads metric.Int64Gauge + verifierHeartbeatScore metric.Float64Gauge + // Chain State sourceChainLatestBlockGauge metric.Int64Gauge sourceChainFinalizedBlockGauge metric.Int64Gauge @@ -129,6 +138,64 @@ func InitMetrics() (*VerifierMetrics, error) { return nil, fmt.Errorf("failed to register storage write errors counter: %w", err) } + // Heartbeat Tracking + vm.heartbeatsSentCounter, err = beholder.GetMeter().Int64Counter( + "verifier_heartbeats_sent_total", + metric.WithDescription("Total number of successfully sent heartbeats"), + ) + if err != nil { + return nil, fmt.Errorf("failed to register heartbeats sent counter: %w", err) + } + + vm.heartbeatsFailedCounter, err = beholder.GetMeter().Int64Counter( + "verifier_heartbeats_failed_total", + metric.WithDescription("Total number of failed heartbeat attempts"), + ) + if err != nil { + return nil, fmt.Errorf("failed to register heartbeats failed counter: %w", err) + } + + vm.heartbeatDurationSeconds, err = beholder.GetMeter().Float64Histogram( + "verifier_heartbeat_duration_seconds", + metric.WithDescription("Duration of heartbeat requests"), + metric.WithUnit("seconds"), + ) + if err != nil { + return nil, fmt.Errorf("failed to register heartbeat duration histogram: %w", err) + } + + vm.verifierHeartbeatTimestamp, err = beholder.GetMeter().Float64Gauge( + "verifier_heartbeat_timestamp", + metric.WithDescription("Timestamp from the heartbeat response"), + ) + if err != nil { + return nil, fmt.Errorf("failed to register verifier heartbeat timestamp gauge: %w", err) + } + + vm.verifierHeartbeatSentChainHeads, err = beholder.GetMeter().Int64Gauge( + "verifier_heartbeat_sent_chain_heads", + metric.WithDescription("Block height sent in the heartbeat request for a chain"), + ) + if err != nil { + return nil, fmt.Errorf("failed to register verifier heartbeat sent chain heads gauge: %w", err) + } + + vm.verifierHeartbeatChainHeads, err = beholder.GetMeter().Int64Gauge( + "verifier_heartbeat_chain_heads", + metric.WithDescription("Block height for a chain from the heartbeat response"), + ) + if err != nil { + return nil, fmt.Errorf("failed to register verifier heartbeat chain heads gauge: %w", err) + } + + vm.verifierHeartbeatScore, err = beholder.GetMeter().Float64Gauge( + "verifier_heartbeat_score", + metric.WithDescription("Score for a chain from the heartbeat response"), + ) + if err != nil { + return nil, fmt.Errorf("failed to register verifier heartbeat score gauge: %w", err) + } + // Chain State vm.sourceChainLatestBlockGauge, err = beholder.GetMeter().Int64Gauge( "verifier_source_chain_latest_block", @@ -256,6 +323,41 @@ func (v *VerifierMetricLabeler) IncrementStorageWriteErrors(ctx context.Context) v.vm.storageWriteErrorsCounter.Add(ctx, 1, metric.WithAttributes(otelLabels...)) } +func (v *VerifierMetricLabeler) IncrementHeartbeatsSent(ctx context.Context) { + otelLabels := beholder.OtelAttributes(v.Labels).AsStringAttributes() + v.vm.heartbeatsSentCounter.Add(ctx, 1, metric.WithAttributes(otelLabels...)) +} + +func (v *VerifierMetricLabeler) IncrementHeartbeatsFailed(ctx context.Context) { + otelLabels := beholder.OtelAttributes(v.Labels).AsStringAttributes() + v.vm.heartbeatsFailedCounter.Add(ctx, 1, metric.WithAttributes(otelLabels...)) +} + +func (v *VerifierMetricLabeler) RecordHeartbeatDuration(ctx context.Context, duration time.Duration) { + otelLabels := beholder.OtelAttributes(v.Labels).AsStringAttributes() + v.vm.heartbeatDurationSeconds.Record(ctx, duration.Seconds(), metric.WithAttributes(otelLabels...)) +} + +func (v *VerifierMetricLabeler) SetVerifierHeartbeatTimestamp(ctx context.Context, timestamp int64) { + otelLabels := beholder.OtelAttributes(v.Labels).AsStringAttributes() + v.vm.verifierHeartbeatTimestamp.Record(ctx, float64(timestamp), metric.WithAttributes(otelLabels...)) +} + +func (v *VerifierMetricLabeler) SetVerifierHeartbeatSentChainHeads(ctx context.Context, blockHeight uint64) { + otelLabels := beholder.OtelAttributes(v.Labels).AsStringAttributes() + v.vm.verifierHeartbeatSentChainHeads.Record(ctx, int64(blockHeight), metric.WithAttributes(otelLabels...)) // #nosec G115 -- block heights are within int64 range +} + +func (v *VerifierMetricLabeler) SetVerifierHeartbeatChainHeads(ctx context.Context, blockHeight uint64) { + otelLabels := beholder.OtelAttributes(v.Labels).AsStringAttributes() + v.vm.verifierHeartbeatChainHeads.Record(ctx, int64(blockHeight), metric.WithAttributes(otelLabels...)) // #nosec G115 -- block heights are within int64 range +} + +func (v *VerifierMetricLabeler) SetVerifierHeartbeatScore(ctx context.Context, score float64) { + otelLabels := beholder.OtelAttributes(v.Labels).AsStringAttributes() + v.vm.verifierHeartbeatScore.Record(ctx, score, metric.WithAttributes(otelLabels...)) +} + func (v *VerifierMetricLabeler) RecordSourceChainLatestBlock(ctx context.Context, blockNum int64) { otelLabels := beholder.OtelAttributes(v.Labels).AsStringAttributes() v.vm.sourceChainLatestBlockGauge.Record(ctx, blockNum, metric.WithAttributes(otelLabels...)) diff --git a/verifier/pkg/monitoring/monitoring.go b/verifier/pkg/monitoring/monitoring.go index 25915e0c5..dc3fb6729 100644 --- a/verifier/pkg/monitoring/monitoring.go +++ b/verifier/pkg/monitoring/monitoring.go @@ -98,6 +98,20 @@ func (f *FakeVerifierMetricLabeler) IncrementMessagesProcessed(context.Context) func (f *FakeVerifierMetricLabeler) IncrementMessagesVerificationFailed(context.Context) {} +func (f *FakeVerifierMetricLabeler) IncrementHeartbeatsSent(context.Context) {} + +func (f *FakeVerifierMetricLabeler) IncrementHeartbeatsFailed(context.Context) {} + +func (f *FakeVerifierMetricLabeler) RecordHeartbeatDuration(context.Context, time.Duration) {} + +func (f *FakeVerifierMetricLabeler) SetVerifierHeartbeatTimestamp(context.Context, int64) {} + +func (f *FakeVerifierMetricLabeler) SetVerifierHeartbeatSentChainHeads(context.Context, uint64) {} + +func (f *FakeVerifierMetricLabeler) SetVerifierHeartbeatChainHeads(context.Context, uint64) {} + +func (f *FakeVerifierMetricLabeler) SetVerifierHeartbeatScore(context.Context, float64) {} + func (f *FakeVerifierMetricLabeler) RecordFinalityWaitDuration(context.Context, time.Duration) {} func (f *FakeVerifierMetricLabeler) RecordMessageVerificationDuration(context.Context, time.Duration) { diff --git a/verifier/verification_coordinator.go b/verifier/verification_coordinator.go index e461c0ebe..7c9f25368 100644 --- a/verifier/verification_coordinator.go +++ b/verifier/verification_coordinator.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "maps" + "time" "github.com/smartcontractkit/chainlink-ccv/common" cursecheckerimpl "github.com/smartcontractkit/chainlink-ccv/integration/pkg/cursechecker" @@ -12,6 +13,7 @@ import ( "github.com/smartcontractkit/chainlink-ccv/protocol" "github.com/smartcontractkit/chainlink-common/pkg/logger" "github.com/smartcontractkit/chainlink-common/pkg/services" + heartbeatpb "github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat/v1" ) type Coordinator struct { @@ -30,6 +32,8 @@ type Coordinator struct { taskVerifierProcessor *TaskVerifierProcessor // 3rd step processor: storage writer storageWriterProcessor *StorageWriterProcessor + // Heartbeat reporter: periodically sends chain statuses to aggregator + heartbeatReporter *HeartbeatReporter } func NewCoordinator( @@ -42,6 +46,8 @@ func NewCoordinator( messageTracker MessageLatencyTracker, monitoring Monitoring, chainStatusManager protocol.ChainStatusManager, + heartbeatClient heartbeatpb.HeartbeatServiceClient, + heartbeatInterval time.Duration, ) (*Coordinator, error) { return NewCoordinatorWithDetector( ctx, @@ -54,6 +60,8 @@ func NewCoordinator( monitoring, chainStatusManager, nil, + heartbeatClient, + heartbeatInterval, ) } @@ -68,6 +76,8 @@ func NewCoordinatorWithDetector( monitoring Monitoring, chainStatusManager protocol.ChainStatusManager, detector common.CurseCheckerService, + heartbeatClient heartbeatpb.HeartbeatServiceClient, + heartbeatInterval time.Duration, ) (*Coordinator, error) { enabledSourceReaders, err := filterOnlyEnabledSourceReaders(ctx, lggr, config, sourceReaders, chainStatusManager) if err != nil { @@ -100,12 +110,36 @@ func NewCoordinatorWithDetector( return nil, fmt.Errorf("failed to create or/and start task verifier service: %w", err) } + var heartbeatReporter *HeartbeatReporter + + if heartbeatClient != nil { + // Collect all chain selectors from source readers. + allSelectors := make([]protocol.ChainSelector, 0, len(sourceReaders)) + for selector := range sourceReaders { + allSelectors = append(allSelectors, selector) + } + + heartbeatReporter, err = NewHeartbeatReporter( + logger.With(lggr, "component", "HeartbeatReporter"), + chainStatusManager, + heartbeatClient, + allSelectors, + config.VerifierID, + heartbeatInterval, + ) + if err != nil { + return nil, fmt.Errorf("failed to create heartbeat reporter: %w", err) + } + } + return &Coordinator{ lggr: lggr, + verifierID: config.VerifierID, sourceReadersServices: sourceReaderServices, curseDetector: curseDetector, storageWriterProcessor: storageWriterProcessor, taskVerifierProcessor: taskVerifierProcessor, + heartbeatReporter: heartbeatReporter, }, nil } @@ -146,6 +180,13 @@ func (vc *Coordinator) Start(_ context.Context) error { } } + if vc.heartbeatReporter != nil { + if err := vc.heartbeatReporter.Start(ctx); err != nil { + vc.lggr.Errorw("Failed to start heartbeat reporter", "error", err) + return fmt.Errorf("failed to start heartbeat reporter: %w", err) + } + } + vc.lggr.Infow("Coordinator started successfully") return nil }) @@ -238,6 +279,14 @@ func (vc *Coordinator) Close() error { vc.cancel() errs := make([]error, 0) + + if vc.heartbeatReporter != nil { + if err := vc.heartbeatReporter.Close(); err != nil { + vc.lggr.Errorw("Failed to stop heartbeat reporter", "error", err) + errs = append(errs, fmt.Errorf("failed to stop heartbeat reporter: %w", err)) + } + } + if vc.curseDetector != nil { if err := vc.curseDetector.Close(); err != nil { vc.lggr.Errorw("Failed to stop curse detector", "error", err) From 79976b1207451c3ea7759c8c350a94786ed7b793 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Tue, 20 Jan 2026 17:25:49 +0100 Subject: [PATCH 02/17] Fix tests --- cmd/verifier/token/main.go | 4 ++++ integration/pkg/constructors/committee_verifier.go | 2 ++ verifier/helpers_test.go | 7 +++++++ verifier/verification_coordinator_cctp_test.go | 2 ++ verifier/verification_coordinator_curse_test.go | 2 ++ verifier/verification_coordinator_finality_test.go | 2 ++ verifier/verification_coordinator_lbtc_test.go | 2 ++ verifier/verification_coordinator_test.go | 2 ++ 8 files changed, 23 insertions(+) diff --git a/cmd/verifier/token/main.go b/cmd/verifier/token/main.go index 04381877a..2b1bba408 100644 --- a/cmd/verifier/token/main.go +++ b/cmd/verifier/token/main.go @@ -218,6 +218,8 @@ func createCCTPCoordinator( messageTracker, verifierMonitoring, storage.NewChainStatusManager(), + nil, // heartbeatClient - not used for token verifiers yet + 10*time.Second, // heartbeatInterval ) if err != nil { lggr.Errorw("Failed to create verification coordinator for cctp", "error", err) @@ -264,6 +266,8 @@ func createLBTCCoordinator( messageTracker, verifierMonitoring, storage.NewChainStatusManager(), + nil, // heartbeatClient - not used for token verifiers + 10*time.Second, // heartbeatInterval ) if err != nil { lggr.Errorw("Failed to create verification coordinator for lbtc", "error", err) diff --git a/integration/pkg/constructors/committee_verifier.go b/integration/pkg/constructors/committee_verifier.go index 51232713b..e7827df53 100644 --- a/integration/pkg/constructors/committee_verifier.go +++ b/integration/pkg/constructors/committee_verifier.go @@ -161,6 +161,8 @@ func NewVerificationCoordinator( messageTracker, verifierMonitoring, chainStatusManager, + nil, // heartbeatClient - not used in integration tests + 10*time.Second, // heartbeatInterval ) if err != nil { lggr.Errorw("Failed to create verification coordinator", "error", err) diff --git a/verifier/helpers_test.go b/verifier/helpers_test.go index 33d289055..916b005cd 100644 --- a/verifier/helpers_test.go +++ b/verifier/helpers_test.go @@ -158,6 +158,13 @@ func (m *noopMetricLabeler) IncrementStorageWriteErrors(ctx context.Context) func (m *noopMetricLabeler) RecordSourceChainLatestBlock(ctx context.Context, blockNum int64) {} func (m *noopMetricLabeler) RecordSourceChainFinalizedBlock(ctx context.Context, blockNum int64) {} func (m *noopMetricLabeler) RecordReorgTrackedSeqNums(ctx context.Context, count int64) {} +func (m *noopMetricLabeler) IncrementHeartbeatsSent(ctx context.Context) {} +func (m *noopMetricLabeler) IncrementHeartbeatsFailed(ctx context.Context) {} +func (m *noopMetricLabeler) RecordHeartbeatDuration(ctx context.Context, duration time.Duration) {} +func (m *noopMetricLabeler) SetVerifierHeartbeatTimestamp(ctx context.Context, timestamp int64) {} +func (m *noopMetricLabeler) SetVerifierHeartbeatSentChainHeads(ctx context.Context, height uint64) {} +func (m *noopMetricLabeler) SetVerifierHeartbeatChainHeads(ctx context.Context, height uint64) {} +func (m *noopMetricLabeler) SetVerifierHeartbeatScore(ctx context.Context, score float64) {} type NoopLatencyTracker struct{} diff --git a/verifier/verification_coordinator_cctp_test.go b/verifier/verification_coordinator_cctp_test.go index d36e83a08..4dac30301 100644 --- a/verifier/verification_coordinator_cctp_test.go +++ b/verifier/verification_coordinator_cctp_test.go @@ -477,6 +477,8 @@ func createCCTPCoordinator( noopLatencyTracker, noopMonitoring, ts.chainStatusManager, + nil, // heartbeatClient - not used in tests + 10*time.Second, // heartbeatInterval ) } diff --git a/verifier/verification_coordinator_curse_test.go b/verifier/verification_coordinator_curse_test.go index 4d3d3e5ee..5f87c1784 100644 --- a/verifier/verification_coordinator_curse_test.go +++ b/verifier/verification_coordinator_curse_test.go @@ -127,6 +127,8 @@ func setupCurseTest(t *testing.T, sourceChain, destChain protocol.ChainSelector, &noopMonitoring{}, setup.chainStatusManager, setup.mockCurseChecker, + nil, // heartbeatClient - not used in curse detection tests + 10*time.Second, // heartbeatInterval ) require.NoError(t, err) setup.coordinator = coordinator diff --git a/verifier/verification_coordinator_finality_test.go b/verifier/verification_coordinator_finality_test.go index 1968dbd2d..ed2981e38 100644 --- a/verifier/verification_coordinator_finality_test.go +++ b/verifier/verification_coordinator_finality_test.go @@ -355,6 +355,8 @@ func initializeCoordinator(t *testing.T, verifierID string) *coordinatorTestSetu &NoopLatencyTracker{}, &noopMonitoring{}, mockChainStatusManager, + nil, // heartbeatClient - not used in finality tests + 10*time.Second, // heartbeatInterval ) require.NoError(t, err) diff --git a/verifier/verification_coordinator_lbtc_test.go b/verifier/verification_coordinator_lbtc_test.go index a0c9a1135..716a23363 100644 --- a/verifier/verification_coordinator_lbtc_test.go +++ b/verifier/verification_coordinator_lbtc_test.go @@ -276,6 +276,8 @@ func createLBTCCoordinator( noopLatencyTracker, noopMonitoring, ts.chainStatusManager, + nil, // heartbeatClient - not used in tests + 10*time.Second, // heartbeatInterval ) } diff --git a/verifier/verification_coordinator_test.go b/verifier/verification_coordinator_test.go index 12bc704ff..0fa250bef 100644 --- a/verifier/verification_coordinator_test.go +++ b/verifier/verification_coordinator_test.go @@ -150,6 +150,8 @@ func createVerificationCoordinator( noopLatencyTracker, noopMonitoring, ts.chainStatusManager, + nil, // heartbeatClient - not used in tests + 10*time.Second, // heartbeatInterval ) } From a0f3942b9704ddfedd632983baa43120774de4ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Tue, 20 Jan 2026 18:05:50 +0100 Subject: [PATCH 03/17] Add basic unit tests --- .../heartbeatclient/heartbeatclient_test.go | 151 ++++++ .../observed_heartbeat_client_test.go | 88 +++ verifier/heartbeat_reporter_test.go | 509 ++++++++++++++++++ 3 files changed, 748 insertions(+) create mode 100644 integration/pkg/heartbeatclient/heartbeatclient_test.go create mode 100644 integration/pkg/heartbeatclient/observed_heartbeat_client_test.go create mode 100644 verifier/heartbeat_reporter_test.go diff --git a/integration/pkg/heartbeatclient/heartbeatclient_test.go b/integration/pkg/heartbeatclient/heartbeatclient_test.go new file mode 100644 index 000000000..e34a04713 --- /dev/null +++ b/integration/pkg/heartbeatclient/heartbeatclient_test.go @@ -0,0 +1,151 @@ +package heartbeatclient_test + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" + "github.com/smartcontractkit/chainlink-ccv/protocol/common/hmac" + "github.com/smartcontractkit/chainlink-common/pkg/logger" + heartbeatpb "github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat/v1" +) + +func TestNewHeartbeatClient_InvalidAddress(t *testing.T) { + lggr := logger.Test(t) + + // Test with invalid address that can't be reached + client, err := heartbeatclient.NewHeartbeatClient("invalid://address", lggr, nil, true) + // Connection succeeds but will fail on actual send + require.NoError(t, err) + require.NotNil(t, client) + defer client.Close() +} + +func TestHeartbeatClient_SendHeartbeat_Success(t *testing.T) { + lggr := logger.Test(t) + + // Test basic client construction + client, err := heartbeatclient.NewHeartbeatClient("localhost:50051", lggr, nil, true) + require.NoError(t, err) + require.NotNil(t, client) + defer client.Close() +} + +func TestHeartbeatClient_SendHeartbeat_WithHMAC(t *testing.T) { + lggr := logger.Test(t) + + // Create HMAC config + hmacConfig := &hmac.ClientConfig{ + APIKey: "test-verifier", + Secret: "test-secret-key-1234567890ab", + } + + // Client should be created successfully with HMAC config + client, err := heartbeatclient.NewHeartbeatClient("localhost:50051", lggr, hmacConfig, true) + require.NoError(t, err) + require.NotNil(t, client) + defer client.Close() +} + +func TestHeartbeatClient_Close(t *testing.T) { + lggr := logger.Test(t) + + client, err := heartbeatclient.NewHeartbeatClient("localhost:50051", lggr, nil, true) + require.NoError(t, err) + require.NotNil(t, client) + + // Close should not error + err = client.Close() + // Note: Close() may return an error if there are pending operations + if err != nil { + t.Logf("First close returned error (expected): %v", err) + } + + // Closing again - gRPC connections may error on second close + err = client.Close() + if err != nil { + t.Logf("Second close returned error (expected): %v", err) + } +} + +func TestHeartbeatClient_SendHeartbeat_Timeout(t *testing.T) { + lggr := logger.Test(t) + + client, err := heartbeatclient.NewHeartbeatClient("localhost:50051", lggr, nil, true) + require.NoError(t, err) + require.NotNil(t, client) + defer client.Close() + + // Create a context that times out immediately + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Nanosecond) + defer cancel() + + // This should fail due to timeout (since the server isn't actually running) + req := &heartbeatpb.HeartbeatRequest{ + SendTimestamp: time.Now().Unix(), + ChainDetails: &heartbeatpb.ChainHealthDetails{ + BlockHeightsByChain: map[uint64]uint64{42: 100}, + }, + } + + // We expect an error (either deadline exceeded or connection refused) + _, err = client.SendHeartbeat(ctx, req) + assert.Error(t, err) +} + +func TestHeartbeatClient_SendHeartbeat_NilRequest(t *testing.T) { + lggr := logger.Test(t) + + client, err := heartbeatclient.NewHeartbeatClient("localhost:50051", lggr, nil, true) + require.NoError(t, err) + require.NotNil(t, client) + defer client.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + // Sending nil request should fail + _, err = client.SendHeartbeat(ctx, nil) + assert.Error(t, err) +} + +// mockHeartbeatServer is a mock implementation of HeartbeatServiceServer for testing +type mockHeartbeatServer struct { + sendHeartbeatFunc func(ctx context.Context, req *heartbeatpb.HeartbeatRequest) (*heartbeatpb.HeartbeatResponse, error) +} + +func (m *mockHeartbeatServer) SendHeartbeat(ctx context.Context, req *heartbeatpb.HeartbeatRequest) (*heartbeatpb.HeartbeatResponse, error) { + if m.sendHeartbeatFunc != nil { + return m.sendHeartbeatFunc(ctx, req) + } + return &heartbeatpb.HeartbeatResponse{Timestamp: time.Now().Unix()}, nil +} + +// TestHeartbeatClient_WithCallOptions tests that call options are properly passed through +func TestHeartbeatClient_WithCallOptions(t *testing.T) { + lggr := logger.Test(t) + + client, err := heartbeatclient.NewHeartbeatClient("localhost:50051", lggr, nil, true) + require.NoError(t, err) + require.NotNil(t, client) + defer client.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + req := &heartbeatpb.HeartbeatRequest{ + SendTimestamp: time.Now().Unix(), + ChainDetails: &heartbeatpb.ChainHealthDetails{ + BlockHeightsByChain: map[uint64]uint64{42: 100}, + }, + } + + // Pass call options (will fail to connect but options should be accepted) + _, err = client.SendHeartbeat(ctx, req, grpc.WaitForReady(false)) + assert.Error(t, err) +} diff --git a/integration/pkg/heartbeatclient/observed_heartbeat_client_test.go b/integration/pkg/heartbeatclient/observed_heartbeat_client_test.go new file mode 100644 index 000000000..57cb650d7 --- /dev/null +++ b/integration/pkg/heartbeatclient/observed_heartbeat_client_test.go @@ -0,0 +1,88 @@ +package heartbeatclient_test + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" + "github.com/smartcontractkit/chainlink-ccv/verifier/pkg/monitoring" + "github.com/smartcontractkit/chainlink-common/pkg/logger" +) + +func TestObservedHeartbeatClient_Close(t *testing.T) { + lggr := logger.Test(t) + fakeMonitoring := monitoring.NewFakeVerifierMonitoring() + + delegateClient := &heartbeatclient.HeartbeatClient{} + + observedClient := heartbeatclient.NewObservedHeartbeatClient( + delegateClient, + "test-verifier", + lggr, + fakeMonitoring, + ) + + // Close should not error + err := observedClient.Close() + assert.NoError(t, err) +} + +func TestObservedHeartbeatClient_FakeMonitoring(t *testing.T) { + lggr := logger.Test(t) + ctx := context.Background() + + // Use real FakeVerifierMonitoring to test integration + fakeMonitoring := monitoring.NewFakeVerifierMonitoring() + + delegateClient := &heartbeatclient.HeartbeatClient{} + observedClient := heartbeatclient.NewObservedHeartbeatClient( + delegateClient, + "test-verifier", + lggr, + fakeMonitoring, + ) + require.NotNil(t, observedClient) + + // These should not panic with real monitoring + metrics := fakeMonitoring.Metrics() + assert.NotNil(t, metrics) + + // Verify we can call metric methods without error + metrics.RecordHeartbeatDuration(ctx, 100*time.Millisecond) + metrics.IncrementHeartbeatsSent(ctx) + metrics.IncrementHeartbeatsFailed(ctx) + metrics.SetVerifierHeartbeatTimestamp(ctx, time.Now().Unix()) + metrics.SetVerifierHeartbeatSentChainHeads(ctx, 100) + metrics.SetVerifierHeartbeatChainHeads(ctx, 200) + metrics.SetVerifierHeartbeatScore(ctx, 0.95) +} + +func TestObservedHeartbeatClient_WithChainSelector(t *testing.T) { + lggr := logger.Test(t) + + // Use real FakeVerifierMonitoring, following the codebase pattern + fakeMonitoring := monitoring.NewFakeVerifierMonitoring() + + delegateClient := &heartbeatclient.HeartbeatClient{} + + observedClient := heartbeatclient.NewObservedHeartbeatClient( + delegateClient, + "test-verifier", + lggr, + fakeMonitoring, + ) + require.NotNil(t, observedClient) + + metrics := fakeMonitoring.Metrics() + + // Verify that With() returns a metric labeler that can be used for chain-specific metrics + chainMetrics := metrics.With("chain_selector", "42") + assert.NotNil(t, chainMetrics) + + chainMetrics = metrics.With("chain_selector", "100") + assert.NotNil(t, chainMetrics) +} diff --git a/verifier/heartbeat_reporter_test.go b/verifier/heartbeat_reporter_test.go new file mode 100644 index 000000000..df6d2623a --- /dev/null +++ b/verifier/heartbeat_reporter_test.go @@ -0,0 +1,509 @@ +package verifier_test + +import ( + "context" + "errors" + "math/big" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + + "github.com/smartcontractkit/chainlink-ccv/internal/mocks" + "github.com/smartcontractkit/chainlink-ccv/protocol" + "github.com/smartcontractkit/chainlink-ccv/verifier" + "github.com/smartcontractkit/chainlink-common/pkg/logger" + heartbeatpb "github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat/v1" +) + +// mockHeartbeatClient is a mock implementation of HeartbeatServiceClient for testing +// Note: This is a gRPC client interface from protobuf, so we mock it manually rather than using mockery +type mockHeartbeatClient struct { + mock.Mock +} + +func (m *mockHeartbeatClient) SendHeartbeat(ctx context.Context, in *heartbeatpb.HeartbeatRequest, opts ...grpc.CallOption) (*heartbeatpb.HeartbeatResponse, error) { + args := m.Called(ctx, in, opts) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).(*heartbeatpb.HeartbeatResponse), args.Error(1) +} + +func TestNewHeartbeatReporter_Success(t *testing.T) { + lggr := logger.Test(t) + mockClient := new(mockHeartbeatClient) + mockStatusMgr := mocks.NewMockChainStatusManager(t) + + selectors := []protocol.ChainSelector{1, 10, 100} + + reporter, err := verifier.NewHeartbeatReporter( + lggr, + mockStatusMgr, + mockClient, + selectors, + "test-verifier", + 10*time.Second, + ) + require.NoError(t, err) + require.NotNil(t, reporter) +} + +func TestNewHeartbeatReporter_NilLogger(t *testing.T) { + mockClient := new(mockHeartbeatClient) + mockStatusMgr := mocks.NewMockChainStatusManager(t) + selectors := []protocol.ChainSelector{1} + + _, err := verifier.NewHeartbeatReporter( + nil, + mockStatusMgr, + mockClient, + selectors, + "test-verifier", + 10*time.Second, + ) + require.Error(t, err) + assert.Contains(t, err.Error(), "logger cannot be nil") +} + +func TestNewHeartbeatReporter_NilChainStatusManager(t *testing.T) { + lggr := logger.Test(t) + mockClient := new(mockHeartbeatClient) + selectors := []protocol.ChainSelector{1} + + _, err := verifier.NewHeartbeatReporter( + lggr, + nil, + mockClient, + selectors, + "test-verifier", + 10*time.Second, + ) + require.Error(t, err) + assert.Contains(t, err.Error(), "chainStatusManager cannot be nil") +} + +func TestNewHeartbeatReporter_NilHeartbeatClient(t *testing.T) { + lggr := logger.Test(t) + mockStatusMgr := mocks.NewMockChainStatusManager(t) + selectors := []protocol.ChainSelector{1} + + _, err := verifier.NewHeartbeatReporter( + lggr, + mockStatusMgr, + nil, + selectors, + "test-verifier", + 10*time.Second, + ) + require.Error(t, err) + assert.Contains(t, err.Error(), "heartbeatClient cannot be nil") +} + +func TestNewHeartbeatReporter_EmptySelectors(t *testing.T) { + lggr := logger.Test(t) + mockClient := new(mockHeartbeatClient) + mockStatusMgr := mocks.NewMockChainStatusManager(t) + + _, err := verifier.NewHeartbeatReporter( + lggr, + mockStatusMgr, + mockClient, + []protocol.ChainSelector{}, + "test-verifier", + 10*time.Second, + ) + require.Error(t, err) + assert.Contains(t, err.Error(), "allSelectors cannot be empty") +} + +func TestNewHeartbeatReporter_EmptyVerifierID(t *testing.T) { + lggr := logger.Test(t) + mockClient := new(mockHeartbeatClient) + mockStatusMgr := mocks.NewMockChainStatusManager(t) + selectors := []protocol.ChainSelector{1} + + _, err := verifier.NewHeartbeatReporter( + lggr, + mockStatusMgr, + mockClient, + selectors, + "", + 10*time.Second, + ) + require.Error(t, err) + assert.Contains(t, err.Error(), "verifierID cannot be empty") +} + +func TestNewHeartbeatReporter_DefaultInterval(t *testing.T) { + lggr := logger.Test(t) + mockClient := new(mockHeartbeatClient) + mockStatusMgr := mocks.NewMockChainStatusManager(t) + selectors := []protocol.ChainSelector{1} + + // Create with 0 interval - should use default + reporter, err := verifier.NewHeartbeatReporter( + lggr, + mockStatusMgr, + mockClient, + selectors, + "test-verifier", + 0, + ) + require.NoError(t, err) + require.NotNil(t, reporter) +} + +func TestHeartbeatReporter_StartAndStop(t *testing.T) { + lggr := logger.Test(t) + mockClient := new(mockHeartbeatClient) + mockStatusMgr := mocks.NewMockChainStatusManager(t) + + selectors := []protocol.ChainSelector{1, 10} + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + // Setup mock responses + chainStatusInfo := &protocol.ChainStatusInfo{ + ChainSelector: 1, + FinalizedBlockHeight: big.NewInt(100), + Disabled: false, + } + + mockStatusMgr.On("ReadChainStatuses", mock.MatchedBy(func(c context.Context) bool { + return c != nil + }), selectors).Return(map[protocol.ChainSelector]*protocol.ChainStatusInfo{ + 1: chainStatusInfo, + 10: chainStatusInfo, + }, nil) + + mockClient.On("SendHeartbeat", mock.MatchedBy(func(c context.Context) bool { + return c != nil + }), mock.MatchedBy(func(req *heartbeatpb.HeartbeatRequest) bool { + return req.SendTimestamp > 0 && len(req.ChainDetails.BlockHeightsByChain) > 0 + }), mock.Anything).Return(&heartbeatpb.HeartbeatResponse{ + Timestamp: time.Now().Unix(), + AggregatorId: "test-aggregator", + ChainBenchmarks: map[uint64]*heartbeatpb.ChainBenchmark{}, + }, nil) + + reporter, err := verifier.NewHeartbeatReporter( + lggr, + mockStatusMgr, + mockClient, + selectors, + "test-verifier", + 50*time.Millisecond, // Short interval for testing + ) + require.NoError(t, err) + + err = reporter.Start(ctx) + require.NoError(t, err) + + // Wait a bit for the reporter to send a heartbeat + time.Sleep(100 * time.Millisecond) + + // Stop the reporter + err = reporter.Close() + require.NoError(t, err) +} + +func TestHeartbeatReporter_SendHeartbeatFailure(t *testing.T) { + lggr := logger.Test(t) + mockClient := new(mockHeartbeatClient) + mockStatusMgr := mocks.NewMockChainStatusManager(t) + + selectors := []protocol.ChainSelector{1} + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + chainStatusInfo := &protocol.ChainStatusInfo{ + ChainSelector: 1, + FinalizedBlockHeight: big.NewInt(100), + Disabled: false, + } + + mockStatusMgr.On("ReadChainStatuses", mock.MatchedBy(func(c context.Context) bool { + return c != nil + }), selectors).Return(map[protocol.ChainSelector]*protocol.ChainStatusInfo{ + 1: chainStatusInfo, + }, nil) + + // Mock client returns error + mockClient.On("SendHeartbeat", mock.MatchedBy(func(c context.Context) bool { + return c != nil + }), mock.MatchedBy(func(req *heartbeatpb.HeartbeatRequest) bool { + return req.SendTimestamp > 0 + }), mock.Anything).Return(nil, errors.New("connection refused")) + + reporter, err := verifier.NewHeartbeatReporter( + lggr, + mockStatusMgr, + mockClient, + selectors, + "test-verifier", + 50*time.Millisecond, + ) + require.NoError(t, err) + + err = reporter.Start(ctx) + require.NoError(t, err) + + time.Sleep(100 * time.Millisecond) + + err = reporter.Close() + require.NoError(t, err) +} + +func TestHeartbeatReporter_ChainStatusReadError(t *testing.T) { + lggr := logger.Test(t) + mockClient := new(mockHeartbeatClient) + mockStatusMgr := mocks.NewMockChainStatusManager(t) + + selectors := []protocol.ChainSelector{1} + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + // Mock status manager returns error + mockStatusMgr.On("ReadChainStatuses", mock.MatchedBy(func(c context.Context) bool { + return c != nil + }), selectors).Return(nil, errors.New("database error")) + + reporter, err := verifier.NewHeartbeatReporter( + lggr, + mockStatusMgr, + mockClient, + selectors, + "test-verifier", + 50*time.Millisecond, + ) + require.NoError(t, err) + + err = reporter.Start(ctx) + require.NoError(t, err) + + time.Sleep(100 * time.Millisecond) + + err = reporter.Close() + require.NoError(t, err) + + // Verify that ReadChainStatuses was called at least once + mockStatusMgr.AssertCalled(t, "ReadChainStatuses", mock.MatchedBy(func(c context.Context) bool { + return c != nil + }), selectors) +} + +func TestHeartbeatReporter_MultipleChains(t *testing.T) { + lggr := logger.Test(t) + mockClient := new(mockHeartbeatClient) + mockStatusMgr := mocks.NewMockChainStatusManager(t) + + selectors := []protocol.ChainSelector{1, 10, 100, 1000} + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + // Setup chain statuses for all selectors + statusMap := make(map[protocol.ChainSelector]*protocol.ChainStatusInfo) + for i, selector := range selectors { + statusMap[selector] = &protocol.ChainStatusInfo{ + ChainSelector: selector, + FinalizedBlockHeight: big.NewInt(int64(100 + i*100)), + Disabled: false, + } + } + + mockStatusMgr.On("ReadChainStatuses", mock.MatchedBy(func(c context.Context) bool { + return c != nil + }), selectors).Return(statusMap, nil) + + // Verify the request has all chain heights + mockClient.On("SendHeartbeat", mock.MatchedBy(func(c context.Context) bool { + return c != nil + }), mock.MatchedBy(func(req *heartbeatpb.HeartbeatRequest) bool { + if req == nil || req.ChainDetails == nil { + return false + } + return len(req.ChainDetails.BlockHeightsByChain) == len(selectors) + }), mock.Anything).Return(&heartbeatpb.HeartbeatResponse{ + Timestamp: time.Now().Unix(), + AggregatorId: "test-aggregator", + ChainBenchmarks: map[uint64]*heartbeatpb.ChainBenchmark{}, + }, nil) + + reporter, err := verifier.NewHeartbeatReporter( + lggr, + mockStatusMgr, + mockClient, + selectors, + "test-verifier", + 50*time.Millisecond, + ) + require.NoError(t, err) + + err = reporter.Start(ctx) + require.NoError(t, err) + + time.Sleep(100 * time.Millisecond) + + err = reporter.Close() + require.NoError(t, err) + + // Verify SendHeartbeat was called with all chains + mockClient.AssertCalled(t, "SendHeartbeat", mock.MatchedBy(func(c context.Context) bool { + return c != nil + }), mock.MatchedBy(func(req *heartbeatpb.HeartbeatRequest) bool { + return len(req.ChainDetails.BlockHeightsByChain) == len(selectors) + }), mock.Anything) +} + +func TestHeartbeatReporter_Name(t *testing.T) { + lggr := logger.Test(t) + mockClient := new(mockHeartbeatClient) + mockStatusMgr := mocks.NewMockChainStatusManager(t) + selectors := []protocol.ChainSelector{1} + + reporter, err := verifier.NewHeartbeatReporter( + lggr, + mockStatusMgr, + mockClient, + selectors, + "my-verifier", + 10*time.Second, + ) + require.NoError(t, err) + + name := reporter.Name() + assert.Contains(t, name, "my-verifier") + assert.Contains(t, name, "HeartbeatReporter") +} + +func TestHeartbeatReporter_HealthReport(t *testing.T) { + lggr := logger.Test(t) + mockClient := new(mockHeartbeatClient) + mockStatusMgr := mocks.NewMockChainStatusManager(t) + selectors := []protocol.ChainSelector{1} + + reporter, err := verifier.NewHeartbeatReporter( + lggr, + mockStatusMgr, + mockClient, + selectors, + "test-verifier", + 10*time.Second, + ) + require.NoError(t, err) + + report := reporter.HealthReport() + assert.NotNil(t, report) + assert.Greater(t, len(report), 0) +} + +func TestHeartbeatReporter_ContextCancellation(t *testing.T) { + lggr := logger.Test(t) + mockClient := new(mockHeartbeatClient) + mockStatusMgr := mocks.NewMockChainStatusManager(t) + + selectors := []protocol.ChainSelector{1} + ctx, cancel := context.WithCancel(context.Background()) + + chainStatusInfo := &protocol.ChainStatusInfo{ + ChainSelector: 1, + FinalizedBlockHeight: big.NewInt(100), + Disabled: false, + } + + mockStatusMgr.On("ReadChainStatuses", mock.MatchedBy(func(c context.Context) bool { + return c != nil + }), selectors).Return(map[protocol.ChainSelector]*protocol.ChainStatusInfo{ + 1: chainStatusInfo, + }, nil) + + mockClient.On("SendHeartbeat", mock.MatchedBy(func(c context.Context) bool { + return c != nil + }), mock.MatchedBy(func(req *heartbeatpb.HeartbeatRequest) bool { + return req.SendTimestamp > 0 + }), mock.Anything).Return(&heartbeatpb.HeartbeatResponse{ + Timestamp: time.Now().Unix(), + AggregatorId: "test-aggregator", + ChainBenchmarks: map[uint64]*heartbeatpb.ChainBenchmark{}, + }, nil) + + reporter, err := verifier.NewHeartbeatReporter( + lggr, + mockStatusMgr, + mockClient, + selectors, + "test-verifier", + 50*time.Millisecond, + ) + require.NoError(t, err) + + err = reporter.Start(ctx) + require.NoError(t, err) + + time.Sleep(50 * time.Millisecond) + + // Cancel context - should stop the reporter + cancel() + + time.Sleep(100 * time.Millisecond) + + err = reporter.Close() + require.NoError(t, err) +} + +func TestHeartbeatReporter_MissingChainStatus(t *testing.T) { + lggr := logger.Test(t) + mockClient := new(mockHeartbeatClient) + mockStatusMgr := mocks.NewMockChainStatusManager(t) + + selectors := []protocol.ChainSelector{1, 10} + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + // Only return status for one chain (not the other) + statusMap := map[protocol.ChainSelector]*protocol.ChainStatusInfo{ + 1: { + ChainSelector: 1, + FinalizedBlockHeight: big.NewInt(100), + Disabled: false, + }, + } + + mockStatusMgr.On("ReadChainStatuses", mock.MatchedBy(func(c context.Context) bool { + return c != nil + }), selectors).Return(statusMap, nil) + + // Should send heartbeat with only the available chain + mockClient.On("SendHeartbeat", mock.MatchedBy(func(c context.Context) bool { + return c != nil + }), mock.MatchedBy(func(req *heartbeatpb.HeartbeatRequest) bool { + // Should only have 1 chain since the other one is missing + return len(req.ChainDetails.BlockHeightsByChain) == 1 + }), mock.Anything).Return(&heartbeatpb.HeartbeatResponse{ + Timestamp: time.Now().Unix(), + AggregatorId: "test-aggregator", + ChainBenchmarks: map[uint64]*heartbeatpb.ChainBenchmark{}, + }, nil) + + reporter, err := verifier.NewHeartbeatReporter( + lggr, + mockStatusMgr, + mockClient, + selectors, + "test-verifier", + 50*time.Millisecond, + ) + require.NoError(t, err) + + err = reporter.Start(ctx) + require.NoError(t, err) + + time.Sleep(100 * time.Millisecond) + + err = reporter.Close() + require.NoError(t, err) +} From 97f8e475586aae793222f81221b621a7a6fc86a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Tue, 20 Jan 2026 18:22:04 +0100 Subject: [PATCH 04/17] Fix linter issues --- .../pkg/heartbeatclient/heartbeatclient_test.go | 14 +------------- verifier/heartbeat_reporter_test.go | 4 ++-- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/integration/pkg/heartbeatclient/heartbeatclient_test.go b/integration/pkg/heartbeatclient/heartbeatclient_test.go index e34a04713..0c5acbb21 100644 --- a/integration/pkg/heartbeatclient/heartbeatclient_test.go +++ b/integration/pkg/heartbeatclient/heartbeatclient_test.go @@ -114,19 +114,7 @@ func TestHeartbeatClient_SendHeartbeat_NilRequest(t *testing.T) { assert.Error(t, err) } -// mockHeartbeatServer is a mock implementation of HeartbeatServiceServer for testing -type mockHeartbeatServer struct { - sendHeartbeatFunc func(ctx context.Context, req *heartbeatpb.HeartbeatRequest) (*heartbeatpb.HeartbeatResponse, error) -} - -func (m *mockHeartbeatServer) SendHeartbeat(ctx context.Context, req *heartbeatpb.HeartbeatRequest) (*heartbeatpb.HeartbeatResponse, error) { - if m.sendHeartbeatFunc != nil { - return m.sendHeartbeatFunc(ctx, req) - } - return &heartbeatpb.HeartbeatResponse{Timestamp: time.Now().Unix()}, nil -} - -// TestHeartbeatClient_WithCallOptions tests that call options are properly passed through +// TestHeartbeatClient_WithCallOptions tests that call options are properly passed through. func TestHeartbeatClient_WithCallOptions(t *testing.T) { lggr := logger.Test(t) diff --git a/verifier/heartbeat_reporter_test.go b/verifier/heartbeat_reporter_test.go index df6d2623a..925de3c14 100644 --- a/verifier/heartbeat_reporter_test.go +++ b/verifier/heartbeat_reporter_test.go @@ -19,8 +19,8 @@ import ( heartbeatpb "github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat/v1" ) -// mockHeartbeatClient is a mock implementation of HeartbeatServiceClient for testing -// Note: This is a gRPC client interface from protobuf, so we mock it manually rather than using mockery +// mockHeartbeatClient is a mock implementation of HeartbeatServiceClient for testing. +// Note: This is a gRPC client interface from protobuf, so we mock it manually rather than using mockery. type mockHeartbeatClient struct { mock.Mock } From 0b12d5b9369c31acd2a33dc461279e171911bbae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Wed, 21 Jan 2026 08:19:52 +0100 Subject: [PATCH 05/17] Fixes --- cmd/verifier/token/main.go | 4 ++-- integration/pkg/constructors/committee_verifier.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/verifier/token/main.go b/cmd/verifier/token/main.go index 2b1bba408..edd092123 100644 --- a/cmd/verifier/token/main.go +++ b/cmd/verifier/token/main.go @@ -266,8 +266,8 @@ func createLBTCCoordinator( messageTracker, verifierMonitoring, storage.NewChainStatusManager(), - nil, // heartbeatClient - not used for token verifiers - 10*time.Second, // heartbeatInterval + nil, // heartbeatClient - not used for token verifiers + 0*time.Second, // heartbeatInterval ) if err != nil { lggr.Errorw("Failed to create verification coordinator for lbtc", "error", err) diff --git a/integration/pkg/constructors/committee_verifier.go b/integration/pkg/constructors/committee_verifier.go index e7827df53..bbb42596d 100644 --- a/integration/pkg/constructors/committee_verifier.go +++ b/integration/pkg/constructors/committee_verifier.go @@ -161,8 +161,8 @@ func NewVerificationCoordinator( messageTracker, verifierMonitoring, chainStatusManager, - nil, // heartbeatClient - not used in integration tests - 10*time.Second, // heartbeatInterval + nil, // heartbeatClient - not used yet + 0*time.Second, // not used ) if err != nil { lggr.Errorw("Failed to create verification coordinator", "error", err) From 5bb8dbe06fa1e920b31c6caec0b62658269e83d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Wed, 21 Jan 2026 15:44:18 +0100 Subject: [PATCH 06/17] Use NoopHeartbeatClient instead of nil --- cmd/verifier/token/main.go | 5 ++-- .../pkg/constructors/committee_verifier.go | 3 +- .../pkg/heartbeatclient/heartbeatclient.go | 2 +- integration/pkg/heartbeatclient/noop.go | 29 +++++++++++++++++++ .../verification_coordinator_cctp_test.go | 4 ++- .../verification_coordinator_curse_test.go | 4 ++- .../verification_coordinator_finality_test.go | 3 +- .../verification_coordinator_lbtc_test.go | 3 +- verifier/verification_coordinator_test.go | 4 ++- 9 files changed, 48 insertions(+), 9 deletions(-) create mode 100644 integration/pkg/heartbeatclient/noop.go diff --git a/cmd/verifier/token/main.go b/cmd/verifier/token/main.go index edd092123..56fef8328 100644 --- a/cmd/verifier/token/main.go +++ b/cmd/verifier/token/main.go @@ -14,6 +14,7 @@ import ( "go.uber.org/zap/zapcore" cmd "github.com/smartcontractkit/chainlink-ccv/cmd/verifier" + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" "github.com/smartcontractkit/chainlink-ccv/pkg/chainaccess" "github.com/smartcontractkit/chainlink-ccv/protocol" "github.com/smartcontractkit/chainlink-ccv/protocol/common/logging" @@ -218,7 +219,7 @@ func createCCTPCoordinator( messageTracker, verifierMonitoring, storage.NewChainStatusManager(), - nil, // heartbeatClient - not used for token verifiers yet + heartbeatclient.NewNoopHeartbeatClient(), 10*time.Second, // heartbeatInterval ) if err != nil { @@ -266,7 +267,7 @@ func createLBTCCoordinator( messageTracker, verifierMonitoring, storage.NewChainStatusManager(), - nil, // heartbeatClient - not used for token verifiers + heartbeatclient.NewNoopHeartbeatClient(), 0*time.Second, // heartbeatInterval ) if err != nil { diff --git a/integration/pkg/constructors/committee_verifier.go b/integration/pkg/constructors/committee_verifier.go index bbb42596d..e713695ae 100644 --- a/integration/pkg/constructors/committee_verifier.go +++ b/integration/pkg/constructors/committee_verifier.go @@ -9,6 +9,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/smartcontractkit/chainlink-ccip/ccv/chains/evm/gobindings/generated/latest/onramp" + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" "github.com/smartcontractkit/chainlink-ccv/integration/pkg/sourcereader" "github.com/smartcontractkit/chainlink-ccv/integration/storageaccess" "github.com/smartcontractkit/chainlink-ccv/pkg/chainaccess" @@ -161,7 +162,7 @@ func NewVerificationCoordinator( messageTracker, verifierMonitoring, chainStatusManager, - nil, // heartbeatClient - not used yet + heartbeatclient.NewNoopHeartbeatClient(), 0*time.Second, // not used ) if err != nil { diff --git a/integration/pkg/heartbeatclient/heartbeatclient.go b/integration/pkg/heartbeatclient/heartbeatclient.go index efe2f8bf4..5f439261a 100644 --- a/integration/pkg/heartbeatclient/heartbeatclient.go +++ b/integration/pkg/heartbeatclient/heartbeatclient.go @@ -15,7 +15,7 @@ import ( ) const ( - MinTLSVersion = tls.VersionTLS12 + MinTLSVersion = tls.VersionTLS13 ) // HeartbeatClient provides methods to send heartbeats to the aggregator service. diff --git a/integration/pkg/heartbeatclient/noop.go b/integration/pkg/heartbeatclient/noop.go new file mode 100644 index 000000000..915c34715 --- /dev/null +++ b/integration/pkg/heartbeatclient/noop.go @@ -0,0 +1,29 @@ +package heartbeatclient + +import ( + "context" + "time" + + "google.golang.org/grpc" + + heartbeatpb "github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat/v1" +) + +// NoopHeartbeatClient is a no-op implementation of HeartbeatServiceClient. +type NoopHeartbeatClient struct{} + +// NewNoopHeartbeatClient creates a new no-op heartbeat client. +func NewNoopHeartbeatClient() *NoopHeartbeatClient { + return &NoopHeartbeatClient{} +} + +// SendHeartbeat is a no-op implementation that returns a dummy response. +func (n *NoopHeartbeatClient) SendHeartbeat(ctx context.Context, in *heartbeatpb.HeartbeatRequest, opts ...grpc.CallOption) (*heartbeatpb.HeartbeatResponse, error) { + return &heartbeatpb.HeartbeatResponse{ + Timestamp: time.Now().Unix(), + AggregatorId: "noop", + ChainBenchmarks: make(map[uint64]*heartbeatpb.ChainBenchmark), + }, nil +} + +var _ heartbeatpb.HeartbeatServiceClient = (*NoopHeartbeatClient)(nil) diff --git a/verifier/verification_coordinator_cctp_test.go b/verifier/verification_coordinator_cctp_test.go index 4dac30301..c5c0cb281 100644 --- a/verifier/verification_coordinator_cctp_test.go +++ b/verifier/verification_coordinator_cctp_test.go @@ -9,6 +9,8 @@ import ( "testing" "time" + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -477,7 +479,7 @@ func createCCTPCoordinator( noopLatencyTracker, noopMonitoring, ts.chainStatusManager, - nil, // heartbeatClient - not used in tests + heartbeatclient.NewNoopHeartbeatClient(), 10*time.Second, // heartbeatInterval ) } diff --git a/verifier/verification_coordinator_curse_test.go b/verifier/verification_coordinator_curse_test.go index 5f87c1784..50ae5349c 100644 --- a/verifier/verification_coordinator_curse_test.go +++ b/verifier/verification_coordinator_curse_test.go @@ -9,6 +9,8 @@ import ( "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" + "github.com/smartcontractkit/chainlink-ccv/internal/mocks" "github.com/smartcontractkit/chainlink-ccv/pkg/chainaccess" "github.com/smartcontractkit/chainlink-ccv/protocol" @@ -127,7 +129,7 @@ func setupCurseTest(t *testing.T, sourceChain, destChain protocol.ChainSelector, &noopMonitoring{}, setup.chainStatusManager, setup.mockCurseChecker, - nil, // heartbeatClient - not used in curse detection tests + heartbeatclient.NewNoopHeartbeatClient(), 10*time.Second, // heartbeatInterval ) require.NoError(t, err) diff --git a/verifier/verification_coordinator_finality_test.go b/verifier/verification_coordinator_finality_test.go index ed2981e38..22342b9e5 100644 --- a/verifier/verification_coordinator_finality_test.go +++ b/verifier/verification_coordinator_finality_test.go @@ -11,6 +11,7 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/zap" + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" "github.com/smartcontractkit/chainlink-ccv/internal/mocks" "github.com/smartcontractkit/chainlink-ccv/pkg/chainaccess" "github.com/smartcontractkit/chainlink-ccv/protocol" @@ -355,7 +356,7 @@ func initializeCoordinator(t *testing.T, verifierID string) *coordinatorTestSetu &NoopLatencyTracker{}, &noopMonitoring{}, mockChainStatusManager, - nil, // heartbeatClient - not used in finality tests + heartbeatclient.NewNoopHeartbeatClient(), 10*time.Second, // heartbeatInterval ) require.NoError(t, err) diff --git a/verifier/verification_coordinator_lbtc_test.go b/verifier/verification_coordinator_lbtc_test.go index 716a23363..83260771b 100644 --- a/verifier/verification_coordinator_lbtc_test.go +++ b/verifier/verification_coordinator_lbtc_test.go @@ -7,6 +7,7 @@ import ( "testing" "time" + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" "github.com/smartcontractkit/chainlink-ccv/pkg/chainaccess" "github.com/smartcontractkit/chainlink-ccv/verifier" "github.com/smartcontractkit/chainlink-ccv/verifier/pkg/monitoring" @@ -276,7 +277,7 @@ func createLBTCCoordinator( noopLatencyTracker, noopMonitoring, ts.chainStatusManager, - nil, // heartbeatClient - not used in tests + heartbeatclient.NewNoopHeartbeatClient(), 10*time.Second, // heartbeatInterval ) } diff --git a/verifier/verification_coordinator_test.go b/verifier/verification_coordinator_test.go index 0fa250bef..bd5b38152 100644 --- a/verifier/verification_coordinator_test.go +++ b/verifier/verification_coordinator_test.go @@ -9,6 +9,8 @@ import ( "testing" "time" + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" + "github.com/ethereum/go-ethereum/crypto" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" @@ -150,7 +152,7 @@ func createVerificationCoordinator( noopLatencyTracker, noopMonitoring, ts.chainStatusManager, - nil, // heartbeatClient - not used in tests + heartbeatclient.NewNoopHeartbeatClient(), 10*time.Second, // heartbeatInterval ) } From 812dfc94b242c77940de3d29e05e109579cfc85a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Wed, 21 Jan 2026 16:14:56 +0100 Subject: [PATCH 07/17] Pass heartbeat interval setting through coordinator config --- cmd/verifier/committee/main.go | 6 ++---- cmd/verifier/token/main.go | 2 -- integration/pkg/constructors/committee_verifier.go | 2 +- verifier/types.go | 1 + verifier/verification_coordinator.go | 8 ++------ verifier/verification_coordinator_cctp_test.go | 1 - verifier/verification_coordinator_curse_test.go | 1 - verifier/verification_coordinator_finality_test.go | 1 - verifier/verification_coordinator_lbtc_test.go | 1 - verifier/verification_coordinator_test.go | 1 - 10 files changed, 6 insertions(+), 18 deletions(-) diff --git a/cmd/verifier/committee/main.go b/cmd/verifier/committee/main.go index 29699222f..b8ed3f672 100644 --- a/cmd/verifier/committee/main.go +++ b/cmd/verifier/committee/main.go @@ -183,7 +183,8 @@ func main() { StorageBatchSize: 50, StorageBatchTimeout: 100 * time.Millisecond, StorageRetryDelay: 2 * time.Second, - CursePollInterval: 2 * time.Second, // Poll RMN Remotes for curse status every 2s + CursePollInterval: 2 * time.Second, // Poll RMN Remotes for curse status every 2s + HeartbeatInterval: 10 * time.Second, // Send heartbeat to aggregator every 10s } pk := os.Getenv(PkEnvVar) @@ -222,8 +223,6 @@ func main() { verifierMonitoring, ) - // TODO: make heartbeat interval configurable - heartbeatInterval := 10 * time.Second heartbeatClient, err := heartbeatclient.NewHeartbeatClient( config.AggregatorAddress, lggr, @@ -265,7 +264,6 @@ func main() { verifierMonitoring, chainStatusManager, observedHeartbeatClient, - heartbeatInterval, ) if err != nil { lggr.Errorw("Failed to create verification coordinator", "error", err) diff --git a/cmd/verifier/token/main.go b/cmd/verifier/token/main.go index 56fef8328..cc57f6cae 100644 --- a/cmd/verifier/token/main.go +++ b/cmd/verifier/token/main.go @@ -220,7 +220,6 @@ func createCCTPCoordinator( verifierMonitoring, storage.NewChainStatusManager(), heartbeatclient.NewNoopHeartbeatClient(), - 10*time.Second, // heartbeatInterval ) if err != nil { lggr.Errorw("Failed to create verification coordinator for cctp", "error", err) @@ -268,7 +267,6 @@ func createLBTCCoordinator( verifierMonitoring, storage.NewChainStatusManager(), heartbeatclient.NewNoopHeartbeatClient(), - 0*time.Second, // heartbeatInterval ) if err != nil { lggr.Errorw("Failed to create verification coordinator for lbtc", "error", err) diff --git a/integration/pkg/constructors/committee_verifier.go b/integration/pkg/constructors/committee_verifier.go index e713695ae..e1bab05f4 100644 --- a/integration/pkg/constructors/committee_verifier.go +++ b/integration/pkg/constructors/committee_verifier.go @@ -135,6 +135,7 @@ func NewVerificationCoordinator( StorageBatchSize: 50, StorageBatchTimeout: 100 * time.Millisecond, StorageRetryDelay: 2 * time.Second, + HeartbeatInterval: 0, // Disabled by default } // Create commit verifier (with ECDSA signer) @@ -163,7 +164,6 @@ func NewVerificationCoordinator( verifierMonitoring, chainStatusManager, heartbeatclient.NewNoopHeartbeatClient(), - 0*time.Second, // not used ) if err != nil { lggr.Errorw("Failed to create verification coordinator", "error", err) diff --git a/verifier/types.go b/verifier/types.go index 91dae48cb..fb89eb736 100644 --- a/verifier/types.go +++ b/verifier/types.go @@ -39,6 +39,7 @@ type CoordinatorConfig struct { StorageBatchTimeout time.Duration `json:"storage_batch_timeout"` // Maximum duration to wait before flushing incomplete storage batch (default: 100ms) StorageRetryDelay time.Duration `json:"storage_retry_delay"` // Delay before retrying failed storage writes (default: 2s) CursePollInterval time.Duration `json:"curse_poll_interval"` // How often to poll RMN Remote contracts for curse status (default: 2s) + HeartbeatInterval time.Duration `json:"heartbeat_interval"` // How often to send heartbeat to aggregator (default: 10s, 0 disables heartbeat) } // VerificationError represents an error that occurred during message verification. diff --git a/verifier/verification_coordinator.go b/verifier/verification_coordinator.go index 7c9f25368..433529478 100644 --- a/verifier/verification_coordinator.go +++ b/verifier/verification_coordinator.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "maps" - "time" "github.com/smartcontractkit/chainlink-ccv/common" cursecheckerimpl "github.com/smartcontractkit/chainlink-ccv/integration/pkg/cursechecker" @@ -47,7 +46,6 @@ func NewCoordinator( monitoring Monitoring, chainStatusManager protocol.ChainStatusManager, heartbeatClient heartbeatpb.HeartbeatServiceClient, - heartbeatInterval time.Duration, ) (*Coordinator, error) { return NewCoordinatorWithDetector( ctx, @@ -61,7 +59,6 @@ func NewCoordinator( chainStatusManager, nil, heartbeatClient, - heartbeatInterval, ) } @@ -77,7 +74,6 @@ func NewCoordinatorWithDetector( chainStatusManager protocol.ChainStatusManager, detector common.CurseCheckerService, heartbeatClient heartbeatpb.HeartbeatServiceClient, - heartbeatInterval time.Duration, ) (*Coordinator, error) { enabledSourceReaders, err := filterOnlyEnabledSourceReaders(ctx, lggr, config, sourceReaders, chainStatusManager) if err != nil { @@ -112,7 +108,7 @@ func NewCoordinatorWithDetector( var heartbeatReporter *HeartbeatReporter - if heartbeatClient != nil { + if heartbeatClient != nil && config.HeartbeatInterval > 0 { // Collect all chain selectors from source readers. allSelectors := make([]protocol.ChainSelector, 0, len(sourceReaders)) for selector := range sourceReaders { @@ -125,7 +121,7 @@ func NewCoordinatorWithDetector( heartbeatClient, allSelectors, config.VerifierID, - heartbeatInterval, + config.HeartbeatInterval, ) if err != nil { return nil, fmt.Errorf("failed to create heartbeat reporter: %w", err) diff --git a/verifier/verification_coordinator_cctp_test.go b/verifier/verification_coordinator_cctp_test.go index c5c0cb281..a0a6e580c 100644 --- a/verifier/verification_coordinator_cctp_test.go +++ b/verifier/verification_coordinator_cctp_test.go @@ -480,7 +480,6 @@ func createCCTPCoordinator( noopMonitoring, ts.chainStatusManager, heartbeatclient.NewNoopHeartbeatClient(), - 10*time.Second, // heartbeatInterval ) } diff --git a/verifier/verification_coordinator_curse_test.go b/verifier/verification_coordinator_curse_test.go index 50ae5349c..386bc1a38 100644 --- a/verifier/verification_coordinator_curse_test.go +++ b/verifier/verification_coordinator_curse_test.go @@ -130,7 +130,6 @@ func setupCurseTest(t *testing.T, sourceChain, destChain protocol.ChainSelector, setup.chainStatusManager, setup.mockCurseChecker, heartbeatclient.NewNoopHeartbeatClient(), - 10*time.Second, // heartbeatInterval ) require.NoError(t, err) setup.coordinator = coordinator diff --git a/verifier/verification_coordinator_finality_test.go b/verifier/verification_coordinator_finality_test.go index 22342b9e5..d19e8e4e0 100644 --- a/verifier/verification_coordinator_finality_test.go +++ b/verifier/verification_coordinator_finality_test.go @@ -357,7 +357,6 @@ func initializeCoordinator(t *testing.T, verifierID string) *coordinatorTestSetu &noopMonitoring{}, mockChainStatusManager, heartbeatclient.NewNoopHeartbeatClient(), - 10*time.Second, // heartbeatInterval ) require.NoError(t, err) diff --git a/verifier/verification_coordinator_lbtc_test.go b/verifier/verification_coordinator_lbtc_test.go index 83260771b..33bbdcb19 100644 --- a/verifier/verification_coordinator_lbtc_test.go +++ b/verifier/verification_coordinator_lbtc_test.go @@ -278,7 +278,6 @@ func createLBTCCoordinator( noopMonitoring, ts.chainStatusManager, heartbeatclient.NewNoopHeartbeatClient(), - 10*time.Second, // heartbeatInterval ) } diff --git a/verifier/verification_coordinator_test.go b/verifier/verification_coordinator_test.go index bd5b38152..b6d3d1e36 100644 --- a/verifier/verification_coordinator_test.go +++ b/verifier/verification_coordinator_test.go @@ -153,7 +153,6 @@ func createVerificationCoordinator( noopMonitoring, ts.chainStatusManager, heartbeatclient.NewNoopHeartbeatClient(), - 10*time.Second, // heartbeatInterval ) } From 9b6fef05593adb8ef266dacc74d352b3707d92f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Wed, 21 Jan 2026 16:20:19 +0100 Subject: [PATCH 08/17] Update .gitignore to include some dashboards --- build/devenv/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build/devenv/.gitignore b/build/devenv/.gitignore index 3580cf8b0..3342c4891 100644 --- a/build/devenv/.gitignore +++ b/build/devenv/.gitignore @@ -51,5 +51,7 @@ dctl job-distributor/ env-*out.toml dashboards/ +!dashboards/ccv.json +!dashboards/heartbeat*.json fakes/tmp tests/e2e/logs From cdeb10f0d908fc5a74799d136f3ada73f6916b20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Wed, 21 Jan 2026 16:22:38 +0100 Subject: [PATCH 09/17] Add initial dashboards for observing heartbeats --- build/devenv/dashboards/heartbeat_agg.json | 807 ++++++++++++++++++ .../devenv/dashboards/heartbeat_verifier.json | 592 +++++++++++++ 2 files changed, 1399 insertions(+) create mode 100644 build/devenv/dashboards/heartbeat_agg.json create mode 100644 build/devenv/dashboards/heartbeat_verifier.json diff --git a/build/devenv/dashboards/heartbeat_agg.json b/build/devenv/dashboards/heartbeat_agg.json new file mode 100644 index 000000000..6c5b23fba --- /dev/null +++ b/build/devenv/dashboards/heartbeat_agg.json @@ -0,0 +1,807 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 12, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 7, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 98 + }, + { + "color": "green", + "value": 100 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "round(\n clamp_max(\n (rate(aggregator_heartbeat_verifier_heartbeats_total[$__range]) / 0.1) * 100,\n 100\n )\n)", + "instant": true, + "legendFormat": "{{caller_id}}", + "range": false, + "refId": "A" + } + ], + "title": "Heartbeat uptime per Verifier", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 98 + }, + { + "color": "green", + "value": 100 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "round(\n clamp_max(\n (rate(aggregator_heartbeat_verifier_heartbeats_total[$__range]) / 0.1) * 100,\n 100\n )\n)", + "hide": false, + "legendFormat": "{{caller_id}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "aggregator_heartbeat_verifier_heartbeats_total", + "hide": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Heartbeat uptime per Verifier", + "type": "timeseries" + } + ], + "title": "Heartbeat uptime overview", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 8, + "panels": [], + "title": "Head report & Benchmark Score overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 2 + }, + "id": 10, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "quantile(0.90, aggregator_heartbeat_verifier_score) by (caller_id)", + "legendFormat": "{{caller_id}}", + "range": true, + "refId": "A" + } + ], + "title": "NOP Score P90", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "aggregator_heartbeat_verifier_score{}", + "legendFormat": "{{caller_id}}: {{chain_selector}}", + "range": true, + "refId": "A" + } + ], + "title": "Verifier Score over time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "aggregator_heartbeat_verifier_chain_heads", + "legendFormat": "{{caller_id}}: {{chain_selector}}", + "range": true, + "refId": "A" + } + ], + "title": "Verifier Score over time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 12, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "aggregator_heartbeat_verifier_chain_heads", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Verifier Score over time", + "transformations": [ + { + "id": "seriesToRows", + "options": {} + }, + { + "id": "extractFields", + "options": { + "source": "Metric" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric": true, + "Time": true, + "__name__": true, + "csa_public_key": true, + "exported_job": true, + "host_name": true, + "instance": true, + "job": true, + "os_description": true, + "os_type": true, + "service_name": true, + "telemetry_sdk_language": true, + "telemetry_sdk_name": true, + "telemetry_sdk_version": true + }, + "indexByName": { + "Metric": 1, + "Time": 0, + "Value": 4, + "__name__": 5, + "caller_id": 2, + "chain_selector": 3, + "csa_public_key": 6, + "exported_job": 7, + "host_name": 8, + "instance": 9, + "job": 10, + "os_description": 11, + "os_type": 12, + "service_name": 13, + "telemetry_sdk_language": 14, + "telemetry_sdk_name": 15, + "telemetry_sdk_version": 16 + }, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 3, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "time() - max by(caller_id) (aggregator_heartbeat_verifier_heartbeat_timestamp{})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Time since last Heartbeat", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "time() - max by(caller_id) (aggregator_heartbeat_verifier_heartbeat_timestamp{})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Time since last Heartbeat", + "type": "timeseries" + } + ], + "title": "Row title", + "type": "row" + } + ], + "refresh": "", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Health overview (Aggregator View)", + "uid": "c0968deb-5dbc-4635-b8d0-3937ce801c76", + "version": 5, + "weekStart": "" +} diff --git a/build/devenv/dashboards/heartbeat_verifier.json b/build/devenv/dashboards/heartbeat_verifier.json new file mode 100644 index 000000000..aa2b852f7 --- /dev/null +++ b/build/devenv/dashboards/heartbeat_verifier.json @@ -0,0 +1,592 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 5, + "title": "Heartbeat chain score & benchmark", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 10, + "x": 0, + "y": 1 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": " sum by (chain_selector) (verifier_heartbeat_chain_heads{host_name=\"$hostname\"}) - sum by (chain_selector) (verifier_heartbeat_sent_chain_heads{host_name=\"$hostname\"})", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Blocks behind vs benchmark", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 14, + "x": 10, + "y": 1 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": " sum by (chain_selector) (verifier_heartbeat_chain_heads{host_name=\"$hostname\"}) - sum by (chain_selector) (verifier_heartbeat_sent_chain_heads{host_name=\"$hostname\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by (chain_selector) (verifier_heartbeat_chain_heads{host_name=\"$hostame\"})", + "hide": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Blocks behind vs benchmark", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 10, + "x": 0, + "y": 7 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "verifier_heartbeat_score{host_name=\"$hostname\"}", + "instant": true, + "legendFormat": "{{chain_selector}}", + "range": false, + "refId": "A" + } + ], + "title": "Heartbeat benchmark score", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 14, + "x": 10, + "y": 7 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "verifier_heartbeat_score{host_name=\"$hostname\"}", + "legendFormat": "{{chain_selector}}", + "range": true, + "refId": "A" + } + ], + "title": "Heartbeat benchmark score", + "type": "timeseries" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 3, + "title": "Heartbeat uptime overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 95 + }, + { + "color": "green", + "value": 99 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "round(\n clamp_max(\n (rate(verifier_heartbeat_duration_seconds_count[$__range]) / 0.1) * 100,\n 100\n )\n)", + "legendFormat": "{{verifier_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Heartbeat uptime", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "round(\n clamp_max(\n (rate(verifier_heartbeat_duration_seconds_count[$__range]) / 0.1) * 100,\n 100\n )\n)", + "legendFormat": "{{verifier_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Heartbeat uptime over time", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "06aab07a7a9f", + "value": "06aab07a7a9f" + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(verifier_heartbeats_sent_total,host_name)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "hostname", + "options": [], + "query": { + "query": "label_values(verifier_heartbeats_sent_total,host_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Heartbeat overview (Verifier View)", + "uid": "b62c6293-c01b-4b43-8705-bb3f918e679c", + "version": 1, + "weekStart": "" +} From 0436cb7abf79a1b3330a56ad46d07298fe09b177 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Wed, 21 Jan 2026 18:30:27 +0100 Subject: [PATCH 10/17] Add dashboards for heartbeat overview --- build/devenv/.gitignore | 2 +- build/devenv/dashboards/heartbeat_agg.json | 1581 ++++++++++++----- .../devenv/dashboards/heartbeat_verifier.json | 15 +- 3 files changed, 1193 insertions(+), 405 deletions(-) diff --git a/build/devenv/.gitignore b/build/devenv/.gitignore index 3342c4891..47a529807 100644 --- a/build/devenv/.gitignore +++ b/build/devenv/.gitignore @@ -50,7 +50,7 @@ pods.tmp.yml dctl job-distributor/ env-*out.toml -dashboards/ +dashboards/* !dashboards/ccv.json !dashboards/heartbeat*.json fakes/tmp diff --git a/build/devenv/dashboards/heartbeat_agg.json b/build/devenv/dashboards/heartbeat_agg.json index 6c5b23fba..fcb5e2c12 100644 --- a/build/devenv/dashboards/heartbeat_agg.json +++ b/build/devenv/dashboards/heartbeat_agg.json @@ -12,18 +12,27 @@ "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" + }, + { + "datasource": { + "type": "loki", + "uid": "P8E80F9AEF21F6940" + }, + "enable": true, + "iconColor": "red", + "name": "New annotation" } ] }, "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 12, + "id": 15, "links": [], "liveNow": false, "panels": [ { - "collapsed": true, + "collapsed": false, "gridPos": { "h": 1, "w": 24, @@ -31,208 +40,191 @@ "y": 0 }, "id": 7, - "panels": [ + "panels": [], + "title": "Heartbeat uptime overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 13, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "orange", - "value": 98 - }, - { - "color": "green", - "value": 100 - } - ] - }, - "unit": "percent" + "editorMode": "code", + "exemplar": false, + "expr": "count(\n last_over_time(aggregator_heartbeat_verifier_heartbeats_total[30s])\n)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Verifiers Up", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "overrides": [] + "thresholdsStyle": { + "mode": "off" + } }, - "gridPos": { - "h": 5, - "w": 24, - "x": 0, - "y": 9 + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] }, - "id": 4, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "10.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "exemplar": false, - "expr": "round(\n clamp_max(\n (rate(aggregator_heartbeat_verifier_heartbeats_total[$__range]) / 0.1) * 100,\n 100\n )\n)", - "instant": true, - "legendFormat": "{{caller_id}}", - "range": false, - "refId": "A" - } - ], - "title": "Heartbeat uptime per Verifier", - "type": "stat" + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 4, + "y": 1 + }, + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "orange", - "value": 98 - }, - { - "color": "green", - "value": 100 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 14 - }, - "id": 6, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "10.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "round(\n clamp_max(\n (rate(aggregator_heartbeat_verifier_heartbeats_total[$__range]) / 0.1) * 100,\n 100\n )\n)", - "hide": false, - "legendFormat": "{{caller_id}}", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "aggregator_heartbeat_verifier_heartbeats_total", - "hide": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "B" - } - ], - "title": "Heartbeat uptime per Verifier", - "type": "timeseries" + "editorMode": "code", + "exemplar": false, + "expr": "count (aggregator_heartbeat_verifier_heartbeats_total{})", + "instant": false, + "legendFormat": "Up", + "range": true, + "refId": "A" } ], - "title": "Heartbeat uptime overview", - "type": "row" + "title": "Verifiers Up", + "type": "timeseries" }, { - "collapsed": false, + "datasource": { + "type": "loki", + "uid": "P8E80F9AEF21F6940" + }, "gridPos": { - "h": 1, - "w": 24, - "x": 0, + "h": 5, + "w": 14, + "x": 10, "y": 1 }, - "id": 8, - "panels": [], - "title": "Head report & Benchmark Score overview", - "type": "row" + "id": 19, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": " ", + "mode": "markdown" + }, + "pluginVersion": "10.1.0", + "transparent": true, + "type": "text" }, { "datasource": { @@ -249,25 +241,31 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "red", "value": null }, { - "color": "red", - "value": 80 + "color": "orange", + "value": 98 + }, + { + "color": "green", + "value": 100 } ] - } + }, + "unit": "percent" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 24, + "h": 3, + "w": 4, "x": 0, - "y": 2 + "y": 6 }, - "id": 10, + "id": 4, + "maxPerRow": 8, "options": { "colorMode": "value", "graphMode": "area", @@ -278,9 +276,754 @@ "lastNotNull" ], "fields": "", - "values": false + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.0", + "repeat": "verifier", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(round(\n clamp_max(\n (rate(aggregator_heartbeat_verifier_heartbeats_total{caller_id=\"$verifier\"}[$__range]) / 0.1) * 100,\n 100\n )\n))", + "instant": true, + "legendFormat": "{{caller_id}}", + "range": false, + "refId": "A" + } + ], + "title": "Heartbeat uptime for $verifier", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 98 + }, + { + "color": "green", + "value": 100 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "round(\n clamp_max(\n (rate(aggregator_heartbeat_verifier_heartbeats_total[$__range]) / 0.1) * 100,\n 100\n )\n)", + "hide": false, + "legendFormat": "{{caller_id}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "aggregator_heartbeat_verifier_heartbeats_total", + "hide": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Heartbeat uptime per Verifier", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 3, + "panels": [], + "title": "Heartbeat - Time since last heartbeat", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 20 + }, + "id": 2, + "maxPerRow": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.1.0", + "repeat": "verifier", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "time() - max by(caller_id) (aggregator_heartbeat_verifier_heartbeat_timestamp{caller_id=~\"$verifier\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "$verifier", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "time() - max by(caller_id) (aggregator_heartbeat_verifier_heartbeat_timestamp{})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Time since last Heartbeat", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 8, + "panels": [], + "title": "Heartbeat - head report & benchmark score overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "color-background" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 2 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 10, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "quantile(0.90, aggregator_heartbeat_verifier_score{caller_id=~\"$verifier\"}) by (caller_id)", + "instant": true, + "legendFormat": "{{caller_id}}", + "range": false, + "refId": "A" + } + ], + "title": "P90 Score $verifier", + "transformations": [ + { + "id": "seriesToRows", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": {}, + "renameByName": {} + } + }, + { + "id": "rowsToFields", + "options": {} + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "color-background" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 2 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 26, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "quantile(0.95, aggregator_heartbeat_verifier_score{caller_id=~\"$verifier\"}) by (caller_id)", + "instant": true, + "legendFormat": "{{caller_id}}", + "range": false, + "refId": "A" + } + ], + "title": "P95 Score $verifier", + "transformations": [ + { + "id": "seriesToRows", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": {}, + "renameByName": {} + } + }, + { + "id": "rowsToFields", + "options": {} + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "color-background" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 2 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 45 + }, + "id": 27, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "quantile(0.99, aggregator_heartbeat_verifier_score{caller_id=~\"$verifier\"}) by (caller_id)", + "instant": true, + "legendFormat": "{{caller_id}}", + "range": false, + "refId": "A" + } + ], + "title": "P99 Score $verifier", + "transformations": [ + { + "id": "seriesToRows", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": {}, + "renameByName": {} + } + }, + { + "id": "rowsToFields", + "options": {} + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 16, + "x": 0, + "y": 50 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "aggregator_heartbeat_verifier_score{}", + "legendFormat": "{{caller_id}}: {{chain_selector}}", + "range": true, + "refId": "A" + } + ], + "title": "Verifier Score over time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 50 + }, + "id": 40, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false }, - "textMode": "auto" + "showHeader": true }, "pluginVersion": "10.1.0", "targets": [ @@ -290,14 +1033,72 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "quantile(0.90, aggregator_heartbeat_verifier_score) by (caller_id)", - "legendFormat": "{{caller_id}}", + "expr": "aggregator_heartbeat_verifier_score{}", + "legendFormat": "__auto", "range": true, "refId": "A" } ], - "title": "NOP Score P90", - "type": "stat" + "title": "Verifier Score over time", + "transformations": [ + { + "id": "seriesToRows", + "options": {} + }, + { + "id": "extractFields", + "options": { + "source": "Metric" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric": true, + "Time": true, + "__name__": true, + "chain_selector": false, + "csa_public_key": true, + "exported_job": true, + "host_name": true, + "instance": true, + "job": true, + "os_description": true, + "os_type": true, + "service_name": true, + "telemetry_sdk_language": true, + "telemetry_sdk_name": true, + "telemetry_sdk_version": true + }, + "indexByName": { + "Metric": 1, + "Time": 0, + "Value": 8, + "__name__": 2, + "caller_id": 3, + "chain_selector": 4, + "csa_public_key": 5, + "exported_job": 6, + "host_name": 7, + "instance": 9, + "job": 10, + "os_description": 11, + "os_type": 12, + "service_name": 13, + "telemetry_sdk_language": 14, + "telemetry_sdk_name": 15, + "telemetry_sdk_version": 16 + }, + "renameByName": { + "Value": "Score", + "caller_id": "Verifier", + "chain_selector": "ChainSelector" + } + } + } + ], + "type": "table" }, { "datasource": { @@ -359,11 +1160,11 @@ }, "gridPos": { "h": 9, - "w": 24, + "w": 16, "x": 0, - "y": 11 + "y": 59 }, - "id": 9, + "id": 11, "options": { "legend": { "calcs": [], @@ -383,15 +1184,143 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "aggregator_heartbeat_verifier_score{}", + "expr": "aggregator_heartbeat_verifier_reported_chain_heads", "legendFormat": "{{caller_id}}: {{chain_selector}}", "range": true, "refId": "A" } ], - "title": "Verifier Score over time", + "title": "Verifier reported chain head over time", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 59 + }, + "id": 12, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "aggregator_heartbeat_verifier_reported_chain_heads", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Verifiers reported head details", + "transformations": [ + { + "id": "seriesToRows", + "options": {} + }, + { + "id": "extractFields", + "options": { + "source": "Metric" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Metric": true, + "Time": true, + "__name__": true, + "csa_public_key": true, + "exported_job": true, + "host_name": true, + "instance": true, + "job": true, + "os_description": true, + "os_type": true, + "service_name": true, + "telemetry_sdk_language": true, + "telemetry_sdk_name": true, + "telemetry_sdk_version": true + }, + "indexByName": { + "Metric": 1, + "Time": 0, + "Value": 4, + "__name__": 5, + "caller_id": 2, + "chain_selector": 3, + "csa_public_key": 6, + "exported_job": 7, + "host_name": 8, + "instance": 9, + "job": 10, + "os_description": 11, + "os_type": 12, + "service_name": 13, + "telemetry_sdk_language": 14, + "telemetry_sdk_name": 15, + "telemetry_sdk_version": 16 + }, + "renameByName": { + "Value": "Reported chain head" + } + } + } + ], + "type": "table" + }, { "datasource": { "type": "prometheus", @@ -452,18 +1381,16 @@ }, "gridPos": { "h": 9, - "w": 24, + "w": 16, "x": 0, - "y": 20 + "y": 68 }, - "id": 11, + "id": 38, "options": { "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", - "placement": "right", + "calcs": [], + "displayMode": "list", + "placement": "bottom", "showLegend": true }, "tooltip": { @@ -478,13 +1405,13 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "aggregator_heartbeat_verifier_chain_heads", - "legendFormat": "{{caller_id}}: {{chain_selector}}", + "expr": "max by (chain_selector, caller_id) (aggregator_heartbeat_verifier_reported_chain_heads)\n- on(chain_selector) group_left()\nmax by (chain_selector) (aggregator_heartbeat_verifier_current_max_chain_head)", + "legendFormat": "", "range": true, "refId": "A" } ], - "title": "Verifier Score over time", + "title": "Verifier reported chain gap from max ", "type": "timeseries" }, { @@ -502,6 +1429,7 @@ "cellOptions": { "type": "auto" }, + "filterable": true, "inspect": false }, "mappings": [], @@ -523,11 +1451,11 @@ }, "gridPos": { "h": 9, - "w": 24, - "x": 0, - "y": 29 + "w": 8, + "x": 16, + "y": 68 }, - "id": 12, + "id": 39, "options": { "cellHeight": "sm", "footer": { @@ -549,14 +1477,14 @@ }, "editorMode": "code", "exemplar": false, - "expr": "aggregator_heartbeat_verifier_chain_heads", + "expr": "max by (chain_selector, caller_id) (aggregator_heartbeat_verifier_reported_chain_heads)\n- on(chain_selector) group_left()\nmax by (chain_selector) (aggregator_heartbeat_verifier_current_max_chain_head)", "instant": true, "legendFormat": "__auto", "range": false, "refId": "A" } ], - "title": "Verifier Score over time", + "title": "Verifiers reported head details", "transformations": [ { "id": "seriesToRows", @@ -606,185 +1534,13 @@ "telemetry_sdk_name": 15, "telemetry_sdk_version": 16 }, - "renameByName": {} + "renameByName": { + "Value": "Chain head lag" + } } } ], "type": "table" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 38 - }, - "id": 3, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 11 - }, - "id": 2, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "10.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "time() - max by(caller_id) (aggregator_heartbeat_verifier_heartbeat_timestamp{})", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Time since last Heartbeat", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 24, - "x": 0, - "y": 17 - }, - "id": 1, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "time() - max by(caller_id) (aggregator_heartbeat_verifier_heartbeat_timestamp{})", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Time since last Heartbeat", - "type": "timeseries" - } - ], - "title": "Row title", - "type": "row" } ], "refresh": "", @@ -792,7 +1548,38 @@ "style": "dark", "tags": [], "templating": { - "list": [] + "list": [ + { + "current": { + "selected": false, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(aggregator_heartbeat_verifier_heartbeats_total,caller_id)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "verifier", + "options": [], + "query": { + "query": "label_values(aggregator_heartbeat_verifier_heartbeats_total,caller_id)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] }, "time": { "from": "now-5m", @@ -801,7 +1588,7 @@ "timepicker": {}, "timezone": "", "title": "Health overview (Aggregator View)", - "uid": "c0968deb-5dbc-4635-b8d0-3937ce801c76", - "version": 5, + "uid": "c0968deb-5dbc-4635-b8d0-3937ce801c71", + "version": 13, "weekStart": "" } diff --git a/build/devenv/dashboards/heartbeat_verifier.json b/build/devenv/dashboards/heartbeat_verifier.json index aa2b852f7..3b8722e9e 100644 --- a/build/devenv/dashboards/heartbeat_verifier.json +++ b/build/devenv/dashboards/heartbeat_verifier.json @@ -227,7 +227,7 @@ }, { "color": "red", - "value": 80 + "value": 3 } ] }, @@ -441,7 +441,7 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "round(\n clamp_max(\n (rate(verifier_heartbeat_duration_seconds_count[$__range]) / 0.1) * 100,\n 100\n )\n)", + "expr": "round(\n clamp_max(\n (rate(verifier_heartbeats_sent_total{host_name=\"$hostname\"}[$__range]) / 0.1) * 100,\n 100\n )\n)", "legendFormat": "{{verifier_id}}", "range": true, "refId": "A" @@ -535,7 +535,8 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "round(\n clamp_max(\n (rate(verifier_heartbeat_duration_seconds_count[$__range]) / 0.1) * 100,\n 100\n )\n)", + "expr": "round(\n clamp_max(\n (rate(verifier_heartbeats_sent_total[$__range]) / 0.1) * 100,\n 100\n )\n)", + "hide": false, "legendFormat": "{{verifier_id}}", "range": true, "refId": "A" @@ -554,21 +555,21 @@ { "current": { "selected": false, - "text": "06aab07a7a9f", - "value": "06aab07a7a9f" + "text": "124d04d2d422", + "value": "124d04d2d422" }, "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "definition": "label_values(verifier_heartbeats_sent_total,host_name)", + "definition": "label_values(verifier_heartbeat_score,host_name)", "hide": 0, "includeAll": false, "multi": false, "name": "hostname", "options": [], "query": { - "query": "label_values(verifier_heartbeats_sent_total,host_name)", + "query": "label_values(verifier_heartbeat_score,host_name)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, From 27996d455538ade8e59292b1a84151819b3dd015 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Thu, 22 Jan 2026 12:54:45 +0100 Subject: [PATCH 11/17] fix --- cmd/verifier/committee/main.go | 1 - deployments/environment_topology.go | 3 +-- verifier/verification_coordinator_lbtc_test.go | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/cmd/verifier/committee/main.go b/cmd/verifier/committee/main.go index fab840ce8..ba7cc6986 100644 --- a/cmd/verifier/committee/main.go +++ b/cmd/verifier/committee/main.go @@ -16,7 +16,6 @@ import ( "go.uber.org/zap/zapcore" cmd "github.com/smartcontractkit/chainlink-ccv/cmd/verifier" - ccvcommon "github.com/smartcontractkit/chainlink-ccv/common" "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" "github.com/smartcontractkit/chainlink-ccv/integration/storageaccess" "github.com/smartcontractkit/chainlink-ccv/protocol" diff --git a/deployments/environment_topology.go b/deployments/environment_topology.go index 6093fcffb..08706ce62 100644 --- a/deployments/environment_topology.go +++ b/deployments/environment_topology.go @@ -4,9 +4,8 @@ import ( "fmt" "os" "slices" - "time" - "strings" + "time" "github.com/BurntSushi/toml" ) diff --git a/verifier/verification_coordinator_lbtc_test.go b/verifier/verification_coordinator_lbtc_test.go index e7f5288f3..77242737e 100644 --- a/verifier/verification_coordinator_lbtc_test.go +++ b/verifier/verification_coordinator_lbtc_test.go @@ -7,7 +7,6 @@ import ( "testing" "time" - "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" "github.com/stretchr/testify/require" From b4c334a1802fbb3b07927fdd6b43976433117ad4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Thu, 22 Jan 2026 13:31:14 +0100 Subject: [PATCH 12/17] Remove unnecessary dependencies for heartbeat client --- .../observed_heartbeat_client.go | 38 +++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/integration/pkg/heartbeatclient/observed_heartbeat_client.go b/integration/pkg/heartbeatclient/observed_heartbeat_client.go index 124dc477c..4a561b324 100644 --- a/integration/pkg/heartbeatclient/observed_heartbeat_client.go +++ b/integration/pkg/heartbeatclient/observed_heartbeat_client.go @@ -7,17 +7,49 @@ import ( "google.golang.org/grpc" - "github.com/smartcontractkit/chainlink-ccv/verifier" "github.com/smartcontractkit/chainlink-common/pkg/logger" heartbeatpb "github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat/v1" ) +// Monitoring provides monitoring functionality for heartbeat clients. +type Monitoring interface { + // Metrics returns the metrics labeler. + Metrics() MetricLabeler +} + +// MetricLabeler provides metric recording functionality. +type MetricLabeler interface { + // With returns a new metrics labeler with the given key-value pairs. + With(keyValues ...string) MetricLabeler + + // RecordHeartbeatDuration records the duration of a heartbeat operation. + RecordHeartbeatDuration(ctx context.Context, duration time.Duration) + + // IncrementHeartbeatsSent increments the counter for successfully sent heartbeats. + IncrementHeartbeatsSent(ctx context.Context) + + // IncrementHeartbeatsFailed increments the counter for failed heartbeat attempts. + IncrementHeartbeatsFailed(ctx context.Context) + + // SetVerifierHeartbeatTimestamp sets the timestamp from the heartbeat response. + SetVerifierHeartbeatTimestamp(ctx context.Context, timestamp int64) + + // SetVerifierHeartbeatSentChainHeads sets the block height sent in the heartbeat request for a chain. + SetVerifierHeartbeatSentChainHeads(ctx context.Context, blockHeight uint64) + + // SetVerifierHeartbeatChainHeads sets the block height for a chain from the heartbeat response. + SetVerifierHeartbeatChainHeads(ctx context.Context, blockHeight uint64) + + // SetVerifierHeartbeatScore sets the score for a chain from the heartbeat response. + SetVerifierHeartbeatScore(ctx context.Context, score float64) +} + // ObservedHeartbeatClient wraps a HeartbeatClient with observability. type ObservedHeartbeatClient struct { delegate *HeartbeatClient verifierID string lggr logger.Logger - monitoring verifier.Monitoring + monitoring Monitoring } // NewObservedHeartbeatClient creates a new observed heartbeat client. @@ -25,7 +57,7 @@ func NewObservedHeartbeatClient( delegate *HeartbeatClient, verifierID string, lggr logger.Logger, - monitoring verifier.Monitoring, + monitoring Monitoring, ) *ObservedHeartbeatClient { return &ObservedHeartbeatClient{ delegate: delegate, From 307cef0c4b8897b6140f74b1bac1cc231307e0a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Mon, 26 Jan 2026 13:25:09 +0100 Subject: [PATCH 13/17] Fix circular dependency --- cmd/verifier/committee/main.go | 2 +- .../observed_heartbeat_client.go | 3 +- .../observed_heartbeat_client_test.go | 39 +----------- verifier/heartbeat_monitoring_adapter.go | 61 +++++++++++++++++++ 4 files changed, 67 insertions(+), 38 deletions(-) create mode 100644 verifier/heartbeat_monitoring_adapter.go diff --git a/cmd/verifier/committee/main.go b/cmd/verifier/committee/main.go index d1cc43c22..f1b88f061 100644 --- a/cmd/verifier/committee/main.go +++ b/cmd/verifier/committee/main.go @@ -230,7 +230,7 @@ func main() { heartbeatClient, config.VerifierID, lggr, - verifierMonitoring, + verifier.NewHeartbeatMonitoringAdapter(verifierMonitoring), ) messageTracker := monitoring.NewMessageLatencyTracker( diff --git a/integration/pkg/heartbeatclient/observed_heartbeat_client.go b/integration/pkg/heartbeatclient/observed_heartbeat_client.go index 4a561b324..c63349398 100644 --- a/integration/pkg/heartbeatclient/observed_heartbeat_client.go +++ b/integration/pkg/heartbeatclient/observed_heartbeat_client.go @@ -12,12 +12,13 @@ import ( ) // Monitoring provides monitoring functionality for heartbeat clients. +// Services using the heartbeat client should provide an adapter implementing this interface. type Monitoring interface { // Metrics returns the metrics labeler. Metrics() MetricLabeler } -// MetricLabeler provides metric recording functionality. +// MetricLabeler provides metric recording functionality for heartbeat operations. type MetricLabeler interface { // With returns a new metrics labeler with the given key-value pairs. With(keyValues ...string) MetricLabeler diff --git a/integration/pkg/heartbeatclient/observed_heartbeat_client_test.go b/integration/pkg/heartbeatclient/observed_heartbeat_client_test.go index 57cb650d7..5aff55bcf 100644 --- a/integration/pkg/heartbeatclient/observed_heartbeat_client_test.go +++ b/integration/pkg/heartbeatclient/observed_heartbeat_client_test.go @@ -1,14 +1,13 @@ package heartbeatclient_test import ( - "context" "testing" - "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" + "github.com/smartcontractkit/chainlink-ccv/verifier" "github.com/smartcontractkit/chainlink-ccv/verifier/pkg/monitoring" "github.com/smartcontractkit/chainlink-common/pkg/logger" ) @@ -23,7 +22,7 @@ func TestObservedHeartbeatClient_Close(t *testing.T) { delegateClient, "test-verifier", lggr, - fakeMonitoring, + verifier.NewHeartbeatMonitoringAdapter(fakeMonitoring), ) // Close should not error @@ -31,40 +30,8 @@ func TestObservedHeartbeatClient_Close(t *testing.T) { assert.NoError(t, err) } -func TestObservedHeartbeatClient_FakeMonitoring(t *testing.T) { - lggr := logger.Test(t) - ctx := context.Background() - - // Use real FakeVerifierMonitoring to test integration - fakeMonitoring := monitoring.NewFakeVerifierMonitoring() - - delegateClient := &heartbeatclient.HeartbeatClient{} - observedClient := heartbeatclient.NewObservedHeartbeatClient( - delegateClient, - "test-verifier", - lggr, - fakeMonitoring, - ) - require.NotNil(t, observedClient) - - // These should not panic with real monitoring - metrics := fakeMonitoring.Metrics() - assert.NotNil(t, metrics) - - // Verify we can call metric methods without error - metrics.RecordHeartbeatDuration(ctx, 100*time.Millisecond) - metrics.IncrementHeartbeatsSent(ctx) - metrics.IncrementHeartbeatsFailed(ctx) - metrics.SetVerifierHeartbeatTimestamp(ctx, time.Now().Unix()) - metrics.SetVerifierHeartbeatSentChainHeads(ctx, 100) - metrics.SetVerifierHeartbeatChainHeads(ctx, 200) - metrics.SetVerifierHeartbeatScore(ctx, 0.95) -} - func TestObservedHeartbeatClient_WithChainSelector(t *testing.T) { lggr := logger.Test(t) - - // Use real FakeVerifierMonitoring, following the codebase pattern fakeMonitoring := monitoring.NewFakeVerifierMonitoring() delegateClient := &heartbeatclient.HeartbeatClient{} @@ -73,7 +40,7 @@ func TestObservedHeartbeatClient_WithChainSelector(t *testing.T) { delegateClient, "test-verifier", lggr, - fakeMonitoring, + verifier.NewHeartbeatMonitoringAdapter(fakeMonitoring), ) require.NotNil(t, observedClient) diff --git a/verifier/heartbeat_monitoring_adapter.go b/verifier/heartbeat_monitoring_adapter.go new file mode 100644 index 000000000..6a801b6a9 --- /dev/null +++ b/verifier/heartbeat_monitoring_adapter.go @@ -0,0 +1,61 @@ +package verifier + +import ( + "context" + "time" + + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" +) + +// heartbeatMonitoringAdapter adapts verifier.Monitoring to heartbeatclient.Monitoring. +// This allows the reusable heartbeat client to work with verifier-specific monitoring. +type heartbeatMonitoringAdapter struct { + monitoring Monitoring +} + +// NewHeartbeatMonitoringAdapter creates an adapter that allows verifier.Monitoring +// to be used with the heartbeat client's observability layer. +func NewHeartbeatMonitoringAdapter(monitoring Monitoring) heartbeatclient.Monitoring { + return &heartbeatMonitoringAdapter{monitoring: monitoring} +} + +func (a *heartbeatMonitoringAdapter) Metrics() heartbeatclient.MetricLabeler { + return &heartbeatMetricLabelerAdapter{labeler: a.monitoring.Metrics()} +} + +// heartbeatMetricLabelerAdapter adapts verifier.MetricLabeler to heartbeatclient.MetricLabeler. +type heartbeatMetricLabelerAdapter struct { + labeler MetricLabeler +} + +func (a *heartbeatMetricLabelerAdapter) With(keyValues ...string) heartbeatclient.MetricLabeler { + return &heartbeatMetricLabelerAdapter{labeler: a.labeler.With(keyValues...)} +} + +func (a *heartbeatMetricLabelerAdapter) RecordHeartbeatDuration(ctx context.Context, duration time.Duration) { + a.labeler.RecordHeartbeatDuration(ctx, duration) +} + +func (a *heartbeatMetricLabelerAdapter) IncrementHeartbeatsSent(ctx context.Context) { + a.labeler.IncrementHeartbeatsSent(ctx) +} + +func (a *heartbeatMetricLabelerAdapter) IncrementHeartbeatsFailed(ctx context.Context) { + a.labeler.IncrementHeartbeatsFailed(ctx) +} + +func (a *heartbeatMetricLabelerAdapter) SetVerifierHeartbeatTimestamp(ctx context.Context, timestamp int64) { + a.labeler.SetVerifierHeartbeatTimestamp(ctx, timestamp) +} + +func (a *heartbeatMetricLabelerAdapter) SetVerifierHeartbeatSentChainHeads(ctx context.Context, blockHeight uint64) { + a.labeler.SetVerifierHeartbeatSentChainHeads(ctx, blockHeight) +} + +func (a *heartbeatMetricLabelerAdapter) SetVerifierHeartbeatChainHeads(ctx context.Context, blockHeight uint64) { + a.labeler.SetVerifierHeartbeatChainHeads(ctx, blockHeight) +} + +func (a *heartbeatMetricLabelerAdapter) SetVerifierHeartbeatScore(ctx context.Context, score float64) { + a.labeler.SetVerifierHeartbeatScore(ctx, score) +} From d901b6b5a397cbbd54ee4fe18a451a358a5401e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Mon, 26 Jan 2026 13:46:20 +0100 Subject: [PATCH 14/17] Fix implementation --- integration/pkg/heartbeatclient/noop.go | 24 +++--- .../observed_heartbeat_client.go | 59 ++++++++++++--- verifier/heartbeat_reporter.go | 21 ++---- verifier/heartbeat_reporter_test.go | 75 +++++++++---------- verifier/verification_coordinator.go | 6 +- 5 files changed, 107 insertions(+), 78 deletions(-) diff --git a/integration/pkg/heartbeatclient/noop.go b/integration/pkg/heartbeatclient/noop.go index 915c34715..c3ee7b075 100644 --- a/integration/pkg/heartbeatclient/noop.go +++ b/integration/pkg/heartbeatclient/noop.go @@ -2,14 +2,9 @@ package heartbeatclient import ( "context" - "time" - - "google.golang.org/grpc" - - heartbeatpb "github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat/v1" ) -// NoopHeartbeatClient is a no-op implementation of HeartbeatServiceClient. +// NoopHeartbeatClient is a no-op implementation of HeartbeatSender. type NoopHeartbeatClient struct{} // NewNoopHeartbeatClient creates a new no-op heartbeat client. @@ -18,12 +13,17 @@ func NewNoopHeartbeatClient() *NoopHeartbeatClient { } // SendHeartbeat is a no-op implementation that returns a dummy response. -func (n *NoopHeartbeatClient) SendHeartbeat(ctx context.Context, in *heartbeatpb.HeartbeatRequest, opts ...grpc.CallOption) (*heartbeatpb.HeartbeatResponse, error) { - return &heartbeatpb.HeartbeatResponse{ - Timestamp: time.Now().Unix(), - AggregatorId: "noop", - ChainBenchmarks: make(map[uint64]*heartbeatpb.ChainBenchmark), +func (n *NoopHeartbeatClient) SendHeartbeat(ctx context.Context, blockHeightsByChain map[uint64]uint64) (HeartbeatResponse, error) { + return HeartbeatResponse{ + AggregatorID: "noop", + Timestamp: 0, + ChainBenchmarks: make(map[uint64]ChainBenchmark), }, nil } -var _ heartbeatpb.HeartbeatServiceClient = (*NoopHeartbeatClient)(nil) +// Close is a no-op implementation. +func (n *NoopHeartbeatClient) Close() error { + return nil +} + +var _ HeartbeatSender = (*NoopHeartbeatClient)(nil) diff --git a/integration/pkg/heartbeatclient/observed_heartbeat_client.go b/integration/pkg/heartbeatclient/observed_heartbeat_client.go index c63349398..e1359d97f 100644 --- a/integration/pkg/heartbeatclient/observed_heartbeat_client.go +++ b/integration/pkg/heartbeatclient/observed_heartbeat_client.go @@ -5,12 +5,32 @@ import ( "fmt" "time" - "google.golang.org/grpc" - "github.com/smartcontractkit/chainlink-common/pkg/logger" heartbeatpb "github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat/v1" ) +// HeartbeatSender defines the interface for sending heartbeats to the aggregator. +type HeartbeatSender interface { + // SendHeartbeat sends chain status information to the aggregator. + // Returns the aggregator's response containing benchmarks and timestamp. + SendHeartbeat(ctx context.Context, blockHeightsByChain map[uint64]uint64) (HeartbeatResponse, error) + // Close closes the heartbeat client connection. + Close() error +} + +// HeartbeatResponse contains the aggregator's response to a heartbeat. +type HeartbeatResponse struct { + AggregatorID string + Timestamp int64 + ChainBenchmarks map[uint64]ChainBenchmark +} + +// ChainBenchmark contains benchmark information for a specific chain. +type ChainBenchmark struct { + BlockHeight uint64 + Score float32 +} + // Monitoring provides monitoring functionality for heartbeat clients. // Services using the heartbeat client should provide an adapter implementing this interface. type Monitoring interface { @@ -69,10 +89,18 @@ func NewObservedHeartbeatClient( } // SendHeartbeat sends a heartbeat request with observability. -func (o *ObservedHeartbeatClient) SendHeartbeat(ctx context.Context, req *heartbeatpb.HeartbeatRequest, opts ...grpc.CallOption) (*heartbeatpb.HeartbeatResponse, error) { +func (o *ObservedHeartbeatClient) SendHeartbeat(ctx context.Context, blockHeightsByChain map[uint64]uint64) (HeartbeatResponse, error) { start := time.Now() - resp, err := o.delegate.SendHeartbeat(ctx, req, opts...) + // Build proto request + req := &heartbeatpb.HeartbeatRequest{ + SendTimestamp: time.Now().Unix(), + ChainDetails: &heartbeatpb.ChainHealthDetails{ + BlockHeightsByChain: blockHeightsByChain, + }, + } + + resp, err := o.delegate.SendHeartbeat(ctx, req) duration := time.Since(start) @@ -80,7 +108,7 @@ func (o *ObservedHeartbeatClient) SendHeartbeat(ctx context.Context, req *heartb metrics.RecordHeartbeatDuration(ctx, duration) // Record what we're sending in the request. It will be used for monitoring of the lag. - for chainSelector, blockHeight := range req.ChainDetails.BlockHeightsByChain { + for chainSelector, blockHeight := range blockHeightsByChain { chainMetrics := metrics.With("chain_selector", fmt.Sprintf("%d", chainSelector)) chainMetrics.SetVerifierHeartbeatSentChainHeads(ctx, blockHeight) } @@ -91,15 +119,22 @@ func (o *ObservedHeartbeatClient) SendHeartbeat(ctx context.Context, req *heartb "error", err, "duration", duration, ) - return nil, err + return HeartbeatResponse{}, err } metrics.IncrementHeartbeatsSent(ctx) metrics.SetVerifierHeartbeatTimestamp(ctx, resp.Timestamp) - // Record per-chain benchmarks from the response. + // Convert proto response to domain response + chainBenchmarks := make(map[uint64]ChainBenchmark, len(resp.ChainBenchmarks)) for chainSelector, benchmark := range resp.ChainBenchmarks { + chainBenchmarks[chainSelector] = ChainBenchmark{ + BlockHeight: benchmark.BlockHeight, + Score: benchmark.Score, + } + + // Record metrics chainMetrics := metrics.With("chain_selector", fmt.Sprintf("%d", chainSelector)) chainMetrics.SetVerifierHeartbeatChainHeads(ctx, benchmark.BlockHeight) chainMetrics.SetVerifierHeartbeatScore(ctx, float64(benchmark.Score)) @@ -107,11 +142,15 @@ func (o *ObservedHeartbeatClient) SendHeartbeat(ctx context.Context, req *heartb o.lggr.Debugw("Heartbeat succeeded", "duration", duration, - "chainCount", len(req.ChainDetails.BlockHeightsByChain), - "chainBenchmarkCount", len(resp.ChainBenchmarks), + "chainCount", len(blockHeightsByChain), + "chainBenchmarkCount", len(chainBenchmarks), ) - return resp, nil + return HeartbeatResponse{ + AggregatorID: resp.AggregatorId, + Timestamp: resp.Timestamp, + ChainBenchmarks: chainBenchmarks, + }, nil } // Close closes the underlying heartbeat client. diff --git a/verifier/heartbeat_reporter.go b/verifier/heartbeat_reporter.go index c39f744da..6dc52a486 100644 --- a/verifier/heartbeat_reporter.go +++ b/verifier/heartbeat_reporter.go @@ -6,10 +6,10 @@ import ( "sync" "time" + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" "github.com/smartcontractkit/chainlink-ccv/protocol" "github.com/smartcontractkit/chainlink-common/pkg/logger" "github.com/smartcontractkit/chainlink-common/pkg/services" - heartbeatpb "github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat/v1" ) const ( @@ -25,7 +25,7 @@ type HeartbeatReporter struct { logger logger.Logger chainStatusManager protocol.ChainStatusManager - heartbeatClient heartbeatpb.HeartbeatServiceClient + heartbeatClient heartbeatclient.HeartbeatSender allSelectors []protocol.ChainSelector verifierID string interval time.Duration @@ -35,7 +35,7 @@ type HeartbeatReporter struct { func NewHeartbeatReporter( lggr logger.Logger, chainStatusManager protocol.ChainStatusManager, - heartbeatClient heartbeatpb.HeartbeatServiceClient, + heartbeatClient heartbeatclient.HeartbeatSender, allSelectors []protocol.ChainSelector, verifierID string, interval time.Duration, @@ -153,15 +153,8 @@ func (hr *HeartbeatReporter) sendHeartbeat(ctx context.Context) { } } - // Create and send heartbeat request. - req := &heartbeatpb.HeartbeatRequest{ - SendTimestamp: time.Now().Unix(), - ChainDetails: &heartbeatpb.ChainHealthDetails{ - BlockHeightsByChain: blockHeightsByChain, - }, - } - - resp, err := hr.heartbeatClient.SendHeartbeat(ctx, req) + // Send heartbeat request. + resp, err := hr.heartbeatClient.SendHeartbeat(ctx, blockHeightsByChain) if err != nil { hr.logger.Errorw("Failed to send heartbeat", "error", err) return @@ -169,14 +162,14 @@ func (hr *HeartbeatReporter) sendHeartbeat(ctx context.Context) { hr.logger.Infow("Heartbeat sent successfully", "verifierId", hr.verifierID, - "aggregatorId", resp.AggregatorId, + "aggregatorId", resp.AggregatorID, "chainCount", len(blockHeightsByChain), ) hr.logger.Debugw("Heartbeat details", "verifierId", hr.verifierID, "blockHeightsByChain", blockHeightsByChain, "chainBenchmarks", resp.ChainBenchmarks, - "aggregatorId", resp.AggregatorId, + "aggregatorId", resp.AggregatorID, "respTimestamp", resp.Timestamp, ) } diff --git a/verifier/heartbeat_reporter_test.go b/verifier/heartbeat_reporter_test.go index 925de3c14..8a11c44ee 100644 --- a/verifier/heartbeat_reporter_test.go +++ b/verifier/heartbeat_reporter_test.go @@ -10,27 +10,27 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" - "google.golang.org/grpc" + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" "github.com/smartcontractkit/chainlink-ccv/internal/mocks" "github.com/smartcontractkit/chainlink-ccv/protocol" "github.com/smartcontractkit/chainlink-ccv/verifier" "github.com/smartcontractkit/chainlink-common/pkg/logger" - heartbeatpb "github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat/v1" ) -// mockHeartbeatClient is a mock implementation of HeartbeatServiceClient for testing. -// Note: This is a gRPC client interface from protobuf, so we mock it manually rather than using mockery. +// mockHeartbeatClient is a mock implementation of heartbeatclient.HeartbeatSender for testing. type mockHeartbeatClient struct { mock.Mock } -func (m *mockHeartbeatClient) SendHeartbeat(ctx context.Context, in *heartbeatpb.HeartbeatRequest, opts ...grpc.CallOption) (*heartbeatpb.HeartbeatResponse, error) { - args := m.Called(ctx, in, opts) - if args.Get(0) == nil { - return nil, args.Error(1) - } - return args.Get(0).(*heartbeatpb.HeartbeatResponse), args.Error(1) +func (m *mockHeartbeatClient) SendHeartbeat(ctx context.Context, blockHeightsByChain map[uint64]uint64) (heartbeatclient.HeartbeatResponse, error) { + args := m.Called(ctx, blockHeightsByChain) + return args.Get(0).(heartbeatclient.HeartbeatResponse), args.Error(1) +} + +func (m *mockHeartbeatClient) Close() error { + args := m.Called() + return args.Error(0) } func TestNewHeartbeatReporter_Success(t *testing.T) { @@ -182,12 +182,12 @@ func TestHeartbeatReporter_StartAndStop(t *testing.T) { mockClient.On("SendHeartbeat", mock.MatchedBy(func(c context.Context) bool { return c != nil - }), mock.MatchedBy(func(req *heartbeatpb.HeartbeatRequest) bool { - return req.SendTimestamp > 0 && len(req.ChainDetails.BlockHeightsByChain) > 0 - }), mock.Anything).Return(&heartbeatpb.HeartbeatResponse{ + }), mock.MatchedBy(func(blockHeights map[uint64]uint64) bool { + return len(blockHeights) > 0 + })).Return(heartbeatclient.HeartbeatResponse{ Timestamp: time.Now().Unix(), - AggregatorId: "test-aggregator", - ChainBenchmarks: map[uint64]*heartbeatpb.ChainBenchmark{}, + AggregatorID: "test-aggregator", + ChainBenchmarks: map[uint64]heartbeatclient.ChainBenchmark{}, }, nil) reporter, err := verifier.NewHeartbeatReporter( @@ -235,9 +235,9 @@ func TestHeartbeatReporter_SendHeartbeatFailure(t *testing.T) { // Mock client returns error mockClient.On("SendHeartbeat", mock.MatchedBy(func(c context.Context) bool { return c != nil - }), mock.MatchedBy(func(req *heartbeatpb.HeartbeatRequest) bool { - return req.SendTimestamp > 0 - }), mock.Anything).Return(nil, errors.New("connection refused")) + }), mock.MatchedBy(func(blockHeights map[uint64]uint64) bool { + return true + })).Return(heartbeatclient.HeartbeatResponse{}, errors.New("connection refused")) reporter, err := verifier.NewHeartbeatReporter( lggr, @@ -322,15 +322,12 @@ func TestHeartbeatReporter_MultipleChains(t *testing.T) { // Verify the request has all chain heights mockClient.On("SendHeartbeat", mock.MatchedBy(func(c context.Context) bool { return c != nil - }), mock.MatchedBy(func(req *heartbeatpb.HeartbeatRequest) bool { - if req == nil || req.ChainDetails == nil { - return false - } - return len(req.ChainDetails.BlockHeightsByChain) == len(selectors) - }), mock.Anything).Return(&heartbeatpb.HeartbeatResponse{ + }), mock.MatchedBy(func(blockHeights map[uint64]uint64) bool { + return len(blockHeights) == len(selectors) + })).Return(heartbeatclient.HeartbeatResponse{ Timestamp: time.Now().Unix(), - AggregatorId: "test-aggregator", - ChainBenchmarks: map[uint64]*heartbeatpb.ChainBenchmark{}, + AggregatorID: "test-aggregator", + ChainBenchmarks: map[uint64]heartbeatclient.ChainBenchmark{}, }, nil) reporter, err := verifier.NewHeartbeatReporter( @@ -354,9 +351,9 @@ func TestHeartbeatReporter_MultipleChains(t *testing.T) { // Verify SendHeartbeat was called with all chains mockClient.AssertCalled(t, "SendHeartbeat", mock.MatchedBy(func(c context.Context) bool { return c != nil - }), mock.MatchedBy(func(req *heartbeatpb.HeartbeatRequest) bool { - return len(req.ChainDetails.BlockHeightsByChain) == len(selectors) - }), mock.Anything) + }), mock.MatchedBy(func(blockHeights map[uint64]uint64) bool { + return len(blockHeights) == len(selectors) + })) } func TestHeartbeatReporter_Name(t *testing.T) { @@ -423,12 +420,12 @@ func TestHeartbeatReporter_ContextCancellation(t *testing.T) { mockClient.On("SendHeartbeat", mock.MatchedBy(func(c context.Context) bool { return c != nil - }), mock.MatchedBy(func(req *heartbeatpb.HeartbeatRequest) bool { - return req.SendTimestamp > 0 - }), mock.Anything).Return(&heartbeatpb.HeartbeatResponse{ + }), mock.MatchedBy(func(blockHeights map[uint64]uint64) bool { + return true + })).Return(heartbeatclient.HeartbeatResponse{ Timestamp: time.Now().Unix(), - AggregatorId: "test-aggregator", - ChainBenchmarks: map[uint64]*heartbeatpb.ChainBenchmark{}, + AggregatorID: "test-aggregator", + ChainBenchmarks: map[uint64]heartbeatclient.ChainBenchmark{}, }, nil) reporter, err := verifier.NewHeartbeatReporter( @@ -480,13 +477,13 @@ func TestHeartbeatReporter_MissingChainStatus(t *testing.T) { // Should send heartbeat with only the available chain mockClient.On("SendHeartbeat", mock.MatchedBy(func(c context.Context) bool { return c != nil - }), mock.MatchedBy(func(req *heartbeatpb.HeartbeatRequest) bool { + }), mock.MatchedBy(func(blockHeights map[uint64]uint64) bool { // Should only have 1 chain since the other one is missing - return len(req.ChainDetails.BlockHeightsByChain) == 1 - }), mock.Anything).Return(&heartbeatpb.HeartbeatResponse{ + return len(blockHeights) == 1 + })).Return(heartbeatclient.HeartbeatResponse{ Timestamp: time.Now().Unix(), - AggregatorId: "test-aggregator", - ChainBenchmarks: map[uint64]*heartbeatpb.ChainBenchmark{}, + AggregatorID: "test-aggregator", + ChainBenchmarks: map[uint64]heartbeatclient.ChainBenchmark{}, }, nil) reporter, err := verifier.NewHeartbeatReporter( diff --git a/verifier/verification_coordinator.go b/verifier/verification_coordinator.go index ed4709b30..fd6e0121c 100644 --- a/verifier/verification_coordinator.go +++ b/verifier/verification_coordinator.go @@ -8,11 +8,11 @@ import ( "github.com/smartcontractkit/chainlink-ccv/common" cursecheckerimpl "github.com/smartcontractkit/chainlink-ccv/integration/pkg/cursechecker" + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" "github.com/smartcontractkit/chainlink-ccv/pkg/chainaccess" "github.com/smartcontractkit/chainlink-ccv/protocol" "github.com/smartcontractkit/chainlink-common/pkg/logger" "github.com/smartcontractkit/chainlink-common/pkg/services" - heartbeatpb "github.com/smartcontractkit/chainlink-protos/chainlink-ccv/heartbeat/v1" ) type Coordinator struct { @@ -45,7 +45,7 @@ func NewCoordinator( messageTracker MessageLatencyTracker, monitoring Monitoring, chainStatusManager protocol.ChainStatusManager, - heartbeatClient heartbeatpb.HeartbeatServiceClient, + heartbeatClient heartbeatclient.HeartbeatSender, ) (*Coordinator, error) { return NewCoordinatorWithDetector( ctx, @@ -73,7 +73,7 @@ func NewCoordinatorWithDetector( monitoring Monitoring, chainStatusManager protocol.ChainStatusManager, detector common.CurseCheckerService, - heartbeatClient heartbeatpb.HeartbeatServiceClient, + heartbeatClient heartbeatclient.HeartbeatSender, ) (*Coordinator, error) { enabledSourceReaders, err := filterOnlyEnabledSourceReaders(ctx, lggr, config, sourceReaders, chainStatusManager) if err != nil { From 13f3b9ed00091d36d42c6cdfa8446e48614317ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Mon, 26 Jan 2026 14:07:23 +0100 Subject: [PATCH 15/17] Fix --- cmd/verifier/committee/main.go | 5 ++--- cmd/verifier/token/main.go | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/cmd/verifier/committee/main.go b/cmd/verifier/committee/main.go index fa731ebc1..eb698e79c 100644 --- a/cmd/verifier/committee/main.go +++ b/cmd/verifier/committee/main.go @@ -13,13 +13,12 @@ import ( "github.com/BurntSushi/toml" "github.com/jmoiron/sqlx" - _ "github.com/lib/pq" "go.uber.org/zap/zapcore" cmd "github.com/smartcontractkit/chainlink-ccv/cmd/verifier" - "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient - "github.com/smartcontractkit/chainlink-ccv/integration/pkg/accessors" + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/accessors" "github.com/smartcontractkit/chainlink-ccv/integration/pkg/blockchain" + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" "github.com/smartcontractkit/chainlink-ccv/integration/storageaccess" "github.com/smartcontractkit/chainlink-ccv/protocol" "github.com/smartcontractkit/chainlink-ccv/protocol/common/hmac" diff --git a/cmd/verifier/token/main.go b/cmd/verifier/token/main.go index 2fad0d65b..2c73d624c 100644 --- a/cmd/verifier/token/main.go +++ b/cmd/verifier/token/main.go @@ -14,9 +14,9 @@ import ( "go.uber.org/zap/zapcore" cmd "github.com/smartcontractkit/chainlink-ccv/cmd/verifier" - "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" - "github.com/smartcontractkit/chainlink-ccv/integration/pkg/accessors" + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/accessors" "github.com/smartcontractkit/chainlink-ccv/integration/pkg/blockchain" + "github.com/smartcontractkit/chainlink-ccv/integration/pkg/heartbeatclient" "github.com/smartcontractkit/chainlink-ccv/pkg/chainaccess" "github.com/smartcontractkit/chainlink-ccv/protocol" "github.com/smartcontractkit/chainlink-ccv/protocol/common/logging" From 2d4144b80fc1e1e92b5b293bac1218c7442f41a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Mon, 26 Jan 2026 15:10:51 +0100 Subject: [PATCH 16/17] Fix --- cmd/verifier/committee/main.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/verifier/committee/main.go b/cmd/verifier/committee/main.go index eb698e79c..9d55370e3 100644 --- a/cmd/verifier/committee/main.go +++ b/cmd/verifier/committee/main.go @@ -13,6 +13,7 @@ import ( "github.com/BurntSushi/toml" "github.com/jmoiron/sqlx" + _ "github.com/lib/pq" "go.uber.org/zap/zapcore" cmd "github.com/smartcontractkit/chainlink-ccv/cmd/verifier" From d1951d1a0f92e932f78114cf944136de65b4539e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Mat=C5=82aszek?= Date: Tue, 27 Jan 2026 09:38:19 +0100 Subject: [PATCH 17/17] trigger ci