diff --git a/core/scripts/go.mod b/core/scripts/go.mod index c50c34f5a8a..d834a3f10b9 100644 --- a/core/scripts/go.mod +++ b/core/scripts/go.mod @@ -507,6 +507,7 @@ require ( github.com/smartcontractkit/chainlink-protos/chainlink-ccv/verifier v0.0.0-20251211142334-5c3421fe2c8d // indirect github.com/smartcontractkit/chainlink-protos/linking-service/go v0.0.0-20251002192024-d2ad9222409b // indirect github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0 // indirect + github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706 // indirect github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6 // indirect github.com/smartcontractkit/chainlink-protos/storage-service v0.3.0 // indirect github.com/smartcontractkit/chainlink-protos/svr v1.1.0 // indirect diff --git a/core/scripts/go.sum b/core/scripts/go.sum index 201abbf3abc..f5656b14b45 100644 --- a/core/scripts/go.sum +++ b/core/scripts/go.sum @@ -1673,6 +1673,8 @@ github.com/smartcontractkit/chainlink-protos/op-catalog v0.0.4 h1:AEnxv4HM3WD1Rb github.com/smartcontractkit/chainlink-protos/op-catalog v0.0.4/go.mod h1:PjZD54vr6rIKEKQj6HNA4hllvYI/QpT+Zefj3tqkFAs= github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0 h1:0eroOyBwmdoGUwUdvMI0/J7m5wuzNnJDMglSOK1sfNY= github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0/go.mod h1:m/A3lqD7ms/RsQ9BT5P2uceYY0QX5mIt4KQxT2G6qEo= +github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706 h1:z3sQK3dyfl9Rbm8Inj8irwvX6yQihASp1UvMjrfz6/w= +github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706/go.mod h1:aifeP3SnsVrO1eSN5Smur3iHjAmi3poaLt6TAbgK0Hw= github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6 h1:L6KJ4kGv/yNNoCk8affk7Y1vAY0qglPMXC/hevV/IsA= github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6/go.mod h1:FRwzI3hGj4CJclNS733gfcffmqQ62ONCkbGi49s658w= github.com/smartcontractkit/chainlink-protos/storage-service v0.3.0 h1:B7itmjy+CMJ26elVw/cAJqqhBQ3Xa/mBYWK0/rQ5MuI= diff --git a/core/services/arbiter/arbiter.go b/core/services/arbiter/arbiter.go index f93969bb613..1309019e090 100644 --- a/core/services/arbiter/arbiter.go +++ b/core/services/arbiter/arbiter.go @@ -10,7 +10,7 @@ import ( "google.golang.org/grpc" "github.com/smartcontractkit/chainlink-common/pkg/services" - ringpb "github.com/smartcontractkit/chainlink-common/pkg/workflows/ring/pb" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" "github.com/smartcontractkit/chainlink/v2/core/logger" ) diff --git a/core/services/arbiter/arbiter_scaler.go b/core/services/arbiter/arbiter_scaler.go index ca341ef2ac1..9210b1e0e9c 100644 --- a/core/services/arbiter/arbiter_scaler.go +++ b/core/services/arbiter/arbiter_scaler.go @@ -6,7 +6,7 @@ import ( "google.golang.org/protobuf/types/known/emptypb" "github.com/smartcontractkit/chainlink-common/pkg/logger" - pb "github.com/smartcontractkit/chainlink-common/pkg/workflows/ring/pb" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" ) // RingArbiterHandler implements the ArbiterScalerServer interface from chainlink-common. @@ -15,7 +15,7 @@ import ( // - Status(): Returns routable shard count and per-shard health for Ring OCR routing decisions // - ConsensusWantShards(): Receives the Ring consensus decision about desired shard count type RingArbiterHandler struct { - pb.UnimplementedArbiterScalerServer + ringpb.UnimplementedArbiterScalerServer state *State lggr logger.Logger } @@ -31,7 +31,7 @@ func NewRingArbiterHandler(state *State, lggr logger.Logger) *RingArbiterHandler // Status returns the current replica status for Ring OCR routing. // Returns only READY shards count and per-shard health status. // This is called by the Ring plugin to determine which shards can receive traffic. -func (h *RingArbiterHandler) Status(ctx context.Context, _ *emptypb.Empty) (*pb.ReplicaStatus, error) { +func (h *RingArbiterHandler) Status(ctx context.Context, _ *emptypb.Empty) (*ringpb.ReplicaStatus, error) { routable := h.state.GetRoutableShards() h.lggr.Debugw("Status requested", @@ -40,9 +40,9 @@ func (h *RingArbiterHandler) Status(ctx context.Context, _ *emptypb.Empty) (*pb. ) // Convert internal shard health to protobuf ShardStatus - shardStatus := make(map[uint32]*pb.ShardStatus, len(routable.ShardInfo)) + shardStatus := make(map[uint32]*ringpb.ShardStatus, len(routable.ShardInfo)) for shardID, health := range routable.ShardInfo { - shardStatus[shardID] = &pb.ShardStatus{ + shardStatus[shardID] = &ringpb.ShardStatus{ IsHealthy: health.IsHealthy, } } @@ -50,7 +50,7 @@ func (h *RingArbiterHandler) Status(ctx context.Context, _ *emptypb.Empty) (*pb. // TODO: Rename WantShards to ReadyShards in protobuf (breaking change) // The field name "WantShards" is misleading - it actually represents // the number of shards ready for routing, not what Ring "wants". - return &pb.ReplicaStatus{ + return &ringpb.ReplicaStatus{ WantShards: uint32(routable.ReadyCount), //nolint:gosec // G115: replica count bounded Status: shardStatus, }, nil @@ -58,7 +58,7 @@ func (h *RingArbiterHandler) Status(ctx context.Context, _ *emptypb.Empty) (*pb. // ConsensusWantShards is called by the Ring consensus to report the desired number of shards. // The consensus has agreed on how many shards the system should have. -func (h *RingArbiterHandler) ConsensusWantShards(ctx context.Context, req *pb.ConsensusWantShardsRequest) (*emptypb.Empty, error) { +func (h *RingArbiterHandler) ConsensusWantShards(ctx context.Context, req *ringpb.ConsensusWantShardsRequest) (*emptypb.Empty, error) { nShards := req.GetNShards() if nShards == 0 { diff --git a/core/services/arbiter/client.go b/core/services/arbiter/client.go index 439566978ce..1cfe4aefa08 100644 --- a/core/services/arbiter/client.go +++ b/core/services/arbiter/client.go @@ -7,21 +7,21 @@ import ( "google.golang.org/protobuf/types/known/emptypb" "github.com/smartcontractkit/chainlink-common/pkg/logger" - pb "github.com/smartcontractkit/chainlink-common/pkg/workflows/ring/pb" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" ) -// RingArbiterClient implements pb.ArbiterScalerClient by calling +// RingArbiterClient implements ringpb.ArbiterScalerClient by calling // the ArbiterScalerServer directly without going over gRPC. // This is used by Ring OCR to communicate with the Arbiter in-process. type RingArbiterClient struct { - server pb.ArbiterScalerServer + server ringpb.ArbiterScalerServer lggr logger.Logger } -var _ pb.ArbiterScalerClient = (*RingArbiterClient)(nil) +var _ ringpb.ArbiterScalerClient = (*RingArbiterClient)(nil) // NewRingArbiterClient creates a new RingArbiterClient. -func NewRingArbiterClient(server pb.ArbiterScalerServer, lggr logger.Logger) *RingArbiterClient { +func NewRingArbiterClient(server ringpb.ArbiterScalerServer, lggr logger.Logger) *RingArbiterClient { return &RingArbiterClient{ server: server, lggr: logger.Named(lggr, "RingArbiterClient"), @@ -29,11 +29,11 @@ func NewRingArbiterClient(server pb.ArbiterScalerServer, lggr logger.Logger) *Ri } // Status returns the current replica status by calling the server directly. -func (c *RingArbiterClient) Status(ctx context.Context, in *emptypb.Empty, _ ...grpc.CallOption) (*pb.ReplicaStatus, error) { +func (c *RingArbiterClient) Status(ctx context.Context, in *emptypb.Empty, _ ...grpc.CallOption) (*ringpb.ReplicaStatus, error) { return c.server.Status(ctx, in) } // ConsensusWantShards notifies the Arbiter about the desired shard count by calling the server directly. -func (c *RingArbiterClient) ConsensusWantShards(ctx context.Context, in *pb.ConsensusWantShardsRequest, _ ...grpc.CallOption) (*emptypb.Empty, error) { +func (c *RingArbiterClient) ConsensusWantShards(ctx context.Context, in *ringpb.ConsensusWantShardsRequest, _ ...grpc.CallOption) (*emptypb.Empty, error) { return c.server.ConsensusWantShards(ctx, in) } diff --git a/core/services/arbiter/client_test.go b/core/services/arbiter/client_test.go index f62b070a996..a1466a64656 100644 --- a/core/services/arbiter/client_test.go +++ b/core/services/arbiter/client_test.go @@ -9,29 +9,28 @@ import ( "github.com/stretchr/testify/require" "google.golang.org/protobuf/types/known/emptypb" - pb "github.com/smartcontractkit/chainlink-common/pkg/workflows/ring/pb" - + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" "github.com/smartcontractkit/chainlink/v2/core/logger" ) -// mockArbiterScalerServer implements pb.ArbiterScalerServer for testing. +// mockArbiterScalerServer implements ringpb.ArbiterScalerServer for testing. type mockArbiterScalerServer struct { - pb.UnimplementedArbiterScalerServer - statusResp *pb.ReplicaStatus + ringpb.UnimplementedArbiterScalerServer + statusResp *ringpb.ReplicaStatus statusErr error consensusErr error consensusCalled bool lastNShards uint32 } -func (m *mockArbiterScalerServer) Status(ctx context.Context, _ *emptypb.Empty) (*pb.ReplicaStatus, error) { +func (m *mockArbiterScalerServer) Status(ctx context.Context, _ *emptypb.Empty) (*ringpb.ReplicaStatus, error) { if m.statusErr != nil { return nil, m.statusErr } return m.statusResp, nil } -func (m *mockArbiterScalerServer) ConsensusWantShards(ctx context.Context, req *pb.ConsensusWantShardsRequest) (*emptypb.Empty, error) { +func (m *mockArbiterScalerServer) ConsensusWantShards(ctx context.Context, req *ringpb.ConsensusWantShardsRequest) (*emptypb.Empty, error) { m.consensusCalled = true m.lastNShards = req.GetNShards() if m.consensusErr != nil { @@ -45,9 +44,9 @@ func TestRingArbiterClient_Status(t *testing.T) { t.Run("returns status from server", func(t *testing.T) { mockServer := &mockArbiterScalerServer{ - statusResp: &pb.ReplicaStatus{ + statusResp: &ringpb.ReplicaStatus{ WantShards: 5, - Status: map[uint32]*pb.ShardStatus{ + Status: map[uint32]*ringpb.ShardStatus{ 0: {IsHealthy: true}, 1: {IsHealthy: true}, 2: {IsHealthy: false}, @@ -89,7 +88,7 @@ func TestRingArbiterClient_ConsensusWantShards(t *testing.T) { mockServer := &mockArbiterScalerServer{} client := NewRingArbiterClient(mockServer, lggr) - req := &pb.ConsensusWantShardsRequest{NShards: 10} + req := &ringpb.ConsensusWantShardsRequest{NShards: 10} resp, err := client.ConsensusWantShards(context.Background(), req) require.NoError(t, err) @@ -105,7 +104,7 @@ func TestRingArbiterClient_ConsensusWantShards(t *testing.T) { } client := NewRingArbiterClient(mockServer, lggr) - req := &pb.ConsensusWantShardsRequest{NShards: 5} + req := &ringpb.ConsensusWantShardsRequest{NShards: 5} resp, err := client.ConsensusWantShards(context.Background(), req) require.Error(t, err) diff --git a/core/services/arbiter/grpc_server.go b/core/services/arbiter/grpc_server.go index e776fa6f377..b980f72076d 100644 --- a/core/services/arbiter/grpc_server.go +++ b/core/services/arbiter/grpc_server.go @@ -7,7 +7,7 @@ import ( "google.golang.org/grpc/codes" "google.golang.org/grpc/status" - ringpb "github.com/smartcontractkit/chainlink-common/pkg/workflows/ring/pb" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" "github.com/smartcontractkit/chainlink/v2/core/logger" ) diff --git a/core/services/arbiter/grpc_server_test.go b/core/services/arbiter/grpc_server_test.go index 8be6db7db53..cb8d3a12c62 100644 --- a/core/services/arbiter/grpc_server_test.go +++ b/core/services/arbiter/grpc_server_test.go @@ -11,7 +11,7 @@ import ( "google.golang.org/grpc/status" "github.com/smartcontractkit/chainlink-common/pkg/services" - ringpb "github.com/smartcontractkit/chainlink-common/pkg/workflows/ring/pb" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" "github.com/smartcontractkit/chainlink/v2/core/logger" ) diff --git a/core/services/arbiter/shardconfig.go b/core/services/arbiter/shardconfig.go index 77af5193ee8..f65cdde315f 100644 --- a/core/services/arbiter/shardconfig.go +++ b/core/services/arbiter/shardconfig.go @@ -273,7 +273,7 @@ func (s *shardConfigSyncer) fetchAndCache(ctx context.Context) { ctx, bc.ReadIdentifier(GetDesiredShardCountMethod), primitives.Unconfirmed, - nil, // No input params + nil, // No input params &result, ) if err != nil { diff --git a/core/services/chainlink/application.go b/core/services/chainlink/application.go index 5c432fe6863..769bf024a4f 100644 --- a/core/services/chainlink/application.go +++ b/core/services/chainlink/application.go @@ -46,12 +46,12 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/utils/jsonserializable" "github.com/smartcontractkit/chainlink-common/pkg/utils/mailbox" "github.com/smartcontractkit/chainlink-common/pkg/workflows/dontime" - "github.com/smartcontractkit/chainlink-common/pkg/workflows/shardorchestrator" "github.com/smartcontractkit/chainlink-evm/pkg/chains/legacyevm" "github.com/smartcontractkit/chainlink-evm/pkg/keys" "github.com/smartcontractkit/chainlink-evm/pkg/logpoller" "github.com/smartcontractkit/chainlink-evm/pkg/txmgr" evmutils "github.com/smartcontractkit/chainlink-evm/pkg/utils" + "github.com/smartcontractkit/chainlink/v2/core/services/shardorchestrator" "github.com/smartcontractkit/chainlink/v2/core/services/ccv/ccvcommitteeverifier" "github.com/smartcontractkit/chainlink/v2/core/services/ccv/ccvexecutor" diff --git a/core/services/ocr2/delegate.go b/core/services/ocr2/delegate.go index dadc11bdce7..2efb3bbc49e 100644 --- a/core/services/ocr2/delegate.go +++ b/core/services/ocr2/delegate.go @@ -53,11 +53,11 @@ import ( llotypes "github.com/smartcontractkit/chainlink-common/pkg/types/llo" "github.com/smartcontractkit/chainlink-common/pkg/utils/mailbox" "github.com/smartcontractkit/chainlink-common/pkg/workflows/dontime" - "github.com/smartcontractkit/chainlink-common/pkg/workflows/ring" - "github.com/smartcontractkit/chainlink-common/pkg/workflows/shardorchestrator" datastreamsllo "github.com/smartcontractkit/chainlink-data-streams/llo" "github.com/smartcontractkit/chainlink-evm/pkg/chains/legacyevm" "github.com/smartcontractkit/chainlink-evm/pkg/keys" + "github.com/smartcontractkit/chainlink/v2/core/services/ring" + "github.com/smartcontractkit/chainlink/v2/core/services/shardorchestrator" "github.com/smartcontractkit/chainlink/v2/core/bridges" gatewayconnector "github.com/smartcontractkit/chainlink/v2/core/capabilities/gateway_connector" @@ -94,7 +94,6 @@ import ( functionsRelay "github.com/smartcontractkit/chainlink/v2/core/services/relay/evm/functions" evmmercury "github.com/smartcontractkit/chainlink/v2/core/services/relay/evm/mercury" mercuryutils "github.com/smartcontractkit/chainlink/v2/core/services/relay/evm/mercury/utils" - localshardorch "github.com/smartcontractkit/chainlink/v2/core/services/shardorchestrator" "github.com/smartcontractkit/chainlink/v2/core/services/streams" "github.com/smartcontractkit/chainlink/v2/core/services/synchronization" "github.com/smartcontractkit/chainlink/v2/core/services/telemetry" @@ -1075,7 +1074,7 @@ func (d *Delegate) newServicesRing( ringStore := ring.NewStore() shardOrchestratorStore := shardorchestrator.NewStore(lggr) // Start ShardOrchestrator - orchestratorSvc := localshardorch.New( + orchestratorSvc := shardorchestrator.New( int(shardingCfg.ShardOrchestratorPort()), shardOrchestratorStore, lggr, diff --git a/core/services/ocr2/plugins/ring/ring_integration_test.go b/core/services/ocr2/plugins/ring/ring_integration_test.go index 4819ba902f2..e6353e9434b 100644 --- a/core/services/ocr2/plugins/ring/ring_integration_test.go +++ b/core/services/ocr2/plugins/ring/ring_integration_test.go @@ -10,16 +10,16 @@ import ( "google.golang.org/protobuf/types/known/emptypb" "github.com/smartcontractkit/chainlink-common/pkg/logger" - "github.com/smartcontractkit/chainlink-common/pkg/workflows/ring" - "github.com/smartcontractkit/chainlink-common/pkg/workflows/ring/pb" - "github.com/smartcontractkit/chainlink-common/pkg/workflows/shardorchestrator" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" + "github.com/smartcontractkit/chainlink/v2/core/services/ring" + "github.com/smartcontractkit/chainlink/v2/core/services/shardorchestrator" ) -// mockArbiterScalerClient implements pb.ArbiterScalerClient for testing. +// mockArbiterScalerClient implements ringpb.ArbiterScalerClient for testing. // It allows configuring the return values for Status and ConsensusWantShards. type mockArbiterScalerClient struct { wantShards uint32 - shardStatus map[uint32]*pb.ShardStatus + shardStatus map[uint32]*ringpb.ShardStatus statusErr error consensusErr error } @@ -27,21 +27,21 @@ type mockArbiterScalerClient struct { func newMockArbiterScalerClient() *mockArbiterScalerClient { return &mockArbiterScalerClient{ wantShards: 1, - shardStatus: map[uint32]*pb.ShardStatus{0: {IsHealthy: true}}, + shardStatus: map[uint32]*ringpb.ShardStatus{0: {IsHealthy: true}}, } } -func (m *mockArbiterScalerClient) Status(_ context.Context, _ *emptypb.Empty, _ ...grpc.CallOption) (*pb.ReplicaStatus, error) { +func (m *mockArbiterScalerClient) Status(_ context.Context, _ *emptypb.Empty, _ ...grpc.CallOption) (*ringpb.ReplicaStatus, error) { if m.statusErr != nil { return nil, m.statusErr } - return &pb.ReplicaStatus{ + return &ringpb.ReplicaStatus{ WantShards: m.wantShards, Status: m.shardStatus, }, nil } -func (m *mockArbiterScalerClient) ConsensusWantShards(_ context.Context, _ *pb.ConsensusWantShardsRequest, _ ...grpc.CallOption) (*emptypb.Empty, error) { +func (m *mockArbiterScalerClient) ConsensusWantShards(_ context.Context, _ *ringpb.ConsensusWantShardsRequest, _ ...grpc.CallOption) (*emptypb.Empty, error) { if m.consensusErr != nil { return nil, m.consensusErr } @@ -59,9 +59,9 @@ func TestRingStoreIntegration(t *testing.T) { require.True(t, health[1]) // Set steady state routing (required for GetShardForWorkflow without OCR) - store.SetRoutingState(&pb.RoutingState{ + store.SetRoutingState(&ringpb.RoutingState{ Id: 1, - State: &pb.RoutingState_RoutableShards{RoutableShards: 1}, + State: &ringpb.RoutingState_RoutableShards{RoutableShards: 1}, }) // Test workflow routing via consistent hashing @@ -82,9 +82,9 @@ func TestRingStoreIntegration(t *testing.T) { store.SetShardHealth(2, true) // Set steady state with 3 shards - store.SetRoutingState(&pb.RoutingState{ + store.SetRoutingState(&ringpb.RoutingState{ Id: 1, - State: &pb.RoutingState_RoutableShards{RoutableShards: 3}, + State: &ringpb.RoutingState_RoutableShards{RoutableShards: 3}, }) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) @@ -106,9 +106,9 @@ func TestRingStoreIntegration(t *testing.T) { store := ring.NewStore() store.SetShardHealth(0, true) - store.SetRoutingState(&pb.RoutingState{ + store.SetRoutingState(&ringpb.RoutingState{ Id: 1, - State: &pb.RoutingState_RoutableShards{RoutableShards: 1}, + State: &ringpb.RoutingState_RoutableShards{RoutableShards: 1}, }) // Manually set a workflow allocation diff --git a/core/services/ring/factory.go b/core/services/ring/factory.go new file mode 100644 index 00000000000..4d4b6d5d30a --- /dev/null +++ b/core/services/ring/factory.go @@ -0,0 +1,83 @@ +package ring + +import ( + "context" + "errors" + + "github.com/smartcontractkit/libocr/offchainreporting2plus/ocr3types" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink-common/pkg/services" + "github.com/smartcontractkit/chainlink-common/pkg/types/core" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" + "github.com/smartcontractkit/chainlink/v2/core/services/shardorchestrator" +) + +const ( + defaultMaxPhaseOutputBytes = 1000000 // 1 MB + defaultMaxReportCount = 1 + defaultBatchSize = 100 +) + +var _ core.OCR3ReportingPluginFactory = &Factory{} + +type Factory struct { + ringStore *Store + shardOrchestratorStore *shardorchestrator.Store + arbiterScaler ringpb.ArbiterScalerClient + config *ConsensusConfig + lggr logger.Logger + + services.StateMachine +} + +func NewFactory(s *Store, shardOrchestratorStore *shardorchestrator.Store, arbiterScaler ringpb.ArbiterScalerClient, lggr logger.Logger, cfg *ConsensusConfig) (*Factory, error) { + if arbiterScaler == nil { + return nil, errors.New("arbiterScaler is required") + } + if cfg == nil { + cfg = &ConsensusConfig{ + BatchSize: defaultBatchSize, + } + } + return &Factory{ + ringStore: s, + shardOrchestratorStore: shardOrchestratorStore, + arbiterScaler: arbiterScaler, + config: cfg, + lggr: logger.Named(lggr, "RingPluginFactory"), + }, nil +} + +func (o *Factory) NewReportingPlugin(_ context.Context, config ocr3types.ReportingPluginConfig) (ocr3types.ReportingPlugin[[]byte], ocr3types.ReportingPluginInfo, error) { + plugin, err := NewPlugin(o.ringStore, o.arbiterScaler, config, o.lggr, o.config) + pluginInfo := ocr3types.ReportingPluginInfo{ + Name: "RingPlugin", + Limits: ocr3types.ReportingPluginLimits{ + MaxQueryLength: defaultMaxPhaseOutputBytes, + MaxObservationLength: defaultMaxPhaseOutputBytes, + MaxOutcomeLength: defaultMaxPhaseOutputBytes, + MaxReportLength: defaultMaxPhaseOutputBytes, + MaxReportCount: defaultMaxReportCount, + }, + } + return plugin, pluginInfo, err +} + +func (o *Factory) Start(ctx context.Context) error { + return o.StartOnce("RingPlugin", func() error { + return nil + }) +} + +func (o *Factory) Close() error { + return o.StopOnce("RingPlugin", func() error { + return nil + }) +} + +func (o *Factory) Name() string { return o.lggr.Name() } + +func (o *Factory) HealthReport() map[string]error { + return map[string]error{o.Name(): o.Healthy()} +} diff --git a/core/services/ring/factory_test.go b/core/services/ring/factory_test.go new file mode 100644 index 00000000000..189e219d34b --- /dev/null +++ b/core/services/ring/factory_test.go @@ -0,0 +1,96 @@ +package ring + +import ( + "context" + "testing" + + "github.com/smartcontractkit/libocr/offchainreporting2plus/ocr3types" + "github.com/stretchr/testify/require" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" + "github.com/smartcontractkit/chainlink/v2/core/services/shardorchestrator" +) + +func TestFactory_NewFactory(t *testing.T) { + lggr := logger.Test(t) + store := NewStore() + shardOrchestratorStore := shardorchestrator.NewStore(lggr) + arbiter := &mockArbiter{} + + tests := []struct { + name string + arbiter ringpb.ArbiterScalerClient + config *ConsensusConfig + wantErr bool + errSubstr string + }{ + { + name: "with_nil_config", + arbiter: arbiter, + config: nil, + wantErr: false, + }, + { + name: "with_custom_config", + arbiter: arbiter, + config: &ConsensusConfig{BatchSize: 50}, + wantErr: false, + }, + { + name: "nil_arbiter_returns_error", + arbiter: nil, + config: nil, + wantErr: true, + errSubstr: "arbiterScaler is required", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f, err := NewFactory(store, shardOrchestratorStore, tt.arbiter, lggr, tt.config) + if tt.wantErr { + require.Error(t, err) + require.Contains(t, err.Error(), tt.errSubstr) + } else { + require.NoError(t, err) + require.NotNil(t, f) + } + }) + } +} + +func TestFactory_NewReportingPlugin(t *testing.T) { + lggr := logger.Test(t) + store := NewStore() + f, err := NewFactory(store, nil, &mockArbiter{}, lggr, nil) + require.NoError(t, err) + + config := ocr3types.ReportingPluginConfig{N: 4, F: 1} + plugin, info, err := f.NewReportingPlugin(context.Background(), config) + require.NoError(t, err) + require.NotNil(t, plugin) + require.NotEmpty(t, info.Name) + require.Equal(t, "RingPlugin", info.Name) + require.Equal(t, defaultMaxReportCount, info.Limits.MaxReportCount) +} + +func TestFactory_Lifecycle(t *testing.T) { + lggr := logger.Test(t) + store := NewStore() + f, err := NewFactory(store, nil, &mockArbiter{}, lggr, nil) + require.NoError(t, err) + + err = f.Start(context.Background()) + require.NoError(t, err) + + name := f.Name() + require.NotEmpty(t, name) + + report := f.HealthReport() + require.NotNil(t, report) + require.Contains(t, report, name) + + err = f.Close() + require.NoError(t, err) +} diff --git a/core/services/ring/plugin.go b/core/services/ring/plugin.go new file mode 100644 index 00000000000..cead14a8bb7 --- /dev/null +++ b/core/services/ring/plugin.go @@ -0,0 +1,273 @@ +package ring + +import ( + "context" + "errors" + "slices" + "time" + + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/emptypb" + "google.golang.org/protobuf/types/known/structpb" + "google.golang.org/protobuf/types/known/timestamppb" + + "github.com/smartcontractkit/libocr/commontypes" + "github.com/smartcontractkit/libocr/offchainreporting2plus/ocr3types" + "github.com/smartcontractkit/libocr/offchainreporting2plus/types" + "github.com/smartcontractkit/libocr/quorumhelper" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" +) + +type Plugin struct { + store *Store + arbiterScaler ringpb.ArbiterScalerClient + config ocr3types.ReportingPluginConfig + lggr logger.Logger + + batchSize int + timeToSync time.Duration +} + +var _ ocr3types.ReportingPlugin[[]byte] = (*Plugin)(nil) + +type ConsensusConfig struct { + BatchSize int + TimeToSync time.Duration +} + +const ( + DefaultBatchSize = 100 + DefaultTimeToSync = 5 * time.Minute +) + +func NewPlugin(store *Store, arbiterScaler ringpb.ArbiterScalerClient, config ocr3types.ReportingPluginConfig, lggr logger.Logger, cfg *ConsensusConfig) (*Plugin, error) { + if arbiterScaler == nil { + return nil, errors.New("RingOCR arbiterScaler is required") + } + if cfg == nil { + cfg = &ConsensusConfig{ + BatchSize: DefaultBatchSize, + TimeToSync: DefaultTimeToSync, + } + } + + if cfg.BatchSize <= 0 { + lggr.Infow("using default batchSize", "default", DefaultBatchSize) + cfg.BatchSize = DefaultBatchSize + } + if cfg.TimeToSync <= 0 { + lggr.Infow("using default timeToSync", "default", DefaultTimeToSync) + cfg.TimeToSync = DefaultTimeToSync + } + + lggr.Infow("RingPlugin config", + "batchSize", cfg.BatchSize, + "timeToSync", cfg.TimeToSync, + ) + + return &Plugin{ + store: store, + arbiterScaler: arbiterScaler, + config: config, + lggr: logger.Named(lggr, "RingPlugin"), + batchSize: cfg.BatchSize, + timeToSync: cfg.TimeToSync, + }, nil +} + +//coverage:ignore +func (p *Plugin) Query(_ context.Context, _ ocr3types.OutcomeContext) (types.Query, error) { + return nil, nil +} + +func (p *Plugin) Observation(ctx context.Context, _ ocr3types.OutcomeContext, _ types.Query) (types.Observation, error) { + var wantShards uint32 + var shardStatus map[uint32]*ringpb.ShardStatus + + status, err := p.arbiterScaler.Status(ctx, &emptypb.Empty{}) + if err != nil { + // NOTE: consider a fallback data source if Arbiter is not available + p.lggr.Errorw("RingOCR failed to get arbiter scaler status", "error", err) + return nil, err + } + wantShards = status.WantShards + shardStatus = status.Status + + allWorkflowIDs := make([]string, 0) + for wfID := range p.store.GetAllRoutingState() { + allWorkflowIDs = append(allWorkflowIDs, wfID) + } + + pendingAllocs := p.store.GetPendingAllocations() + p.lggr.Infow("RingOCR Observation pending allocations", "pendingAllocs", pendingAllocs) + + allWorkflowIDs = append(allWorkflowIDs, pendingAllocs...) + allWorkflowIDs = uniqueSorted(allWorkflowIDs) + p.lggr.Infow("RingOCR Observation all workflow IDs unique", "allWorkflowIDs", allWorkflowIDs, "wantShards", wantShards) + + observation := &ringpb.Observation{ + ShardStatus: shardStatus, + WorkflowIds: allWorkflowIDs, + Now: timestamppb.Now(), + WantShards: wantShards, + } + + return proto.MarshalOptions{Deterministic: true}.Marshal(observation) +} + +func (p *Plugin) ValidateObservation(_ context.Context, _ ocr3types.OutcomeContext, _ types.Query, ao types.AttributedObservation) error { + observation := &ringpb.Observation{} + if err := proto.Unmarshal(ao.Observation, observation); err != nil { + return err + } + if observation.Now == nil { + return errors.New("observation missing timestamp") + } + if observation.WantShards == 0 { + return errors.New("observation missing WantShards") + } + return nil +} + +func (p *Plugin) ObservationQuorum(_ context.Context, _ ocr3types.OutcomeContext, _ types.Query, aos []types.AttributedObservation) (quorumReached bool, err error) { + return quorumhelper.ObservationCountReachesObservationQuorum(quorumhelper.QuorumTwoFPlusOne, p.config.N, p.config.F, aos), nil +} + +func (p *Plugin) collectShardInfo(aos []types.AttributedObservation) (shardHealth map[uint32]int, workflows []string, timestamps []time.Time, wantShardVotes map[commontypes.OracleID]uint32) { + shardHealth = make(map[uint32]int) + wantShardVotes = make(map[commontypes.OracleID]uint32) + for _, ao := range aos { + observation := &ringpb.Observation{} + _ = proto.Unmarshal(ao.Observation, observation) // validated in ValidateObservation + + for shardID, status := range observation.ShardStatus { + if status != nil && status.IsHealthy { + shardHealth[shardID]++ + } + } + + workflows = append(workflows, observation.WorkflowIds...) + timestamps = append(timestamps, observation.Now.AsTime()) + + wantShardVotes[ao.Observer] = observation.WantShards + } + return shardHealth, workflows, timestamps, wantShardVotes +} + +func (p *Plugin) getHealthyShards(shardHealth map[uint32]int) []uint32 { + var healthyShards []uint32 + for shardID, votes := range shardHealth { + if votes > p.config.F { + healthyShards = append(healthyShards, shardID) + p.store.SetShardHealth(shardID, true) + } + } + slices.Sort(healthyShards) + + return healthyShards +} + +func (p *Plugin) Outcome(_ context.Context, outctx ocr3types.OutcomeContext, _ types.Query, aos []types.AttributedObservation) (ocr3types.Outcome, error) { + currentShardHealth, allWorkflows, nows, wantShardVotes := p.collectShardInfo(aos) + p.lggr.Infow("RingOCR Outcome collect shard info", "currentShardHealth", currentShardHealth, "wantShardVotes", wantShardVotes) + + // Use the median timestamp to determine the current time + slices.SortFunc(nows, time.Time.Compare) + now := nows[len(nows)/2] + + // Use median for wantShards consensus (all validated observations have WantShards > 0) + votes := make([]uint32, 0, len(wantShardVotes)) + for _, v := range wantShardVotes { + votes = append(votes, v) + } + slices.Sort(votes) + wantShards := votes[len(votes)/2] + + // Bootstrap from Arbiter's current shard count on 1st round; subsequent rounds build on prior outcome + prior := &ringpb.Outcome{} + if outctx.PreviousOutcome == nil { + prior.Routes = map[string]*ringpb.WorkflowRoute{} + prior.State = &ringpb.RoutingState{Id: outctx.SeqNr, State: &ringpb.RoutingState_RoutableShards{RoutableShards: wantShards}} + } else if err := proto.Unmarshal(outctx.PreviousOutcome, prior); err != nil { + return nil, err + } + + allWorkflows = uniqueSorted(allWorkflows) + + healthyShards := p.getHealthyShards(currentShardHealth) + + nextState, err := NextState(prior.State, wantShards, now, p.timeToSync) + if err != nil { + return nil, err + } + + // Deterministic hashing ensures all nodes agree on workflow-to-shard assignments + // without coordination, preventing protocol failures from inconsistent routing + ring := newShardRing(healthyShards) + routes := make(map[string]*ringpb.WorkflowRoute) + for _, wfID := range allWorkflows { + shard, err := locateShard(ring, wfID) + if err != nil { + p.lggr.Warnw("RingOCR failed to locate shard for workflow", "workflowID", wfID, "error", err) + shard = 0 // fallback to shard 0 when no healthy shards + } + routes[wfID] = &ringpb.WorkflowRoute{Shard: shard} + } + + outcome := &ringpb.Outcome{ + State: nextState, + Routes: routes, + } + + p.lggr.Infow("RingOCR Outcome", "healthyShards", len(healthyShards), "totalObservations", len(aos), "workflowCount", len(routes)) + + return proto.MarshalOptions{Deterministic: true}.Marshal(outcome) +} + +func (p *Plugin) Reports(_ context.Context, _ uint64, outcome ocr3types.Outcome) ([]ocr3types.ReportPlus[[]byte], error) { + allOraclesTransmitNow := &ocr3types.TransmissionSchedule{ + Transmitters: make([]commontypes.OracleID, p.config.N), + TransmissionDelays: make([]time.Duration, p.config.N), + } + + for i := 0; i < p.config.N; i++ { + allOraclesTransmitNow.Transmitters[i] = commontypes.OracleID(i) //nolint:gosec // G115: i bounded by config.N + } + + info, err := structpb.NewStruct(map[string]any{ + "keyBundleName": "evm", + }) + if err != nil { + return nil, err + } + infoBytes, err := proto.MarshalOptions{Deterministic: true}.Marshal(info) + if err != nil { + return nil, err + } + + return []ocr3types.ReportPlus[[]byte]{ + { + ReportWithInfo: ocr3types.ReportWithInfo[[]byte]{ + Report: types.Report(outcome), + Info: infoBytes, + }, + }, + }, nil +} + +//coverage:ignore +func (p *Plugin) ShouldAcceptAttestedReport(_ context.Context, _ uint64, _ ocr3types.ReportWithInfo[[]byte]) (bool, error) { + return true, nil +} + +//coverage:ignore +func (p *Plugin) ShouldTransmitAcceptedReport(_ context.Context, _ uint64, _ ocr3types.ReportWithInfo[[]byte]) (bool, error) { + return true, nil +} + +//coverage:ignore +func (p *Plugin) Close() error { + return nil +} diff --git a/core/services/ring/plugin_test.go b/core/services/ring/plugin_test.go new file mode 100644 index 00000000000..7547fa85e1e --- /dev/null +++ b/core/services/ring/plugin_test.go @@ -0,0 +1,746 @@ +package ring + +import ( + "context" + "testing" + "time" + + "github.com/smartcontractkit/libocr/commontypes" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/emptypb" + "google.golang.org/protobuf/types/known/timestamppb" + + "github.com/smartcontractkit/libocr/offchainreporting2/types" + "github.com/smartcontractkit/libocr/offchainreporting2plus/ocr3types" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" + "github.com/smartcontractkit/chainlink/v2/core/services/shardorchestrator" +) + +type mockArbiter struct { + status *ringpb.ReplicaStatus +} + +func (m *mockArbiter) Status(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*ringpb.ReplicaStatus, error) { + if m.status != nil { + return m.status, nil + } + return &ringpb.ReplicaStatus{}, nil +} + +func (m *mockArbiter) ConsensusWantShards(ctx context.Context, req *ringpb.ConsensusWantShardsRequest, opts ...grpc.CallOption) (*emptypb.Empty, error) { + return &emptypb.Empty{}, nil +} + +var twoHealthyShards = []map[uint32]*ringpb.ShardStatus{ + {0: {IsHealthy: true}, 1: {IsHealthy: true}}, + {0: {IsHealthy: true}, 1: {IsHealthy: true}}, + {0: {IsHealthy: true}, 1: {IsHealthy: true}}, +} + +func toShardStatus(m map[uint32]bool) map[uint32]*ringpb.ShardStatus { + result := make(map[uint32]*ringpb.ShardStatus, len(m)) + for k, v := range m { + result[k] = &ringpb.ShardStatus{IsHealthy: v} + } + return result +} + +func TestPlugin_Outcome(t *testing.T) { + t.Run("WithMultiNodeObservations", func(t *testing.T) { + lggr := logger.Test(t) + store := NewStore() + store.SetAllShardHealth(map[uint32]bool{0: true, 1: true, 2: true}) + + config := ocr3types.ReportingPluginConfig{ + N: 4, F: 1, + OffchainConfig: []byte{}, + MaxDurationObservation: 0, + MaxDurationShouldAcceptAttestedReport: 0, + MaxDurationShouldTransmitAcceptedReport: 0, + } + + plugin, err := NewPlugin(store, &mockArbiter{}, config, lggr, nil) + require.NoError(t, err) + + ctx := t.Context() + intialSeqNr := uint64(42) + outcomeCtx := ocr3types.OutcomeContext{SeqNr: intialSeqNr} + + // Observations from 4 NOPs reporting health, workflows, and wantShards=3 + observations := []struct { + name string + shardStatus map[uint32]*ringpb.ShardStatus + workflows []string + wantShards uint32 + }{ + { + name: "NOP 0", + shardStatus: toShardStatus(map[uint32]bool{0: true, 1: true, 2: true}), + workflows: []string{"wf-A", "wf-B", "wf-C"}, + wantShards: 3, + }, + { + name: "NOP 1", + shardStatus: toShardStatus(map[uint32]bool{0: true, 1: true, 2: true}), + workflows: []string{"wf-B", "wf-C", "wf-D"}, + wantShards: 3, + }, + { + name: "NOP 2", + shardStatus: toShardStatus(map[uint32]bool{0: true, 1: true, 2: false}), // shard 2 unhealthy + workflows: []string{"wf-A", "wf-C"}, + wantShards: 3, + }, + { + name: "NOP 3", + shardStatus: toShardStatus(map[uint32]bool{0: true, 1: true, 2: true}), + workflows: []string{"wf-A", "wf-B", "wf-D"}, + wantShards: 3, + }, + } + + // Build attributed observations + aos := make([]types.AttributedObservation, 0) + for idx, obs := range observations { + pbObs := &ringpb.Observation{ + ShardStatus: obs.shardStatus, + WorkflowIds: obs.workflows, + Now: timestamppb.Now(), + WantShards: obs.wantShards, + } + rawObs, marshalErr := proto.Marshal(pbObs) + require.NoError(t, marshalErr) + + aos = append(aos, types.AttributedObservation{ + Observation: rawObs, + Observer: commontypes.OracleID(idx), //nolint:gosec // G115: idx bounded by observations slice + }) + } + + // Execute Outcome phase + outcome, err := plugin.Outcome(ctx, outcomeCtx, nil, aos) + require.NoError(t, err) + require.NotNil(t, outcome) + + // Verify outcome + outcomeProto := &ringpb.Outcome{} + err = proto.Unmarshal(outcome, outcomeProto) + require.NoError(t, err) + + // Check consensus results + require.NotNil(t, outcomeProto.State) + // When bootstrapping without PreviousOutcome, we use wantShards from observations (3) + // Since consensus wantShards (3) equals bootstrap shards, no transition needed - ID stays the same + require.Equal(t, intialSeqNr, outcomeProto.State.Id, "ID should match SeqNr (no transition needed)") + t.Logf("Outcome - ID: %d, HealthyShards: %v", outcomeProto.State.Id, outcomeProto.State.GetRoutableShards()) + t.Logf("Workflows assigned: %d", len(outcomeProto.Routes)) + + // Verify all workflows are assigned + expectedWorkflows := map[string]bool{"wf-A": true, "wf-B": true, "wf-C": true, "wf-D": true} + require.Len(t, outcomeProto.Routes, len(expectedWorkflows)) + for wf := range expectedWorkflows { + route, exists := outcomeProto.Routes[wf] + require.True(t, exists, "workflow %s should be assigned", wf) + require.LessOrEqual(t, route.Shard, uint32(2), "shard should be healthy (0-2)") + t.Logf(" %s → shard %d", wf, route.Shard) + } + + // Verify determinism: run again, should get same assignments + outcome2, err := plugin.Outcome(ctx, outcomeCtx, nil, aos) + require.NoError(t, err) + + outcomeProto2 := &ringpb.Outcome{} + err = proto.Unmarshal(outcome2, outcomeProto2) + require.NoError(t, err) + + // Same workflows → same shards + for wf, route1 := range outcomeProto.Routes { + route2, exists := outcomeProto2.Routes[wf] + require.True(t, exists) + require.Equal(t, route1.Shard, route2.Shard, "workflow %s should assign to same shard", wf) + } + }) +} + +func TestPlugin_StateTransitions(t *testing.T) { + lggr := logger.Test(t) + store := NewStore() + + config := ocr3types.ReportingPluginConfig{ + N: 4, F: 1, + } + + // Use short time to sync for testing + plugin, err := NewPlugin(store, &mockArbiter{}, config, lggr, &ConsensusConfig{ + BatchSize: 100, + TimeToSync: 1 * time.Second, + }) + require.NoError(t, err) + + ctx := t.Context() + now := time.Now() + + // Test 1: Initial state with no previous outcome + t.Run("initial_state", func(t *testing.T) { + outcomeCtx := ocr3types.OutcomeContext{ + SeqNr: 1, + PreviousOutcome: nil, + } + + // Only 1 healthy shard in observations with wantShards=1 + aos := makeObservationsWithWantShards(t, []map[uint32]*ringpb.ShardStatus{ + {0: {IsHealthy: true}}, + {0: {IsHealthy: true}}, + {0: {IsHealthy: true}}, + }, []string{"wf-1"}, now, 1) + + outcome, err := plugin.Outcome(ctx, outcomeCtx, nil, aos) + require.NoError(t, err) + + outcomeProto := &ringpb.Outcome{} + err = proto.Unmarshal(outcome, outcomeProto) + require.NoError(t, err) + + // Should be in stable state with min shard count + require.NotNil(t, outcomeProto.State.GetRoutableShards()) + require.Equal(t, uint32(1), outcomeProto.State.GetRoutableShards()) + t.Logf("Initial state: %d routable shards", outcomeProto.State.GetRoutableShards()) + }) + + // Test 2: Transition triggered when wantShards changes + t.Run("transition_triggered", func(t *testing.T) { + // Start with 1 shard in stable state + priorOutcome := &ringpb.Outcome{ + State: &ringpb.RoutingState{ + Id: 1, + State: &ringpb.RoutingState_RoutableShards{ + RoutableShards: 1, + }, + }, + Routes: map[string]*ringpb.WorkflowRoute{}, + } + priorBytes, err := proto.Marshal(priorOutcome) + require.NoError(t, err) + + outcomeCtx := ocr3types.OutcomeContext{ + SeqNr: 2, + PreviousOutcome: priorBytes, + } + + // Observations show 2 healthy shards and wantShards=2 + aos := makeObservationsWithWantShards(t, twoHealthyShards, []string{"wf-1"}, now, 2) + + outcome, err := plugin.Outcome(ctx, outcomeCtx, nil, aos) + require.NoError(t, err) + + outcomeProto := &ringpb.Outcome{} + err = proto.Unmarshal(outcome, outcomeProto) + require.NoError(t, err) + + // Should transition to Transition state + transition := outcomeProto.State.GetTransition() + require.NotNil(t, transition, "should be in transition state") + require.Equal(t, uint32(2), transition.WantShards, "want 2 shards") + require.Equal(t, uint32(1), transition.LastStableCount, "was at 1 shard") + require.True(t, transition.ChangesSafeAfter.AsTime().After(now), "safety period should be in future") + t.Logf("Transition: %d → %d, safe after %v", transition.LastStableCount, transition.WantShards, transition.ChangesSafeAfter.AsTime()) + }) + + // Test 3: Stay in transition during safety period + t.Run("stay_in_transition", func(t *testing.T) { + safeAfter := now.Add(1 * time.Hour) + priorOutcome := &ringpb.Outcome{ + State: &ringpb.RoutingState{ + Id: 2, + State: &ringpb.RoutingState_Transition{ + Transition: &ringpb.Transition{ + WantShards: 2, + LastStableCount: 1, + ChangesSafeAfter: timestamppb.New(safeAfter), + }, + }, + }, + Routes: map[string]*ringpb.WorkflowRoute{}, + } + priorBytes, err := proto.Marshal(priorOutcome) + require.NoError(t, err) + + outcomeCtx := ocr3types.OutcomeContext{ + SeqNr: 3, + PreviousOutcome: priorBytes, + } + + // Still showing 2 healthy shards with wantShards=2, but safety period not elapsed + aos := makeObservationsWithWantShards(t, twoHealthyShards, []string{"wf-1"}, now, 2) + + outcome, err := plugin.Outcome(ctx, outcomeCtx, nil, aos) + require.NoError(t, err) + + outcomeProto := &ringpb.Outcome{} + err = proto.Unmarshal(outcome, outcomeProto) + require.NoError(t, err) + + // Should still be in transition state + transition := outcomeProto.State.GetTransition() + require.NotNil(t, transition, "should still be in transition") + require.Equal(t, uint32(2), transition.WantShards) + t.Logf("Still in transition, waiting for safety period") + }) + + // Test 4: Complete transition after safety period + t.Run("complete_transition", func(t *testing.T) { + safeAfter := now.Add(-1 * time.Second) // Safety period already passed + priorOutcome := &ringpb.Outcome{ + State: &ringpb.RoutingState{ + Id: 2, + State: &ringpb.RoutingState_Transition{ + Transition: &ringpb.Transition{ + WantShards: 2, + LastStableCount: 1, + ChangesSafeAfter: timestamppb.New(safeAfter), + }, + }, + }, + Routes: map[string]*ringpb.WorkflowRoute{}, + } + priorBytes, err := proto.Marshal(priorOutcome) + require.NoError(t, err) + + outcomeCtx := ocr3types.OutcomeContext{ + SeqNr: 3, + PreviousOutcome: priorBytes, + } + + aos := makeObservationsWithWantShards(t, twoHealthyShards, []string{"wf-1"}, now, 2) + + outcome, err := plugin.Outcome(ctx, outcomeCtx, nil, aos) + require.NoError(t, err) + + outcomeProto := &ringpb.Outcome{} + err = proto.Unmarshal(outcome, outcomeProto) + require.NoError(t, err) + + // Should now be in stable state with 2 shards + require.NotNil(t, outcomeProto.State.GetRoutableShards(), "should be in stable state") + require.Equal(t, uint32(2), outcomeProto.State.GetRoutableShards()) + require.Equal(t, uint64(3), outcomeProto.State.Id, "state ID should increment") + t.Logf("Transition complete: now at %d routable shards", outcomeProto.State.GetRoutableShards()) + }) + + // Test 5: Stay stable when wantShards matches current + t.Run("stay_stable", func(t *testing.T) { + priorOutcome := &ringpb.Outcome{ + State: &ringpb.RoutingState{ + Id: 3, + State: &ringpb.RoutingState_RoutableShards{ + RoutableShards: 2, + }, + }, + Routes: map[string]*ringpb.WorkflowRoute{}, + } + priorBytes, err := proto.Marshal(priorOutcome) + require.NoError(t, err) + + outcomeCtx := ocr3types.OutcomeContext{ + SeqNr: 4, + PreviousOutcome: priorBytes, + } + + // Same 2 healthy shards with wantShards=2 + aos := makeObservationsWithWantShards(t, twoHealthyShards, []string{"wf-1"}, now, 2) + + outcome, err := plugin.Outcome(ctx, outcomeCtx, nil, aos) + require.NoError(t, err) + + outcomeProto := &ringpb.Outcome{} + err = proto.Unmarshal(outcome, outcomeProto) + require.NoError(t, err) + + // Should stay in stable state, ID unchanged + require.NotNil(t, outcomeProto.State.GetRoutableShards()) + require.Equal(t, uint32(2), outcomeProto.State.GetRoutableShards()) + require.Equal(t, uint64(3), outcomeProto.State.Id, "state ID should not change when stable") + t.Logf("Staying stable at %d routable shards", outcomeProto.State.GetRoutableShards()) + }) +} + +func makeObservationsWithWantShards(t *testing.T, shardStatuses []map[uint32]*ringpb.ShardStatus, workflows []string, now time.Time, wantShards uint32) []types.AttributedObservation { + aos := make([]types.AttributedObservation, 0, len(shardStatuses)) + for i, status := range shardStatuses { + pbObs := &ringpb.Observation{ + ShardStatus: status, + WorkflowIds: workflows, + Now: timestamppb.New(now), + WantShards: wantShards, + } + rawObs, marshalErr := proto.Marshal(pbObs) + require.NoError(t, marshalErr) + + aos = append(aos, types.AttributedObservation{ + Observation: rawObs, + Observer: commontypes.OracleID(i), //nolint:gosec // G115: i bounded by shardStatuses slice + }) + } + return aos +} + +func TestPlugin_NewPlugin_NilArbiter(t *testing.T) { + lggr := logger.Test(t) + store := NewStore() + config := ocr3types.ReportingPluginConfig{N: 4, F: 1} + + _, err := NewPlugin(store, nil, config, lggr, nil) + require.Error(t, err) + require.Contains(t, err.Error(), "RingOCR arbiterScaler is required") +} + +func TestPlugin_getHealthyShards(t *testing.T) { + tests := []struct { + name string + votes map[uint32]int // shardID -> vote count + f int + want int + }{ + {"all healthy", map[uint32]int{0: 2, 1: 2, 2: 2}, 1, 3}, + {"some unhealthy", map[uint32]int{0: 2, 1: 1, 2: 2}, 1, 2}, + {"none healthy", map[uint32]int{0: 1, 1: 1}, 1, 0}, + {"higher F threshold", map[uint32]int{0: 3, 1: 2, 2: 3}, 2, 2}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + plugin := &Plugin{ + store: NewStore(), + config: ocr3types.ReportingPluginConfig{F: tc.f}, + } + got := plugin.getHealthyShards(tc.votes) + require.Len(t, got, tc.want) + }) + } +} + +func TestPlugin_NoHealthyShardsFallbackToShardZero(t *testing.T) { + lggr := logger.Test(t) + store := NewStore() + + // Set all shards unhealthy - store starts in transition state + store.SetAllShardHealth(map[uint32]bool{0: false, 1: false, 2: false}) + + config := ocr3types.ReportingPluginConfig{ + N: 4, F: 1, + } + + arbiter := &mockArbiter{} + plugin, err := NewPlugin(store, arbiter, config, lggr, &ConsensusConfig{ + BatchSize: 100, + TimeToSync: 1 * time.Second, + }) + require.NoError(t, err) + + transmitter := NewTransmitter(lggr, store, nil, arbiter, "test-account") + + ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) + defer cancel() + + // Start a goroutine that requests allocation (will block waiting for OCR) + resultCh := make(chan uint32) + errCh := make(chan error, 1) + go func() { + shard, shardErr := store.GetShardForWorkflow(ctx, "workflow-123") + if shardErr != nil { + errCh <- shardErr + return + } + resultCh <- shard + }() + + // Give goroutine time to enqueue request + time.Sleep(10 * time.Millisecond) + + // Verify request is pending for OCR consensus + pending := store.GetPendingAllocations() + require.Contains(t, pending, "workflow-123") + + // Simulate OCR round with observations showing no healthy shards + // The pending allocation "workflow-123" should be included in observation + now := time.Now() + aos := make([]types.AttributedObservation, 3) + for i := 0; i < 3; i++ { + pbObs := &ringpb.Observation{ + ShardStatus: toShardStatus(map[uint32]bool{0: false, 1: false, 2: false}), + WorkflowIds: []string{"workflow-123"}, + Now: timestamppb.New(now), + } + rawObs, marshalErr := proto.Marshal(pbObs) + require.NoError(t, marshalErr) + aos[i] = types.AttributedObservation{ + Observation: rawObs, + Observer: commontypes.OracleID(i), //nolint:gosec // G115: i bounded by loop + } + } + + // Use a previous outcome in steady state so we can test the fallback + priorOutcome := &ringpb.Outcome{ + State: &ringpb.RoutingState{ + Id: 1, + State: &ringpb.RoutingState_RoutableShards{RoutableShards: 3}, + }, + Routes: map[string]*ringpb.WorkflowRoute{}, + } + priorBytes, err := proto.Marshal(priorOutcome) + require.NoError(t, err) + + outcomeCtx := ocr3types.OutcomeContext{ + SeqNr: 2, + PreviousOutcome: priorBytes, + } + + // Run plugin Outcome phase + outcome, err := plugin.Outcome(ctx, outcomeCtx, nil, aos) + require.NoError(t, err) + + // Transmit the outcome (applies routes to store) + reports, err := plugin.Reports(ctx, 2, outcome) + require.NoError(t, err) + require.Len(t, reports, 1) + + err = transmitter.Transmit(ctx, types.ConfigDigest{}, 2, reports[0].ReportWithInfo, nil) + require.NoError(t, err) + + // Blocked goroutine should now receive result from OCR - should be shard 0 (fallback) + select { + case shard := <-resultCh: + require.Equal(t, uint32(0), shard, "should fallback to shard 0 when no healthy shards") + case recvErr := <-errCh: + t.Fatalf("unexpected error: %v", recvErr) + case <-time.After(100 * time.Millisecond): + t.Fatal("allocation was not fulfilled by OCR") + } + + // Verify the outcome assigned workflow-123 to shard 0 + outcomeProto := &ringpb.Outcome{} + err = proto.Unmarshal(outcome, outcomeProto) + require.NoError(t, err) + + route, exists := outcomeProto.Routes["workflow-123"] + require.True(t, exists, "workflow-123 should be in routes") + require.Equal(t, uint32(0), route.Shard, "workflow-123 should be assigned to shard 0 (fallback)") +} + +func TestPlugin_ObservationQuorum(t *testing.T) { + lggr := logger.Test(t) + store := NewStore() + config := ocr3types.ReportingPluginConfig{N: 4, F: 1} + plugin, err := NewPlugin(store, &mockArbiter{}, config, lggr, nil) + require.NoError(t, err) + + ctx := context.Background() + outctx := ocr3types.OutcomeContext{} + + t.Run("quorum_reached", func(t *testing.T) { + // Need 2F+1 = 3 observations for quorum with N=4, F=1 + aos := make([]types.AttributedObservation, 3) + for i := range aos { + aos[i] = types.AttributedObservation{Observer: commontypes.OracleID(i)} //nolint:gosec // G115: i bounded by slice + } + + quorum, qErr := plugin.ObservationQuorum(ctx, outctx, nil, aos) + require.NoError(t, qErr) + require.True(t, quorum) + }) + + t.Run("quorum_not_reached", func(t *testing.T) { + // Only 2 observations - not enough for quorum + aos := make([]types.AttributedObservation, 2) + for i := range aos { + aos[i] = types.AttributedObservation{Observer: commontypes.OracleID(i)} //nolint:gosec // G115: i bounded by slice + } + + quorum, qErr := plugin.ObservationQuorum(ctx, outctx, nil, aos) + require.NoError(t, qErr) + require.False(t, quorum) + }) + + t.Run("exact_quorum", func(t *testing.T) { + // Exactly 2F+1 = 3 observations + aos := make([]types.AttributedObservation, 3) + for i := range aos { + aos[i] = types.AttributedObservation{Observer: commontypes.OracleID(i)} //nolint:gosec // G115: i bounded by slice + } + + quorum, qErr := plugin.ObservationQuorum(ctx, outctx, nil, aos) + require.NoError(t, qErr) + require.True(t, quorum) + }) + + t.Run("all_observations", func(t *testing.T) { + // All N=4 observations + aos := make([]types.AttributedObservation, 4) + for i := range aos { + aos[i] = types.AttributedObservation{Observer: commontypes.OracleID(i)} //nolint:gosec // G115: i bounded by slice + } + + quorum, qErr := plugin.ObservationQuorum(ctx, outctx, nil, aos) + require.NoError(t, qErr) + require.True(t, quorum) + }) +} + +func TestPlugin_ShardOrchestratorIntegration(t *testing.T) { + lggr := logger.Test(t) + + // Create both stores + ringStore := NewStore() + orchestratorStore := shardorchestrator.NewStore(lggr) + + // Initialize ring store with healthy shards + ringStore.SetAllShardHealth(map[uint32]bool{0: true, 1: true, 2: true}) + + config := ocr3types.ReportingPluginConfig{ + N: 4, F: 1, + } + + arbiter := &mockArbiter{} + plugin, err := NewPlugin(ringStore, arbiter, config, lggr, &ConsensusConfig{ + BatchSize: 100, + TimeToSync: 1 * time.Second, + }) + require.NoError(t, err) + + // Create transmitter with both stores + transmitter := NewTransmitter(lggr, ringStore, orchestratorStore, arbiter, "test-account") + + ctx := context.Background() + now := time.Now() + + t.Run("initial_workflow_assignments", func(t *testing.T) { + // Create observations with workflows + workflows := []string{"wf-A", "wf-B", "wf-C"} + aos := makeObservationsWithWantShards(t, []map[uint32]*ringpb.ShardStatus{ + {0: {IsHealthy: true}, 1: {IsHealthy: true}, 2: {IsHealthy: true}}, + {0: {IsHealthy: true}, 1: {IsHealthy: true}, 2: {IsHealthy: true}}, + {0: {IsHealthy: true}, 1: {IsHealthy: true}, 2: {IsHealthy: true}}, + }, workflows, now, 3) + + outcomeCtx := ocr3types.OutcomeContext{ + SeqNr: 1, + PreviousOutcome: nil, + } + + // Generate outcome + outcome, err := plugin.Outcome(ctx, outcomeCtx, nil, aos) + require.NoError(t, err) + + // Generate report and transmit + reports, err := plugin.Reports(ctx, 1, outcome) + require.NoError(t, err) + require.Len(t, reports, 1) + + err = transmitter.Transmit(ctx, types.ConfigDigest{}, 1, reports[0].ReportWithInfo, nil) + require.NoError(t, err) + + // Verify ring store was updated + for _, wf := range workflows { + shard, err := ringStore.GetShardForWorkflow(ctx, wf) + require.NoError(t, err) + require.LessOrEqual(t, shard, uint32(2), "workflow should be assigned to valid shard") + t.Logf("Ring store: %s → shard %d", wf, shard) + } + + // Verify orchestrator store was updated with correct state + for _, wf := range workflows { + mapping, err := orchestratorStore.GetWorkflowMapping(ctx, wf) + require.NoError(t, err) + require.Equal(t, wf, mapping.WorkflowID) + require.LessOrEqual(t, mapping.NewShardID, uint32(2)) + require.Equal(t, uint32(0), mapping.OldShardID, "initial assignment should have oldShardID=0") + require.Equal(t, shardorchestrator.StateSteady, mapping.TransitionState, "initial assignment should be steady") + t.Logf("Orchestrator store: %s → shard %d (state: %s)", wf, mapping.NewShardID, mapping.TransitionState.String()) + } + + // Verify version tracking + version := orchestratorStore.GetMappingVersion() + require.Equal(t, uint64(1), version, "version should increment after first update") + }) + + t.Run("workflow_transition_detected", func(t *testing.T) { + // First, establish a baseline with workflows distributed across 3 shards + // Use wantShards=3 to ensure workflows actually get assigned to shard 2 + baselineAos := makeObservationsWithWantShards(t, []map[uint32]*ringpb.ShardStatus{ + {0: {IsHealthy: true}, 1: {IsHealthy: true}, 2: {IsHealthy: true}}, + {0: {IsHealthy: true}, 1: {IsHealthy: true}, 2: {IsHealthy: true}}, + {0: {IsHealthy: true}, 1: {IsHealthy: true}, 2: {IsHealthy: true}}, + }, []string{"wf-A", "wf-B", "wf-C", "wf-D", "wf-E"}, now, 3) + + baselineOutcome, err := plugin.Outcome(ctx, ocr3types.OutcomeContext{SeqNr: 2}, nil, baselineAos) + require.NoError(t, err) + + baselineReports, err := plugin.Reports(ctx, 2, baselineOutcome) + require.NoError(t, err) + + err = transmitter.Transmit(ctx, types.ConfigDigest{}, 2, baselineReports[0].ReportWithInfo, nil) + require.NoError(t, err) + + // Parse baseline to see which workflows were on shard 2 + baselineProto := &ringpb.Outcome{} + err = proto.Unmarshal(baselineOutcome, baselineProto) + require.NoError(t, err) + + workflowsOnShard2 := []string{} + for wfID, route := range baselineProto.Routes { + if route.Shard == 2 { + workflowsOnShard2 = append(workflowsOnShard2, wfID) + } + t.Logf("Baseline: %s on shard %d", wfID, route.Shard) + } + require.NotEmpty(t, workflowsOnShard2, "at least one workflow should be on shard 2 for this test") + + // Now scale down to 2 shards - workflows on shard 2 MUST move + transitionAos := makeObservationsWithWantShards(t, []map[uint32]*ringpb.ShardStatus{ + {0: {IsHealthy: true}, 1: {IsHealthy: true}}, + {0: {IsHealthy: true}, 1: {IsHealthy: true}}, + {0: {IsHealthy: true}, 1: {IsHealthy: true}}, + }, []string{"wf-A", "wf-B", "wf-C", "wf-D", "wf-E"}, now, 2) + + outcomeCtx := ocr3types.OutcomeContext{ + SeqNr: 3, + PreviousOutcome: baselineOutcome, + } + + outcome, err := plugin.Outcome(ctx, outcomeCtx, nil, transitionAos) + require.NoError(t, err) + + reports, err := plugin.Reports(ctx, 3, outcome) + require.NoError(t, err) + + err = transmitter.Transmit(ctx, types.ConfigDigest{}, 3, reports[0].ReportWithInfo, nil) + require.NoError(t, err) + + // Verify orchestrator store shows transition state for workflows that moved from shard 2 + outcomeProto := &ringpb.Outcome{} + err = proto.Unmarshal(outcome, outcomeProto) + require.NoError(t, err) + + // Workflows that were on shard 2 must have moved and should show TransitionState + for _, wfID := range workflowsOnShard2 { + mapping, err := orchestratorStore.GetWorkflowMapping(ctx, wfID) + require.NoError(t, err) + + newRoute := outcomeProto.Routes[wfID] + require.NotEqual(t, uint32(2), newRoute.Shard, "workflow should have moved from shard 2") + require.Equal(t, shardorchestrator.StateTransitioning, mapping.TransitionState, + "workflow %s moved from shard 2 to shard %d, should be transitioning", wfID, newRoute.Shard) + require.Equal(t, uint32(2), mapping.OldShardID, "should track old shard") + require.Equal(t, newRoute.Shard, mapping.NewShardID, "should track new shard") + t.Logf("Workflow %s transitioned: shard 2 → %d", wfID, newRoute.Shard) + } + + // Verify version incremented + version := orchestratorStore.GetMappingVersion() + require.Equal(t, uint64(3), version, "version should increment after update") + }) +} diff --git a/core/services/ring/state.go b/core/services/ring/state.go new file mode 100644 index 00000000000..efa31fe50e7 --- /dev/null +++ b/core/services/ring/state.go @@ -0,0 +1,93 @@ +package ring + +import ( + "errors" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" + + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" + "github.com/smartcontractkit/chainlink/v2/core/services/shardorchestrator" +) + +// TransitionStateFromBool converts a proto bool (in_transition) to TransitionState +func TransitionStateFromBool(inTransition bool) shardorchestrator.TransitionState { + if inTransition { + return shardorchestrator.StateTransitioning + } + return shardorchestrator.StateSteady +} + +// TransitionStateFromRoutingState returns the TransitionState based on RoutingState +func TransitionStateFromRoutingState(state *ringpb.RoutingState) shardorchestrator.TransitionState { + if IsInSteadyState(state) { + return shardorchestrator.StateSteady + } + return shardorchestrator.StateTransitioning +} + +func IsInSteadyState(state *ringpb.RoutingState) bool { + if state == nil { + return false + } + _, ok := state.State.(*ringpb.RoutingState_RoutableShards) + return ok +} + +func NextStateFromSteady(currentID uint64, currentShards, wantShards uint32, now time.Time, timeToSync time.Duration) *ringpb.RoutingState { + if currentShards == wantShards { + return &ringpb.RoutingState{ + Id: currentID, + State: &ringpb.RoutingState_RoutableShards{RoutableShards: currentShards}, + } + } + + return &ringpb.RoutingState{ + Id: currentID + 1, + State: &ringpb.RoutingState_Transition{ + Transition: &ringpb.Transition{ + WantShards: wantShards, + LastStableCount: currentShards, + ChangesSafeAfter: timestamppb.New(now.Add(timeToSync)), + }, + }, + } +} + +func NextStateFromTransition(currentID uint64, transition *ringpb.Transition, now time.Time) *ringpb.RoutingState { + safeAfter := transition.ChangesSafeAfter.AsTime() + + if now.Before(safeAfter) { + return &ringpb.RoutingState{ + Id: currentID, + State: &ringpb.RoutingState_Transition{ + Transition: transition, + }, + } + } + + return &ringpb.RoutingState{ + Id: currentID + 1, + State: &ringpb.RoutingState_RoutableShards{ + RoutableShards: transition.WantShards, + }, + } +} + +func NextState(current *ringpb.RoutingState, wantShards uint32, now time.Time, timeToSync time.Duration) (*ringpb.RoutingState, error) { + if current == nil { + return nil, errors.New("current state is nil") + } + + switch s := current.State.(type) { + case *ringpb.RoutingState_RoutableShards: + return NextStateFromSteady(current.Id, s.RoutableShards, wantShards, now, timeToSync), nil + + case *ringpb.RoutingState_Transition: + return NextStateFromTransition(current.Id, s.Transition, now), nil + + // coverage:ignore + default: + return nil, errors.New("unknown state type") + } +} diff --git a/core/services/ring/state_test.go b/core/services/ring/state_test.go new file mode 100644 index 00000000000..cd66525642c --- /dev/null +++ b/core/services/ring/state_test.go @@ -0,0 +1,265 @@ +package ring + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/timestamppb" + + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" +) + +func TestStateTransitionDeterminism(t *testing.T) { + now := time.Unix(0, 0) + timeToSync := 5 * time.Minute + + current := &ringpb.RoutingState{ + Id: 1, + State: &ringpb.RoutingState_RoutableShards{RoutableShards: 2}, + } + + // Same inputs should produce identical outputs + result1, err := NextState(current, 4, now, timeToSync) + require.NoError(t, err) + + result2, err := NextState(current, 4, now, timeToSync) + require.NoError(t, err) + + require.Equal(t, result1.Id, result2.Id) + require.Equal(t, result1.GetTransition().WantShards, result2.GetTransition().WantShards) + require.Equal(t, result1.GetTransition().LastStableCount, result2.GetTransition().LastStableCount) + require.Equal(t, result1.GetTransition().ChangesSafeAfter.AsTime(), result2.GetTransition().ChangesSafeAfter.AsTime()) +} + +// ∀ state, inputs: NextState(state, inputs).Id >= state.Id +func TestFV_StateIDMonotonicity(t *testing.T) { + timeToSync := 5 * time.Minute + baseTime := time.Unix(0, 0) + + testCases := []struct { + name string + state *ringpb.RoutingState + now time.Time + }{ + // Steady state cases + {"steady_same_shards", steadyState(10, 3), baseTime}, + {"steady_more_shards", steadyState(10, 3), baseTime}, + {"steady_fewer_shards", steadyState(10, 3), baseTime}, + // Transition state cases + {"transition_before_safe", transitionState(10, 3, 5, baseTime.Add(1*time.Hour)), baseTime}, + {"transition_at_safe", transitionState(10, 3, 5, baseTime), baseTime}, + {"transition_after_safe", transitionState(10, 3, 5, baseTime.Add(-1*time.Second)), baseTime}, + } + + shardCounts := []uint32{1, 2, 3, 5, 10} + + for _, tc := range testCases { + for _, wantShards := range shardCounts { + t.Run(tc.name, func(t *testing.T) { + result, err := NextState(tc.state, wantShards, tc.now, timeToSync) + require.NoError(t, err) + + // INVARIANT: ID never decreases + require.GreaterOrEqual(t, result.Id, tc.state.Id, + "state ID must be monotonically non-decreasing") + }) + } + } +} + +// The state machine only produces valid transitions: +// - Steady → Steady (when shards unchanged) +// - Steady → Transition (when shards change) +// - Transition → Transition (before safety period) +// - Transition → Steady (after safety period) +func TestFV_ValidStateTransitions(t *testing.T) { + timeToSync := 5 * time.Minute + baseTime := time.Unix(0, 0) + + t.Run("steady_to_steady_when_unchanged", func(t *testing.T) { + for _, shards := range []uint32{1, 2, 3, 5, 10} { + state := steadyState(1, shards) + result, err := NextState(state, shards, baseTime, timeToSync) + require.NoError(t, err) + + // Must remain steady with same shard count + require.True(t, IsInSteadyState(result)) + require.Equal(t, shards, result.GetRoutableShards()) + require.Equal(t, state.Id, result.Id, "ID unchanged when no transition") + } + }) + + t.Run("steady_to_transition_when_changed", func(t *testing.T) { + transitions := [][2]uint32{{1, 2}, {2, 1}, {3, 5}, {5, 3}, {1, 10}} + for _, tr := range transitions { + current, want := tr[0], tr[1] + state := steadyState(1, current) + result, err := NextState(state, want, baseTime, timeToSync) + require.NoError(t, err) + + // Must enter transition + require.False(t, IsInSteadyState(result)) + require.NotNil(t, result.GetTransition()) + require.Equal(t, want, result.GetTransition().WantShards) + require.Equal(t, current, result.GetTransition().LastStableCount) + require.Equal(t, state.Id+1, result.Id) + } + }) + + t.Run("transition_stays_before_safe_time", func(t *testing.T) { + safeAfter := baseTime.Add(1 * time.Hour) + for _, wantShards := range []uint32{1, 2, 5} { + state := transitionState(5, 2, wantShards, safeAfter) + result, err := NextState(state, wantShards, baseTime, timeToSync) + require.NoError(t, err) + + // Must remain in transition + require.False(t, IsInSteadyState(result)) + require.Equal(t, state.Id, result.Id, "ID unchanged while waiting") + } + }) + + t.Run("transition_completes_after_safe_time", func(t *testing.T) { + safeAfter := baseTime.Add(-1 * time.Second) + for _, wantShards := range []uint32{1, 2, 5} { + state := transitionState(5, 2, wantShards, safeAfter) + result, err := NextState(state, wantShards, baseTime, timeToSync) + require.NoError(t, err) + + // Must complete to steady + require.True(t, IsInSteadyState(result)) + require.Equal(t, wantShards, result.GetRoutableShards()) + require.Equal(t, state.Id+1, result.Id) + } + }) +} + +// ∀ transition: completion occurs iff now >= safeAfter +func TestFV_SafetyPeriodEnforcement(t *testing.T) { + timeToSync := 5 * time.Minute + baseTime := time.Unix(0, 0) + + // Test various time offsets relative to safeAfter + offsets := []time.Duration{ + -1 * time.Hour, + -1 * time.Minute, + -1 * time.Second, + -1 * time.Nanosecond, + 0, + 1 * time.Nanosecond, + 1 * time.Second, + 1 * time.Minute, + 1 * time.Hour, + } + + for _, offset := range offsets { + safeAfter := baseTime + now := baseTime.Add(offset) + state := transitionState(1, 2, 5, safeAfter) + + result, err := NextState(state, 5, now, timeToSync) + require.NoError(t, err) + + shouldComplete := !now.Before(safeAfter) + didComplete := IsInSteadyState(result) + + require.Equal(t, shouldComplete, didComplete, + "offset=%v: safety period enforcement failed", offset) + } +} + +// When entering transition, WantShards equals the requested shard count +// When completing transition, final shard count equals WantShards +func TestFV_TransitionPreservesTarget(t *testing.T) { + timeToSync := 5 * time.Minute + baseTime := time.Unix(0, 0) + + for _, currentShards := range []uint32{1, 2, 3, 5} { + for _, wantShards := range []uint32{1, 2, 3, 5} { + if currentShards == wantShards { + continue // No transition occurs + } + + // Step 1: Enter transition + state := steadyState(0, currentShards) + afterEnter, err := NextState(state, wantShards, baseTime, timeToSync) + require.NoError(t, err) + require.Equal(t, wantShards, afterEnter.GetTransition().WantShards, + "transition must preserve target shard count") + + // Step 2: Complete transition (after safety period) + afterComplete, err := NextState(afterEnter, wantShards, baseTime.Add(timeToSync+time.Second), timeToSync) + require.NoError(t, err) + require.Equal(t, wantShards, afterComplete.GetRoutableShards(), + "completed state must have target shard count") + } + } +} + +// ∀ transition: ∃ time t where transition completes (no infinite loops) +func TestFV_EventualCompletion(t *testing.T) { + timeToSync := 5 * time.Minute + baseTime := time.Unix(0, 0) + + state := steadyState(0, 2) + + // Enter transition + state, err := NextState(state, 5, baseTime, timeToSync) + require.NoError(t, err) + require.False(t, IsInSteadyState(state)) + + // Simulate time progression - must complete within safety period + completionTime := baseTime.Add(timeToSync) + state, err = NextState(state, 5, completionTime, timeToSync) + require.NoError(t, err) + + require.True(t, IsInSteadyState(state), "transition must eventually complete") +} + +// ∀ state: exactly one of (IsInSteadyState, IsInTransition) is true +func TestFV_StateTypeExclusivity(t *testing.T) { + states := []*ringpb.RoutingState{ + steadyState(0, 1), + steadyState(5, 3), + transitionState(0, 1, 2, time.Now()), + transitionState(5, 3, 5, time.Now().Add(time.Hour)), + } + + for i, state := range states { + isSteady := IsInSteadyState(state) + _, isTransition := state.State.(*ringpb.RoutingState_Transition) + + require.NotEqual(t, isSteady, isTransition, + "state %d: exactly one state type must be true", i) + } +} + +// IsInSteadyState(nil) = false (safe handling of nil) +// NextState(nil, ...) returns error (explicit failure) +func TestFV_NilStateSafety(t *testing.T) { + require.False(t, IsInSteadyState(nil), "nil state must not be steady") + + _, err := NextState(nil, 1, time.Now(), time.Minute) + require.Error(t, err, "NextState must reject nil input") +} + +func steadyState(id uint64, shards uint32) *ringpb.RoutingState { + return &ringpb.RoutingState{ + Id: id, + State: &ringpb.RoutingState_RoutableShards{RoutableShards: shards}, + } +} + +func transitionState(id uint64, lastStable, wantShards uint32, safeAfter time.Time) *ringpb.RoutingState { + return &ringpb.RoutingState{ + Id: id, + State: &ringpb.RoutingState_Transition{ + Transition: &ringpb.Transition{ + WantShards: wantShards, + LastStableCount: lastStable, + ChangesSafeAfter: timestamppb.New(safeAfter), + }, + }, + } +} diff --git a/core/services/ring/store.go b/core/services/ring/store.go new file mode 100644 index 00000000000..c60ead132a7 --- /dev/null +++ b/core/services/ring/store.go @@ -0,0 +1,219 @@ +package ring + +import ( + "context" + "maps" + "slices" + "sync" + + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" +) + +// AllocationRequest represents a pending workflow allocation request during transition +type AllocationRequest struct { + WorkflowID string + Result chan uint32 +} + +// Store manages shard routing state and workflow mappings. +// It serves as a shared data layer across three components: +// - RingOCR plugin: produces consensus-driven routing updates +// - Arbiter: provides shard health and scaling decisions +// - ShardOrchestrator: consumes routing state to direct workflow execution +type Store struct { + routingState map[string]uint32 // workflow_id -> shard_id (cache of allocated workflows) + shardHealth map[uint32]bool // shard_id -> is_healthy + healthyShards []uint32 // Sorted list of healthy shards + currentState *ringpb.RoutingState // Current routing state (steady or transition) + + pendingAllocs map[string][]chan uint32 // workflow_id -> waiting channels + allocRequests chan AllocationRequest // Channel for new allocation requests + + mu sync.Mutex +} + +const AllocationRequestChannelCapacity = 1000 + +func NewStore() *Store { + return &Store{ + routingState: make(map[string]uint32), + shardHealth: make(map[uint32]bool), + healthyShards: make([]uint32, 0), + pendingAllocs: make(map[string][]chan uint32), + allocRequests: make(chan AllocationRequest, AllocationRequestChannelCapacity), + mu: sync.Mutex{}, + } +} + +func (s *Store) updateHealthyShards() { + s.mu.Lock() + defer s.mu.Unlock() + + s.healthyShards = make([]uint32, 0) + + for shardID, healthy := range s.shardHealth { + if healthy { + s.healthyShards = append(s.healthyShards, shardID) + } + } + + // Sort for determinism + slices.Sort(s.healthyShards) + + // If no healthy shards, add shard 0 as fallback + if len(s.healthyShards) == 0 { + s.healthyShards = []uint32{0} + } +} + +// GetShardForWorkflow called by Workflow Registry Syncers of all shards via ShardOrchestratorService. +func (s *Store) GetShardForWorkflow(ctx context.Context, workflowID string) (uint32, error) { + s.mu.Lock() + + // Only trust the cache in steady state; during transition OCR may have invalidated it + if IsInSteadyState(s.currentState) { + // Check if already allocated in cache + if shard, ok := s.routingState[workflowID]; ok { + s.mu.Unlock() + return shard, nil + } + ring := newShardRing(s.healthyShards) + s.mu.Unlock() + return locateShard(ring, workflowID) + } + + // During transition, defer to OCR consensus for consistent shard assignment across nodes + resultCh := make(chan uint32, 1) + s.pendingAllocs[workflowID] = append(s.pendingAllocs[workflowID], resultCh) + s.mu.Unlock() + + select { + case s.allocRequests <- AllocationRequest{WorkflowID: workflowID, Result: resultCh}: + case <-ctx.Done(): + return 0, ctx.Err() + } + + select { + case shard := <-resultCh: + return shard, nil + case <-ctx.Done(): + return 0, ctx.Err() + } +} + +// SetShardForWorkflow is called by the RingOCR plugin whenever it finishes a round with allocations for a given workflow ID. +func (s *Store) SetShardForWorkflow(workflowID string, shardID uint32) { + s.mu.Lock() + defer s.mu.Unlock() + + s.routingState[workflowID] = shardID + + // Signal any waiting allocation requests + if waiters, ok := s.pendingAllocs[workflowID]; ok { + for _, ch := range waiters { + select { + case ch <- shardID: + default: + } + } + delete(s.pendingAllocs, workflowID) + } +} + +// SetRoutingState is called by the RingOCR plugin whenever a state transition happens. +func (s *Store) SetRoutingState(state *ringpb.RoutingState) { + s.mu.Lock() + defer s.mu.Unlock() + s.currentState = state +} + +func (s *Store) GetRoutingState() *ringpb.RoutingState { + s.mu.Lock() + defer s.mu.Unlock() + return s.currentState +} + +// GetPendingAllocations called by the RingOCR plugin in the observation phase +// to collect all allocation requests (only applicable to the TRANSITION phase). +func (s *Store) GetPendingAllocations() []string { + var pending []string + for { + select { + case req := <-s.allocRequests: + pending = append(pending, req.WorkflowID) + default: + return pending + } + } +} + +func (s *Store) IsInTransition() bool { + s.mu.Lock() + defer s.mu.Unlock() + return !IsInSteadyState(s.currentState) +} + +func (s *Store) GetShardHealth() map[uint32]bool { + s.mu.Lock() + defer s.mu.Unlock() + return maps.Clone(s.shardHealth) +} + +func (s *Store) SetShardHealth(shardID uint32, healthy bool) { + s.mu.Lock() + s.shardHealth[shardID] = healthy + s.mu.Unlock() + s.updateHealthyShards() +} + +func (s *Store) SetAllShardHealth(health map[uint32]bool) { + s.mu.Lock() + s.shardHealth = make(map[uint32]bool) + for k, v := range health { + s.shardHealth[k] = v + } + + // Uninitialized store must wait for OCR consensus before serving requests + if s.currentState == nil { + numHealthy := uint32(0) + for _, healthy := range health { + if healthy { + numHealthy++ + } + } + s.currentState = &ringpb.RoutingState{ + State: &ringpb.RoutingState_Transition{ + Transition: &ringpb.Transition{ + WantShards: numHealthy, + }, + }, + } + } + s.mu.Unlock() + + s.updateHealthyShards() +} + +func (s *Store) GetAllRoutingState() map[string]uint32 { + s.mu.Lock() + defer s.mu.Unlock() + return maps.Clone(s.routingState) +} + +func (s *Store) DeleteWorkflow(workflowID string) { + s.mu.Lock() + defer s.mu.Unlock() + delete(s.routingState, workflowID) +} + +func (s *Store) GetHealthyShardCount() int { + s.mu.Lock() + defer s.mu.Unlock() + return len(s.healthyShards) +} + +func (s *Store) GetHealthyShards() []uint32 { + s.mu.Lock() + defer s.mu.Unlock() + return slices.Clone(s.healthyShards) +} diff --git a/core/services/ring/store_test.go b/core/services/ring/store_test.go new file mode 100644 index 00000000000..0be6844b45c --- /dev/null +++ b/core/services/ring/store_test.go @@ -0,0 +1,314 @@ +package ring + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" + + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" +) + +func TestStore_DeterministicHashing(t *testing.T) { + store := NewStore() + + // Set up healthy shards + store.SetAllShardHealth(map[uint32]bool{ + 0: true, + 1: true, + 2: true, + }) + // Simulate OCR having moved to steady state + store.SetRoutingState(&ringpb.RoutingState{ + State: &ringpb.RoutingState_RoutableShards{RoutableShards: 3}, + }) + + ctx := context.Background() + + // Test determinism: same workflow always gets same shard + shard1, err := store.GetShardForWorkflow(ctx, "workflow-123") + require.NoError(t, err) + shard2, err := store.GetShardForWorkflow(ctx, "workflow-123") + require.NoError(t, err) + shard3, err := store.GetShardForWorkflow(ctx, "workflow-123") + require.NoError(t, err) + + require.Equal(t, shard1, shard2, "Same workflow should get same shard (call 2)") + require.Equal(t, shard2, shard3, "Same workflow should get same shard (call 3)") + require.LessOrEqual(t, shard1, uint32(2), "Shard should be in healthy set") +} + +func TestStore_ConsistentRingConsistency(t *testing.T) { + store1 := NewStore() + store2 := NewStore() + store3 := NewStore() + + // All stores with same healthy shards + healthyShards := map[uint32]bool{0: true, 1: true, 2: true} + steadyState := &ringpb.RoutingState{ + State: &ringpb.RoutingState_RoutableShards{RoutableShards: 3}, + } + store1.SetAllShardHealth(healthyShards) + store1.SetRoutingState(steadyState) + store2.SetAllShardHealth(healthyShards) + store2.SetRoutingState(steadyState) + store3.SetAllShardHealth(healthyShards) + store3.SetRoutingState(steadyState) + + ctx := context.Background() + + // All compute same assignments + workflows := []string{"workflow-A", "workflow-B", "workflow-C", "workflow-D"} + for _, wf := range workflows { + s1, err := store1.GetShardForWorkflow(ctx, wf) + require.NoError(t, err) + s2, err := store2.GetShardForWorkflow(ctx, wf) + require.NoError(t, err) + s3, err := store3.GetShardForWorkflow(ctx, wf) + require.NoError(t, err) + + require.Equal(t, s1, s2, "All nodes should agree on %s assignment", wf) + require.Equal(t, s2, s3, "All nodes should agree on %s assignment", wf) + } +} + +func TestStore_Rebalancing(t *testing.T) { + store := NewStore() + ctx := context.Background() + + // Start with 3 healthy shards + store.SetAllShardHealth(map[uint32]bool{0: true, 1: true, 2: true}) + store.SetRoutingState(&ringpb.RoutingState{ + State: &ringpb.RoutingState_RoutableShards{RoutableShards: 3}, + }) + assignments1 := make(map[string]uint32) + for i := 1; i <= 10; i++ { + wfID := "workflow-" + string(rune(i)) + shard, err := store.GetShardForWorkflow(ctx, wfID) + require.NoError(t, err) + assignments1[wfID] = shard + } + + // Shard 1 fails + store.SetShardHealth(1, false) + assignments2 := make(map[string]uint32) + for i := 1; i <= 10; i++ { + wfID := "workflow-" + string(rune(i)) + shard, err := store.GetShardForWorkflow(ctx, wfID) + require.NoError(t, err) + assignments2[wfID] = shard + } + + // Check that rebalancing occurred (some workflows moved) + healthyShards := store.GetHealthyShards() + require.Len(t, healthyShards, 2, "Should have 2 healthy shards") + require.NotContains(t, healthyShards, uint32(1), "Shard 1 should not be healthy") + + // Verify that workflows on healthy shards did not move + for wfID, originalShard := range assignments1 { + if originalShard == 0 || originalShard == 2 { + require.Equal(t, originalShard, assignments2[wfID], + "Workflow %s on healthy shard %d should not have moved", wfID, originalShard) + } + } +} + +func TestStore_GetHealthyShards(t *testing.T) { + store := NewStore() + + store.SetAllShardHealth(map[uint32]bool{ + 3: true, + 1: true, + 2: true, + }) + + healthyShards := store.GetHealthyShards() + require.Len(t, healthyShards, 3) + // Should be sorted + require.Equal(t, []uint32{1, 2, 3}, healthyShards) +} + +func TestStore_DistributionAcrossShards(t *testing.T) { + store := NewStore() + ctx := context.Background() + + store.SetAllShardHealth(map[uint32]bool{ + 0: true, + 1: true, + 2: true, + }) + store.SetRoutingState(&ringpb.RoutingState{ + State: &ringpb.RoutingState_RoutableShards{RoutableShards: 3}, + }) + + // Generate many workflows and check distribution + totalWorkflows := 100 + distribution := make(map[uint32]int) + for i := 0; i < totalWorkflows; i++ { + wfID := "workflow-" + string(rune(i)) + shard, err := store.GetShardForWorkflow(ctx, wfID) + require.NoError(t, err) + distribution[shard]++ + } + + require.Equal(t, totalWorkflows, sum(distribution), "Should have 100 workflows") + + // Each shard should have roughly 33% of workflows (±5%) + for shard, count := range distribution { + pct := float64(count) / 100.0 * 100 + require.GreaterOrEqual(t, pct, 28.0, "Shard %d has too few workflows: %d%%", shard, int(pct)) + require.LessOrEqual(t, pct, 38.0, "Shard %d has too many workflows: %d%%", shard, int(pct)) + } +} + +func sum(distribution map[uint32]int) int { + total := 0 + for _, count := range distribution { + total += count + } + return total +} + +func TestStore_GetShardForWorkflow_CacheHit(t *testing.T) { + store := NewStore() + ctx := context.Background() + + // Set up steady state + store.SetAllShardHealth(map[uint32]bool{0: true, 1: true, 2: true}) + store.SetRoutingState(&ringpb.RoutingState{ + State: &ringpb.RoutingState_RoutableShards{RoutableShards: 3}, + }) + + // Pre-populate cache with a specific shard assignment + store.SetShardForWorkflow("cached-workflow", 2) + + // Should return cached value, not recompute + shard, err := store.GetShardForWorkflow(ctx, "cached-workflow") + require.NoError(t, err) + require.Equal(t, uint32(2), shard) +} + +func TestStore_GetShardForWorkflow_ContextCancelledDuringSend(t *testing.T) { + store := NewStore() + + // Put store in transition state + store.SetAllShardHealth(map[uint32]bool{0: true}) + store.SetRoutingState(&ringpb.RoutingState{ + State: &ringpb.RoutingState_Transition{ + Transition: &ringpb.Transition{WantShards: 2}, + }, + }) + + // Fill up the allocRequests channel + for i := 0; i < AllocationRequestChannelCapacity; i++ { + store.allocRequests <- AllocationRequest{WorkflowID: "filler"} + } + + // Context that's already cancelled + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + // Should fail: channel is full and context is cancelled + _, err := store.GetShardForWorkflow(ctx, "workflow-123") + require.ErrorIs(t, err, context.Canceled) +} + +func TestStore_PendingAllocsDuringTransition(t *testing.T) { + store := NewStore() + store.SetAllShardHealth(map[uint32]bool{0: true, 1: true}) + + // Put store in transition state + store.SetRoutingState(&ringpb.RoutingState{ + State: &ringpb.RoutingState_Transition{ + Transition: &ringpb.Transition{WantShards: 3}, + }, + }) + + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + + // Start a goroutine that requests allocation (will block) + resultCh := make(chan uint32) + go func() { + shard, _ := store.GetShardForWorkflow(ctx, "workflow-X") + resultCh <- shard + }() + + // Give goroutine time to enqueue request + time.Sleep(10 * time.Millisecond) + + // Verify request is pending + pending := store.GetPendingAllocations() + require.Contains(t, pending, "workflow-X") + + // Fulfill the allocation (simulates transmitter receiving OCR outcome) + store.SetShardForWorkflow("workflow-X", 2) + + // Blocked goroutine should now receive result + select { + case shard := <-resultCh: + require.Equal(t, uint32(2), shard) + case <-time.After(50 * time.Millisecond): + t.Fatal("allocation was not fulfilled") + } +} + +func TestStore_AccessorMethods(t *testing.T) { + store := NewStore() + + store.SetAllShardHealth(map[uint32]bool{0: true, 1: true, 2: false}) + store.SetRoutingState(&ringpb.RoutingState{ + State: &ringpb.RoutingState_RoutableShards{RoutableShards: 2}, + }) + store.SetShardForWorkflow("wf-1", 0) + store.SetShardForWorkflow("wf-2", 1) + + t.Run("GetRoutingState", func(t *testing.T) { + state := store.GetRoutingState() + require.NotNil(t, state) + require.Equal(t, uint32(2), state.GetRoutableShards()) + }) + + t.Run("IsInTransition_steady_state", func(t *testing.T) { + require.False(t, store.IsInTransition()) + }) + + t.Run("GetShardHealth", func(t *testing.T) { + health := store.GetShardHealth() + require.Len(t, health, 3) + require.True(t, health[0]) + require.True(t, health[1]) + require.False(t, health[2]) + }) + + t.Run("GetAllRoutingState", func(t *testing.T) { + routes := store.GetAllRoutingState() + require.Len(t, routes, 2) + require.Equal(t, uint32(0), routes["wf-1"]) + require.Equal(t, uint32(1), routes["wf-2"]) + }) + + t.Run("GetHealthyShardCount", func(t *testing.T) { + require.Equal(t, 2, store.GetHealthyShardCount()) + }) + + t.Run("DeleteWorkflow", func(t *testing.T) { + store.DeleteWorkflow("wf-1") + routes := store.GetAllRoutingState() + require.Len(t, routes, 1) + require.NotContains(t, routes, "wf-1") + }) + + t.Run("IsInTransition_transition_state", func(t *testing.T) { + store.SetRoutingState(&ringpb.RoutingState{ + State: &ringpb.RoutingState_Transition{Transition: &ringpb.Transition{WantShards: 3}}, + }) + require.True(t, store.IsInTransition()) + }) + + t.Run("IsInTransition_nil_state", func(t *testing.T) { + store.SetRoutingState(nil) + require.True(t, store.IsInTransition()) + }) +} diff --git a/core/services/ring/transmitter.go b/core/services/ring/transmitter.go new file mode 100644 index 00000000000..febc5ca59c3 --- /dev/null +++ b/core/services/ring/transmitter.go @@ -0,0 +1,139 @@ +package ring + +import ( + "context" + + "google.golang.org/protobuf/proto" + + "github.com/smartcontractkit/libocr/offchainreporting2plus/ocr3types" + "github.com/smartcontractkit/libocr/offchainreporting2plus/types" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" + "github.com/smartcontractkit/chainlink/v2/core/services/shardorchestrator" +) + +var _ ocr3types.ContractTransmitter[[]byte] = (*Transmitter)(nil) + +// Transmitter handles transmission of shard orchestration outcomes +type Transmitter struct { + lggr logger.Logger + ringStore *Store + shardOrchestratorStore *shardorchestrator.Store + arbiterScaler ringpb.ArbiterScalerClient + fromAccount types.Account +} + +func NewTransmitter(lggr logger.Logger, ringStore *Store, shardOrchestratorStore *shardorchestrator.Store, arbiterScaler ringpb.ArbiterScalerClient, fromAccount types.Account) *Transmitter { + return &Transmitter{ + lggr: lggr, + ringStore: ringStore, + shardOrchestratorStore: shardOrchestratorStore, + arbiterScaler: arbiterScaler, + fromAccount: fromAccount, + } +} + +func (t *Transmitter) Transmit(ctx context.Context, _ types.ConfigDigest, _ uint64, r ocr3types.ReportWithInfo[[]byte], _ []types.AttributedOnchainSignature) error { + outcome := &ringpb.Outcome{} + if err := proto.Unmarshal(r.Report, outcome); err != nil { + t.lggr.Error("failed to unmarshal report") + return err + } + + if err := t.notifyArbiter(ctx, outcome.State); err != nil { + t.lggr.Errorw("failed to notify arbiter", "err", err) + return err + } + + // Update Ring Store + t.ringStore.SetRoutingState(outcome.State) + + // Determine if system is in transition state + systemInTransition := false + if outcome.State != nil { + if _, ok := outcome.State.State.(*ringpb.RoutingState_Transition); ok { + systemInTransition = true + } + } + + // Update ShardOrchestrator store if available + if t.shardOrchestratorStore != nil { + mappings := make([]*shardorchestrator.WorkflowMappingState, 0, len(outcome.Routes)) + for workflowID, route := range outcome.Routes { + // Get the current shard assignment for this workflow to detect changes + var oldShardID uint32 + var transitionState shardorchestrator.TransitionState + + existingMapping, err := t.shardOrchestratorStore.GetWorkflowMapping(ctx, workflowID) + switch { + case err != nil: + // New workflow - no previous assignment + oldShardID = 0 + transitionState = shardorchestrator.StateSteady + case existingMapping.NewShardID != route.Shard: + // Workflow is moving to a different shard + oldShardID = existingMapping.NewShardID + transitionState = shardorchestrator.StateTransitioning + default: + // Same shard - but might be in system transition + oldShardID = existingMapping.NewShardID + if systemInTransition { + transitionState = shardorchestrator.StateTransitioning + } else { + transitionState = shardorchestrator.StateSteady + } + } + + mappings = append(mappings, &shardorchestrator.WorkflowMappingState{ + WorkflowID: workflowID, + OldShardID: oldShardID, + NewShardID: route.Shard, + TransitionState: transitionState, + }) + } + + if err := t.shardOrchestratorStore.BatchUpdateWorkflowMappings(ctx, mappings); err != nil { + t.lggr.Errorw("failed to update ShardOrchestrator store", "err", err, "workflowCount", len(mappings)) + // Don't fail the entire transmission if ShardOrchestrator update fails + } else { + t.lggr.Debugw("Updated ShardOrchestrator store", "workflowCount", len(mappings)) + } + } + + // Update Ring Store workflow mappings + for workflowID, route := range outcome.Routes { + t.ringStore.SetShardForWorkflow(workflowID, route.Shard) + t.lggr.Debugw("Updated workflow shard mapping", "workflowID", workflowID, "shard", route.Shard) + } + + return nil +} + +func (t *Transmitter) notifyArbiter(ctx context.Context, state *ringpb.RoutingState) error { + if state == nil { + return nil + } + + var nShards uint32 + switch s := state.State.(type) { + case *ringpb.RoutingState_RoutableShards: + nShards = s.RoutableShards + t.lggr.Infow("Transmitting shard routing", "routableShards", nShards) + case *ringpb.RoutingState_Transition: + nShards = s.Transition.WantShards + t.lggr.Infow("Transmitting shard routing (in transition)", "wantShards", nShards) + } + + if t.arbiterScaler != nil && nShards > 0 { + if _, err := t.arbiterScaler.ConsensusWantShards(ctx, &ringpb.ConsensusWantShardsRequest{NShards: nShards}); err != nil { + return err + } + } + + return nil +} + +func (t *Transmitter) FromAccount(ctx context.Context) (types.Account, error) { + return t.fromAccount, nil +} diff --git a/core/services/ring/transmitter_test.go b/core/services/ring/transmitter_test.go new file mode 100644 index 00000000000..0dc7e6b3fca --- /dev/null +++ b/core/services/ring/transmitter_test.go @@ -0,0 +1,174 @@ +package ring + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/emptypb" + + "github.com/smartcontractkit/libocr/offchainreporting2plus/ocr3types" + "github.com/smartcontractkit/libocr/offchainreporting2plus/types" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" +) + +type mockArbiterScaler struct { + called bool + nShards uint32 + err error +} + +func (m *mockArbiterScaler) Status(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*ringpb.ReplicaStatus, error) { + return &ringpb.ReplicaStatus{}, nil +} + +func (m *mockArbiterScaler) ConsensusWantShards(ctx context.Context, req *ringpb.ConsensusWantShardsRequest, opts ...grpc.CallOption) (*emptypb.Empty, error) { + m.called = true + m.nShards = req.NShards + if m.err != nil { + return nil, m.err + } + return &emptypb.Empty{}, nil +} + +func TestTransmitter_NewTransmitter(t *testing.T) { + lggr := logger.Test(t) + store := NewStore() + tx := NewTransmitter(lggr, store, nil, nil, "test-account") + require.NotNil(t, tx) +} + +func TestTransmitter_FromAccount(t *testing.T) { + lggr := logger.Test(t) + store := NewStore() + tx := NewTransmitter(lggr, store, nil, nil, "my-account") + + account, err := tx.FromAccount(context.Background()) + require.NoError(t, err) + require.Equal(t, types.Account("my-account"), account) +} + +func TestTransmitter_Transmit(t *testing.T) { + lggr := logger.Test(t) + store := NewStore() + mock := &mockArbiterScaler{} + tx := NewTransmitter(lggr, store, nil, mock, "test-account") + + outcome := &ringpb.Outcome{ + State: &ringpb.RoutingState{ + Id: 1, + State: &ringpb.RoutingState_RoutableShards{RoutableShards: 3}, + }, + Routes: map[string]*ringpb.WorkflowRoute{ + "wf-1": {Shard: 0}, + "wf-2": {Shard: 1}, + }, + } + outcomeBytes, err := proto.Marshal(outcome) + require.NoError(t, err) + + report := ocr3types.ReportWithInfo[[]byte]{Report: outcomeBytes} + err = tx.Transmit(context.Background(), types.ConfigDigest{}, 0, report, nil) + require.NoError(t, err) + + // Verify arbiter was notified + require.True(t, mock.called) + require.Equal(t, uint32(3), mock.nShards) + + // Verify store was updated + require.Equal(t, uint32(3), store.GetRoutingState().GetRoutableShards()) + routes := store.GetAllRoutingState() + require.Equal(t, uint32(0), routes["wf-1"]) + require.Equal(t, uint32(1), routes["wf-2"]) +} + +func TestTransmitter_Transmit_NilArbiter(t *testing.T) { + lggr := logger.Test(t) + store := NewStore() + tx := NewTransmitter(lggr, store, nil, nil, "test-account") + + outcome := &ringpb.Outcome{ + State: &ringpb.RoutingState{ + Id: 1, + State: &ringpb.RoutingState_RoutableShards{RoutableShards: 2}, + }, + Routes: map[string]*ringpb.WorkflowRoute{"wf-1": {Shard: 0}}, + } + outcomeBytes, _ := proto.Marshal(outcome) + + err := tx.Transmit(context.Background(), types.ConfigDigest{}, 0, ocr3types.ReportWithInfo[[]byte]{Report: outcomeBytes}, nil) + require.NoError(t, err) +} + +func TestTransmitter_Transmit_TransitionState(t *testing.T) { + lggr := logger.Test(t) + store := NewStore() + mock := &mockArbiterScaler{} + tx := NewTransmitter(lggr, store, nil, mock, "test-account") + + outcome := &ringpb.Outcome{ + State: &ringpb.RoutingState{ + Id: 1, + State: &ringpb.RoutingState_Transition{ + Transition: &ringpb.Transition{WantShards: 5}, + }, + }, + } + outcomeBytes, _ := proto.Marshal(outcome) + + err := tx.Transmit(context.Background(), types.ConfigDigest{}, 0, ocr3types.ReportWithInfo[[]byte]{Report: outcomeBytes}, nil) + require.NoError(t, err) + require.Equal(t, uint32(5), mock.nShards) +} + +func TestTransmitter_Transmit_InvalidReport(t *testing.T) { + lggr := logger.Test(t) + store := NewStore() + tx := NewTransmitter(lggr, store, nil, nil, "test-account") + + // Send invalid protobuf data + report := ocr3types.ReportWithInfo[[]byte]{Report: []byte("invalid protobuf")} + err := tx.Transmit(context.Background(), types.ConfigDigest{}, 0, report, nil) + require.Error(t, err) +} + +func TestTransmitter_Transmit_ArbiterError(t *testing.T) { + lggr := logger.Test(t) + store := NewStore() + mock := &mockArbiterScaler{err: context.DeadlineExceeded} + tx := NewTransmitter(lggr, store, nil, mock, "test-account") + + outcome := &ringpb.Outcome{ + State: &ringpb.RoutingState{ + Id: 1, + State: &ringpb.RoutingState_RoutableShards{RoutableShards: 3}, + }, + } + outcomeBytes, _ := proto.Marshal(outcome) + + err := tx.Transmit(context.Background(), types.ConfigDigest{}, 0, ocr3types.ReportWithInfo[[]byte]{Report: outcomeBytes}, nil) + require.ErrorIs(t, err, context.DeadlineExceeded) +} + +func TestTransmitter_Transmit_NilState(t *testing.T) { + lggr := logger.Test(t) + store := NewStore() + tx := NewTransmitter(lggr, store, nil, nil, "test-account") + + outcome := &ringpb.Outcome{ + State: nil, + Routes: map[string]*ringpb.WorkflowRoute{"wf-1": {Shard: 0}}, + } + outcomeBytes, _ := proto.Marshal(outcome) + + err := tx.Transmit(context.Background(), types.ConfigDigest{}, 0, ocr3types.ReportWithInfo[[]byte]{Report: outcomeBytes}, nil) + require.NoError(t, err) + + // Routes should still be applied + routes := store.GetAllRoutingState() + require.Equal(t, uint32(0), routes["wf-1"]) +} diff --git a/core/services/ring/utils.go b/core/services/ring/utils.go new file mode 100644 index 00000000000..734495279ba --- /dev/null +++ b/core/services/ring/utils.go @@ -0,0 +1,66 @@ +package ring + +import ( + "errors" + "slices" + "strconv" + + "github.com/buraksezer/consistent" + "github.com/cespare/xxhash/v2" +) + +var errInvalidRing = errors.New("RingOCR invalid ring for consistent hashing") +var errInvalidMember = errors.New("RingOCR invalid member for consistent hashing") + +func uniqueSorted(s []string) []string { + result := slices.Clone(s) + slices.Sort(result) + return slices.Compact(result) +} + +type xxhashHasher struct{} + +func (h xxhashHasher) Sum64(data []byte) uint64 { + return xxhash.Sum64(data) +} + +type ShardMember string + +func (m ShardMember) String() string { + return string(m) +} + +func consistentHashConfig() consistent.Config { + return consistent.Config{ + PartitionCount: 997, // Prime number for better distribution + ReplicationFactor: 50, // Number of replicas per node + Load: 1.1, // Load factor for bounded loads + Hasher: xxhashHasher{}, + } +} + +func newShardRing(healthyShards []uint32) *consistent.Consistent { + if len(healthyShards) == 0 { + return nil + } + members := make([]consistent.Member, len(healthyShards)) + for i, shardID := range healthyShards { + members[i] = ShardMember(strconv.FormatUint(uint64(shardID), 10)) + } + return consistent.New(members, consistentHashConfig()) +} + +func locateShard(ring *consistent.Consistent, workflowID string) (uint32, error) { + if ring == nil { + return 0, errInvalidRing + } + member := ring.LocateKey([]byte(workflowID)) + if member == nil { + return 0, errInvalidMember + } + shardID, err := strconv.ParseUint(member.String(), 10, 32) + if err != nil { + return 0, err + } + return uint32(shardID), nil +} diff --git a/core/services/ring/utils_test.go b/core/services/ring/utils_test.go new file mode 100644 index 00000000000..65f1c61a625 --- /dev/null +++ b/core/services/ring/utils_test.go @@ -0,0 +1,12 @@ +package ring + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestUniqueSorted(t *testing.T) { + got := uniqueSorted([]string{"c", "a", "b", "a", "c"}) + require.Equal(t, []string{"a", "b", "c"}, got) +} diff --git a/core/services/shardorchestrator/client.go b/core/services/shardorchestrator/client.go new file mode 100644 index 00000000000..271209a8017 --- /dev/null +++ b/core/services/shardorchestrator/client.go @@ -0,0 +1,78 @@ +package shardorchestrator + +import ( + "context" + "fmt" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" +) + +// Client wraps gRPC client for communicating with shard 0's orchestrator service +type Client struct { + conn *grpc.ClientConn + client ringpb.ShardOrchestratorServiceClient + logger logger.Logger +} + +// NewClient creates a new gRPC client to communicate with the shard orchestrator on shard 0 +func NewClient(ctx context.Context, address string, lggr logger.Logger) (*Client, error) { + conn, err := grpc.NewClient(address, + grpc.WithTransportCredentials(insecure.NewCredentials()), + ) + if err != nil { + return nil, fmt.Errorf("failed to create shard orchestrator client for %s: %w", address, err) + } + + return &Client{ + conn: conn, + client: ringpb.NewShardOrchestratorServiceClient(conn), + logger: logger.Named(lggr, "ShardOrchestratorClient"), + }, nil +} + +// GetWorkflowShardMapping queries shard 0 for workflow-to-shard mappings +func (c *Client) GetWorkflowShardMapping(ctx context.Context, workflowIDs []string) (*ringpb.GetWorkflowShardMappingResponse, error) { + c.logger.Debugw("Calling GetWorkflowShardMapping", "workflowCount", len(workflowIDs)) + + req := &ringpb.GetWorkflowShardMappingRequest{ + WorkflowIds: workflowIDs, + } + + resp, err := c.client.GetWorkflowShardMapping(ctx, req) + if err != nil { + return nil, fmt.Errorf("gRPC GetWorkflowShardMapping failed: %w", err) + } + + c.logger.Debugw("GetWorkflowShardMapping response received", + "mappingCount", len(resp.Mappings), + "version", resp.MappingVersion) + + return resp, nil +} + +// ReportWorkflowTriggerRegistration reports workflow trigger registration to shard 0 +func (c *Client) ReportWorkflowTriggerRegistration(ctx context.Context, req *ringpb.ReportWorkflowTriggerRegistrationRequest) (*ringpb.ReportWorkflowTriggerRegistrationResponse, error) { + c.logger.Debugw("Calling ReportWorkflowTriggerRegistration", + "shardID", req.SourceShardId, + "workflowCount", len(req.RegisteredWorkflows)) + + resp, err := c.client.ReportWorkflowTriggerRegistration(ctx, req) + if err != nil { + return nil, fmt.Errorf("gRPC ReportWorkflowTriggerRegistration failed: %w", err) + } + + c.logger.Debugw("ReportWorkflowTriggerRegistration response received", + "success", resp.Success) + + return resp, nil +} + +// Close closes the gRPC connection +func (c *Client) Close() error { + c.logger.Info("Closing ShardOrchestrator gRPC client") + return c.conn.Close() +} diff --git a/core/services/shardorchestrator/client_test.go b/core/services/shardorchestrator/client_test.go new file mode 100644 index 00000000000..fed793bd4af --- /dev/null +++ b/core/services/shardorchestrator/client_test.go @@ -0,0 +1,212 @@ +package shardorchestrator + +import ( + "context" + "net" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/test/bufconn" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" +) + +const bufSize = 1024 * 1024 + +// mockShardOrchestratorServer implements the gRPC server for testing +type mockShardOrchestratorServer struct { + ringpb.UnimplementedShardOrchestratorServiceServer + mappings map[string]uint32 + registrationCalled bool +} + +func (m *mockShardOrchestratorServer) GetWorkflowShardMapping(ctx context.Context, req *ringpb.GetWorkflowShardMappingRequest) (*ringpb.GetWorkflowShardMappingResponse, error) { + mappings := make(map[string]uint32) + mappingStates := make(map[string]*ringpb.WorkflowMappingState) + + for _, wfID := range req.WorkflowIds { + if shardID, ok := m.mappings[wfID]; ok { + mappings[wfID] = shardID + mappingStates[wfID] = &ringpb.WorkflowMappingState{ + OldShardId: 0, + NewShardId: shardID, + InTransition: false, + } + } + } + + return &ringpb.GetWorkflowShardMappingResponse{ + Mappings: mappings, + MappingStates: mappingStates, + MappingVersion: 1, + }, nil +} + +func (m *mockShardOrchestratorServer) ReportWorkflowTriggerRegistration(ctx context.Context, req *ringpb.ReportWorkflowTriggerRegistrationRequest) (*ringpb.ReportWorkflowTriggerRegistrationResponse, error) { + m.registrationCalled = true + return &ringpb.ReportWorkflowTriggerRegistrationResponse{ + Success: true, + }, nil +} + +// setupTestServer creates a test gRPC server using bufconn +func setupTestServer(t *testing.T, mock *mockShardOrchestratorServer) (*grpc.Server, *bufconn.Listener) { + lis := bufconn.Listen(bufSize) + s := grpc.NewServer() + ringpb.RegisterShardOrchestratorServiceServer(s, mock) + + go func() { + if err := s.Serve(lis); err != nil { + t.Logf("Server exited with error: %v", err) + } + }() + + return s, lis +} + +// createTestClient creates a client connected to the test server +func createTestClient(t *testing.T, lis *bufconn.Listener) *Client { + conn, err := grpc.NewClient("passthrough://bufnet", + grpc.WithContextDialer(func(context.Context, string) (net.Conn, error) { + return lis.Dial() + }), + grpc.WithTransportCredentials(insecure.NewCredentials()), + ) + require.NoError(t, err) + + lggr := logger.Test(t) + return &Client{ + conn: conn, + client: ringpb.NewShardOrchestratorServiceClient(conn), + logger: logger.Named(lggr, "TestClient"), + } +} + +func TestClient_GetWorkflowShardMapping(t *testing.T) { + ctx := context.Background() + + mock := &mockShardOrchestratorServer{ + mappings: map[string]uint32{ + "workflow-1": 0, + "workflow-2": 1, + "workflow-3": 2, + }, + } + + grpcServer, lis := setupTestServer(t, mock) + defer grpcServer.Stop() + + client := createTestClient(t, lis) + defer client.Close() + + t.Run("successful mapping query", func(t *testing.T) { + workflowIDs := []string{"workflow-1", "workflow-2", "workflow-3"} + resp, err := client.GetWorkflowShardMapping(ctx, workflowIDs) + require.NoError(t, err) + require.NotNil(t, resp) + + assert.Len(t, resp.Mappings, 3) + assert.Equal(t, uint32(0), resp.Mappings["workflow-1"]) + assert.Equal(t, uint32(1), resp.Mappings["workflow-2"]) + assert.Equal(t, uint32(2), resp.Mappings["workflow-3"]) + + assert.Len(t, resp.MappingStates, 3) + assert.Equal(t, uint64(1), resp.MappingVersion) + }) + + t.Run("partial workflow query", func(t *testing.T) { + workflowIDs := []string{"workflow-1", "workflow-unknown"} + resp, err := client.GetWorkflowShardMapping(ctx, workflowIDs) + require.NoError(t, err) + require.NotNil(t, resp) + + // Should only return mappings for known workflows + assert.Len(t, resp.Mappings, 1) + assert.Equal(t, uint32(0), resp.Mappings["workflow-1"]) + _, exists := resp.Mappings["workflow-unknown"] + assert.False(t, exists) + }) + + t.Run("empty workflow list", func(t *testing.T) { + resp, err := client.GetWorkflowShardMapping(ctx, []string{}) + require.NoError(t, err) + require.NotNil(t, resp) + + assert.Empty(t, resp.Mappings) + }) +} + +func TestClient_ReportWorkflowTriggerRegistration(t *testing.T) { + ctx := context.Background() + + mock := &mockShardOrchestratorServer{ + mappings: map[string]uint32{}, + } + + grpcServer, lis := setupTestServer(t, mock) + defer grpcServer.Stop() + + client := createTestClient(t, lis) + defer client.Close() + + t.Run("successful registration report", func(t *testing.T) { + req := &ringpb.ReportWorkflowTriggerRegistrationRequest{ + SourceShardId: 1, + RegisteredWorkflows: map[string]uint32{ + "workflow-1": 1, + "workflow-2": 1, + }, + TotalActiveWorkflows: 2, + } + + resp, err := client.ReportWorkflowTriggerRegistration(ctx, req) + require.NoError(t, err) + require.NotNil(t, resp) + + assert.True(t, resp.Success) + assert.True(t, mock.registrationCalled) + }) +} + +func TestClient_Close(t *testing.T) { + mock := &mockShardOrchestratorServer{ + mappings: map[string]uint32{}, + } + + grpcServer, lis := setupTestServer(t, mock) + defer grpcServer.Stop() + + client := createTestClient(t, lis) + + err := client.Close() + require.NoError(t, err) + + // Verify connection is closed by attempting to use it + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + + _, err = client.GetWorkflowShardMapping(ctx, []string{"test"}) + assert.Error(t, err, "should fail after client is closed") +} + +func TestNewClient(t *testing.T) { + ctx := context.Background() + lggr := logger.Test(t) + + t.Run("creates client successfully", func(t *testing.T) { + // Note: This creates a client but doesn't connect immediately with grpc.NewClient + client, err := NewClient(ctx, "localhost:50051", lggr) + require.NoError(t, err) + require.NotNil(t, client) + defer client.Close() + + assert.NotNil(t, client.conn) + assert.NotNil(t, client.client) + assert.NotNil(t, client.logger) + }) +} diff --git a/core/services/shardorchestrator/service.go b/core/services/shardorchestrator/service.go new file mode 100644 index 00000000000..77826e0d5f4 --- /dev/null +++ b/core/services/shardorchestrator/service.go @@ -0,0 +1,109 @@ +package shardorchestrator + +import ( + "context" + "errors" + "fmt" + + "google.golang.org/grpc" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" +) + +// Server implements the gRPC ShardOrchestratorService +// This runs on shard zero and serves requests from other shards +type Server struct { + ringpb.UnimplementedShardOrchestratorServiceServer + store *Store + logger logger.Logger +} + +func NewServer(store *Store, lggr logger.Logger) *Server { + return &Server{ + store: store, + logger: logger.Named(lggr, "ShardOrchestratorServer"), + } +} + +// RegisterWithGRPCServer registers this service with a gRPC server +func (s *Server) RegisterWithGRPCServer(grpcServer *grpc.Server) { + ringpb.RegisterShardOrchestratorServiceServer(grpcServer, s) + s.logger.Info("Registered ShardOrchestrator gRPC service") +} + +// GetWorkflowShardMapping handles batch requests for workflow-to-shard mappings +// This is called by other shards to determine where to route workflow executions +func (s *Server) GetWorkflowShardMapping(ctx context.Context, req *ringpb.GetWorkflowShardMappingRequest) (*ringpb.GetWorkflowShardMappingResponse, error) { + s.logger.Debugw("GetWorkflowShardMapping called", "workflowCount", len(req.WorkflowIds)) + + if len(req.WorkflowIds) == 0 { + return nil, errors.New("workflow_ids is required and must not be empty") + } + + // Retrieve batch from store + mappings, version, err := s.store.GetWorkflowMappingsBatch(ctx, req.WorkflowIds) + if err != nil { + s.logger.Errorw("Failed to get workflow mappings", "error", err) + return nil, fmt.Errorf("failed to get workflow mappings: %w", err) + } + + // Build simple mappings map (workflow_id -> shard_id) + simpleMappings := make(map[string]uint32, len(mappings)) + // Build detailed mapping states + mappingStates := make(map[string]*ringpb.WorkflowMappingState, len(mappings)) + + for workflowID, mapping := range mappings { + // Simple mapping: just the current shard + simpleMappings[workflowID] = mapping.NewShardID + + // Detailed state: includes transition information + mappingStates[workflowID] = &ringpb.WorkflowMappingState{ + OldShardId: mapping.OldShardID, + NewShardId: mapping.NewShardID, + InTransition: mapping.TransitionState.InTransition(), + } + } + + return &ringpb.GetWorkflowShardMappingResponse{ + Mappings: simpleMappings, + MappingStates: mappingStates, + MappingVersion: version, + }, nil +} + +// ReportWorkflowTriggerRegistration handles shard registration reports +// Shards call this to inform shard zero about which workflows they have loaded +func (s *Server) ReportWorkflowTriggerRegistration(ctx context.Context, req *ringpb.ReportWorkflowTriggerRegistrationRequest) (*ringpb.ReportWorkflowTriggerRegistrationResponse, error) { + s.logger.Debugw("ReportWorkflowTriggerRegistration called", + "shardID", req.SourceShardId, + "workflowCount", len(req.RegisteredWorkflows), + "totalActive", req.TotalActiveWorkflows, + ) + + // Extract workflow IDs from the map + workflowIDs := make([]string, 0, len(req.RegisteredWorkflows)) + for workflowID := range req.RegisteredWorkflows { + workflowIDs = append(workflowIDs, workflowID) + } + + err := s.store.ReportShardRegistration(ctx, req.SourceShardId, workflowIDs) + if err != nil { + s.logger.Errorw("Failed to update shard registrations", + "shardID", req.SourceShardId, + "error", err, + ) + return &ringpb.ReportWorkflowTriggerRegistrationResponse{ + Success: false, + }, nil + } + + s.logger.Infow("Successfully registered workflows", + "shardID", req.SourceShardId, + "workflowCount", len(workflowIDs), + ) + + return &ringpb.ReportWorkflowTriggerRegistrationResponse{ + Success: true, + }, nil +} diff --git a/core/services/shardorchestrator/service_test.go b/core/services/shardorchestrator/service_test.go new file mode 100644 index 00000000000..185a99c8752 --- /dev/null +++ b/core/services/shardorchestrator/service_test.go @@ -0,0 +1,118 @@ +package shardorchestrator_test + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" + "github.com/smartcontractkit/chainlink/v2/core/services/shardorchestrator" +) + +func TestServer_GetWorkflowShardMapping(t *testing.T) { + ctx := context.Background() + lggr := logger.Test(t) + + t.Run("returns_mappings_for_multiple_workflows", func(t *testing.T) { + store := shardorchestrator.NewStore(lggr) + server := shardorchestrator.NewServer(store, lggr) + + // Set up some workflow mappings + mappings := []*shardorchestrator.WorkflowMappingState{ + { + WorkflowID: "wf-alpha", + OldShardID: 0, + NewShardID: 1, + TransitionState: shardorchestrator.StateSteady, + }, + { + WorkflowID: "wf-beta", + OldShardID: 0, + NewShardID: 2, + TransitionState: shardorchestrator.StateSteady, + }, + { + WorkflowID: "wf-gamma", + OldShardID: 1, + NewShardID: 0, + TransitionState: shardorchestrator.StateTransitioning, + }, + } + err := store.BatchUpdateWorkflowMappings(ctx, mappings) + require.NoError(t, err) + + // Request all three workflows + req := &ringpb.GetWorkflowShardMappingRequest{ + WorkflowIds: []string{"wf-alpha", "wf-beta", "wf-gamma"}, + } + + resp, err := server.GetWorkflowShardMapping(ctx, req) + require.NoError(t, err) + require.NotNil(t, resp) + + // Verify simple mappings + require.Len(t, resp.Mappings, 3) + require.Equal(t, uint32(1), resp.Mappings["wf-alpha"]) + require.Equal(t, uint32(2), resp.Mappings["wf-beta"]) + require.Equal(t, uint32(0), resp.Mappings["wf-gamma"]) + + // Verify detailed mapping states + require.Len(t, resp.MappingStates, 3) + + // wf-alpha: steady state + alphaState := resp.MappingStates["wf-alpha"] + require.Equal(t, uint32(0), alphaState.OldShardId) + require.Equal(t, uint32(1), alphaState.NewShardId) + require.False(t, alphaState.InTransition, "steady state should not be in transition") + + // wf-gamma: transitioning state + gammaState := resp.MappingStates["wf-gamma"] + require.Equal(t, uint32(1), gammaState.OldShardId) + require.Equal(t, uint32(0), gammaState.NewShardId) + require.True(t, gammaState.InTransition, "transitioning state should be in transition") + + // Verify version + require.Equal(t, uint64(1), resp.MappingVersion) + }) + + t.Run("rejects_empty_workflow_ids", func(t *testing.T) { + store := shardorchestrator.NewStore(lggr) + server := shardorchestrator.NewServer(store, lggr) + + req := &ringpb.GetWorkflowShardMappingRequest{ + WorkflowIds: []string{}, + } + + resp, err := server.GetWorkflowShardMapping(ctx, req) + require.Error(t, err) + require.Nil(t, resp) + require.Contains(t, err.Error(), "required") + }) + + t.Run("handles_partial_results_for_nonexistent_workflows", func(t *testing.T) { + store := shardorchestrator.NewStore(lggr) + server := shardorchestrator.NewServer(store, lggr) + + // Add one workflow + err := store.BatchUpdateWorkflowMappings(ctx, []*shardorchestrator.WorkflowMappingState{ + {WorkflowID: "exists", NewShardID: 1, TransitionState: shardorchestrator.StateSteady}, + }) + require.NoError(t, err) + + // Request one that exists and one that doesn't - batch query silently skips missing workflows + req := &ringpb.GetWorkflowShardMappingRequest{ + WorkflowIds: []string{"exists", "does-not-exist"}, + } + + resp, err := server.GetWorkflowShardMapping(ctx, req) + require.NoError(t, err) + require.NotNil(t, resp) + + // Only the existing workflow is returned + require.Len(t, resp.Mappings, 1) + require.Equal(t, uint32(1), resp.Mappings["exists"]) + require.NotContains(t, resp.Mappings, "does-not-exist") + }) +} diff --git a/core/services/shardorchestrator/shard_orchestrator.go b/core/services/shardorchestrator/shard_orchestrator.go index b4b2cae5ce7..93df375ae5f 100644 --- a/core/services/shardorchestrator/shard_orchestrator.go +++ b/core/services/shardorchestrator/shard_orchestrator.go @@ -10,7 +10,6 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/logger" "github.com/smartcontractkit/chainlink-common/pkg/services" - "github.com/smartcontractkit/chainlink-common/pkg/workflows/shardorchestrator" ) // ShardOrchestrator is the main service interface for the orchestrator. @@ -24,7 +23,7 @@ type orchestrator struct { services.StateMachine grpcServer *grpc.Server - grpcHandler *shardorchestrator.Server + grpcHandler *Server listener net.Listener lggr logger.Logger @@ -37,10 +36,10 @@ type orchestrator struct { var _ ShardOrchestrator = (*orchestrator)(nil) // New creates a new ShardOrchestrator service. -func New(port int, store *shardorchestrator.Store, lggr logger.Logger) ShardOrchestrator { +func New(port int, store *Store, lggr logger.Logger) ShardOrchestrator { lggr = logger.Named(lggr, "ShardOrchestrator") - grpcHandler := shardorchestrator.NewServer(store, lggr) + grpcHandler := NewServer(store, lggr) grpcServer := grpc.NewServer() grpcHandler.RegisterWithGRPCServer(grpcServer) diff --git a/core/services/shardorchestrator/shard_orchestrator_test.go b/core/services/shardorchestrator/shard_orchestrator_test.go index 80eb52622f3..76be1562720 100644 --- a/core/services/shardorchestrator/shard_orchestrator_test.go +++ b/core/services/shardorchestrator/shard_orchestrator_test.go @@ -11,18 +11,17 @@ import ( "google.golang.org/grpc/credentials/insecure" "github.com/smartcontractkit/chainlink-common/pkg/logger" - "github.com/smartcontractkit/chainlink-common/pkg/workflows/shardorchestrator" - "github.com/smartcontractkit/chainlink-common/pkg/workflows/shardorchestrator/pb" - shardorchestratorservice "github.com/smartcontractkit/chainlink/v2/core/services/shardorchestrator" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" + "github.com/smartcontractkit/chainlink/v2/core/services/shardorchestrator" ) // setupShardOrchestrator creates a test ShardOrchestrator and returns the store, client, and cleanup function -func setupShardOrchestrator(t *testing.T) (*shardorchestrator.Store, pb.ShardOrchestratorServiceClient, func()) { +func setupShardOrchestrator(t *testing.T) (*shardorchestrator.Store, ringpb.ShardOrchestratorServiceClient, func()) { lggr := logger.Test(t) store := shardorchestrator.NewStore(lggr) ctx := context.Background() - orchestrator := shardorchestratorservice.New(0, store, lggr) + orchestrator := shardorchestrator.New(0, store, lggr) err := orchestrator.Start(ctx) require.NoError(t, err) @@ -38,7 +37,7 @@ func setupShardOrchestrator(t *testing.T) (*shardorchestrator.Store, pb.ShardOrc conn, err := grpc.NewClient(addr, grpc.WithTransportCredentials(insecure.NewCredentials())) require.NoError(t, err) - client := pb.NewShardOrchestratorServiceClient(conn) + client := ringpb.NewShardOrchestratorServiceClient(conn) cleanup := func() { conn.Close() @@ -62,7 +61,7 @@ func TestShardOrchestrator_GetWorkflowShardMapping(t *testing.T) { require.NoError(t, err) // Call the gRPC endpoint - resp, err := client.GetWorkflowShardMapping(ctx, &pb.GetWorkflowShardMappingRequest{ + resp, err := client.GetWorkflowShardMapping(ctx, &ringpb.GetWorkflowShardMappingRequest{ WorkflowIds: []string{"workflow1", "workflow2"}, }) @@ -81,7 +80,7 @@ func TestShardOrchestrator_GetWorkflowShardMapping(t *testing.T) { ctx := context.Background() // Call with empty workflow IDs - resp, err := client.GetWorkflowShardMapping(ctx, &pb.GetWorkflowShardMappingRequest{ + resp, err := client.GetWorkflowShardMapping(ctx, &ringpb.GetWorkflowShardMappingRequest{ WorkflowIds: []string{}, }) @@ -99,7 +98,7 @@ func TestShardOrchestrator_ReportWorkflowTriggerRegistration(t *testing.T) { ctx := context.Background() // Report workflows registered on a shard - resp, err := client.ReportWorkflowTriggerRegistration(ctx, &pb.ReportWorkflowTriggerRegistrationRequest{ + resp, err := client.ReportWorkflowTriggerRegistration(ctx, &ringpb.ReportWorkflowTriggerRegistrationRequest{ SourceShardId: 2, RegisteredWorkflows: map[string]uint32{ "workflow1": 1, @@ -120,7 +119,7 @@ func TestShardOrchestrator_ReportWorkflowTriggerRegistration(t *testing.T) { ctx := context.Background() // Report with no workflows - resp, err := client.ReportWorkflowTriggerRegistration(ctx, &pb.ReportWorkflowTriggerRegistrationRequest{ + resp, err := client.ReportWorkflowTriggerRegistration(ctx, &ringpb.ReportWorkflowTriggerRegistrationRequest{ SourceShardId: 3, RegisteredWorkflows: map[string]uint32{}, TotalActiveWorkflows: 0, @@ -138,7 +137,7 @@ func TestShardOrchestrator_ReportWorkflowTriggerRegistration(t *testing.T) { ctx := context.Background() // Multiple shards reporting different workflows - resp1, err := client.ReportWorkflowTriggerRegistration(ctx, &pb.ReportWorkflowTriggerRegistrationRequest{ + resp1, err := client.ReportWorkflowTriggerRegistration(ctx, &ringpb.ReportWorkflowTriggerRegistrationRequest{ SourceShardId: 1, RegisteredWorkflows: map[string]uint32{ "workflow1": 1, @@ -148,7 +147,7 @@ func TestShardOrchestrator_ReportWorkflowTriggerRegistration(t *testing.T) { require.NoError(t, err) assert.True(t, resp1.Success) - resp2, err := client.ReportWorkflowTriggerRegistration(ctx, &pb.ReportWorkflowTriggerRegistrationRequest{ + resp2, err := client.ReportWorkflowTriggerRegistration(ctx, &ringpb.ReportWorkflowTriggerRegistrationRequest{ SourceShardId: 2, RegisteredWorkflows: map[string]uint32{ "workflow2": 1, @@ -174,7 +173,7 @@ func TestShardOrchestrator_Integration(t *testing.T) { require.NoError(t, err) // Step 2: Shard 1 reports it has registered workflow-a - reportResp, err := client.ReportWorkflowTriggerRegistration(ctx, &pb.ReportWorkflowTriggerRegistrationRequest{ + reportResp, err := client.ReportWorkflowTriggerRegistration(ctx, &ringpb.ReportWorkflowTriggerRegistrationRequest{ SourceShardId: 1, RegisteredWorkflows: map[string]uint32{ "workflow-a": 1, @@ -185,7 +184,7 @@ func TestShardOrchestrator_Integration(t *testing.T) { assert.True(t, reportResp.Success) // Step 3: Another shard queries for the mapping - mappingResp, err := client.GetWorkflowShardMapping(ctx, &pb.GetWorkflowShardMappingRequest{ + mappingResp, err := client.GetWorkflowShardMapping(ctx, &ringpb.GetWorkflowShardMappingRequest{ WorkflowIds: []string{"workflow-a", "workflow-b"}, }) require.NoError(t, err) diff --git a/core/services/shardorchestrator/store.go b/core/services/shardorchestrator/store.go new file mode 100644 index 00000000000..a5a5f339eba --- /dev/null +++ b/core/services/shardorchestrator/store.go @@ -0,0 +1,260 @@ +package shardorchestrator + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" +) + +// TransitionState represents the state of a workflow's shard assignment +type TransitionState uint8 + +const ( + StateSteady TransitionState = iota + StateTransitioning +) + +// String returns the string representation of the TransitionState +func (s TransitionState) String() string { + switch s { + case StateSteady: + return "steady" + case StateTransitioning: + return "transitioning" + default: + return "unknown" + } +} + +// InTransition returns true if the state is transitioning +func (s TransitionState) InTransition() bool { + return s == StateTransitioning +} + +// WorkflowMappingState represents the state of a workflow assignment +type WorkflowMappingState struct { + WorkflowID string + OldShardID uint32 + NewShardID uint32 + TransitionState TransitionState + UpdatedAt time.Time +} + +// Store manages workflow-to-shard mappings that will be exposed via gRPC +// RingOCR plugin updates this store, and the gRPC service reads from it +type Store struct { + // workflowMappings tracks the current shard assignment for each workflow + workflowMappings map[string]*WorkflowMappingState // workflow_id -> mapping state + + // shardRegistrations tracks what workflows each shard has registered + // This is populated by ReportWorkflowTriggerRegistration calls from shards + shardRegistrations map[uint32]map[string]bool // shard_id -> set of workflow_ids + + // mappingVersion increments on any change to workflowMappings + // Used by clients for cache invalidation + mappingVersion uint64 + + // lastUpdateTime tracks when mappings were last modified + lastUpdateTime time.Time + + mu sync.RWMutex + logger logger.Logger +} + +func NewStore(lggr logger.Logger) *Store { + return &Store{ + workflowMappings: make(map[string]*WorkflowMappingState), + shardRegistrations: make(map[uint32]map[string]bool), + mappingVersion: 0, + lastUpdateTime: time.Now(), + logger: logger.Named(lggr, "ShardOrchestratorStore"), + } +} + +// UpdateWorkflowMapping is called by RingOCR to update workflow assignments +// This is the primary data source for shard orchestration +func (s *Store) UpdateWorkflowMapping(ctx context.Context, workflowID string, oldShardID, newShardID uint32, state TransitionState) error { + s.mu.Lock() + defer s.mu.Unlock() + + now := time.Now() + s.workflowMappings[workflowID] = &WorkflowMappingState{ + WorkflowID: workflowID, + OldShardID: oldShardID, + NewShardID: newShardID, + TransitionState: state, + UpdatedAt: now, + } + + s.mappingVersion++ + s.lastUpdateTime = now + + s.logger.Debugw("Updated workflow mapping", + "workflowID", workflowID, + "oldShardID", oldShardID, + "newShardID", newShardID, + "state", state.String(), + "version", s.mappingVersion, + ) + + return nil +} + +// BatchUpdateWorkflowMappings allows RingOCR to update multiple mappings atomically +func (s *Store) BatchUpdateWorkflowMappings(ctx context.Context, mappings []*WorkflowMappingState) error { + s.mu.Lock() + defer s.mu.Unlock() + + now := time.Now() + for _, mapping := range mappings { + s.workflowMappings[mapping.WorkflowID] = &WorkflowMappingState{ + WorkflowID: mapping.WorkflowID, + OldShardID: mapping.OldShardID, + NewShardID: mapping.NewShardID, + TransitionState: mapping.TransitionState, + UpdatedAt: now, + } + } + + s.mappingVersion++ + s.lastUpdateTime = now + + s.logger.Debugw("Batch updated workflow mappings", "count", len(mappings), "version", s.mappingVersion) + return nil +} + +// GetWorkflowMapping retrieves the shard assignment for a specific workflow +// This is called by the gRPC service to respond to GetWorkflowShardMapping requests +func (s *Store) GetWorkflowMapping(ctx context.Context, workflowID string) (*WorkflowMappingState, error) { + s.mu.RLock() + defer s.mu.RUnlock() + + mapping, ok := s.workflowMappings[workflowID] + if !ok { + return nil, fmt.Errorf("workflow %s not found in shard mappings", workflowID) + } + + // Return a copy to avoid external mutations + return &WorkflowMappingState{ + WorkflowID: mapping.WorkflowID, + OldShardID: mapping.OldShardID, + NewShardID: mapping.NewShardID, + TransitionState: mapping.TransitionState, + UpdatedAt: mapping.UpdatedAt, + }, nil +} + +// GetAllWorkflowMappings returns all current workflow-to-shard assignments +func (s *Store) GetAllWorkflowMappings(ctx context.Context) ([]*WorkflowMappingState, error) { + s.mu.RLock() + defer s.mu.RUnlock() + + mappings := make([]*WorkflowMappingState, 0, len(s.workflowMappings)) + for _, mapping := range s.workflowMappings { + mappings = append(mappings, &WorkflowMappingState{ + WorkflowID: mapping.WorkflowID, + OldShardID: mapping.OldShardID, + NewShardID: mapping.NewShardID, + TransitionState: mapping.TransitionState, + UpdatedAt: mapping.UpdatedAt, + }) + } + + return mappings, nil +} + +// ReportShardRegistration is called when a shard reports its registered workflows +// This helps track which workflows each shard has successfully loaded +// It also updates workflowMappings so GetWorkflowShardMapping returns correct data +func (s *Store) ReportShardRegistration(ctx context.Context, shardID uint32, workflowIDs []string) error { + s.mu.Lock() + defer s.mu.Unlock() + + now := time.Now() + + // Clear and update shard registrations + s.shardRegistrations[shardID] = make(map[string]bool) + for _, wfID := range workflowIDs { + s.shardRegistrations[shardID][wfID] = true + } + + // Also update workflowMappings - when a shard reports it has a workflow, + // that's authoritative information about where the workflow is running + for _, wfID := range workflowIDs { + existing, ok := s.workflowMappings[wfID] + if !ok || existing.NewShardID != shardID { + s.workflowMappings[wfID] = &WorkflowMappingState{ + WorkflowID: wfID, + OldShardID: 0, + NewShardID: shardID, + TransitionState: StateSteady, + UpdatedAt: now, + } + if ok { + s.workflowMappings[wfID].OldShardID = existing.NewShardID + } + } + } + + s.mappingVersion++ + s.lastUpdateTime = now + + s.logger.Debugw("Updated shard registrations", + "shardID", shardID, + "workflowCount", len(workflowIDs), + "version", s.mappingVersion, + ) + + return nil +} + +// GetShardRegistrations returns the workflows registered on a specific shard +func (s *Store) GetShardRegistrations(ctx context.Context, shardID uint32) ([]string, error) { + s.mu.RLock() + defer s.mu.RUnlock() + + workflows, ok := s.shardRegistrations[shardID] + if !ok { + return []string{}, nil + } + + result := make([]string, 0, len(workflows)) + for wfID := range workflows { + result = append(result, wfID) + } + + return result, nil +} + +// GetWorkflowMappingsBatch retrieves mappings for multiple workflows +func (s *Store) GetWorkflowMappingsBatch(ctx context.Context, workflowIDs []string) (map[string]*WorkflowMappingState, uint64, error) { + s.mu.RLock() + defer s.mu.RUnlock() + + result := make(map[string]*WorkflowMappingState, len(workflowIDs)) + + for _, workflowID := range workflowIDs { + if mapping, ok := s.workflowMappings[workflowID]; ok { + // Return a copy to avoid external mutations + result[workflowID] = &WorkflowMappingState{ + WorkflowID: mapping.WorkflowID, + OldShardID: mapping.OldShardID, + NewShardID: mapping.NewShardID, + TransitionState: mapping.TransitionState, + UpdatedAt: mapping.UpdatedAt, + } + } + } + + return result, s.mappingVersion, nil +} + +// GetMappingVersion returns the current version of the mapping set +func (s *Store) GetMappingVersion() uint64 { + s.mu.RLock() + defer s.mu.RUnlock() + return s.mappingVersion +} diff --git a/core/services/shardorchestrator/store_test.go b/core/services/shardorchestrator/store_test.go new file mode 100644 index 00000000000..1214b72bff5 --- /dev/null +++ b/core/services/shardorchestrator/store_test.go @@ -0,0 +1,198 @@ +package shardorchestrator_test + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink/v2/core/services/shardorchestrator" +) + +func TestStore_BatchUpdateAndQuery(t *testing.T) { + ctx := context.Background() + lggr := logger.Test(t) + store := shardorchestrator.NewStore(lggr) + + // Create and insert multiple workflow mappings + mappings := []*shardorchestrator.WorkflowMappingState{ + { + WorkflowID: "workflow-1", + OldShardID: 0, + NewShardID: 1, + TransitionState: shardorchestrator.StateSteady, + }, + { + WorkflowID: "workflow-2", + OldShardID: 0, + NewShardID: 2, + TransitionState: shardorchestrator.StateSteady, + }, + { + WorkflowID: "workflow-3", + OldShardID: 0, + NewShardID: 1, + TransitionState: shardorchestrator.StateSteady, + }, + } + + err := store.BatchUpdateWorkflowMappings(ctx, mappings) + require.NoError(t, err) + + // Query individual workflow + mapping1, err := store.GetWorkflowMapping(ctx, "workflow-1") + require.NoError(t, err) + assert.Equal(t, uint32(1), mapping1.NewShardID) + assert.Equal(t, shardorchestrator.StateSteady, mapping1.TransitionState) + + // Query all workflows + allMappings, err := store.GetAllWorkflowMappings(ctx) + require.NoError(t, err) + assert.Len(t, allMappings, 3) + + // Query batch + batchMappings, version, err := store.GetWorkflowMappingsBatch(ctx, []string{"workflow-1", "workflow-2"}) + require.NoError(t, err) + assert.Len(t, batchMappings, 2) + assert.Equal(t, uint64(1), version) // First update +} + +func TestStore_WorkflowTransition(t *testing.T) { + ctx := context.Background() + lggr := logger.Test(t) + store := shardorchestrator.NewStore(lggr) + + // Initial assignment + err := store.UpdateWorkflowMapping(ctx, "workflow-123", 0, 1, shardorchestrator.StateSteady) + require.NoError(t, err) + + mapping, err := store.GetWorkflowMapping(ctx, "workflow-123") + require.NoError(t, err) + assert.Equal(t, uint32(1), mapping.NewShardID) + assert.Equal(t, shardorchestrator.StateSteady, mapping.TransitionState) + + // Move to different shard (transitioning) + err = store.UpdateWorkflowMapping(ctx, "workflow-123", 1, 3, shardorchestrator.StateTransitioning) + require.NoError(t, err) + + mapping, err = store.GetWorkflowMapping(ctx, "workflow-123") + require.NoError(t, err) + assert.Equal(t, uint32(1), mapping.OldShardID) + assert.Equal(t, uint32(3), mapping.NewShardID) + assert.Equal(t, shardorchestrator.StateTransitioning, mapping.TransitionState) + + // Complete transition + err = store.UpdateWorkflowMapping(ctx, "workflow-123", 1, 3, shardorchestrator.StateSteady) + require.NoError(t, err) + + mapping, err = store.GetWorkflowMapping(ctx, "workflow-123") + require.NoError(t, err) + assert.Equal(t, uint32(3), mapping.NewShardID) + assert.Equal(t, shardorchestrator.StateSteady, mapping.TransitionState) +} + +func TestStore_VersionTracking(t *testing.T) { + ctx := context.Background() + lggr := logger.Test(t) + store := shardorchestrator.NewStore(lggr) + + // Initial version should be 0 + assert.Equal(t, uint64(0), store.GetMappingVersion()) + + // First update increments version + err := store.UpdateWorkflowMapping(ctx, "wf-1", 0, 1, shardorchestrator.StateSteady) + require.NoError(t, err) + assert.Equal(t, uint64(1), store.GetMappingVersion()) + + // Batch update increments version + err = store.BatchUpdateWorkflowMappings(ctx, []*shardorchestrator.WorkflowMappingState{ + {WorkflowID: "wf-2", NewShardID: 2, TransitionState: shardorchestrator.StateSteady}, + }) + require.NoError(t, err) + assert.Equal(t, uint64(2), store.GetMappingVersion()) + + // Version is included in batch query response + _, version, err := store.GetWorkflowMappingsBatch(ctx, []string{"wf-1", "wf-2"}) + require.NoError(t, err) + assert.Equal(t, uint64(2), version) +} + +func TestStore_ShardRegistrations(t *testing.T) { + ctx := context.Background() + lggr := logger.Test(t) + store := shardorchestrator.NewStore(lggr) + + // Shard 1 reports its workflows + err := store.ReportShardRegistration(ctx, 1, []string{"workflow-1", "workflow-3"}) + require.NoError(t, err) + + // Shard 2 reports its workflows + err = store.ReportShardRegistration(ctx, 2, []string{"workflow-2"}) + require.NoError(t, err) + + // Query shard registrations + shard1Workflows, err := store.GetShardRegistrations(ctx, 1) + require.NoError(t, err) + assert.Len(t, shard1Workflows, 2) + assert.Contains(t, shard1Workflows, "workflow-1") + assert.Contains(t, shard1Workflows, "workflow-3") + + shard2Workflows, err := store.GetShardRegistrations(ctx, 2) + require.NoError(t, err) + assert.Len(t, shard2Workflows, 1) + assert.Contains(t, shard2Workflows, "workflow-2") + + // Query non-existent shard returns empty + shard3Workflows, err := store.GetShardRegistrations(ctx, 3) + require.NoError(t, err) + assert.Empty(t, shard3Workflows) + + // Re-reporting replaces previous registrations + err = store.ReportShardRegistration(ctx, 1, []string{"workflow-1"}) + require.NoError(t, err) + + shard1Workflows, err = store.GetShardRegistrations(ctx, 1) + require.NoError(t, err) + assert.Len(t, shard1Workflows, 1) + assert.Contains(t, shard1Workflows, "workflow-1") + assert.NotContains(t, shard1Workflows, "workflow-3") // Removed +} + +func TestStore_NotFoundError(t *testing.T) { + ctx := context.Background() + lggr := logger.Test(t) + store := shardorchestrator.NewStore(lggr) + + // Query non-existent workflow + _, err := store.GetWorkflowMapping(ctx, "non-existent") + require.Error(t, err) + assert.Contains(t, err.Error(), "not found") +} + +func TestStore_BatchQueryPartialResults(t *testing.T) { + ctx := context.Background() + lggr := logger.Test(t) + store := shardorchestrator.NewStore(lggr) + + // Insert only some workflows + err := store.UpdateWorkflowMapping(ctx, "exists-1", 0, 1, shardorchestrator.StateSteady) + require.NoError(t, err) + err = store.UpdateWorkflowMapping(ctx, "exists-2", 0, 2, shardorchestrator.StateSteady) + require.NoError(t, err) + + // Query mix of existing and non-existing workflows + results, _, err := store.GetWorkflowMappingsBatch(ctx, []string{ + "exists-1", + "non-existent", + "exists-2", + }) + require.NoError(t, err) + + // Should only return existing ones + assert.Len(t, results, 2) + assert.Contains(t, results, "exists-1") + assert.Contains(t, results, "exists-2") + assert.NotContains(t, results, "non-existent") +} diff --git a/core/services/workflows/syncer/v2/workflow_registry.go b/core/services/workflows/syncer/v2/workflow_registry.go index 12fb1c25bf9..96e86104d3d 100644 --- a/core/services/workflows/syncer/v2/workflow_registry.go +++ b/core/services/workflows/syncer/v2/workflow_registry.go @@ -18,10 +18,10 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/services" "github.com/smartcontractkit/chainlink-common/pkg/types" "github.com/smartcontractkit/chainlink-common/pkg/types/query/primitives" - "github.com/smartcontractkit/chainlink-common/pkg/workflows/shardorchestrator" "github.com/smartcontractkit/chainlink-evm/gethwrappers/workflow/generated/workflow_registry_wrapper_v2" "github.com/smartcontractkit/chainlink-evm/pkg/config" "github.com/smartcontractkit/chainlink/v2/core/logger" + "github.com/smartcontractkit/chainlink/v2/core/services/shardorchestrator" "github.com/smartcontractkit/chainlink/v2/core/services/workflows/syncer/versioning" ) diff --git a/deployment/go.mod b/deployment/go.mod index a2807306844..f3d6b724f7f 100644 --- a/deployment/go.mod +++ b/deployment/go.mod @@ -431,6 +431,7 @@ require ( github.com/smartcontractkit/chainlink-protos/chainlink-ccv/message-discovery v0.0.0-20251211142334-5c3421fe2c8d // indirect github.com/smartcontractkit/chainlink-protos/chainlink-ccv/verifier v0.0.0-20251211142334-5c3421fe2c8d // indirect github.com/smartcontractkit/chainlink-protos/linking-service/go v0.0.0-20251002192024-d2ad9222409b // indirect + github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706 // indirect github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6 // indirect github.com/smartcontractkit/chainlink-protos/storage-service v0.3.0 // indirect github.com/smartcontractkit/chainlink-protos/svr v1.1.0 // indirect diff --git a/deployment/go.sum b/deployment/go.sum index 4596008d04c..3571ae8854a 100644 --- a/deployment/go.sum +++ b/deployment/go.sum @@ -1410,6 +1410,8 @@ github.com/smartcontractkit/chainlink-protos/op-catalog v0.0.4 h1:AEnxv4HM3WD1Rb github.com/smartcontractkit/chainlink-protos/op-catalog v0.0.4/go.mod h1:PjZD54vr6rIKEKQj6HNA4hllvYI/QpT+Zefj3tqkFAs= github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0 h1:0eroOyBwmdoGUwUdvMI0/J7m5wuzNnJDMglSOK1sfNY= github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0/go.mod h1:m/A3lqD7ms/RsQ9BT5P2uceYY0QX5mIt4KQxT2G6qEo= +github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706 h1:z3sQK3dyfl9Rbm8Inj8irwvX6yQihASp1UvMjrfz6/w= +github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706/go.mod h1:aifeP3SnsVrO1eSN5Smur3iHjAmi3poaLt6TAbgK0Hw= github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6 h1:L6KJ4kGv/yNNoCk8affk7Y1vAY0qglPMXC/hevV/IsA= github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6/go.mod h1:FRwzI3hGj4CJclNS733gfcffmqQ62ONCkbGi49s658w= github.com/smartcontractkit/chainlink-protos/storage-service v0.3.0 h1:B7itmjy+CMJ26elVw/cAJqqhBQ3Xa/mBYWK0/rQ5MuI= diff --git a/go.md b/go.md index 75c2edcd8fc..9e84bb6f3c2 100644 --- a/go.md +++ b/go.md @@ -107,6 +107,8 @@ flowchart LR click chainlink-protos/linking-service/go href "https://github.com/smartcontractkit/chainlink-protos" chainlink-protos/orchestrator --> wsrpc click chainlink-protos/orchestrator href "https://github.com/smartcontractkit/chainlink-protos" + chainlink-protos/ring/go + click chainlink-protos/ring/go href "https://github.com/smartcontractkit/chainlink-protos" chainlink-protos/rmn/v1.6/go click chainlink-protos/rmn/v1.6/go href "https://github.com/smartcontractkit/chainlink-protos" chainlink-protos/storage-service @@ -142,6 +144,7 @@ flowchart LR chainlink/v2 --> chainlink-evm/contracts/cre/gobindings chainlink/v2 --> chainlink-feeds chainlink/v2 --> chainlink-protos/orchestrator + chainlink/v2 --> chainlink-protos/ring/go chainlink/v2 --> chainlink-sui chainlink/v2 --> chainlink-ton chainlink/v2 --> cre-sdk-go/capabilities/networking/http @@ -219,6 +222,7 @@ flowchart LR chainlink-protos/cre/go chainlink-protos/linking-service/go chainlink-protos/orchestrator + chainlink-protos/ring/go chainlink-protos/rmn/v1.6/go chainlink-protos/storage-service chainlink-protos/svr @@ -368,6 +372,8 @@ flowchart LR click chainlink-protos/op-catalog href "https://github.com/smartcontractkit/chainlink-protos" chainlink-protos/orchestrator --> wsrpc click chainlink-protos/orchestrator href "https://github.com/smartcontractkit/chainlink-protos" + chainlink-protos/ring/go + click chainlink-protos/ring/go href "https://github.com/smartcontractkit/chainlink-protos" chainlink-protos/rmn/v1.6/go click chainlink-protos/rmn/v1.6/go href "https://github.com/smartcontractkit/chainlink-protos" chainlink-protos/storage-service @@ -501,6 +507,7 @@ flowchart LR chainlink/v2 --> chainlink-evm/contracts/cre/gobindings chainlink/v2 --> chainlink-feeds chainlink/v2 --> chainlink-protos/orchestrator + chainlink/v2 --> chainlink-protos/ring/go chainlink/v2 --> cre-sdk-go/capabilities/networking/http chainlink/v2 --> cre-sdk-go/capabilities/scheduler/cron chainlink/v2 --> quarantine @@ -617,6 +624,7 @@ flowchart LR chainlink-protos/linking-service/go chainlink-protos/op-catalog chainlink-protos/orchestrator + chainlink-protos/ring/go chainlink-protos/rmn/v1.6/go chainlink-protos/storage-service chainlink-protos/svr diff --git a/go.mod b/go.mod index be06ea4d56a..1a96da10bde 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,8 @@ require ( github.com/aptos-labs/aptos-go-sdk v1.11.0 github.com/avast/retry-go/v4 v4.6.1 github.com/btcsuite/btcd/btcec/v2 v2.3.4 + github.com/buraksezer/consistent v0.10.0 + github.com/cespare/xxhash/v2 v2.3.0 github.com/cloudevents/sdk-go/binding/format/protobuf/v2 v2.16.1 github.com/coreos/go-oidc/v3 v3.11.0 github.com/cosmos/cosmos-sdk v0.50.14 @@ -100,6 +102,7 @@ require ( github.com/smartcontractkit/chainlink-protos/cre/go v0.0.0-20251124151448-0448aefdaab9 github.com/smartcontractkit/chainlink-protos/linking-service/go v0.0.0-20251002192024-d2ad9222409b github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0 + github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706 github.com/smartcontractkit/chainlink-protos/storage-service v0.3.0 github.com/smartcontractkit/chainlink-protos/workflows/go v0.0.0-20260106052706-6dd937cb5ec6 github.com/smartcontractkit/chainlink-solana v1.1.2-0.20260121103211-89fe83165431 @@ -191,13 +194,11 @@ require ( github.com/btcsuite/btcutil v1.0.3-0.20201208143702-a53e38424cce // indirect github.com/buger/goterm v1.0.4 // indirect github.com/buger/jsonparser v1.1.1 // indirect - github.com/buraksezer/consistent v0.10.0 // indirect github.com/bytecodealliance/wasmtime-go/v28 v28.0.0 // indirect github.com/bytedance/sonic v1.12.3 // indirect github.com/bytedance/sonic/loader v0.2.0 // indirect github.com/cenkalti/backoff v2.2.1+incompatible // indirect github.com/cenkalti/backoff/v5 v5.0.3 // indirect - github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cloudevents/sdk-go/v2 v2.16.1 // indirect github.com/cloudwego/base64x v0.1.4 // indirect github.com/cloudwego/iasm v0.2.0 // indirect diff --git a/go.sum b/go.sum index 60cd3e656fa..66c3b54b1e1 100644 --- a/go.sum +++ b/go.sum @@ -1218,6 +1218,8 @@ github.com/smartcontractkit/chainlink-protos/linking-service/go v0.0.0-202510021 github.com/smartcontractkit/chainlink-protos/linking-service/go v0.0.0-20251002192024-d2ad9222409b/go.mod h1:qSTSwX3cBP3FKQwQacdjArqv0g6QnukjV4XuzO6UyoY= github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0 h1:0eroOyBwmdoGUwUdvMI0/J7m5wuzNnJDMglSOK1sfNY= github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0/go.mod h1:m/A3lqD7ms/RsQ9BT5P2uceYY0QX5mIt4KQxT2G6qEo= +github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706 h1:z3sQK3dyfl9Rbm8Inj8irwvX6yQihASp1UvMjrfz6/w= +github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706/go.mod h1:aifeP3SnsVrO1eSN5Smur3iHjAmi3poaLt6TAbgK0Hw= github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6 h1:L6KJ4kGv/yNNoCk8affk7Y1vAY0qglPMXC/hevV/IsA= github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6/go.mod h1:FRwzI3hGj4CJclNS733gfcffmqQ62ONCkbGi49s658w= github.com/smartcontractkit/chainlink-protos/storage-service v0.3.0 h1:B7itmjy+CMJ26elVw/cAJqqhBQ3Xa/mBYWK0/rQ5MuI= diff --git a/integration-tests/go.mod b/integration-tests/go.mod index b62a4c9ede3..af024559711 100644 --- a/integration-tests/go.mod +++ b/integration-tests/go.mod @@ -516,6 +516,7 @@ require ( github.com/smartcontractkit/chainlink-protos/cre/go v0.0.0-20251124151448-0448aefdaab9 // indirect github.com/smartcontractkit/chainlink-protos/linking-service/go v0.0.0-20251002192024-d2ad9222409b // indirect github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0 // indirect + github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706 // indirect github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6 // indirect github.com/smartcontractkit/chainlink-protos/storage-service v0.3.0 // indirect github.com/smartcontractkit/chainlink-protos/svr v1.1.0 // indirect diff --git a/integration-tests/go.sum b/integration-tests/go.sum index bc1a6beca5e..76bf62ab871 100644 --- a/integration-tests/go.sum +++ b/integration-tests/go.sum @@ -1654,6 +1654,8 @@ github.com/smartcontractkit/chainlink-protos/op-catalog v0.0.4 h1:AEnxv4HM3WD1Rb github.com/smartcontractkit/chainlink-protos/op-catalog v0.0.4/go.mod h1:PjZD54vr6rIKEKQj6HNA4hllvYI/QpT+Zefj3tqkFAs= github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0 h1:0eroOyBwmdoGUwUdvMI0/J7m5wuzNnJDMglSOK1sfNY= github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0/go.mod h1:m/A3lqD7ms/RsQ9BT5P2uceYY0QX5mIt4KQxT2G6qEo= +github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706 h1:z3sQK3dyfl9Rbm8Inj8irwvX6yQihASp1UvMjrfz6/w= +github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706/go.mod h1:aifeP3SnsVrO1eSN5Smur3iHjAmi3poaLt6TAbgK0Hw= github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6 h1:L6KJ4kGv/yNNoCk8affk7Y1vAY0qglPMXC/hevV/IsA= github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6/go.mod h1:FRwzI3hGj4CJclNS733gfcffmqQ62ONCkbGi49s658w= github.com/smartcontractkit/chainlink-protos/storage-service v0.3.0 h1:B7itmjy+CMJ26elVw/cAJqqhBQ3Xa/mBYWK0/rQ5MuI= diff --git a/integration-tests/load/go.mod b/integration-tests/load/go.mod index c988d942e94..70d3c4f8ed4 100644 --- a/integration-tests/load/go.mod +++ b/integration-tests/load/go.mod @@ -502,6 +502,7 @@ require ( github.com/smartcontractkit/chainlink-protos/job-distributor v0.17.0 // indirect github.com/smartcontractkit/chainlink-protos/linking-service/go v0.0.0-20251002192024-d2ad9222409b // indirect github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0 // indirect + github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706 // indirect github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6 // indirect github.com/smartcontractkit/chainlink-protos/storage-service v0.3.0 // indirect github.com/smartcontractkit/chainlink-protos/svr v1.1.0 // indirect diff --git a/integration-tests/load/go.sum b/integration-tests/load/go.sum index a53c483a613..f4f54f78888 100644 --- a/integration-tests/load/go.sum +++ b/integration-tests/load/go.sum @@ -1632,6 +1632,8 @@ github.com/smartcontractkit/chainlink-protos/op-catalog v0.0.4 h1:AEnxv4HM3WD1Rb github.com/smartcontractkit/chainlink-protos/op-catalog v0.0.4/go.mod h1:PjZD54vr6rIKEKQj6HNA4hllvYI/QpT+Zefj3tqkFAs= github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0 h1:0eroOyBwmdoGUwUdvMI0/J7m5wuzNnJDMglSOK1sfNY= github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0/go.mod h1:m/A3lqD7ms/RsQ9BT5P2uceYY0QX5mIt4KQxT2G6qEo= +github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706 h1:z3sQK3dyfl9Rbm8Inj8irwvX6yQihASp1UvMjrfz6/w= +github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706/go.mod h1:aifeP3SnsVrO1eSN5Smur3iHjAmi3poaLt6TAbgK0Hw= github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6 h1:L6KJ4kGv/yNNoCk8affk7Y1vAY0qglPMXC/hevV/IsA= github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6/go.mod h1:FRwzI3hGj4CJclNS733gfcffmqQ62ONCkbGi49s658w= github.com/smartcontractkit/chainlink-protos/storage-service v0.3.0 h1:B7itmjy+CMJ26elVw/cAJqqhBQ3Xa/mBYWK0/rQ5MuI= diff --git a/system-tests/lib/go.mod b/system-tests/lib/go.mod index e9050e175f4..6f511d843d3 100644 --- a/system-tests/lib/go.mod +++ b/system-tests/lib/go.mod @@ -474,6 +474,7 @@ require ( github.com/smartcontractkit/chainlink-protos/chainlink-ccv/verifier v0.0.0-20251211142334-5c3421fe2c8d // indirect github.com/smartcontractkit/chainlink-protos/linking-service/go v0.0.0-20251002192024-d2ad9222409b // indirect github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0 // indirect + github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706 // indirect github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6 // indirect github.com/smartcontractkit/chainlink-protos/storage-service v0.3.0 // indirect github.com/smartcontractkit/chainlink-protos/svr v1.1.0 // indirect diff --git a/system-tests/lib/go.sum b/system-tests/lib/go.sum index e235a1a952e..af46f501292 100644 --- a/system-tests/lib/go.sum +++ b/system-tests/lib/go.sum @@ -1639,6 +1639,8 @@ github.com/smartcontractkit/chainlink-protos/op-catalog v0.0.4 h1:AEnxv4HM3WD1Rb github.com/smartcontractkit/chainlink-protos/op-catalog v0.0.4/go.mod h1:PjZD54vr6rIKEKQj6HNA4hllvYI/QpT+Zefj3tqkFAs= github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0 h1:0eroOyBwmdoGUwUdvMI0/J7m5wuzNnJDMglSOK1sfNY= github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0/go.mod h1:m/A3lqD7ms/RsQ9BT5P2uceYY0QX5mIt4KQxT2G6qEo= +github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706 h1:z3sQK3dyfl9Rbm8Inj8irwvX6yQihASp1UvMjrfz6/w= +github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706/go.mod h1:aifeP3SnsVrO1eSN5Smur3iHjAmi3poaLt6TAbgK0Hw= github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6 h1:L6KJ4kGv/yNNoCk8affk7Y1vAY0qglPMXC/hevV/IsA= github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6/go.mod h1:FRwzI3hGj4CJclNS733gfcffmqQ62ONCkbGi49s658w= github.com/smartcontractkit/chainlink-protos/storage-service v0.3.0 h1:B7itmjy+CMJ26elVw/cAJqqhBQ3Xa/mBYWK0/rQ5MuI= diff --git a/system-tests/tests/go.mod b/system-tests/tests/go.mod index a867399664f..ae0c8997c41 100644 --- a/system-tests/tests/go.mod +++ b/system-tests/tests/go.mod @@ -58,6 +58,7 @@ require ( github.com/smartcontractkit/chainlink-evm/gethwrappers v0.0.0-20251222115927-36a18321243c github.com/smartcontractkit/chainlink-protos/cre/go v0.0.0-20251124151448-0448aefdaab9 github.com/smartcontractkit/chainlink-protos/job-distributor v0.17.0 + github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706 github.com/smartcontractkit/chainlink-protos/workflows/go v0.0.0-20260106052706-6dd937cb5ec6 github.com/smartcontractkit/chainlink-solana v1.1.2-0.20260121103211-89fe83165431 github.com/smartcontractkit/chainlink-testing-framework/framework v0.13.0 diff --git a/system-tests/tests/go.sum b/system-tests/tests/go.sum index 947394ae0ad..0d0ee9cb63c 100644 --- a/system-tests/tests/go.sum +++ b/system-tests/tests/go.sum @@ -1836,6 +1836,8 @@ github.com/smartcontractkit/chainlink-protos/op-catalog v0.0.4 h1:AEnxv4HM3WD1Rb github.com/smartcontractkit/chainlink-protos/op-catalog v0.0.4/go.mod h1:PjZD54vr6rIKEKQj6HNA4hllvYI/QpT+Zefj3tqkFAs= github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0 h1:0eroOyBwmdoGUwUdvMI0/J7m5wuzNnJDMglSOK1sfNY= github.com/smartcontractkit/chainlink-protos/orchestrator v0.10.0/go.mod h1:m/A3lqD7ms/RsQ9BT5P2uceYY0QX5mIt4KQxT2G6qEo= +github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706 h1:z3sQK3dyfl9Rbm8Inj8irwvX6yQihASp1UvMjrfz6/w= +github.com/smartcontractkit/chainlink-protos/ring/go v0.0.0-20260128151123-605e9540b706/go.mod h1:aifeP3SnsVrO1eSN5Smur3iHjAmi3poaLt6TAbgK0Hw= github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6 h1:L6KJ4kGv/yNNoCk8affk7Y1vAY0qglPMXC/hevV/IsA= github.com/smartcontractkit/chainlink-protos/rmn/v1.6/go v0.0.0-20250131130834-15e0d4cde2a6/go.mod h1:FRwzI3hGj4CJclNS733gfcffmqQ62ONCkbGi49s658w= github.com/smartcontractkit/chainlink-protos/storage-service v0.3.0 h1:B7itmjy+CMJ26elVw/cAJqqhBQ3Xa/mBYWK0/rQ5MuI= diff --git a/system-tests/tests/smoke/cre/v2_sharding_test.go b/system-tests/tests/smoke/cre/v2_sharding_test.go index 054bc34bc4c..2baf953275d 100644 --- a/system-tests/tests/smoke/cre/v2_sharding_test.go +++ b/system-tests/tests/smoke/cre/v2_sharding_test.go @@ -16,8 +16,7 @@ import ( "github.com/smartcontractkit/chainlink-testing-framework/framework" - ringpb "github.com/smartcontractkit/chainlink-common/pkg/workflows/ring/pb" - shardorchpb "github.com/smartcontractkit/chainlink-common/pkg/workflows/shardorchestrator/pb" + ringpb "github.com/smartcontractkit/chainlink-protos/ring/go" "github.com/smartcontractkit/chainlink-deployments-framework/datastore" commonchangeset "github.com/smartcontractkit/chainlink/deployment/common/changeset" @@ -137,11 +136,11 @@ func validateShardOrchestratorRPC(t *testing.T, logger zerolog.Logger, addr stri require.NoError(t, err, "Failed to create gRPC client for ShardOrchestrator at %s", addr) defer conn.Close() - client := shardorchpb.NewShardOrchestratorServiceClient(conn) + client := ringpb.NewShardOrchestratorServiceClient(conn) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - resp, err := client.GetWorkflowShardMapping(ctx, &shardorchpb.GetWorkflowShardMappingRequest{ + resp, err := client.GetWorkflowShardMapping(ctx, &ringpb.GetWorkflowShardMappingRequest{ WorkflowIds: []string{"test-workflow-id"}, }) @@ -200,7 +199,7 @@ func validateShardingScaleScenario(t *testing.T, testEnv *ttypes.TestEnvironment waitForArbiterShardCount(t, arbiterClient, 1) logger.Info().Msg("Step 3: Register all workflows on shard-zero (the only shard)") - _, err := shardOrchClient.ReportWorkflowTriggerRegistration(ctx, &shardorchpb.ReportWorkflowTriggerRegistrationRequest{ + _, err := shardOrchClient.ReportWorkflowTriggerRegistration(ctx, &ringpb.ReportWorkflowTriggerRegistrationRequest{ SourceShardId: 0, RegisteredWorkflows: map[string]uint32{"workflow-A": 1, "workflow-B": 1, "workflow-C": 1, "workflow-D": 1}, TotalActiveWorkflows: 4, @@ -208,7 +207,7 @@ func validateShardingScaleScenario(t *testing.T, testEnv *ttypes.TestEnvironment require.NoError(t, err) logger.Info().Msg("Step 4: Verify all workflows mapped to shard 0") - resp, err := shardOrchClient.GetWorkflowShardMapping(ctx, &shardorchpb.GetWorkflowShardMappingRequest{ + resp, err := shardOrchClient.GetWorkflowShardMapping(ctx, &ringpb.GetWorkflowShardMappingRequest{ WorkflowIds: workflowIDs, }) require.NoError(t, err) @@ -225,7 +224,7 @@ func validateShardingScaleScenario(t *testing.T, testEnv *ttypes.TestEnvironment waitForArbiterShardCount(t, arbiterClient, 2) logger.Info().Msg("Step 7: Shard 1 reports its workflows after scaling") - _, err = shardOrchClient.ReportWorkflowTriggerRegistration(ctx, &shardorchpb.ReportWorkflowTriggerRegistrationRequest{ + _, err = shardOrchClient.ReportWorkflowTriggerRegistration(ctx, &ringpb.ReportWorkflowTriggerRegistrationRequest{ SourceShardId: 1, RegisteredWorkflows: map[string]uint32{"workflow-C": 1, "workflow-D": 1}, TotalActiveWorkflows: 2, @@ -233,7 +232,7 @@ func validateShardingScaleScenario(t *testing.T, testEnv *ttypes.TestEnvironment require.NoError(t, err) logger.Info().Msg("Step 8: Verify workflow mappings now span 2 shards") - resp, err = shardOrchClient.GetWorkflowShardMapping(ctx, &shardorchpb.GetWorkflowShardMappingRequest{ + resp, err = shardOrchClient.GetWorkflowShardMapping(ctx, &ringpb.GetWorkflowShardMappingRequest{ WorkflowIds: workflowIDs, }) require.NoError(t, err) @@ -298,10 +297,10 @@ func newArbiterClient(t *testing.T, addr string) ringpb.ArbiterClient { return ringpb.NewArbiterClient(conn) } -func newShardOrchestratorClient(t *testing.T, addr string) shardorchpb.ShardOrchestratorServiceClient { +func newShardOrchestratorClient(t *testing.T, addr string) ringpb.ShardOrchestratorServiceClient { t.Helper() conn, err := grpc.NewClient(addr, grpc.WithTransportCredentials(insecure.NewCredentials())) require.NoError(t, err) t.Cleanup(func() { conn.Close() }) - return shardorchpb.NewShardOrchestratorServiceClient(conn) + return ringpb.NewShardOrchestratorServiceClient(conn) }