From fe0b55928a90d803d6e0634aa40674359939c76d Mon Sep 17 00:00:00 2001 From: Jason Lynch Date: Mon, 5 Jan 2026 16:14:14 -0500 Subject: [PATCH 1/2] fix: panic from task cancellation This issue caused occasional E2E failures with this message in the Control Plane server logs: ``` panic: invalid state transition for command ScheduleSubWorkflow: CancelPending -> Done ``` This happened because our workflows backend erroneously returned `ErrInstanceNotFinished` when the workflows runtime tried to remove a cancelled workflow. Since the cancellation events weren't consumed, the workflows ended up in an invalid state transition. This change fixes the bug and brings us in line with the reference backend implementations. --- server/internal/workflows/backend/etcd/etcd.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/internal/workflows/backend/etcd/etcd.go b/server/internal/workflows/backend/etcd/etcd.go index ef09f974..0e6cf5ea 100644 --- a/server/internal/workflows/backend/etcd/etcd.go +++ b/server/internal/workflows/backend/etcd/etcd.go @@ -126,7 +126,7 @@ func (b *Backend) RemoveWorkflowInstance(ctx context.Context, instance *workflow return fmt.Errorf("failed to get workflow instance: %w", err) } - if inst.State != core.WorkflowInstanceStateFinished && inst.State != core.WorkflowInstanceStateContinuedAsNew { + if inst.State == core.WorkflowInstanceStateActive { return backend.ErrInstanceNotFinished } From 494c77103e1ab94c9fdabd2540621173de7d3244 Mon Sep 17 00:00:00 2001 From: Jason Lynch Date: Thu, 8 Jan 2026 10:52:26 -0500 Subject: [PATCH 2/2] test: fix clustertest timeout Sometimes this build takes slightly longer that 60 seconds in CI. This change fixes the sporadic errors we're getting right now. --- clustertest/utils_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clustertest/utils_test.go b/clustertest/utils_test.go index bebcc3d4..14ae94cb 100644 --- a/clustertest/utils_test.go +++ b/clustertest/utils_test.go @@ -22,7 +22,7 @@ import ( ) func buildImage() { - ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 300*time.Second) defer cancel() log.Println("building control-plane image")