diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9c999af35..c99fea886 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -157,7 +157,7 @@ jobs: - name: Upload logs if: always() - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v6 with: name: e2e-logs-${{ matrix.customized-settings }} path: test/e2e/logs-${{ matrix.customized-settings }}/ diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 5730dfc94..b4aa7b8b0 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Harden Runner - uses: step-security/harden-runner@df199fb7be9f65074067a9eb93f12bb4c5547cf2 # v2.13.3 + uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0 with: egress-policy: audit diff --git a/Makefile b/Makefile index d7361627e..d0eb30e34 100644 --- a/Makefile +++ b/Makefile @@ -141,7 +141,7 @@ $(PROTOC): unzip $(TOOLS_BIN_DIR)/protoc.zip -d $(TOOLS_BIN_DIR)/protoc_tmp && mv $(TOOLS_BIN_DIR)/protoc_tmp/bin/protoc $(PROTOC) && rm -rf $(TOOLS_BIN_DIR)/protoc.zip $(TOOLS_BIN_DIR)/protoc_tmp .PHONY: help -help: ## Display this help. +help: ## Display this help @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) @@ -150,7 +150,7 @@ help: ## Display this help. ## -------------------------------------- .PHONY: lint -lint: $(GOLANGCI_LINT) +lint: $(GOLANGCI_LINT) ## Run fast linting $(GOLANGCI_LINT) run -v .PHONY: lint-full @@ -161,16 +161,16 @@ lint-full: $(GOLANGCI_LINT) ## Run slower linters to detect possible issues ## Development ## -------------------------------------- -staticcheck: $(STATICCHECK) +staticcheck: $(STATICCHECK) ## Run static analysis $(STATICCHECK) ./... .PHONY: fmt -fmt: $(GOIMPORTS) ## Run go fmt against code. +fmt: $(GOIMPORTS) ## Run go fmt against code go fmt ./... $(GOIMPORTS) -local go.goms.io/fleet -w $$(go list -f {{.Dir}} ./...) .PHONY: vet -vet: ## Run go vet against code. +vet: ## Run go vet against code go vet ./... ## -------------------------------------- @@ -178,19 +178,19 @@ vet: ## Run go vet against code. ## -------------------------------------- .PHONY: test -test: manifests generate fmt vet local-unit-test integration-test ## Run tests. +test: manifests generate fmt vet local-unit-test integration-test ## Run unit tests and integration tests ## # Set up the timeout parameters as some of the tests (rollout controller) lengths have exceeded the default 10 minute mark. # TO-DO (chenyu1): enable parallelization for single package integration tests. .PHONY: local-unit-test -local-unit-test: $(ENVTEST) ## Run tests. +local-unit-test: $(ENVTEST) ## Run unit tests export CGO_ENABLED=1 && \ export KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" && \ - go test `go list ./pkg/... ./cmd/...` -race -coverpkg=./... -coverprofile=ut-coverage.xml -covermode=atomic -v -timeout=20m + go test `go list ./pkg/... ./cmd/...` -race -coverpkg=./... -coverprofile=ut-coverage.xml -covermode=atomic -v -timeout=30m .PHONY: integration-test -integration-test: $(ENVTEST) ## Run tests. +integration-test: $(ENVTEST) ## Run integration tests export CGO_ENABLED=1 && \ export KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" && \ ginkgo -v -p --race --cover --coverpkg=./pkg/scheduler/... ./test/scheduler && \ @@ -202,14 +202,14 @@ integration-test: $(ENVTEST) ## Run tests. LABEL_FILTER ?= !custom .PHONY: e2e-tests -e2e-tests: setup-clusters +e2e-tests: setup-clusters ## Run E2E tests cd ./test/e2e && ginkgo --timeout=70m --label-filter="$(LABEL_FILTER)" -v -p . -e2e-tests-custom: setup-clusters +e2e-tests-custom: setup-clusters ## Run custom E2E tests with labels cd ./test/e2e && ginkgo --label-filter="custom" -v -p . .PHONY: setup-clusters -setup-clusters: +setup-clusters: ## Set up Kind clusters for E2E testing cd ./test/e2e && chmod +x ./setup.sh && ./setup.sh $(MEMBER_CLUSTER_COUNT) .PHONY: collect-e2e-logs @@ -218,7 +218,7 @@ collect-e2e-logs: ## Collect logs from hub and member agent pods after e2e tests ## reviewable .PHONY: reviewable -reviewable: fmt vet lint staticcheck +reviewable: fmt vet lint staticcheck ## Run all quality checks before PR go mod tidy ## -------------------------------------- @@ -230,7 +230,7 @@ CRD_OPTIONS ?= "crd" # Generate manifests e.g. CRD, RBAC etc. .PHONY: manifests -manifests: $(CONTROLLER_GEN) +manifests: $(CONTROLLER_GEN) ## Generate CRDs and manifests $(CONTROLLER_GEN) \ $(CRD_OPTIONS) rbac:roleName=manager-role webhook paths="./apis/..." output:crd:artifacts:config=config/crd/bases @@ -243,7 +243,7 @@ protos: $(PROTOC_GEN_GO) $(PROTOC_GEN_GO_GRPC) $(PROTOC_GEN_GRPC_GATEWAY) $(PROT apis/protos/azure/compute/v1/vmsizerecommender.proto # Generate code -generate: $(CONTROLLER_GEN) protos +generate: $(CONTROLLER_GEN) protos ## Generate deep copy methods $(CONTROLLER_GEN) \ object:headerFile="hack/boilerplate.go.txt" paths="./..." @@ -252,17 +252,17 @@ generate: $(CONTROLLER_GEN) protos ## -------------------------------------- .PHONY: build -build: generate fmt vet ## Build agent binaries. +build: generate fmt vet ## Build agent binaries go build -o bin/hubagent cmd/hubagent/main.go go build -o bin/memberagent cmd/memberagent/main.go go build -o bin/crdinstaller cmd/crdinstaller/main.go .PHONY: run-hubagent -run-hubagent: manifests generate fmt vet ## Run a controllers from your host. +run-hubagent: manifests generate fmt vet ## Run hub-agent from your host go run ./cmd/hubagent/main.go .PHONY: run-memberagent -run-memberagent: manifests generate fmt vet ## Run a controllers from your host. +run-memberagent: manifests generate fmt vet ## Run member-agent from your host go run ./cmd/memberagent/main.go .PHONY: run-crdinstaller @@ -279,7 +279,7 @@ QEMU_VERSION ?= 7.2.0-1 BUILDKIT_VERSION ?= v0.18.1 .PHONY: push -push: +push: ## Build and push all Docker images $(MAKE) OUTPUT_TYPE="type=registry" docker-build-hub-agent docker-build-member-agent docker-build-refresh-token docker-build-crd-installer # By default, docker buildx create will pull image moby/buildkit:buildx-stable-1 and hit the too many requests error @@ -289,7 +289,7 @@ push: # we keep the original setup if the build target is x86_64 platforms (default) for compatibility reasons, but will switch to # a more general setup for non-x86_64 hosts. # -# On some systems the emulation setup might not work at all (e.g., macOS on Apple Silicon -> Rosetta 2 will be used +# On some systems the emulation setup might not work at all (e.g., macOS on Apple Silicon -> Rosetta 2 will be used # by Docker Desktop as the default emulation option for AMD64 on ARM64 container compatibility). .PHONY: docker-buildx-builder # Note (chenyu1): the step below sets up emulation for building/running non-native binaries on the host. The original @@ -313,7 +313,7 @@ docker-buildx-builder: fi .PHONY: docker-build-hub-agent -docker-build-hub-agent: docker-buildx-builder +docker-build-hub-agent: docker-buildx-builder ## Build hub-agent image docker buildx build \ --file docker/$(HUB_AGENT_IMAGE_NAME).Dockerfile \ --output=$(OUTPUT_TYPE) \ @@ -325,7 +325,7 @@ docker-build-hub-agent: docker-buildx-builder --build-arg GOOS=$(TARGET_OS) . .PHONY: docker-build-member-agent -docker-build-member-agent: docker-buildx-builder +docker-build-member-agent: docker-buildx-builder ## Build member-agent image docker buildx build \ --file docker/$(MEMBER_AGENT_IMAGE_NAME).Dockerfile \ --output=$(OUTPUT_TYPE) \ @@ -337,7 +337,7 @@ docker-build-member-agent: docker-buildx-builder --build-arg GOOS=$(TARGET_OS) . .PHONY: docker-build-refresh-token -docker-build-refresh-token: docker-buildx-builder +docker-build-refresh-token: docker-buildx-builder ## Build refresh-token image docker buildx build \ --file docker/$(REFRESH_TOKEN_IMAGE_NAME).Dockerfile \ --output=$(OUTPUT_TYPE) \ @@ -366,7 +366,7 @@ helm-package-arc-member-cluster-agents: envsubst < charts/member-agent-arc/values.yaml > charts/member-agent-arc/values.yaml.tmp && \ mv charts/member-agent-arc/values.yaml.tmp charts/member-agent-arc/values.yaml && \ helm package charts/member-agent-arc/ --version $(ARC_MEMBER_AGENT_HELMCHART_VERSION) - + helm push $(ARC_MEMBER_AGENT_HELMCHART_NAME)-$(ARC_MEMBER_AGENT_HELMCHART_VERSION).tgz oci://$(REGISTRY) ## ----------------------------------- @@ -379,5 +379,5 @@ clean-bin: ## Remove all generated binaries rm -rf ./bin .PHONY: clean-e2e-tests -clean-e2e-tests: +clean-e2e-tests: ## Clean up E2E test clusters cd ./test/e2e && chmod +x ./stop.sh && ./stop.sh $(MEMBER_CLUSTER_COUNT) diff --git a/apis/placement/v1beta1/clusterresourceplacement_types.go b/apis/placement/v1beta1/clusterresourceplacement_types.go index 03dd10133..d1b6db3d1 100644 --- a/apis/placement/v1beta1/clusterresourceplacement_types.go +++ b/apis/placement/v1beta1/clusterresourceplacement_types.go @@ -524,9 +524,9 @@ const ( type RolloutStrategy struct { // Type of rollout. The only supported types are "RollingUpdate" and "External". // Default is "RollingUpdate". - // +kubebuilder:validation:Optional // +kubebuilder:default=RollingUpdate // +kubebuilder:validation:Enum=RollingUpdate;External + // +kubebuilder:validation:XValidation:rule="!(self != 'External' && oldSelf == 'External')",message="cannot change rollout strategy type from 'External' to other types" Type RolloutStrategyType `json:"type,omitempty"` // Rolling update config params. Present only if RolloutStrategyType = RollingUpdate. diff --git a/apis/placement/v1beta1/stageupdate_types.go b/apis/placement/v1beta1/stageupdate_types.go index 8d41589a7..ef68dd087 100644 --- a/apis/placement/v1beta1/stageupdate_types.go +++ b/apis/placement/v1beta1/stageupdate_types.go @@ -96,7 +96,7 @@ type UpdateRunObjList interface { // +kubebuilder:printcolumn:JSONPath=`.status.conditions[?(@.type=="Succeeded")].status`,name="Succeeded",type=string // +kubebuilder:printcolumn:JSONPath=`.metadata.creationTimestamp`,name="Age",type=date // +kubebuilder:printcolumn:JSONPath=`.spec.stagedRolloutStrategyName`,name="Strategy",priority=1,type=string -// +kubebuilder:validation:XValidation:rule="size(self.metadata.name) < 128",message="metadata.name max length is 127" +// +kubebuilder:validation:XValidation:rule="size(self.metadata.name) < 64",message="metadata.name max length is 63" // ClusterStagedUpdateRun represents a stage by stage update process that applies ClusterResourcePlacement // selected resources to specified clusters. @@ -427,7 +427,7 @@ const ( // Its condition status can be one of the following: // - "True": The staged update run is making progress. // - "False": The staged update run is waiting/paused/abandoned. - // - "Unknown" means it is unknown. + // - "Unknown": The staged update run is in a transitioning state. StagedUpdateRunConditionProgressing StagedUpdateRunConditionType = "Progressing" // StagedUpdateRunConditionSucceeded indicates whether the staged update run is completed successfully. @@ -489,7 +489,8 @@ const ( // StageUpdatingConditionProgressing indicates whether the stage updating is making progress. // Its condition status can be one of the following: // - "True": The stage updating is making progress. - // - "False": The stage updating is waiting/pausing. + // - "False": The stage updating is waiting. + // - "Unknown": The staged updating is a transitioning state. StageUpdatingConditionProgressing StageUpdatingConditionType = "Progressing" // StageUpdatingConditionSucceeded indicates whether the stage updating is completed successfully. @@ -790,7 +791,7 @@ func (c *ClusterApprovalRequestList) GetApprovalRequestObjs() []ApprovalRequestO // +kubebuilder:printcolumn:JSONPath=`.status.conditions[?(@.type=="Succeeded")].status`,name="Succeeded",type=string // +kubebuilder:printcolumn:JSONPath=`.metadata.creationTimestamp`,name="Age",type=date // +kubebuilder:printcolumn:JSONPath=`.spec.stagedRolloutStrategyName`,name="Strategy",priority=1,type=string -// +kubebuilder:validation:XValidation:rule="size(self.metadata.name) < 128",message="metadata.name max length is 127" +// +kubebuilder:validation:XValidation:rule="size(self.metadata.name) < 64",message="metadata.name max length is 63" // StagedUpdateRun represents a stage by stage update process that applies ResourcePlacement // selected resources to specified clusters. diff --git a/cmd/hubagent/main.go b/cmd/hubagent/main.go index 9d1608a3b..9b2b4d13c 100644 --- a/cmd/hubagent/main.go +++ b/cmd/hubagent/main.go @@ -46,6 +46,8 @@ import ( "go.goms.io/fleet/cmd/hubagent/options" "go.goms.io/fleet/cmd/hubagent/workload" mcv1beta1 "go.goms.io/fleet/pkg/controllers/membercluster/v1beta1" + readiness "go.goms.io/fleet/pkg/utils/informer/readiness" + "go.goms.io/fleet/pkg/utils/validator" "go.goms.io/fleet/pkg/webhook" "go.goms.io/fleet/pkg/webhook/managedresource" // +kubebuilder:scaffold:imports @@ -166,7 +168,17 @@ func main() { ctx := ctrl.SetupSignalHandler() if err := workload.SetupControllers(ctx, &wg, mgr, config, opts); err != nil { - klog.ErrorS(err, "unable to set up ready check") + klog.ErrorS(err, "unable to set up controllers") + exitWithErrorFunc() + } + + // Add readiness check for dynamic informer cache AFTER controllers are set up. + // This ensures the discovery cache is populated before the hub agent is marked ready, + // which is critical for all controllers that rely on dynamic resource discovery. + // AddReadyzCheck adds additional readiness check instead of replacing the one registered earlier provided the name is different. + // Both registered checks need to pass for the manager to be considered ready. + if err := mgr.AddReadyzCheck("informer-cache", readiness.InformerReadinessChecker(validator.ResourceInformer)); err != nil { + klog.ErrorS(err, "unable to set up informer cache readiness check") exitWithErrorFunc() } diff --git a/cmd/hubagent/workload/setup.go b/cmd/hubagent/workload/setup.go index a2bccc07b..3c9d48383 100644 --- a/cmd/hubagent/workload/setup.go +++ b/cmd/hubagent/workload/setup.go @@ -376,7 +376,7 @@ func SetupControllers(ctx context.Context, wg *sync.WaitGroup, mgr ctrl.Manager, } defaultFramework := framework.NewFramework(schedulerProfile, mgr) defaultSchedulingQueue := queue.NewSimplePlacementSchedulingQueue( - queue.WithName(schedulerQueueName), + schedulerQueueName, nil, ) // we use one scheduler for every 10 concurrent placement defaultScheduler := scheduler.NewScheduler("DefaultScheduler", defaultFramework, defaultSchedulingQueue, mgr, @@ -514,7 +514,23 @@ func SetupControllers(ctx context.Context, wg *sync.WaitGroup, mgr ctrl.Manager, } resourceChangeController := controller.NewController(resourceChangeControllerName, controller.ClusterWideKeyFunc, rcr.Reconcile, rateLimiter) + // Set up the InformerPopulator that runs on ALL pods (leader and followers) + // This ensures all pods have synced informer caches for webhook validation + klog.Info("Setting up informer populator") + informerPopulator := &resourcewatcher.InformerPopulator{ + DiscoveryClient: discoverClient, + RESTMapper: mgr.GetRESTMapper(), + InformerManager: dynamicInformerManager, + ResourceConfig: resourceConfig, + } + + if err := mgr.Add(informerPopulator); err != nil { + klog.ErrorS(err, "Failed to setup informer populator") + return err + } + // Set up a runner that starts all the custom controllers we created above + // This runs ONLY on the leader and adds event handlers to the informers created by InformerPopulator resourceChangeDetector := &resourcewatcher.ChangeDetector{ DiscoveryClient: discoverClient, RESTMapper: mgr.GetRESTMapper(), diff --git a/config/crd/bases/placement.kubernetes-fleet.io_clusterresourceplacements.yaml b/config/crd/bases/placement.kubernetes-fleet.io_clusterresourceplacements.yaml index f49443d3e..a4fb8185e 100644 --- a/config/crd/bases/placement.kubernetes-fleet.io_clusterresourceplacements.yaml +++ b/config/crd/bases/placement.kubernetes-fleet.io_clusterresourceplacements.yaml @@ -2482,6 +2482,10 @@ spec: - RollingUpdate - External type: string + x-kubernetes-validations: + - message: cannot change rollout strategy type from 'External' + to other types + rule: '!(self != ''External'' && oldSelf == ''External'')' type: object required: - resourceSelectors diff --git a/config/crd/bases/placement.kubernetes-fleet.io_clusterstagedupdateruns.yaml b/config/crd/bases/placement.kubernetes-fleet.io_clusterstagedupdateruns.yaml index 725eb8ddc..7f7ead012 100644 --- a/config/crd/bases/placement.kubernetes-fleet.io_clusterstagedupdateruns.yaml +++ b/config/crd/bases/placement.kubernetes-fleet.io_clusterstagedupdateruns.yaml @@ -2436,8 +2436,8 @@ spec: - spec type: object x-kubernetes-validations: - - message: metadata.name max length is 127 - rule: size(self.metadata.name) < 128 + - message: metadata.name max length is 63 + rule: size(self.metadata.name) < 64 served: true storage: true subresources: diff --git a/config/crd/bases/placement.kubernetes-fleet.io_resourceplacements.yaml b/config/crd/bases/placement.kubernetes-fleet.io_resourceplacements.yaml index 7e775b9b3..5df58390b 100644 --- a/config/crd/bases/placement.kubernetes-fleet.io_resourceplacements.yaml +++ b/config/crd/bases/placement.kubernetes-fleet.io_resourceplacements.yaml @@ -992,6 +992,10 @@ spec: - RollingUpdate - External type: string + x-kubernetes-validations: + - message: cannot change rollout strategy type from 'External' + to other types + rule: '!(self != ''External'' && oldSelf == ''External'')' type: object required: - resourceSelectors diff --git a/config/crd/bases/placement.kubernetes-fleet.io_stagedupdateruns.yaml b/config/crd/bases/placement.kubernetes-fleet.io_stagedupdateruns.yaml index b06ff9829..bca8fe903 100644 --- a/config/crd/bases/placement.kubernetes-fleet.io_stagedupdateruns.yaml +++ b/config/crd/bases/placement.kubernetes-fleet.io_stagedupdateruns.yaml @@ -1356,8 +1356,8 @@ spec: - spec type: object x-kubernetes-validations: - - message: metadata.name max length is 127 - rule: size(self.metadata.name) < 128 + - message: metadata.name max length is 63 + rule: size(self.metadata.name) < 64 served: true storage: true subresources: diff --git a/pkg/controllers/clusterinventory/clusterprofile/controller.go b/pkg/controllers/clusterinventory/clusterprofile/controller.go index f1735d3d2..11b145dfc 100644 --- a/pkg/controllers/clusterinventory/clusterprofile/controller.go +++ b/pkg/controllers/clusterinventory/clusterprofile/controller.go @@ -201,7 +201,6 @@ func (r *Reconciler) fillInClusterStatus(mc *clusterv1beta1.MemberCluster, cp *c } else { // throw an alert _ = controller.NewUnexpectedBehaviorError(fmt.Errorf("cluster certificate authority data not found in member cluster %s status", mc.Name)) - cp.Status.AccessProviders[0].Cluster.InsecureSkipTLSVerify = true } } diff --git a/pkg/controllers/membercluster/v1beta1/membercluster_controller.go b/pkg/controllers/membercluster/v1beta1/membercluster_controller.go index e96243a64..971d95062 100644 --- a/pkg/controllers/membercluster/v1beta1/membercluster_controller.go +++ b/pkg/controllers/membercluster/v1beta1/membercluster_controller.go @@ -515,8 +515,18 @@ func (r *Reconciler) syncInternalMemberClusterStatus(imc *clusterv1beta1.Interna } // TODO: We didn't handle condition type: clusterv1beta1.ConditionTypeMemberClusterHealthy. - // Copy Agent status. - mc.Status.AgentStatus = imc.Status.AgentStatus + // Copy Agent status and set ObservedGeneration for agent conditions. + if len(imc.Status.AgentStatus) > 0 { + mc.Status.AgentStatus = make([]clusterv1beta1.AgentStatus, len(imc.Status.AgentStatus)) + } + for i := range imc.Status.AgentStatus { + mc.Status.AgentStatus[i] = *imc.Status.AgentStatus[i].DeepCopy() + // Set ObservedGeneration for each agent condition. + for j := range mc.Status.AgentStatus[i].Conditions { + mc.Status.AgentStatus[i].Conditions[j].ObservedGeneration = mc.GetGeneration() + } + } + r.aggregateJoinedCondition(mc) // Copy resource usages. mc.Status.ResourceUsage = imc.Status.ResourceUsage diff --git a/pkg/controllers/membercluster/v1beta1/membercluster_controller_test.go b/pkg/controllers/membercluster/v1beta1/membercluster_controller_test.go index 087684a1a..c855adf05 100644 --- a/pkg/controllers/membercluster/v1beta1/membercluster_controller_test.go +++ b/pkg/controllers/membercluster/v1beta1/membercluster_controller_test.go @@ -706,6 +706,8 @@ func TestMarkMemberClusterJoined(t *testing.T) { func TestSyncInternalMemberClusterStatus(t *testing.T) { now := metav1.Now() + imcObservedGeneration := int64(1) + mcObservedGeneration := int64(2) tests := map[string]struct { r *Reconciler internalMemberCluster *clusterv1beta1.InternalMemberCluster @@ -728,6 +730,7 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Status: propertyProviderConditionStatus1, Reason: propertyProviderConditionReason1, Message: propertyProviderConditionMessage1, + ObservedGeneration: imcObservedGeneration, LastTransitionTime: now, }, { @@ -735,6 +738,7 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Status: propertyProviderConditionStatus2, Reason: propertyProviderConditionReason2, Message: propertyProviderConditionMessage2, + ObservedGeneration: imcObservedGeneration, LastTransitionTime: now, }, }, @@ -768,9 +772,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.MemberAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionTrue, - Reason: "Joined", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + Reason: "Joined", + ObservedGeneration: imcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -779,9 +784,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.ServiceExportImportAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionTrue, - Reason: "Joined", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + Reason: "Joined", + ObservedGeneration: imcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -789,24 +795,31 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { }, }, }, - memberCluster: &clusterv1beta1.MemberCluster{}, + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Generation: mcObservedGeneration, + }, + }, wantedMemberCluster: &clusterv1beta1.MemberCluster{ Status: clusterv1beta1.MemberClusterStatus{ Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.ConditionTypeMemberClusterJoined), - Status: metav1.ConditionTrue, - Reason: reasonMemberClusterJoined, + Type: string(clusterv1beta1.ConditionTypeMemberClusterJoined), + Status: metav1.ConditionTrue, + Reason: reasonMemberClusterJoined, + ObservedGeneration: mcObservedGeneration, }, { - Type: propertyProviderConditionType1, - Status: propertyProviderConditionStatus1, - Reason: propertyProviderConditionReason1, + Type: propertyProviderConditionType1, + Status: propertyProviderConditionStatus1, + Reason: propertyProviderConditionReason1, + ObservedGeneration: mcObservedGeneration, }, { - Type: propertyProviderConditionType2, - Status: propertyProviderConditionStatus2, - Reason: propertyProviderConditionReason2, + Type: propertyProviderConditionType2, + Status: propertyProviderConditionStatus2, + Reason: propertyProviderConditionReason2, + ObservedGeneration: mcObservedGeneration, }, }, Properties: map[clusterv1beta1.PropertyName]clusterv1beta1.PropertyValue{ @@ -839,9 +852,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.MemberAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionTrue, - Reason: "Joined", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + Reason: "Joined", + ObservedGeneration: mcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -850,9 +864,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.ServiceExportImportAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionTrue, - Reason: "Joined", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + Reason: "Joined", + ObservedGeneration: mcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -885,9 +900,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.MemberAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionFalse, - Reason: "Left", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionFalse, + Reason: "Left", + ObservedGeneration: imcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -896,9 +912,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.ServiceExportImportAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionFalse, - Reason: "Left", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionFalse, + Reason: "Left", + ObservedGeneration: imcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -906,19 +923,25 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { }, }, }, - memberCluster: &clusterv1beta1.MemberCluster{}, + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Generation: mcObservedGeneration, + }, + }, wantedMemberCluster: &clusterv1beta1.MemberCluster{ Status: clusterv1beta1.MemberClusterStatus{ Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.ConditionTypeMemberClusterJoined), - Status: metav1.ConditionFalse, - Reason: reasonMemberClusterLeft, + Type: string(clusterv1beta1.ConditionTypeMemberClusterJoined), + Status: metav1.ConditionFalse, + Reason: reasonMemberClusterLeft, + ObservedGeneration: mcObservedGeneration, }, { - Type: string(clusterv1beta1.ConditionTypeMemberClusterReadyToJoin), - Status: metav1.ConditionFalse, - Reason: reasonMemberClusterNotReadyToJoin, + Type: string(clusterv1beta1.ConditionTypeMemberClusterReadyToJoin), + Status: metav1.ConditionFalse, + Reason: reasonMemberClusterNotReadyToJoin, + ObservedGeneration: mcObservedGeneration, }, }, ResourceUsage: clusterv1beta1.ResourceUsage{ @@ -935,9 +958,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.MemberAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionFalse, - Reason: "Left", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionFalse, + Reason: "Left", + ObservedGeneration: mcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -946,9 +970,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.ServiceExportImportAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionFalse, - Reason: "Left", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionFalse, + Reason: "Left", + ObservedGeneration: mcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -981,9 +1006,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.MemberAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionTrue, - Reason: "Joined", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + Reason: "Joined", + ObservedGeneration: imcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -992,9 +1018,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.ServiceExportImportAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionFalse, - Reason: "Left", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionFalse, + Reason: "Left", + ObservedGeneration: imcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -1002,14 +1029,19 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { }, }, }, - memberCluster: &clusterv1beta1.MemberCluster{}, + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Generation: mcObservedGeneration, + }, + }, wantedMemberCluster: &clusterv1beta1.MemberCluster{ Status: clusterv1beta1.MemberClusterStatus{ Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.ConditionTypeMemberClusterJoined), - Status: metav1.ConditionUnknown, - Reason: reasonMemberClusterUnknown, + Type: string(clusterv1beta1.ConditionTypeMemberClusterJoined), + Status: metav1.ConditionUnknown, + Reason: reasonMemberClusterUnknown, + ObservedGeneration: mcObservedGeneration, }, }, ResourceUsage: clusterv1beta1.ResourceUsage{ @@ -1026,9 +1058,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.MemberAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionTrue, - Reason: "Joined", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + Reason: "Joined", + ObservedGeneration: mcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -1037,9 +1070,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.ServiceExportImportAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionFalse, - Reason: "Left", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionFalse, + Reason: "Left", + ObservedGeneration: mcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -1068,7 +1102,11 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { }, }, }, - memberCluster: &clusterv1beta1.MemberCluster{}, + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Generation: mcObservedGeneration, + }, + }, wantedMemberCluster: &clusterv1beta1.MemberCluster{ Status: clusterv1beta1.MemberClusterStatus{ ResourceUsage: clusterv1beta1.ResourceUsage{ @@ -1082,9 +1120,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { }, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.ConditionTypeMemberClusterJoined), - Status: metav1.ConditionUnknown, - Reason: reasonMemberClusterUnknown, + Type: string(clusterv1beta1.ConditionTypeMemberClusterJoined), + Status: metav1.ConditionUnknown, + Reason: reasonMemberClusterUnknown, + ObservedGeneration: mcObservedGeneration, }, }, }, @@ -1125,9 +1164,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.MemberAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionTrue, - Reason: "Joined", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + Reason: "Joined", + ObservedGeneration: imcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -1136,9 +1176,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.ServiceExportImportAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionTrue, - Reason: "Joined", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + Reason: "Joined", + ObservedGeneration: imcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -1147,9 +1188,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.MultiClusterServiceAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionFalse, - Reason: "Left", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionFalse, + Reason: "Left", + ObservedGeneration: imcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -1157,14 +1199,19 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { }, }, }, - memberCluster: &clusterv1beta1.MemberCluster{}, + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Generation: mcObservedGeneration, + }, + }, wantedMemberCluster: &clusterv1beta1.MemberCluster{ Status: clusterv1beta1.MemberClusterStatus{ Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.ConditionTypeMemberClusterJoined), - Status: metav1.ConditionTrue, - Reason: reasonMemberClusterJoined, + Type: string(clusterv1beta1.ConditionTypeMemberClusterJoined), + Status: metav1.ConditionTrue, + Reason: reasonMemberClusterJoined, + ObservedGeneration: mcObservedGeneration, }, }, ResourceUsage: clusterv1beta1.ResourceUsage{ @@ -1181,9 +1228,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.MemberAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionTrue, - Reason: "Joined", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + Reason: "Joined", + ObservedGeneration: mcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -1192,9 +1240,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.ServiceExportImportAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionTrue, - Reason: "Joined", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + Reason: "Joined", + ObservedGeneration: mcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -1203,9 +1252,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.MultiClusterServiceAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionFalse, - Reason: "Left", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionFalse, + Reason: "Left", + ObservedGeneration: mcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -1238,9 +1288,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.MemberAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionTrue, - Reason: "Joined", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + Reason: "Joined", + ObservedGeneration: imcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -1248,14 +1299,19 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { }, }, }, - memberCluster: &clusterv1beta1.MemberCluster{}, + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Generation: mcObservedGeneration, + }, + }, wantedMemberCluster: &clusterv1beta1.MemberCluster{ Status: clusterv1beta1.MemberClusterStatus{ Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.ConditionTypeMemberClusterJoined), - Status: metav1.ConditionUnknown, - Reason: reasonMemberClusterUnknown, + Type: string(clusterv1beta1.ConditionTypeMemberClusterJoined), + Status: metav1.ConditionUnknown, + Reason: reasonMemberClusterUnknown, + ObservedGeneration: mcObservedGeneration, }, }, ResourceUsage: clusterv1beta1.ResourceUsage{ @@ -1272,9 +1328,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.MemberAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionTrue, - Reason: "Joined", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + Reason: "Joined", + ObservedGeneration: mcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -1307,9 +1364,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.MemberAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionTrue, - Reason: "Joined", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + Reason: "Joined", + ObservedGeneration: imcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -1321,14 +1379,19 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { }, }, }, - memberCluster: &clusterv1beta1.MemberCluster{}, + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Generation: mcObservedGeneration, + }, + }, wantedMemberCluster: &clusterv1beta1.MemberCluster{ Status: clusterv1beta1.MemberClusterStatus{ Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.ConditionTypeMemberClusterJoined), - Status: metav1.ConditionUnknown, - Reason: reasonMemberClusterUnknown, + Type: string(clusterv1beta1.ConditionTypeMemberClusterJoined), + Status: metav1.ConditionUnknown, + Reason: reasonMemberClusterUnknown, + ObservedGeneration: mcObservedGeneration, }, }, ResourceUsage: clusterv1beta1.ResourceUsage{ @@ -1345,9 +1408,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.MemberAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionTrue, - Reason: "Joined", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + Reason: "Joined", + ObservedGeneration: mcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -1384,9 +1448,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.MemberAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionTrue, - Reason: "Joined", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + Reason: "Joined", + ObservedGeneration: imcObservedGeneration, }, }, LastReceivedHeartbeat: now, @@ -1398,14 +1463,19 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { }, }, }, - memberCluster: &clusterv1beta1.MemberCluster{}, + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Generation: mcObservedGeneration, + }, + }, wantedMemberCluster: &clusterv1beta1.MemberCluster{ Status: clusterv1beta1.MemberClusterStatus{ Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.ConditionTypeMemberClusterJoined), - Status: metav1.ConditionUnknown, - Reason: reasonMemberClusterUnknown, + Type: string(clusterv1beta1.ConditionTypeMemberClusterJoined), + Status: metav1.ConditionUnknown, + Reason: reasonMemberClusterUnknown, + ObservedGeneration: mcObservedGeneration, }, }, ResourceUsage: clusterv1beta1.ResourceUsage{ @@ -1422,9 +1492,10 @@ func TestSyncInternalMemberClusterStatus(t *testing.T) { Type: clusterv1beta1.MemberAgent, Conditions: []metav1.Condition{ { - Type: string(clusterv1beta1.AgentJoined), - Status: metav1.ConditionTrue, - Reason: "Joined", + Type: string(clusterv1beta1.AgentJoined), + Status: metav1.ConditionTrue, + Reason: "Joined", + ObservedGeneration: mcObservedGeneration, }, }, LastReceivedHeartbeat: now, diff --git a/pkg/controllers/placement/placement_status.go b/pkg/controllers/placement/placement_status.go index 20c981f2b..63b935bf1 100644 --- a/pkg/controllers/placement/placement_status.go +++ b/pkg/controllers/placement/placement_status.go @@ -265,7 +265,7 @@ func setPlacementConditions( func (r *Reconciler) buildClusterToBindingMap(ctx context.Context, placementObj fleetv1beta1.PlacementObj, latestSchedulingPolicySnapshot fleetv1beta1.PolicySnapshotObj) (map[string]fleetv1beta1.BindingObj, error) { placementKObj := klog.KObj(placementObj) // List all bindings for the placement object. - bindings, err := controller.ListBindingsFromKey(ctx, r.Client, types.NamespacedName{Namespace: placementObj.GetNamespace(), Name: placementObj.GetName()}) + bindings, err := controller.ListBindingsFromKey(ctx, r.Client, types.NamespacedName{Namespace: placementObj.GetNamespace(), Name: placementObj.GetName()}, true) if err != nil { klog.ErrorS(err, "Failed to list bindings for placement", "placement", placementKObj) return nil, controller.NewAPIServerError(true, err) diff --git a/pkg/controllers/placement/resource_selector.go b/pkg/controllers/placement/resource_selector.go index 08d598536..d849d3c92 100644 --- a/pkg/controllers/placement/resource_selector.go +++ b/pkg/controllers/placement/resource_selector.go @@ -380,7 +380,7 @@ func (r *Reconciler) fetchAllResourcesInOneNamespace(namespaceName string, place trackedResource := r.InformerManager.GetNameSpaceScopedResources() for _, gvr := range trackedResource { - if !r.shouldSelectResource(gvr) { + if !utils.ShouldProcessResource(gvr, r.RestMapper, r.ResourceConfig) { continue } if !r.InformerManager.IsInformerSynced(gvr) { @@ -406,26 +406,6 @@ func (r *Reconciler) fetchAllResourcesInOneNamespace(namespaceName string, place return resources, nil } -// shouldSelectResource returns whether a resource should be selected for propagation. -func (r *Reconciler) shouldSelectResource(gvr schema.GroupVersionResource) bool { - // By default, all of the APIs are allowed. - if r.ResourceConfig == nil { - return true - } - gvks, err := r.RestMapper.KindsFor(gvr) - if err != nil { - klog.ErrorS(err, "gvr(%s) transform failed: %v", gvr.String(), err) - return false - } - for _, gvk := range gvks { - if r.ResourceConfig.IsResourceDisabled(gvk) { - klog.V(2).InfoS("Skip watch resource", "group version kind", gvk.String()) - return false - } - } - return true -} - // generateRawContent strips all the unnecessary fields to prepare the objects for dispatch. func generateRawContent(object *unstructured.Unstructured) ([]byte, error) { // Make a deep copy of the object as we are modifying it. diff --git a/pkg/controllers/rollout/controller.go b/pkg/controllers/rollout/controller.go index 52e2abdc2..48fa0f143 100644 --- a/pkg/controllers/rollout/controller.go +++ b/pkg/controllers/rollout/controller.go @@ -101,7 +101,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim // list all the bindings associated with the placement // we read from the API server directly to avoid the repeated reconcile loop due to cache inconsistency - allBindings, err := controller.ListBindingsFromKey(ctx, r.UncachedReader, placementKey) + allBindings, err := controller.ListBindingsFromKey(ctx, r.UncachedReader, placementKey, false) if err != nil { klog.ErrorS(err, "Failed to list all the bindings associated with the placement", "placement", placementObjRef) @@ -145,7 +145,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim } // find the master resourceSnapshot. - masterResourceSnapshot, err := controller.FetchLatestMasterResourceSnapshot(ctx, r.UncachedReader, placementKey) + // Use the cached client so that rollout controller and work-generator have the same view of the + // resourceSnapshots in order to reduce the possibility of missing resourceSnapshots in work-generator. + masterResourceSnapshot, err := controller.FetchLatestMasterResourceSnapshot(ctx, r.Client, placementKey) if err != nil { klog.ErrorS(err, "Failed to find the masterResourceSnapshot for the placement", "placement", placementObjRef) diff --git a/pkg/controllers/rollout/controller_integration_test.go b/pkg/controllers/rollout/controller_integration_test.go index 61d48051b..56c23358d 100644 --- a/pkg/controllers/rollout/controller_integration_test.go +++ b/pkg/controllers/rollout/controller_integration_test.go @@ -1013,76 +1013,6 @@ var _ = Describe("Test the rollout Controller", func() { } }) - It("Should rollout all the selected bindings when strategy type is changed from External to RollingUpdate", func() { - By("Creating CRP with External strategy") - var targetCluster int32 = 10 - rolloutCRP = clusterResourcePlacementForTest(testCRPName, - createPlacementPolicyForTest(placementv1beta1.PickNPlacementType, targetCluster), - createPlacementRolloutStrategyForTest(placementv1beta1.ExternalRolloutStrategyType, nil, nil)) - Expect(k8sClient.Create(ctx, rolloutCRP)).Should(Succeed()) - - By("Creating the latest master resource snapshot") - masterSnapshot := generateClusterResourceSnapshot(rolloutCRP.Name, 0, true) - Expect(k8sClient.Create(ctx, masterSnapshot)).Should(Succeed()) - By(fmt.Sprintf("master resource snapshot %s created", masterSnapshot.Name)) - - By("Creating scheduled bindings for master snapshot on target clusters") - clusters := make([]string, targetCluster) - for i := 0; i < int(targetCluster); i++ { - clusters[i] = "cluster-" + utils.RandStr() - binding := generateClusterResourceBinding(placementv1beta1.BindingStateScheduled, masterSnapshot.Name, clusters[i]) - Expect(k8sClient.Create(ctx, binding)).Should(Succeed()) - By(fmt.Sprintf("resource binding %s created", binding.Name)) - bindings = append(bindings, binding) - } - - By("Checking bindings are not rolled out consistently") - verifyBindingsNotRolledOutConsistently(controller.ConvertCRBArrayToBindingObjs(bindings)) - - By("Updating CRP rollout strategy type to RollingUpdate") - rolloutCRP.Spec.Strategy.Type = placementv1beta1.RollingUpdateRolloutStrategyType - rolloutCRP.Spec.Strategy.RollingUpdate = generateDefaultRollingUpdateConfig() - Expect(k8sClient.Update(ctx, rolloutCRP)).Should(Succeed(), "Failed to update CRP") - - By("Verifying that rollout is unblocked") - verifyBindingsRolledOut(controller.ConvertCRBArrayToBindingObjs(bindings), masterSnapshot, timeout) - }) - - It("Should rollout all the selected bindings when strategy type is changed from External to empty", func() { - By("Creating CRP with External strategy") - var targetCluster int32 = 10 - rolloutCRP = clusterResourcePlacementForTest(testCRPName, - createPlacementPolicyForTest(placementv1beta1.PickNPlacementType, targetCluster), - createPlacementRolloutStrategyForTest(placementv1beta1.ExternalRolloutStrategyType, nil, nil)) - Expect(k8sClient.Create(ctx, rolloutCRP)).Should(Succeed()) - - By("Creating the latest master resource snapshot") - masterSnapshot := generateClusterResourceSnapshot(rolloutCRP.Name, 0, true) - Expect(k8sClient.Create(ctx, masterSnapshot)).Should(Succeed()) - By(fmt.Sprintf("master resource snapshot %s created", masterSnapshot.Name)) - - By("Creating scheduled bindings for master snapshot on target clusters") - clusters := make([]string, targetCluster) - for i := 0; i < int(targetCluster); i++ { - clusters[i] = "cluster-" + utils.RandStr() - binding := generateClusterResourceBinding(placementv1beta1.BindingStateScheduled, masterSnapshot.Name, clusters[i]) - Expect(k8sClient.Create(ctx, binding)).Should(Succeed()) - By(fmt.Sprintf("resource binding %s created", binding.Name)) - bindings = append(bindings, binding) - } - - By("Checking bindings are not rolled out consistently") - verifyBindingsNotRolledOutConsistently(controller.ConvertCRBArrayToBindingObjs(bindings)) - - By("Updating CRP rollout strategy type to empty") - rolloutCRP.Spec.Strategy.Type = "" - rolloutCRP.Spec.Strategy.RollingUpdate = nil - Expect(k8sClient.Update(ctx, rolloutCRP)).Should(Succeed(), "Failed to update CRP") - - By("Verifying that rollout is unblocked") - verifyBindingsRolledOut(controller.ConvertCRBArrayToBindingObjs(bindings), masterSnapshot, timeout) - }) - It("Should not rollout anymore if the rollout strategy type is changed from RollingUpdate to External", func() { By("Creating CRP with RollingUpdate strategy") var targetCluster int32 = 10 diff --git a/pkg/controllers/statusbackreporter/controller.go b/pkg/controllers/statusbackreporter/controller.go new file mode 100644 index 000000000..c9eb39635 --- /dev/null +++ b/pkg/controllers/statusbackreporter/controller.go @@ -0,0 +1,291 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package statusbackreporter + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + errorsutil "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/client-go/dynamic" + "k8s.io/klog/v2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/handler" + + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/pkg/utils/controller" + parallelizerutil "go.goms.io/fleet/pkg/utils/parallelizer" +) + +// Reconciler reconciles a Work object (specifically its status) to back-report +// statuses to their corresponding original resources in the hub cluster. +type Reconciler struct { + hubClient client.Client + hubDynamicClient dynamic.Interface + + parallelizer parallelizerutil.Parallelizer +} + +// NewReconciler creates a new Reconciler. +func NewReconciler(hubClient client.Client, hubDynamicClient dynamic.Interface, parallelizer parallelizerutil.Parallelizer) *Reconciler { + if parallelizer == nil { + klog.V(2).InfoS("parallelizer is not set; using the default parallelizer with a worker count of 1") + parallelizer = parallelizerutil.NewParallelizer(1) + } + + return &Reconciler{ + hubClient: hubClient, + hubDynamicClient: hubDynamicClient, + parallelizer: parallelizer, + } +} + +// Reconcile reconciles the Work object to back-report statuses to their corresponding +// original resources in the hub cluster. +func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + workRef := klog.KRef(req.Namespace, req.Name) + startTime := time.Now() + klog.V(2).InfoS("Reconciliation loop starts", "controller", "statusBackReporter", "work", workRef) + defer func() { + latency := time.Since(startTime).Milliseconds() + klog.V(2).InfoS("Reconciliation loop ends", "controller", "statusBackReporter", "work", workRef, "latency", latency) + }() + + work := &placementv1beta1.Work{} + if err := r.hubClient.Get(ctx, req.NamespacedName, work); err != nil { + klog.ErrorS(err, "Failed to retrieve Work object", "work", workRef) + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + // Perform a sanity check; make sure that mirroring back to original resources can be done, i.e., + // the scheduling policy is set to the PickFixed type with exactly one target cluster, or the PickN + // type with the number of clusters set to 1. The logic also checks if the report back strategy still + // allows status back-reporting. + placementObj, shouldSkip, err := r.validatePlacementObjectForOriginalResourceStatusBackReporting(ctx, work) + if err != nil { + klog.ErrorS(err, "Failed to validate the placement object associated with the Work object for back-reporting statuses to original resources", "work", workRef) + return ctrl.Result{}, err + } + if shouldSkip { + klog.V(2).InfoS("Skip status back-reporting to original resources as the report-back strategy on the placement object forbids so", "work", workRef, "placement", klog.KObj(placementObj)) + return ctrl.Result{}, nil + } + + // Prepare a map for quick lookup of whether a resource is enveloped. + isResEnvelopedByIdStr := prepareIsResEnvelopedMap(placementObj) + + // Back-report statuses to original resources. + + // Prepare a child context. + // Cancel the child context anyway to avoid leaks. + childCtx, cancel := context.WithCancel(ctx) + defer cancel() + errs := make([]error, len(work.Status.ManifestConditions)) + doWork := func(pieces int) { + manifestCond := &work.Status.ManifestConditions[pieces] + resIdentifier := manifestCond.Identifier + + applyCond := meta.FindStatusCondition(work.Status.Conditions, placementv1beta1.WorkConditionTypeApplied) + if applyCond == nil || applyCond.ObservedGeneration != work.Generation || applyCond.Status != metav1.ConditionTrue { + // The resource has not been successfully applied yet. Skip back-reporting. + klog.V(2).InfoS("Skip status back-reporting for the resource; the resource has not been successfully applied yet", "work", workRef, "resourceIdentifier", resIdentifier) + return + } + + // Skip the resource if there is no back-reported status. + if manifestCond.BackReportedStatus == nil || len(manifestCond.BackReportedStatus.ObservedStatus.Raw) == 0 { + klog.V(2).InfoS("Skip status back-reporting for the resource; there is no back-reported status", "work", workRef, "resourceIdentifier", resIdentifier) + return + } + + // Skip the resource if it is enveloped. + idStr := formatWorkResourceIdentifier(&resIdentifier) + isEnveloped, ok := isResEnvelopedByIdStr[idStr] + if !ok { + // The resource is not found in the list of selected resources as reported by the status of the placement object. + // + // This is not considered as an error as the resource might be absent due to consistency reasons (i.e., it has + // just been de-selected); the status back-reporter will skip the resource for now. + klog.V(2).InfoS("Skip status back-reporting for the resource; the resource is not found in the list of selected resources in the placement object", "work", workRef, "resourceIdentifier", resIdentifier) + return + } + if isEnveloped { + // The resource is enveloped; skip back-reporting. + klog.V(2).InfoS("Skip status back-reporting for the resource; the resource is enveloped", "work", workRef, "resourceIdentifier", resIdentifier) + return + } + + // Note that applied resources should always have a valid identifier set; for simplicity reasons + // here the back-reporter will no longer perform any validation. + gvr := schema.GroupVersionResource{ + Group: resIdentifier.Group, + Version: resIdentifier.Version, + Resource: resIdentifier.Resource, + } + nsName := resIdentifier.Namespace + resName := resIdentifier.Name + unstructured, err := r.hubDynamicClient.Resource(gvr).Namespace(nsName).Get(ctx, resName, metav1.GetOptions{}) + if err != nil { + wrappedErr := fmt.Errorf("failed to retrieve the target resource for status back-reporting: %w", err) + klog.ErrorS(err, "Failed to retrieve the target resource for status back-reporting", "work", workRef, "resourceIdentifier", resIdentifier) + errs[pieces] = wrappedErr + return + } + + // Set the back-reported status to the target resource. + statusWrapper := make(map[string]interface{}) + if err := json.Unmarshal(manifestCond.BackReportedStatus.ObservedStatus.Raw, &statusWrapper); err != nil { + wrappedErr := fmt.Errorf("failed to unmarshal back-reported status: %w", err) + klog.ErrorS(err, "Failed to unmarshal back-reported status", "work", workRef, "resourceIdentifier", resIdentifier) + errs[pieces] = wrappedErr + return + } + + // Note that if the applied resource has a status sub-resource, it is usually safe for us to assume that + // the original resource should also have a status sub-resource of the same format. + unstructured.Object["status"] = statusWrapper["status"] + _, err = r.hubDynamicClient.Resource(gvr).Namespace(nsName).UpdateStatus(ctx, unstructured, metav1.UpdateOptions{}) + if err != nil { + // TO-DO (chenyu1): check for cases where the API definition is inconsistent between the member cluster + // side and the hub cluster side, and single out the errors as user errors instead. + wrappedErr := fmt.Errorf("failed to update status to the target resource: %w", err) + klog.ErrorS(err, "Failed to update status to the target resource", "work", workRef, "resourceIdentifier", resIdentifier) + errs[pieces] = wrappedErr + return + } + } + r.parallelizer.ParallelizeUntil(childCtx, len(work.Status.ManifestConditions), doWork, "backReportStatusToOriginalResources") + return ctrl.Result{}, errorsutil.NewAggregate(errs) +} + +// validatePlacementObjectForOriginalResourceStatusBackReporting validates whether +// the placement object associated with the given Work object is eligible for back-reporting +// statuses to original resources. +func (r *Reconciler) validatePlacementObjectForOriginalResourceStatusBackReporting( + ctx context.Context, work *placementv1beta1.Work) (placementv1beta1.PlacementObj, bool, error) { + // Read the `kubernetes-fleet.io/parent-CRP` label to retrieve the CRP/RP name. + parentPlacementName, ok := work.Labels[placementv1beta1.PlacementTrackingLabel] + if !ok || len(parentPlacementName) == 0 { + // Normally this should never occur. + wrappedErr := fmt.Errorf("the placement tracking label is absent or invalid (label value: %s)", parentPlacementName) + return nil, false, controller.NewUnexpectedBehaviorError(wrappedErr) + } + + // Read the `kubernetes-fleet.io/parent-namespace` label to retrieve the RP namespace (if any). + parentPlacementNSName := work.Labels[placementv1beta1.ParentNamespaceLabel] + + var placementObj placementv1beta1.PlacementObj + if len(parentPlacementNSName) == 0 { + // Retrieve the CRP object. + placementObj = &placementv1beta1.ClusterResourcePlacement{} + if err := r.hubClient.Get(ctx, client.ObjectKey{Name: parentPlacementName}, placementObj); err != nil { + wrappedErr := fmt.Errorf("failed to retrieve CRP object: %w", err) + return nil, false, controller.NewAPIServerError(true, wrappedErr) + } + } else { + // Retrieve the RP object. + placementObj = &placementv1beta1.ResourcePlacement{} + if err := r.hubClient.Get(ctx, client.ObjectKey{Namespace: parentPlacementNSName, Name: parentPlacementName}, placementObj); err != nil { + wrappedErr := fmt.Errorf("failed to retrieve RP object: %w", err) + return nil, false, controller.NewAPIServerError(true, wrappedErr) + } + } + + // Validate the scheduling policy of the placement object. + schedulingPolicy := placementObj.GetPlacementSpec().Policy + switch { + case schedulingPolicy == nil: + // The system uses a default scheduling policy of the PickAll placement type. Reject status back-reporting. + wrappedErr := fmt.Errorf("no scheduling policy specified (the PickAll type is in use); cannot back-report status to original resources") + return nil, false, controller.NewUserError(wrappedErr) + case schedulingPolicy.PlacementType == placementv1beta1.PickAllPlacementType: + wrappedErr := fmt.Errorf("the scheduling policy in use is of the PickAll type; cannot back-report status to original resources") + return nil, false, controller.NewUserError(wrappedErr) + case schedulingPolicy.PlacementType == placementv1beta1.PickFixedPlacementType && len(schedulingPolicy.ClusterNames) != 1: + wrappedErr := fmt.Errorf("the scheduling policy in use is of the PickFixed type, but it has more than one target cluster (%d clusters); cannot back-report status to original resources", len(schedulingPolicy.ClusterNames)) + return nil, false, controller.NewUserError(wrappedErr) + case schedulingPolicy.PlacementType == placementv1beta1.PickNPlacementType && schedulingPolicy.NumberOfClusters == nil: + // Normally this should never occur. + wrappedErr := fmt.Errorf("the scheduling policy in use is of the PickN type, but no number of target clusters is specified; cannot back-report status to original resources") + return nil, false, controller.NewUserError(wrappedErr) + case schedulingPolicy.PlacementType == placementv1beta1.PickNPlacementType && *schedulingPolicy.NumberOfClusters != 1: + wrappedErr := fmt.Errorf("the scheduling policy in use is of the PickN type, but the number of target clusters is not set to 1; cannot back-report status to original resources") + return nil, false, controller.NewUserError(wrappedErr) + } + + // Check if the report back strategy on the placement object still allows status back-reporting to the original resources. + reportBackStrategy := placementObj.GetPlacementSpec().Strategy.ReportBackStrategy + switch { + case reportBackStrategy == nil: + klog.V(2).InfoS("Skip status back-reporting; the strategy has not been set", "placement", klog.KObj(placementObj)) + return placementObj, true, nil + case reportBackStrategy.Type != placementv1beta1.ReportBackStrategyTypeMirror: + klog.V(2).InfoS("Skip status back-reporting; it has been disabled in the strategy", "placement", klog.KObj(placementObj)) + return placementObj, true, nil + case reportBackStrategy.Destination == nil: + // This in theory should never occur; CEL based validation should have rejected such strategies. + klog.V(2).InfoS("Skip status back-reporting; destination has not been set in the strategy", "placement", klog.KObj(placementObj)) + return placementObj, true, nil + case *reportBackStrategy.Destination != placementv1beta1.ReportBackDestinationOriginalResource: + klog.V(2).InfoS("Skip status back-reporting; destination has been set to the Work API", "placement", klog.KObj(placementObj)) + return placementObj, true, nil + } + + // The scheduling policy is valid for back-reporting statuses to original resources. + return placementObj, false, nil +} + +// formatResourceIdentifier formats a ResourceIdentifier object to a string for keying purposes. +// +// The format in use is `[API-GROUP]/[API-VERSION]/[API-KIND]/[NAMESPACE]/[NAME]`, e.g., `/v1/Namespace//work`. +func formatResourceIdentifier(resourceIdentifier *placementv1beta1.ResourceIdentifier) string { + return fmt.Sprintf("%s/%s/%s/%s/%s", resourceIdentifier.Group, resourceIdentifier.Version, resourceIdentifier.Kind, resourceIdentifier.Namespace, resourceIdentifier.Name) +} + +// formatWorkResourceIdentifier formats a WorkResourceIdentifier object to a string for keying purposes. +// +// The format in use is `[API-GROUP]/[API-VERSION]/[API-KIND]/[NAMESPACE]/[NAME]`, e.g., `/v1/Namespace//work`. +func formatWorkResourceIdentifier(workResourceIdentifier *placementv1beta1.WorkResourceIdentifier) string { + return fmt.Sprintf("%s/%s/%s/%s/%s", workResourceIdentifier.Group, workResourceIdentifier.Version, workResourceIdentifier.Kind, workResourceIdentifier.Namespace, workResourceIdentifier.Name) +} + +// prepareIsResEnvelopedMap prepares a map for quick lookup of whether a resource is enveloped. +func prepareIsResEnvelopedMap(placementObj placementv1beta1.PlacementObj) map[string]bool { + isResEnvelopedByIdStr := make(map[string]bool) + + selectedResources := placementObj.GetPlacementStatus().SelectedResources + for idx := range selectedResources { + selectedRes := selectedResources[idx] + idStr := formatResourceIdentifier(&selectedRes) + isResEnvelopedByIdStr[idStr] = selectedRes.Envelope != nil + } + + return isResEnvelopedByIdStr +} + +func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + Named("status-back-reporter"). + Watches(&placementv1beta1.Work{}, &handler.EnqueueRequestForObject{}). + Complete(r) +} diff --git a/pkg/controllers/statusbackreporter/controller_integration_test.go b/pkg/controllers/statusbackreporter/controller_integration_test.go new file mode 100644 index 000000000..3690d60d2 --- /dev/null +++ b/pkg/controllers/statusbackreporter/controller_integration_test.go @@ -0,0 +1,408 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package statusbackreporter + +import ( + "encoding/json" + "fmt" + "time" + + "github.com/google/go-cmp/cmp" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/pkg/utils" + testutilsactuals "go.goms.io/fleet/test/utils/actuals" + testutilsresource "go.goms.io/fleet/test/utils/resource" +) + +const ( + // The linter in use mistakenly recognizes some of the names as potential hardcoded credentials; + // as a result, gosec linter warnings are suppressed for these variables. + crpWorkNameTemplate = "%s-work-%s" //nolint:gosec + nsNameTemplate = "ns-%s" + crpNameTemplate = "crp-%s" + + deployName = "app" + + workOrManifestAppliedReason = "MarkedAsApplied" + workOrManifestAppliedMessage = "the object is marked as applied" + deployAvailableReason = "MarkedAsAvailable" + deployAvailableMessage = "the object is marked as available" +) + +const ( + eventuallyDuration = time.Second * 10 + eventuallyInterval = time.Second * 1 +) + +var ( + nsTemplate = corev1.Namespace{ + TypeMeta: metav1.TypeMeta{ + Kind: "Namespace", + APIVersion: "v1", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: nsName, + }, + } +) + +// createWorkObject creates a new Work object with the given work name/namespace, placement object name/namespace, report back strategy, and raw manifest JSONs. +func createWorkObject(workName, memberClusterReservedNSName, placementObjName, placementObjNSName string, reportBackStrategy *placementv1beta1.ReportBackStrategy, rawManifestJSON ...[]byte) { + work := testutilsresource.WorkObjectForTest(workName, memberClusterReservedNSName, placementObjName, placementObjNSName, nil, reportBackStrategy, rawManifestJSON...) + Expect(hubClient.Create(ctx, work)).To(Succeed()) +} + +func marshalK8sObjJSON(obj runtime.Object) []byte { + json, err := testutilsresource.MarshalRuntimeObjToJSONForTest(obj) + Expect(err).To(BeNil(), "Failed to marshal the k8s object to JSON") + return json +} + +func prepareStatusWrapperData(obj runtime.Object) ([]byte, error) { + unstructuredObjMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(obj) + if err != nil { + return nil, fmt.Errorf("failed to convert to unstructured object: %w", err) + } + unstructuredObj := &unstructured.Unstructured{Object: unstructuredObjMap} + statusBackReportingWrapper := make(map[string]interface{}) + statusBackReportingWrapper["apiVersion"] = unstructuredObj.GetAPIVersion() + statusBackReportingWrapper["kind"] = unstructuredObj.GetKind() + statusBackReportingWrapper["status"] = unstructuredObj.Object["status"] + statusBackReportingWrapperData, err := json.Marshal(statusBackReportingWrapper) + if err != nil { + return nil, fmt.Errorf("failed to marshal status back-reporting wrapper data: %w", err) + } + return statusBackReportingWrapperData, nil +} + +func ensureWorkObjectDeletion(workName string) { + // Retrieve the Work object. + work := &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: workName, + Namespace: memberReservedNSName, + }, + } + Expect(hubClient.Delete(ctx, work)).To(Succeed(), "Failed to delete the Work object") + + workObjRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName) + Eventually(workObjRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove work object") +} + +var _ = Describe("back-reporting status", func() { + Context("back-report status for deployments (CRP)", Ordered, func() { + crpName := fmt.Sprintf(crpNameTemplate, utils.RandStr()) + workName := fmt.Sprintf(crpWorkNameTemplate, crpName, utils.RandStr()) + // The environment prepared by the envtest package does not support namespace + // deletion; each test case would use a new namespace. + nsName := fmt.Sprintf(nsNameTemplate, utils.RandStr()) + + var ns *corev1.Namespace + var deploy *appsv1.Deployment + var now metav1.Time + + BeforeAll(func() { + now = metav1.Now().Rfc3339Copy() + + // Create the namespace. + ns = nsTemplate.DeepCopy() + nsJSON := marshalK8sObjJSON(ns) + ns.Name = nsName + Expect(hubClient.Create(ctx, ns)).To(Succeed()) + + // Create the deployment. + deploy = &appsv1.Deployment{ + TypeMeta: metav1.TypeMeta{ + Kind: "Deployment", + APIVersion: "apps/v1", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: deployName, + Namespace: nsName, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To(int32(1)), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "nginx", + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": "nginx", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "nginx", + Image: "nginx", + Ports: []corev1.ContainerPort{ + { + ContainerPort: 80, + }, + }, + }, + }, + }, + }, + }, + } + deployJSON := marshalK8sObjJSON(deploy) + Expect(hubClient.Create(ctx, deploy)).To(Succeed()) + + // Create the CRP. + crp := &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + }, + Spec: placementv1beta1.PlacementSpec{ + ResourceSelectors: []placementv1beta1.ResourceSelectorTerm{ + { + Group: "", + Version: "v1", + Kind: "Namespace", + Name: nsName, + }, + }, + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickFixedPlacementType, + ClusterNames: []string{ + cluster1, + }, + }, + Strategy: placementv1beta1.RolloutStrategy{ + ReportBackStrategy: &placementv1beta1.ReportBackStrategy{ + Type: placementv1beta1.ReportBackStrategyTypeMirror, + Destination: ptr.To(placementv1beta1.ReportBackDestinationOriginalResource), + }, + }, + }, + } + Expect(hubClient.Create(ctx, crp)).To(Succeed()) + + // Create the Work object. + reportBackStrategy := &placementv1beta1.ReportBackStrategy{ + Type: placementv1beta1.ReportBackStrategyTypeMirror, + Destination: ptr.To(placementv1beta1.ReportBackDestinationOriginalResource), + } + createWorkObject(workName, memberReservedNSName, crpName, "", reportBackStrategy, nsJSON, deployJSON) + }) + + It("can update CRP status", func() { + Eventually(func() error { + crp := &placementv1beta1.ClusterResourcePlacement{} + if err := hubClient.Get(ctx, client.ObjectKey{Name: crpName}, crp); err != nil { + return fmt.Errorf("failed to retrieve CRP object: %w", err) + } + + crp.Status = placementv1beta1.PlacementStatus{ + SelectedResources: []placementv1beta1.ResourceIdentifier{ + { + Group: "", + Version: "v1", + Kind: "Namespace", + Name: nsName, + }, + { + Group: "apps", + Version: "v1", + Kind: "Deployment", + Name: deployName, + Namespace: nsName, + }, + }, + } + if err := hubClient.Status().Update(ctx, crp); err != nil { + return fmt.Errorf("failed to update CRP status: %w", err) + } + return nil + }, eventuallyDuration, eventuallyInterval).To(Succeed(), "Failed to update CRP status") + }) + + It("can update work status", func() { + Eventually(func() error { + work := &placementv1beta1.Work{} + if err := hubClient.Get(ctx, client.ObjectKey{Namespace: memberReservedNSName, Name: workName}, work); err != nil { + return fmt.Errorf("failed to retrieve work object: %w", err) + } + + deployWithStatus := deploy.DeepCopy() + deployWithStatus.Status = appsv1.DeploymentStatus{ + ObservedGeneration: deploy.Generation, + Replicas: 1, + UpdatedReplicas: 1, + AvailableReplicas: 1, + ReadyReplicas: 1, + UnavailableReplicas: 0, + Conditions: []appsv1.DeploymentCondition{ + { + Type: appsv1.DeploymentAvailable, + Status: corev1.ConditionTrue, + LastUpdateTime: now, + LastTransitionTime: now, + Reason: deployAvailableReason, + Message: deployAvailableMessage, + }, + }, + } + + statusBackReportingWrapperData, err := prepareStatusWrapperData(deployWithStatus) + if err != nil { + return fmt.Errorf("failed to prepare status wrapper data: %w", err) + } + + work.Status = placementv1beta1.WorkStatus{ + Conditions: []metav1.Condition{ + { + Type: placementv1beta1.WorkConditionTypeApplied, + Status: metav1.ConditionTrue, + Reason: workOrManifestAppliedReason, + Message: workOrManifestAppliedMessage, + ObservedGeneration: 1, + LastTransitionTime: now, + }, + }, + ManifestConditions: []placementv1beta1.ManifestCondition{ + { + Identifier: placementv1beta1.WorkResourceIdentifier{ + Ordinal: 0, + Group: "", + Version: "v1", + Kind: "Namespace", + Resource: "namespaces", + Namespace: "", + Name: nsName, + }, + Conditions: []metav1.Condition{ + { + Type: placementv1beta1.WorkConditionTypeApplied, + Status: metav1.ConditionTrue, + Reason: workOrManifestAppliedReason, + Message: workOrManifestAppliedMessage, + ObservedGeneration: 1, + LastTransitionTime: now, + }, + }, + }, + { + Identifier: placementv1beta1.WorkResourceIdentifier{ + Ordinal: 1, + Group: "apps", + Version: "v1", + Kind: "Deployment", + Resource: "deployments", + Namespace: nsName, + Name: deployName, + }, + Conditions: []metav1.Condition{ + { + Type: placementv1beta1.WorkConditionTypeApplied, + Status: metav1.ConditionTrue, + Reason: workOrManifestAppliedReason, + Message: workOrManifestAppliedMessage, + ObservedGeneration: 1, + LastTransitionTime: now, + }, + }, + BackReportedStatus: &placementv1beta1.BackReportedStatus{ + ObservedStatus: runtime.RawExtension{ + Raw: statusBackReportingWrapperData, + }, + ObservationTime: now, + }, + }, + }, + } + if err := hubClient.Status().Update(ctx, work); err != nil { + return fmt.Errorf("failed to update Work object status: %w", err) + } + return nil + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update Work object status") + }) + + It("should back-report status to original resource", func() { + wantDeployStatus := appsv1.DeploymentStatus{ + ObservedGeneration: deploy.Generation, + Replicas: 1, + UpdatedReplicas: 1, + AvailableReplicas: 1, + ReadyReplicas: 1, + UnavailableReplicas: 0, + Conditions: []appsv1.DeploymentCondition{ + { + Type: appsv1.DeploymentAvailable, + Status: corev1.ConditionTrue, + LastUpdateTime: now, + LastTransitionTime: now, + Reason: deployAvailableReason, + Message: deployAvailableMessage, + }, + }, + } + + Eventually(func() error { + deploy := &appsv1.Deployment{} + if err := hubClient.Get(ctx, client.ObjectKey{Namespace: nsName, Name: deployName}, deploy); err != nil { + return fmt.Errorf("failed to retrieve Deployment object: %w", err) + } + + if diff := cmp.Diff(deploy.Status, wantDeployStatus); diff != "" { + return fmt.Errorf("deploy status diff (-got, +want):\n%s", diff) + } + return nil + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to back-report status") + }) + + AfterAll(func() { + // Delete the Work object. + ensureWorkObjectDeletion(workName) + + // Delete the Deployment object. + Eventually(func() error { + deploy := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: nsName, + Name: deployName, + }, + } + if err := hubClient.Delete(ctx, deploy); err != nil && !errors.IsNotFound(err) { + return fmt.Errorf("failed to delete Deployment object: %w", err) + } + if err := hubClient.Get(ctx, client.ObjectKey{Name: deployName, Namespace: nsName}, deploy); err != nil && !errors.IsNotFound(err) { + return fmt.Errorf("Deployment object still exists or an unexpected error occurred: %w", err) + } + return nil + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove Deployment object") + + // The environment prepared by the envtest package does not support namespace + // deletion; consequently this test suite would not attempt to verify its deletion. + }) + }) +}) diff --git a/pkg/controllers/statusbackreporter/controller_test.go b/pkg/controllers/statusbackreporter/controller_test.go new file mode 100644 index 000000000..0071b17e1 --- /dev/null +++ b/pkg/controllers/statusbackreporter/controller_test.go @@ -0,0 +1,760 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package statusbackreporter + +import ( + "context" + "log" + "os" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/scheme" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" +) + +const ( + // The linter in use mistakenly recognizes some of the names as potential hardcoded credentials; + // as a result, gosec linter warnings are suppressed for these variables. + crpName1 = "crp-1" + rpName1 = "rp-1" + nsName = "work" + clusterResEnvelopeName = "cluster-res-envelope-1" + resEnvelopeName = "res-envelope-1" + cluster1 = "cluster-1" + cluster2 = "cluster-2" + + crpWorkName1 = "crp-1-work" + rpWorkName1 = "work.test.app-work" //nolint:gosec + rpWorkName2 = "work.app-work" //nolint:gosec +) + +func TestMain(m *testing.M) { + // Set up the scheme. + if err := clientgoscheme.AddToScheme(scheme.Scheme); err != nil { + log.Fatalf("failed to add default set of APIs to the runtime scheme: %v", err) + } + if err := placementv1beta1.AddToScheme(scheme.Scheme); err != nil { + log.Fatalf("failed to add custom APIs (placement/v1beta1) to the runtime scheme: %v", err) + } + + os.Exit(m.Run()) +} + +// TestFormatResourceIdentifier tests the formatResourceIdentifier function. +func TestFormatResourceIdentifier(t *testing.T) { + testCases := []struct { + name string + resourceIdentifier *placementv1beta1.ResourceIdentifier + wantIdStr string + }{ + { + name: "cluster-scoped object (core API group)", + resourceIdentifier: &placementv1beta1.ResourceIdentifier{ + Group: "", + Version: "v1", + Kind: "Namespace", + Namespace: "", + Name: nsName, + }, + wantIdStr: "/v1/Namespace//work", + }, + { + name: "cluster-scoped object (non-core API group)", + resourceIdentifier: &placementv1beta1.ResourceIdentifier{ + Group: "rbac.authorization.k8s.io", + Version: "v1", + Kind: "ClusterRole", + Namespace: "", + Name: "admin", + }, + wantIdStr: "rbac.authorization.k8s.io/v1/ClusterRole//admin", + }, + { + name: "namespace-scoped object (core API group)", + resourceIdentifier: &placementv1beta1.ResourceIdentifier{ + Group: "", + Version: "v1", + Kind: "Pod", + Namespace: "default", + Name: "nginx-pod", + }, + wantIdStr: "/v1/Pod/default/nginx-pod", + }, + { + name: "namespace-scoped object (non-core API group)", + resourceIdentifier: &placementv1beta1.ResourceIdentifier{ + Group: "apps", + Version: "v1", + Kind: "Deployment", + Namespace: "default", + Name: "nginx", + }, + wantIdStr: "apps/v1/Deployment/default/nginx", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + idStr := formatResourceIdentifier(tc.resourceIdentifier) + if !cmp.Equal(idStr, tc.wantIdStr) { + t.Errorf("formatResourceIdentifier() = %v, want %v", idStr, tc.wantIdStr) + } + }) + } +} + +// TestFormatWorkResourceIdentifier tests the formatWorkResourceIdentifier function. +func TestFormatWorkResourceIdentifier(t *testing.T) { + testCases := []struct { + name string + workResourceIdentifier *placementv1beta1.WorkResourceIdentifier + wantIdStr string + }{ + { + name: "cluster-scoped object (core API group)", + workResourceIdentifier: &placementv1beta1.WorkResourceIdentifier{ + Ordinal: 0, + Group: "", + Version: "v1", + Kind: "Namespace", + Resource: "namespaces", + Namespace: "", + Name: "work", + }, + wantIdStr: "/v1/Namespace//work", + }, + { + name: "cluster-scoped object (non-core API group)", + workResourceIdentifier: &placementv1beta1.WorkResourceIdentifier{ + Ordinal: 1, + Group: "rbac.authorization.k8s.io", + Version: "v1", + Kind: "ClusterRole", + Resource: "clusterroles", + Namespace: "", + Name: "admin", + }, + wantIdStr: "rbac.authorization.k8s.io/v1/ClusterRole//admin", + }, + { + name: "namespace-scoped object (core API group)", + workResourceIdentifier: &placementv1beta1.WorkResourceIdentifier{ + Ordinal: 2, + Group: "", + Version: "v1", + Kind: "Pod", + Resource: "pods", + Namespace: "default", + Name: "nginx-pod", + }, + wantIdStr: "/v1/Pod/default/nginx-pod", + }, + { + name: "namespace-scoped object (non-core API group)", + workResourceIdentifier: &placementv1beta1.WorkResourceIdentifier{ + Ordinal: 3, + Group: "apps", + Version: "v1", + Kind: "Deployment", + Resource: "deployments", + Namespace: "default", + Name: "nginx", + }, + wantIdStr: "apps/v1/Deployment/default/nginx", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + idStr := formatWorkResourceIdentifier(tc.workResourceIdentifier) + if !cmp.Equal(idStr, tc.wantIdStr) { + t.Errorf("formatWorkResourceIdentifier() = %v, want %v", idStr, tc.wantIdStr) + } + }) + } +} + +// TestPrepareIsResEnvelopedMap tests the prepareIsResEnvelopedMap function. +func TestPrepareIsResEnvelopedMap(t *testing.T) { + testCases := []struct { + name string + placementObj placementv1beta1.PlacementObj + wantIsResEnvelopedMap map[string]bool + }{ + { + name: "CRP object with regular and enveloped objects", + placementObj: &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName1, + }, + Status: placementv1beta1.PlacementStatus{ + SelectedResources: []placementv1beta1.ResourceIdentifier{ + { + Group: "", + Version: "v1", + Kind: "Namespace", + Namespace: "", + Name: nsName, + }, + { + Group: "rbac.authorization.k8s.io", + Version: "v1", + Kind: "ClusterRole", + Namespace: "", + Name: "admin", + }, + { + Group: "rbac.authorization.k8s.io", + Version: "v1", + Kind: "ClusterRoleBinding", + Name: "admin-users", + Envelope: &placementv1beta1.EnvelopeIdentifier{ + Name: clusterResEnvelopeName, + Type: placementv1beta1.ClusterResourceEnvelopeType, + }, + }, + }, + }, + }, + wantIsResEnvelopedMap: map[string]bool{ + "/v1/Namespace//work": false, + "rbac.authorization.k8s.io/v1/ClusterRole//admin": false, + "rbac.authorization.k8s.io/v1/ClusterRoleBinding//admin-users": true, + }, + }, + { + name: "RP object with regular and enveloped objects", + placementObj: &placementv1beta1.ResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: nsName, + Name: rpName1, + }, + Status: placementv1beta1.PlacementStatus{ + SelectedResources: []placementv1beta1.ResourceIdentifier{ + { + Group: "", + Version: "v1", + Kind: "Pod", + Namespace: "default", + Name: "nginx-pod", + }, + { + Group: "apps", + Version: "v1", + Kind: "Deployment", + Namespace: "default", + Name: "nginx", + }, + { + Group: "apps", + Version: "v1", + Kind: "ResourceQuota", + Namespace: "default", + Name: "all", + Envelope: &placementv1beta1.EnvelopeIdentifier{ + Name: resEnvelopeName, + Type: placementv1beta1.ResourceEnvelopeType, + }, + }, + }, + }, + }, + wantIsResEnvelopedMap: map[string]bool{ + "/v1/Pod/default/nginx-pod": false, + "apps/v1/Deployment/default/nginx": false, + "apps/v1/ResourceQuota/default/all": true, + }, + }, + { + name: "empty map", + placementObj: &placementv1beta1.ResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: nsName, + Name: rpName1, + }, + Status: placementv1beta1.PlacementStatus{ + SelectedResources: []placementv1beta1.ResourceIdentifier{}, + }, + }, + wantIsResEnvelopedMap: map[string]bool{}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + isResEnvelopedMap := prepareIsResEnvelopedMap(tc.placementObj) + if diff := cmp.Diff(isResEnvelopedMap, tc.wantIsResEnvelopedMap); diff != "" { + t.Errorf("prepareIsResEnvelopedMap() isResEnvelopedMaps mismatch (-got, +want):\n%s", diff) + } + }) + } +} + +// TestValidatePlacementObjectForOriginalResourceStatusBackReporting tests the validatePlacementObjectForOriginalResourceStatusBackReporting function. +func TestValidatePlacementObjectForOriginalResourceStatusBackReporting(t *testing.T) { + testCases := []struct { + name string + placementObj placementv1beta1.PlacementObj + work *placementv1beta1.Work + wantShouldSkip bool + wantErred bool + wantErrStrSubString string + // The method returns the placement object as it is; for simplicity reasons the test spec here + // will no longer check the returned placement object here. + }{ + { + name: "no placement tracking label", + work: &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpWorkName1, + }, + }, + wantErred: true, + wantErrStrSubString: "the placement tracking label is absent or invalid", + }, + { + name: "empty placement tracking label", + work: &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpWorkName1, + Labels: map[string]string{ + placementv1beta1.PlacementTrackingLabel: "", + }, + }, + }, + wantErred: true, + wantErrStrSubString: "the placement tracking label is absent or invalid", + }, + { + name: "work associated with rp, invalid scheduling policy (nil)", + work: &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpWorkName1, + Labels: map[string]string{ + placementv1beta1.PlacementTrackingLabel: rpName1, + placementv1beta1.ParentNamespaceLabel: nsName, + }, + }, + }, + placementObj: &placementv1beta1.ResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpName1, + Namespace: nsName, + }, + Spec: placementv1beta1.PlacementSpec{ + Strategy: placementv1beta1.RolloutStrategy{ + ReportBackStrategy: &placementv1beta1.ReportBackStrategy{ + Type: placementv1beta1.ReportBackStrategyTypeMirror, + Destination: ptr.To(placementv1beta1.ReportBackDestinationOriginalResource), + }, + }, + }, + }, + wantErred: true, + wantErrStrSubString: "no scheduling policy specified (the PickAll type is in use)", + }, + { + name: "work associated with crp, invalid scheduling policy (nil)", + work: &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpWorkName1, + Labels: map[string]string{ + placementv1beta1.PlacementTrackingLabel: crpName1, + }, + }, + }, + placementObj: &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName1, + }, + Spec: placementv1beta1.PlacementSpec{ + Strategy: placementv1beta1.RolloutStrategy{ + ReportBackStrategy: &placementv1beta1.ReportBackStrategy{ + Type: placementv1beta1.ReportBackStrategyTypeMirror, + Destination: ptr.To(placementv1beta1.ReportBackDestinationOriginalResource), + }, + }, + }, + }, + wantErred: true, + wantErrStrSubString: "no scheduling policy specified (the PickAll type is in use)", + }, + { + name: "work associated with rp, rp not found", + work: &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpWorkName2, + Labels: map[string]string{ + placementv1beta1.PlacementTrackingLabel: rpName1, + placementv1beta1.ParentNamespaceLabel: nsName, + }, + }, + }, + wantErred: true, + wantErrStrSubString: "failed to retrieve RP object", + }, + { + name: "work associated with crp, crp not found", + work: &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpWorkName1, + Labels: map[string]string{ + placementv1beta1.PlacementTrackingLabel: crpName1, + }, + }, + }, + wantErred: true, + wantErrStrSubString: "failed to retrieve CRP object", + }, + { + name: "work associated with rp, with PickAll scheduling policy", + work: &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpWorkName2, + Labels: map[string]string{ + placementv1beta1.PlacementTrackingLabel: rpName1, + placementv1beta1.ParentNamespaceLabel: nsName, + }, + }, + }, + placementObj: &placementv1beta1.ResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpName1, + Namespace: nsName, + }, + Spec: placementv1beta1.PlacementSpec{ + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickAllPlacementType, + }, + Strategy: placementv1beta1.RolloutStrategy{ + ReportBackStrategy: &placementv1beta1.ReportBackStrategy{ + Type: placementv1beta1.ReportBackStrategyTypeMirror, + Destination: ptr.To(placementv1beta1.ReportBackDestinationOriginalResource), + }, + }, + }, + }, + wantErred: true, + wantErrStrSubString: "the scheduling policy in use is of the PickAll type", + }, + { + name: "work associated with rp, with PickFixed placement type and more than 1 selected clusters", + work: &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpWorkName2, + Labels: map[string]string{ + placementv1beta1.PlacementTrackingLabel: rpName1, + placementv1beta1.ParentNamespaceLabel: nsName, + }, + }, + }, + placementObj: &placementv1beta1.ResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpName1, + Namespace: nsName, + }, + Spec: placementv1beta1.PlacementSpec{ + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickFixedPlacementType, + ClusterNames: []string{ + cluster1, + cluster2, + }, + }, + Strategy: placementv1beta1.RolloutStrategy{ + ReportBackStrategy: &placementv1beta1.ReportBackStrategy{ + Type: placementv1beta1.ReportBackStrategyTypeMirror, + Destination: ptr.To(placementv1beta1.ReportBackDestinationOriginalResource), + }, + }, + }, + }, + wantErred: true, + wantErrStrSubString: "the scheduling policy in use is of the PickFixed type, but it has more than one target cluster", + }, + { + name: "work associated with rp, with PickN placement type and more than 1 clusters to select", + work: &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpWorkName2, + Labels: map[string]string{ + placementv1beta1.PlacementTrackingLabel: rpName1, + placementv1beta1.ParentNamespaceLabel: nsName, + }, + }, + }, + placementObj: &placementv1beta1.ResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpName1, + Namespace: nsName, + }, + Spec: placementv1beta1.PlacementSpec{ + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickNPlacementType, + NumberOfClusters: ptr.To(int32(2)), + }, + Strategy: placementv1beta1.RolloutStrategy{ + ReportBackStrategy: &placementv1beta1.ReportBackStrategy{ + Type: placementv1beta1.ReportBackStrategyTypeMirror, + Destination: ptr.To(placementv1beta1.ReportBackDestinationOriginalResource), + }, + }, + }, + }, + wantErred: true, + wantErrStrSubString: "the scheduling policy in use is of the PickN type, but the number of target clusters is not set to 1", + }, + { + // Normally this will never occur. + name: "work associated with rp, with PickN placement type and no number of target clusters", + work: &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpWorkName2, + Labels: map[string]string{ + placementv1beta1.PlacementTrackingLabel: rpName1, + placementv1beta1.ParentNamespaceLabel: nsName, + }, + }, + }, + placementObj: &placementv1beta1.ResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpName1, + Namespace: nsName, + }, + Spec: placementv1beta1.PlacementSpec{ + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickNPlacementType, + }, + Strategy: placementv1beta1.RolloutStrategy{ + ReportBackStrategy: &placementv1beta1.ReportBackStrategy{ + Type: placementv1beta1.ReportBackStrategyTypeMirror, + Destination: ptr.To(placementv1beta1.ReportBackDestinationOriginalResource), + }, + }, + }, + }, + wantErred: true, + wantErrStrSubString: "the scheduling policy in use is of the PickN type, but no number of target clusters is specified", + }, + { + name: "work associated with crp, with PickFixed placement type and one selected cluster", + work: &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpWorkName1, + Labels: map[string]string{ + placementv1beta1.PlacementTrackingLabel: crpName1, + }, + }, + }, + placementObj: &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName1, + }, + Spec: placementv1beta1.PlacementSpec{ + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickFixedPlacementType, + ClusterNames: []string{ + cluster1, + }, + }, + Strategy: placementv1beta1.RolloutStrategy{ + ReportBackStrategy: &placementv1beta1.ReportBackStrategy{ + Type: placementv1beta1.ReportBackStrategyTypeMirror, + Destination: ptr.To(placementv1beta1.ReportBackDestinationOriginalResource), + }, + }, + }, + }, + }, + { + name: "work associated with crp, with PickN placement type and 1 target cluster to select", + work: &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpWorkName1, + Labels: map[string]string{ + placementv1beta1.PlacementTrackingLabel: crpName1, + }, + }, + }, + placementObj: &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName1, + }, + Spec: placementv1beta1.PlacementSpec{ + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickNPlacementType, + NumberOfClusters: ptr.To(int32(1)), + }, + Strategy: placementv1beta1.RolloutStrategy{ + ReportBackStrategy: &placementv1beta1.ReportBackStrategy{ + Type: placementv1beta1.ReportBackStrategyTypeMirror, + Destination: ptr.To(placementv1beta1.ReportBackDestinationOriginalResource), + }, + }, + }, + }, + }, + { + name: "work associated with rp, no report back strategy (nil)", + work: &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpWorkName2, + Labels: map[string]string{ + placementv1beta1.PlacementTrackingLabel: rpName1, + placementv1beta1.ParentNamespaceLabel: nsName, + }, + }, + }, + placementObj: &placementv1beta1.ResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpName1, + Namespace: nsName, + }, + Spec: placementv1beta1.PlacementSpec{ + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickNPlacementType, + NumberOfClusters: ptr.To(int32(1)), + }, + Strategy: placementv1beta1.RolloutStrategy{}, + }, + }, + wantShouldSkip: true, + }, + { + name: "work associated with rp, report back strategy not set to Mirror type", + work: &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpWorkName2, + Labels: map[string]string{ + placementv1beta1.PlacementTrackingLabel: rpName1, + placementv1beta1.ParentNamespaceLabel: nsName, + }, + }, + }, + placementObj: &placementv1beta1.ResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpName1, + Namespace: nsName, + }, + Spec: placementv1beta1.PlacementSpec{ + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickNPlacementType, + NumberOfClusters: ptr.To(int32(1)), + }, + Strategy: placementv1beta1.RolloutStrategy{ + ReportBackStrategy: &placementv1beta1.ReportBackStrategy{ + Type: placementv1beta1.ReportBackStrategyTypeDisabled, + }, + }, + }, + }, + wantShouldSkip: true, + }, + { + name: "work associated with crp, report back strategy destination not set", + work: &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpWorkName1, + Labels: map[string]string{ + placementv1beta1.PlacementTrackingLabel: crpName1, + }, + }, + }, + placementObj: &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName1, + }, + Spec: placementv1beta1.PlacementSpec{ + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickNPlacementType, + NumberOfClusters: ptr.To(int32(1)), + }, + Strategy: placementv1beta1.RolloutStrategy{ + ReportBackStrategy: &placementv1beta1.ReportBackStrategy{ + Type: placementv1beta1.ReportBackStrategyTypeMirror, + }, + }, + }, + }, + wantShouldSkip: true, + }, + { + name: "work associated with crp, report back strategy destination not set to OriginalResource", + work: &placementv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpWorkName1, + Labels: map[string]string{ + placementv1beta1.PlacementTrackingLabel: crpName1, + }, + }, + }, + placementObj: &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName1, + }, + Spec: placementv1beta1.PlacementSpec{ + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickNPlacementType, + NumberOfClusters: ptr.To(int32(1)), + }, + Strategy: placementv1beta1.RolloutStrategy{ + ReportBackStrategy: &placementv1beta1.ReportBackStrategy{ + Type: placementv1beta1.ReportBackStrategyTypeMirror, + Destination: ptr.To(placementv1beta1.ReportBackDestinationWorkAPI), + }, + }, + }, + }, + wantShouldSkip: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctx := context.Background() + fakeClientBuilder := fake.NewClientBuilder().WithScheme(scheme.Scheme) + if tc.placementObj != nil { + fakeClientBuilder.WithObjects(tc.placementObj) + } + fakeClient := fakeClientBuilder.Build() + + r := NewReconciler(fakeClient, nil, nil) + + _, shouldSkip, err := r.validatePlacementObjectForOriginalResourceStatusBackReporting(ctx, tc.work) + if tc.wantErred { + if err == nil { + t.Fatalf("validatePlacementObjectForOriginalResourceStatusBackReporting() = nil, want erred") + return + } + if !strings.Contains(err.Error(), tc.wantErrStrSubString) { + t.Fatalf("validatePlacementObjectForOriginalResourceStatusBackReporting() = %v, want to have prefix %s", err, tc.wantErrStrSubString) + return + } + } + if shouldSkip != tc.wantShouldSkip { + t.Errorf("validatePlacementObjectForOriginalResourceStatusBackReporting() shouldSkip = %v, want %v", shouldSkip, tc.wantShouldSkip) + } + }) + } +} diff --git a/pkg/controllers/statusbackreporter/suite_test.go b/pkg/controllers/statusbackreporter/suite_test.go new file mode 100644 index 000000000..5a7518ea0 --- /dev/null +++ b/pkg/controllers/statusbackreporter/suite_test.go @@ -0,0 +1,142 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package statusbackreporter + +import ( + "context" + "flag" + "path/filepath" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/scheme" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/rest" + "k8s.io/klog/v2" + "k8s.io/klog/v2/textlogger" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/metrics/server" + + "go.goms.io/fleet/pkg/utils/parallelizer" +) + +const ( + defaultWorkerCount = 4 +) + +const ( + memberReservedNSName = "fleet-member-experimental" +) + +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. +var ( + hubCfg *rest.Config + hubEnv *envtest.Environment + hubClient client.Client + hubMgr manager.Manager + statusBackReporter *Reconciler + + ctx context.Context + cancel context.CancelFunc +) + +func TestAPIs(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Status Back-Reporter Integration Test Suite") +} + +var _ = BeforeSuite(func() { + ctx, cancel = context.WithCancel(context.TODO()) + + By("Setup klog") + fs := flag.NewFlagSet("klog", flag.ContinueOnError) + klog.InitFlags(fs) + Expect(fs.Parse([]string{"--v", "5", "-add_dir_header", "true"})).Should(Succeed()) + + klog.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + By("Bootstrapping test environments") + hubEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{ + filepath.Join("../../../", "config", "crd", "bases"), + filepath.Join("../../../", "test", "manifests"), + }, + } + + var err error + hubCfg, err = hubEnv.Start() + Expect(err).ToNot(HaveOccurred()) + Expect(hubCfg).ToNot(BeNil()) + + // The schemes have been set up in the TestMain method. + + By("Building the K8s clients") + hubClient, err = client.New(hubCfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).ToNot(HaveOccurred()) + Expect(hubClient).ToNot(BeNil()) + + hubDynamicClient, err := dynamic.NewForConfig(hubCfg) + Expect(err).ToNot(HaveOccurred()) + Expect(hubDynamicClient).ToNot(BeNil()) + + // Create the reserved namespace for KubeFleet member cluster. + memberReservedNS := corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: memberReservedNSName, + }, + } + Expect(hubClient.Create(ctx, &memberReservedNS)).To(Succeed()) + + By("Setting up the controller and the controller manager for member cluster 1") + hubMgr, err = ctrl.NewManager(hubCfg, ctrl.Options{ + Scheme: scheme.Scheme, + Metrics: server.Options{ + BindAddress: "0", + }, + Logger: textlogger.NewLogger(textlogger.NewConfig(textlogger.Verbosity(4))), + }) + Expect(err).ToNot(HaveOccurred()) + + statusBackReporter = NewReconciler( + hubClient, + hubDynamicClient, + parallelizer.NewParallelizer(defaultWorkerCount), + ) + Expect(statusBackReporter.SetupWithManager(hubMgr)).To(Succeed()) + + go func() { + defer GinkgoRecover() + Expect(hubMgr.Start(ctx)).To(Succeed()) + }() +}) + +var _ = AfterSuite(func() { + defer klog.Flush() + + cancel() + By("Tearing down the test environment") + Expect(hubEnv.Stop()).To(Succeed()) +}) diff --git a/pkg/controllers/updaterun/controller.go b/pkg/controllers/updaterun/controller.go index df1e9dcc6..82a51d511 100644 --- a/pkg/controllers/updaterun/controller.go +++ b/pkg/controllers/updaterun/controller.go @@ -77,6 +77,10 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim klog.ErrorS(err, "Failed to get updateRun object", "updateRun", req.NamespacedName) return runtime.Result{}, client.IgnoreNotFound(err) } + + // Update all existing conditions' ObservedGeneration to the current generation. + updateAllStatusConditionsGeneration(updateRun.GetUpdateRunStatus(), updateRun.GetGeneration()) + runObjRef := klog.KObj(updateRun) // Remove waitTime from the updateRun status for BeforeStageTask and AfterStageTask for type Approval. @@ -110,12 +114,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim var toBeUpdatedBindings, toBeDeletedBindings []placementv1beta1.BindingObj updateRunStatus := updateRun.GetUpdateRunStatus() initCond := meta.FindStatusCondition(updateRunStatus.Conditions, string(placementv1beta1.StagedUpdateRunConditionInitialized)) - // Check if initialized regardless of generation. - // The updateRun spec fields are immutable except for the state field. When the state changes, - // the update run generation increments, but we don't need to reinitialize since initialization is a one-time setup. - if !(initCond != nil && initCond.Status == metav1.ConditionTrue) { + if !condition.IsConditionStatusTrue(initCond, updateRun.GetGeneration()) { // Check if initialization failed for the current generation. - if initCond != nil && initCond.Status == metav1.ConditionFalse { + if condition.IsConditionStatusFalse(initCond, updateRun.GetGeneration()) { klog.V(2).InfoS("The updateRun has failed to initialize", "errorMsg", initCond.Message, "updateRun", runObjRef) return runtime.Result{}, nil } @@ -158,9 +159,12 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim return runtime.Result{}, r.recordUpdateRunSucceeded(ctx, updateRun) } - // Execute the updateRun. - if state == placementv1beta1.StateRun { - klog.V(2).InfoS("Continue to execute the updateRun", "state", state, "updatingStageIndex", updatingStageIndex, "updateRun", runObjRef) + switch state { + case placementv1beta1.StateInitialize: + klog.V(2).InfoS("The updateRun is initialized but not executed, waiting to execute", "state", state, "updateRun", runObjRef) + case placementv1beta1.StateRun: + // Execute the updateRun. + klog.V(2).InfoS("Continue to execute the updateRun", "updatingStageIndex", updatingStageIndex, "updateRun", runObjRef) finished, waitTime, execErr := r.execute(ctx, updateRun, updatingStageIndex, toBeUpdatedBindings, toBeDeletedBindings) if errors.Is(execErr, errStagedUpdatedAborted) { // errStagedUpdatedAborted cannot be retried. @@ -172,21 +176,48 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim return runtime.Result{}, r.recordUpdateRunSucceeded(ctx, updateRun) } - // The execution is not finished yet or it encounters a retriable error. - // We need to record the status and requeue. - if updateErr := r.recordUpdateRunStatus(ctx, updateRun); updateErr != nil { - return runtime.Result{}, updateErr + return r.handleIncompleteUpdateRun(ctx, updateRun, waitTime, execErr, state, runObjRef) + case placementv1beta1.StateStop: + // Stop the updateRun. + klog.V(2).InfoS("Stopping the updateRun", "state", state, "updatingStageIndex", updatingStageIndex, "updateRun", runObjRef) + finished, waitTime, stopErr := r.stop(updateRun, updatingStageIndex, toBeUpdatedBindings, toBeDeletedBindings) + if errors.Is(stopErr, errStagedUpdatedAborted) { + // errStagedUpdatedAborted cannot be retried. + return runtime.Result{}, r.recordUpdateRunFailed(ctx, updateRun, stopErr.Error()) } - klog.V(2).InfoS("The updateRun is not finished yet", "requeueWaitTime", waitTime, "execErr", execErr, "updateRun", runObjRef) - if execErr != nil { - return runtime.Result{}, execErr + + if finished { + klog.V(2).InfoS("The updateRun is stopped", "updateRun", runObjRef) + return runtime.Result{}, r.recordUpdateRunStopped(ctx, updateRun) } - return runtime.Result{Requeue: true, RequeueAfter: waitTime}, nil + + return r.handleIncompleteUpdateRun(ctx, updateRun, waitTime, stopErr, state, runObjRef) + + default: + // Initialize, Run, or Stop are the only supported states. + unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("found unsupported updateRun state: %s", state)) + klog.ErrorS(unexpectedErr, "Invalid updateRun state", "state", state, "updateRun", runObjRef) + return runtime.Result{}, r.recordUpdateRunFailed(ctx, updateRun, unexpectedErr.Error()) } - klog.V(2).InfoS("The updateRun is initialized but not executed, waiting to execute", "state", state, "updateRun", runObjRef) return runtime.Result{}, nil } +func (r *Reconciler) handleIncompleteUpdateRun(ctx context.Context, updateRun placementv1beta1.UpdateRunObj, waitTime time.Duration, err error, state placementv1beta1.State, runObjRef klog.ObjectRef) (runtime.Result, error) { + // The execution or stopping is not finished yet or it encounters a retriable error. + // We need to record the status and requeue. + if updateErr := r.recordUpdateRunStatus(ctx, updateRun); updateErr != nil { + return runtime.Result{}, updateErr + } + + klog.V(2).InfoS("The updateRun is not finished yet", "state", state, "requeueWaitTime", waitTime, "err", err, "updateRun", runObjRef) + + // Return execution or stopping retriable error if any. + if err != nil { + return runtime.Result{}, err + } + return runtime.Result{Requeue: true, RequeueAfter: waitTime}, nil +} + // handleDelete handles the deletion of the updateRun object. // We delete all the dependent resources, including approvalRequest objects, of the updateRun object. func (r *Reconciler) handleDelete(ctx context.Context, updateRun placementv1beta1.UpdateRunObj) (bool, time.Duration, error) { @@ -277,6 +308,25 @@ func (r *Reconciler) recordUpdateRunFailed(ctx context.Context, updateRun placem return nil } +// recordUpdateRunStopped records the progressing condition as stopped in the updateRun status. +func (r *Reconciler) recordUpdateRunStopped(ctx context.Context, updateRun placementv1beta1.UpdateRunObj) error { + updateRunStatus := updateRun.GetUpdateRunStatus() + meta.SetStatusCondition(&updateRunStatus.Conditions, metav1.Condition{ + Type: string(placementv1beta1.StagedUpdateRunConditionProgressing), + Status: metav1.ConditionFalse, + ObservedGeneration: updateRun.GetGeneration(), + Reason: condition.UpdateRunStoppedReason, + Message: "The update run has been stopped", + }) + + if updateErr := r.Client.Status().Update(ctx, updateRun); updateErr != nil { + klog.ErrorS(updateErr, "Failed to update the updateRun status as stopped", "updateRun", klog.KObj(updateRun)) + // updateErr can be retried. + return controller.NewUpdateIgnoreConflictError(updateErr) + } + return nil +} + // recordUpdateRunStatus records the updateRun status. func (r *Reconciler) recordUpdateRunStatus(ctx context.Context, updateRun placementv1beta1.UpdateRunObj) error { if updateErr := r.Client.Status().Update(ctx, updateRun); updateErr != nil { @@ -484,3 +534,57 @@ func removeWaitTimeFromUpdateRunStatus(updateRun placementv1beta1.UpdateRunObj) } } } + +// updateAllStatusConditionsGeneration iterates through all existing conditions in the UpdateRun status +// and updates their ObservedGeneration field to the current UpdateRun generation. +func updateAllStatusConditionsGeneration(updateRunStatus *placementv1beta1.UpdateRunStatus, generation int64) { + // Update main UpdateRun conditions. + for i := range updateRunStatus.Conditions { + updateRunStatus.Conditions[i].ObservedGeneration = generation + } + + // Update stage-level conditions and nested task conditions if it exists. + for i := range updateRunStatus.StagesStatus { + stageStatus := &updateRunStatus.StagesStatus[i] + + // Update stage conditions. + updateAllStageStatusConditionsGeneration(stageStatus, generation) + } + + // Update deletion stage conditions and nested tasks if it exists. + if updateRunStatus.DeletionStageStatus != nil { + deletionStageStatus := updateRunStatus.DeletionStageStatus + + // Update deletion stage conditions. + updateAllStageStatusConditionsGeneration(deletionStageStatus, generation) + } +} + +// updateAllStageStatusConditionsGeneration updates all conditions' ObservedGeneration in the given stage status. +func updateAllStageStatusConditionsGeneration(stageStatus *placementv1beta1.StageUpdatingStatus, generation int64) { + // Update stage conditions. + for j := range stageStatus.Conditions { + stageStatus.Conditions[j].ObservedGeneration = generation + } + + // Update before stage task conditions. + for j := range stageStatus.BeforeStageTaskStatus { + for k := range stageStatus.BeforeStageTaskStatus[j].Conditions { + stageStatus.BeforeStageTaskStatus[j].Conditions[k].ObservedGeneration = generation + } + } + + // Update after stage task conditions. + for j := range stageStatus.AfterStageTaskStatus { + for k := range stageStatus.AfterStageTaskStatus[j].Conditions { + stageStatus.AfterStageTaskStatus[j].Conditions[k].ObservedGeneration = generation + } + } + + // Update cluster-level conditions. + for j := range stageStatus.Clusters { + for k := range stageStatus.Clusters[j].Conditions { + stageStatus.Clusters[j].Conditions[k].ObservedGeneration = generation + } + } +} diff --git a/pkg/controllers/updaterun/controller_integration_test.go b/pkg/controllers/updaterun/controller_integration_test.go index 6453d5617..9f014f478 100644 --- a/pkg/controllers/updaterun/controller_integration_test.go +++ b/pkg/controllers/updaterun/controller_integration_test.go @@ -332,6 +332,26 @@ func generateFailedMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *p } } +func generateStoppingMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { + return &prometheusclientmodel.Metric{ + Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionProgressing), + string(metav1.ConditionUnknown), condition.UpdateRunStoppingReason), + Gauge: &prometheusclientmodel.Gauge{ + Value: ptr.To(float64(time.Now().UnixNano()) / 1e9), + }, + } +} + +func generateStoppedMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { + return &prometheusclientmodel.Metric{ + Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionProgressing), + string(metav1.ConditionFalse), condition.UpdateRunStoppedReason), + Gauge: &prometheusclientmodel.Gauge{ + Value: ptr.To(float64(time.Now().UnixNano()) / 1e9), + }, + } +} + func generateSucceededMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric { return &prometheusclientmodel.Metric{ Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionSucceeded), @@ -421,30 +441,19 @@ func generateTestClusterResourceBindingsAndClusters(policySnapshotIndex int) ([] if i%2 == 0 { region = regionWestus } - // reserse the order of the clusters by index + // reverse the order of the clusters by index targetClusters[i] = generateTestMemberCluster(numTargetClusters-1-i, "cluster-"+strconv.Itoa(i), map[string]string{"group": "prod", "region": region}) resourceBindings[i] = generateTestClusterResourceBinding(policySnapshotName, targetClusters[i].Name, placementv1beta1.BindingStateScheduled) } - unscheduledClusters := make([]*clusterv1beta1.MemberCluster, numUnscheduledClusters) - // Half of the unscheduled clusters have old policy snapshot. - for i := range numUnscheduledClusters / 2 { - unscheduledClusters[i] = generateTestMemberCluster(i, "unscheduled-cluster-"+strconv.Itoa(i), map[string]string{"group": "staging"}) - // Update the policySnapshot name so that these clusters are considered to-be-deleted. - resourceBindings[numTargetClusters+i] = generateTestClusterResourceBinding(policySnapshotName+"a", unscheduledClusters[i].Name, placementv1beta1.BindingStateUnscheduled) - } - // The other half of the unscheduled clusters have latest policy snapshot but still unscheduled. - for i := numUnscheduledClusters / 2; i < numUnscheduledClusters; i++ { - unscheduledClusters[i] = generateTestMemberCluster(i, "unscheduled-cluster-"+strconv.Itoa(i), map[string]string{"group": "staging"}) - resourceBindings[numTargetClusters+i] = generateTestClusterResourceBinding(policySnapshotName, unscheduledClusters[i].Name, placementv1beta1.BindingStateUnscheduled) - } + resourceBindings, unscheduledClusters := generateTestUnscheduledClusterResourceBindingsAndClusters(policySnapshotName, numUnscheduledClusters, resourceBindings) return resourceBindings, targetClusters, unscheduledClusters } -func generateSmallTestClusterResourceBindingsAndClusters(policySnapshotIndex int) ([]*placementv1beta1.ClusterResourceBinding, []*clusterv1beta1.MemberCluster, []*clusterv1beta1.MemberCluster) { +func generateSmallTestClusterResourceBindingsAndClusters(policySnapshotIndex int, numUnscheduledClusters int) ([]*placementv1beta1.ClusterResourceBinding, []*clusterv1beta1.MemberCluster, []*clusterv1beta1.MemberCluster) { numTargetClusters := 3 policySnapshotName := fmt.Sprintf(placementv1beta1.PolicySnapshotNameFmt, testCRPName, policySnapshotIndex) - resourceBindings := make([]*placementv1beta1.ClusterResourceBinding, numTargetClusters) + resourceBindings := make([]*placementv1beta1.ClusterResourceBinding, numTargetClusters+numUnscheduledClusters) targetClusters := make([]*clusterv1beta1.MemberCluster, numTargetClusters) for i := range targetClusters { // split the clusters into 2 regions @@ -452,14 +461,33 @@ func generateSmallTestClusterResourceBindingsAndClusters(policySnapshotIndex int if i%2 == 0 { region = regionWestus } - // reserse the order of the clusters by index + // reverse the order of the clusters by index targetClusters[i] = generateTestMemberCluster(numTargetClusters-1-i, "cluster-"+strconv.Itoa(i), map[string]string{"group": "prod", "region": region}) resourceBindings[i] = generateTestClusterResourceBinding(policySnapshotName, targetClusters[i].Name, placementv1beta1.BindingStateScheduled) } - unscheduledClusters := make([]*clusterv1beta1.MemberCluster, 0) + + resourceBindings, unscheduledClusters := generateTestUnscheduledClusterResourceBindingsAndClusters(policySnapshotName, numUnscheduledClusters, resourceBindings) return resourceBindings, targetClusters, unscheduledClusters } +func generateTestUnscheduledClusterResourceBindingsAndClusters(policySnapshotName string, numUnscheduledClusters int, bindings []*placementv1beta1.ClusterResourceBinding) ([]*placementv1beta1.ClusterResourceBinding, []*clusterv1beta1.MemberCluster) { + targetClusters := len(bindings) - numUnscheduledClusters + unscheduledClusters := make([]*clusterv1beta1.MemberCluster, numUnscheduledClusters) + unscheduledClusterName := "unscheduled-cluster-%d" + // Half of the unscheduled clusters have old policy snapshot. + for i := range numUnscheduledClusters / 2 { + unscheduledClusters[i] = generateTestMemberCluster(i, fmt.Sprintf(unscheduledClusterName, i), map[string]string{"group": "staging"}) + // Update the policySnapshot name so that these clusters are considered to-be-deleted. + bindings[targetClusters+i] = generateTestClusterResourceBinding(policySnapshotName+"old", unscheduledClusters[i].Name, placementv1beta1.BindingStateUnscheduled) + } + // The other half of the unscheduled clusters have latest policy snapshot but still unscheduled. + for i := numUnscheduledClusters / 2; i < numUnscheduledClusters; i++ { + unscheduledClusters[i] = generateTestMemberCluster(i, fmt.Sprintf(unscheduledClusterName, i), map[string]string{"group": "staging"}) + bindings[targetClusters+i] = generateTestClusterResourceBinding(policySnapshotName, unscheduledClusters[i].Name, placementv1beta1.BindingStateUnscheduled) + } + return bindings, unscheduledClusters +} + func generateTestClusterResourceBinding(policySnapshotName, targetCluster string, state placementv1beta1.BindingState) *placementv1beta1.ClusterResourceBinding { binding := &placementv1beta1.ClusterResourceBinding{ ObjectMeta: metav1.ObjectMeta{ @@ -823,3 +851,18 @@ func generateFalseProgressingCondition(obj client.Object, condType any, reason s falseCond.Reason = reason return falseCond } + +func generateFalseConditionWithReason(obj client.Object, condType any, reason string) metav1.Condition { + falseCond := generateFalseCondition(obj, condType) + falseCond.Reason = reason + return falseCond +} + +func generateProgressingUnknownConditionWithReason(obj client.Object, reason string) metav1.Condition { + return metav1.Condition{ + Status: metav1.ConditionUnknown, + Type: string(placementv1beta1.StageUpdatingConditionProgressing), + ObservedGeneration: obj.GetGeneration(), + Reason: reason, + } +} diff --git a/pkg/controllers/updaterun/controller_test.go b/pkg/controllers/updaterun/controller_test.go index fdc3fa5f7..f57f27c5f 100644 --- a/pkg/controllers/updaterun/controller_test.go +++ b/pkg/controllers/updaterun/controller_test.go @@ -1079,3 +1079,295 @@ func TestRemoveWaitTimeFromUpdateRunStatus(t *testing.T) { }) } } + +func TestUpdateAllStatusConditionsGeneration(t *testing.T) { + tests := map[string]struct { + status *placementv1beta1.UpdateRunStatus + generation int64 + wantStatus *placementv1beta1.UpdateRunStatus + }{ + "should update ObservedGeneration for main conditions": { + status: &placementv1beta1.UpdateRunStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.StagedUpdateRunConditionInitialized), + Status: metav1.ConditionTrue, + ObservedGeneration: 4, + }, + { + Type: string(placementv1beta1.StagedUpdateRunConditionProgressing), + Status: metav1.ConditionTrue, + ObservedGeneration: 4, + }, + }, + }, + generation: 5, + wantStatus: &placementv1beta1.UpdateRunStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.StagedUpdateRunConditionInitialized), + Status: metav1.ConditionTrue, + ObservedGeneration: 5, + }, + { + Type: string(placementv1beta1.StagedUpdateRunConditionProgressing), + Status: metav1.ConditionTrue, + ObservedGeneration: 5, + }, + }, + }, + }, + "should update ObservedGeneration for stage conditions": { + status: &placementv1beta1.UpdateRunStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.StagedUpdateRunConditionInitialized), + Status: metav1.ConditionTrue, + ObservedGeneration: 2, + }, + { + Type: string(placementv1beta1.StagedUpdateRunConditionProgressing), + Status: metav1.ConditionTrue, + ObservedGeneration: 2, + }, + }, + StagesStatus: []placementv1beta1.StageUpdatingStatus{ + { + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.StageUpdatingConditionProgressing), + Status: metav1.ConditionTrue, + ObservedGeneration: 2, + }, + }, + }, + }, + }, + generation: 3, + wantStatus: &placementv1beta1.UpdateRunStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.StagedUpdateRunConditionInitialized), + Status: metav1.ConditionTrue, + ObservedGeneration: 3, + }, + { + Type: string(placementv1beta1.StagedUpdateRunConditionProgressing), + Status: metav1.ConditionTrue, + ObservedGeneration: 3, + }, + }, + StagesStatus: []placementv1beta1.StageUpdatingStatus{ + { + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.StageUpdatingConditionProgressing), + Status: metav1.ConditionTrue, + ObservedGeneration: 3, + }, + }, + }, + }, + }, + }, + "should handle empty status": { + status: &placementv1beta1.UpdateRunStatus{}, + generation: 2, + wantStatus: &placementv1beta1.UpdateRunStatus{}, + }, + "should handle complex nested structure": { + status: &placementv1beta1.UpdateRunStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.StagedUpdateRunConditionInitialized), + Status: metav1.ConditionTrue, + ObservedGeneration: 7, + }, + { + Type: string(placementv1beta1.StagedUpdateRunConditionProgressing), + Status: metav1.ConditionTrue, + ObservedGeneration: 7, + }, + }, + StagesStatus: []placementv1beta1.StageUpdatingStatus{ + { + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.StageUpdatingConditionProgressing), + Status: metav1.ConditionTrue, + ObservedGeneration: 7, + }, + { + Type: string(placementv1beta1.StageUpdatingConditionSucceeded), + Status: metav1.ConditionTrue, + ObservedGeneration: 7, + }, + }, + BeforeStageTaskStatus: []placementv1beta1.StageTaskStatus{ + { + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.StageTaskConditionWaitTimeElapsed), + Status: metav1.ConditionTrue, + ObservedGeneration: 7, + }, + }, + }, + }, + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 7, + }, + { + Type: string(placementv1beta1.ClusterUpdatingConditionSucceeded), + Status: metav1.ConditionTrue, + ObservedGeneration: 7, + }, + }, + }, + { + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 7, + }, + { + Type: string(placementv1beta1.ClusterUpdatingConditionSucceeded), + Status: metav1.ConditionTrue, + ObservedGeneration: 7, + }, + }, + }, + }, + }, + }, + DeletionStageStatus: &placementv1beta1.StageUpdatingStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.StagedUpdateRunConditionProgressing), + Status: metav1.ConditionTrue, + ObservedGeneration: 7, + }, + }, + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 7, + }, + }, + }, + }, + }, + }, + generation: 8, + wantStatus: &placementv1beta1.UpdateRunStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.StagedUpdateRunConditionInitialized), + Status: metav1.ConditionTrue, + ObservedGeneration: 8, + }, + { + Type: string(placementv1beta1.StagedUpdateRunConditionProgressing), + Status: metav1.ConditionTrue, + ObservedGeneration: 8, + }, + }, + StagesStatus: []placementv1beta1.StageUpdatingStatus{ + { + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.StageUpdatingConditionProgressing), + Status: metav1.ConditionTrue, + ObservedGeneration: 8, + }, + { + Type: string(placementv1beta1.StageUpdatingConditionSucceeded), + Status: metav1.ConditionTrue, + ObservedGeneration: 8, + }, + }, + BeforeStageTaskStatus: []placementv1beta1.StageTaskStatus{ + { + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.StageTaskConditionWaitTimeElapsed), + Status: metav1.ConditionTrue, + ObservedGeneration: 8, + }, + }, + }, + }, + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 8, + }, + { + Type: string(placementv1beta1.ClusterUpdatingConditionSucceeded), + Status: metav1.ConditionTrue, + ObservedGeneration: 8, + }, + }, + }, + { + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 8, + }, + { + Type: string(placementv1beta1.ClusterUpdatingConditionSucceeded), + Status: metav1.ConditionTrue, + ObservedGeneration: 8, + }, + }, + }, + }, + }, + }, + DeletionStageStatus: &placementv1beta1.StageUpdatingStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.StagedUpdateRunConditionProgressing), + Status: metav1.ConditionTrue, + ObservedGeneration: 8, + }, + }, + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 8, + }, + }, + }, + }, + }, + }, + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + updateAllStatusConditionsGeneration(tt.status, tt.generation) + if diff := cmp.Diff(tt.wantStatus, tt.status); diff != "" { + t.Errorf("updateAllStatusConditionsGeneration() mismatch (-want +got):\n%s", diff) + } + }) + } +} diff --git a/pkg/controllers/updaterun/execution.go b/pkg/controllers/updaterun/execution.go index 72e265ec2..6eb47dd00 100644 --- a/pkg/controllers/updaterun/execution.go +++ b/pkg/controllers/updaterun/execution.go @@ -18,7 +18,6 @@ package updaterun import ( "context" - "errors" "fmt" "reflect" "strconv" @@ -68,14 +67,7 @@ func (r *Reconciler) execute( // Set up defer function to handle errStagedUpdatedAborted. defer func() { - if errors.Is(err, errStagedUpdatedAborted) { - if updatingStageStatus != nil { - markStageUpdatingFailed(updatingStageStatus, updateRun.GetGeneration(), err.Error()) - } else { - // Handle deletion stage case. - markStageUpdatingFailed(updateRunStatus.DeletionStageStatus, updateRun.GetGeneration(), err.Error()) - } - } + checkIfErrorStagedUpdateAborted(err, updateRun, updatingStageStatus) }() // Mark updateRun as progressing if it's not already marked as waiting or stuck. @@ -95,7 +87,7 @@ func (r *Reconciler) execute( } maxConcurrency, err := calculateMaxConcurrencyValue(updateRunStatus, updatingStageIndex) if err != nil { - return false, 0, err + return false, 0, fmt.Errorf("%w: %s", errStagedUpdatedAborted, err.Error()) } waitTime, err = r.executeUpdatingStage(ctx, updateRun, updatingStageIndex, toBeUpdatedBindings, maxConcurrency) // The execution has not finished yet. @@ -232,9 +224,7 @@ func (r *Reconciler) executeUpdatingStage( } } markClusterUpdatingStarted(clusterStatus, updateRun.GetGeneration()) - if finishedClusterCount == 0 { - markStageUpdatingStarted(updatingStageStatus, updateRun.GetGeneration()) - } + markStageUpdatingProgressStarted(updatingStageStatus, updateRun.GetGeneration()) // Need to continue as we need to process at most maxConcurrency number of clusters in parallel. continue } @@ -338,7 +328,7 @@ func (r *Reconciler) executeDeleteStage( existingDeleteStageClusterMap[existingDeleteStageStatus.Clusters[i].ClusterName] = &existingDeleteStageStatus.Clusters[i] } // Mark the delete stage as started in case it's not. - markStageUpdatingStarted(updateRunStatus.DeletionStageStatus, updateRun.GetGeneration()) + markStageUpdatingProgressStarted(updateRunStatus.DeletionStageStatus, updateRun.GetGeneration()) for _, binding := range toBeDeletedBindings { bindingSpec := binding.GetBindingSpec() curCluster, exist := existingDeleteStageClusterMap[bindingSpec.TargetCluster] @@ -564,7 +554,7 @@ func calculateMaxConcurrencyValue(status *placementv1beta1.UpdateRunStatus, stag func aggregateUpdateRunStatus(updateRun placementv1beta1.UpdateRunObj, stageName string, stuckClusterNames []string) { if len(stuckClusterNames) > 0 { markUpdateRunStuck(updateRun, stageName, strings.Join(stuckClusterNames, ", ")) - } else { + } else if updateRun.GetUpdateRunSpec().State == placementv1beta1.StateRun { // If there is no stuck cluster but some progress has been made, mark the update run as progressing. markUpdateRunProgressing(updateRun) } @@ -672,7 +662,7 @@ func markUpdateRunProgressing(updateRun placementv1beta1.UpdateRunObj) { }) } -// markUpdateRunProgressingIfNotWaitingOrStuck marks the update run as proegressing in memory if it's not marked as waiting or stuck already. +// markUpdateRunProgressingIfNotWaitingOrStuck marks the update run as progressing in memory if it's not marked as waiting or stuck already. func markUpdateRunProgressingIfNotWaitingOrStuck(updateRun placementv1beta1.UpdateRunObj) { updateRunStatus := updateRun.GetUpdateRunStatus() progressingCond := meta.FindStatusCondition(updateRunStatus.Conditions, string(placementv1beta1.StagedUpdateRunConditionProgressing)) @@ -708,8 +698,8 @@ func markUpdateRunWaiting(updateRun placementv1beta1.UpdateRunObj, message strin }) } -// markStageUpdatingStarted marks the stage updating status as started in memory. -func markStageUpdatingStarted(stageUpdatingStatus *placementv1beta1.StageUpdatingStatus, generation int64) { +// markStageUpdatingProgressStarted marks the stage updating status as started in memory. +func markStageUpdatingProgressStarted(stageUpdatingStatus *placementv1beta1.StageUpdatingStatus, generation int64) { if stageUpdatingStatus.StartTime == nil { stageUpdatingStatus.StartTime = &metav1.Time{Time: time.Now()} } diff --git a/pkg/controllers/updaterun/execution_integration_test.go b/pkg/controllers/updaterun/execution_integration_test.go index 481c8b58c..0c9ab9697 100644 --- a/pkg/controllers/updaterun/execution_integration_test.go +++ b/pkg/controllers/updaterun/execution_integration_test.go @@ -728,7 +728,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { updateRun = generateTestClusterStagedUpdateRun() crp = generateTestClusterResourcePlacement() - resourceBindings, targetClusters, _ = generateSmallTestClusterResourceBindingsAndClusters(1) + resourceBindings, targetClusters, _ = generateSmallTestClusterResourceBindingsAndClusters(1, 0) policySnapshot = generateTestClusterSchedulingPolicySnapshot(1, len(targetClusters)) resourceSnapshot = generateTestClusterResourceSnapshot() resourceSnapshot = generateTestClusterResourceSnapshot() @@ -810,7 +810,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) By("Validating the initialization succeeded and the execution started") - initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy) + initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy, 0) wantStatus = generateExecutionStartedStatus(updateRun, initialized) validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") @@ -904,7 +904,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) By("Validating the initialization succeeded and the execution started") - initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy) + initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy, 0) wantStatus = generateExecutionStartedStatus(updateRun, initialized) validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") @@ -1013,7 +1013,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) By("Validating the initialization succeeded and the execution started") - initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy) + initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy, 0) wantStatus = generateExecutionStartedStatus(updateRun, initialized) validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") @@ -1145,7 +1145,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) By("Validating the initialization succeeded and the execution started") - initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy) + initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy, 0) wantStatus = generateExecutionStartedStatus(updateRun, initialized) validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") @@ -1237,7 +1237,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) By("Validating the initialization succeeded and the execution started") - initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy) + initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy, 0) wantStatus = generateExecutionStartedStatus(updateRun, initialized) validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") @@ -1300,7 +1300,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) By("Validating the initialization succeeded and the execution has not started") - initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy) + initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy, 0) wantStatus = generateExecutionNotStartedStatus(updateRun, initialized) validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") @@ -1534,7 +1534,7 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) By("Validating the initialization succeeded and but not execution started") - wantStatus = generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy) + wantStatus = generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy, 0) validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") By("Checking update run status metrics are emitted") @@ -1560,8 +1560,9 @@ var _ = Describe("UpdateRun execution tests - single stage", func() { It("Should start execution after changing the state to Run", func() { By("Updating the updateRun state to Run") - updateRun.Spec.State = placementv1beta1.StateRun - Expect(k8sClient.Update(ctx, updateRun)).Should(Succeed(), "failed to update the updateRun state") + updateRun = updateClusterStagedUpdateRunState(updateRun.Name, placementv1beta1.StateRun) + // Update the test's want status to match the new generation. + updateAllStatusConditionsGeneration(wantStatus, updateRun.Generation) By("Validating the execution has started") wantStatus = generateExecutionStartedStatus(updateRun, wantStatus) diff --git a/pkg/controllers/updaterun/initialization.go b/pkg/controllers/updaterun/initialization.go index 223029c3a..9ef3f3376 100644 --- a/pkg/controllers/updaterun/initialization.go +++ b/pkg/controllers/updaterun/initialization.go @@ -201,7 +201,7 @@ func (r *Reconciler) collectScheduledClusters( updateRunRef := klog.KObj(updateRun) policySnapshotRef := klog.KObj(latestPolicySnapshot) - bindingObjs, err := controller.ListBindingsFromKey(ctx, r.Client, placementKey) + bindingObjs, err := controller.ListBindingsFromKey(ctx, r.Client, placementKey, true) if err != nil { klog.ErrorS(err, "Failed to list bindings", "placement", placementKey, "policySnapshot", policySnapshotRef, "updateRun", updateRunRef) // list err can be retried. diff --git a/pkg/controllers/updaterun/initialization_integration_test.go b/pkg/controllers/updaterun/initialization_integration_test.go index 5948ea6fd..03614ac72 100644 --- a/pkg/controllers/updaterun/initialization_integration_test.go +++ b/pkg/controllers/updaterun/initialization_integration_test.go @@ -1039,6 +1039,7 @@ func generateSucceededInitializationStatusForSmallClusters( resourceSnapshotIndex string, policySnapshot *placementv1beta1.ClusterSchedulingPolicySnapshot, updateStrategy *placementv1beta1.ClusterStagedUpdateStrategy, + numUnscheduledClusters int, ) *placementv1beta1.UpdateRunStatus { status := &placementv1beta1.UpdateRunStatus{ PolicySnapshotIndexUsed: policySnapshot.Labels[placementv1beta1.PolicyIndexLabel], @@ -1065,6 +1066,10 @@ func generateSucceededInitializationStatusForSmallClusters( generateTrueCondition(updateRun, placementv1beta1.StagedUpdateRunConditionInitialized), }, } + for i := range numUnscheduledClusters { + status.DeletionStageStatus.Clusters = append(status.DeletionStageStatus.Clusters, + placementv1beta1.ClusterUpdatingStatus{ClusterName: fmt.Sprintf("unscheduled-cluster-%d", i)}) + } for i := range status.StagesStatus { var beforeTasks []placementv1beta1.StageTaskStatus for _, task := range updateStrategy.Spec.Stages[i].BeforeStageTasks { diff --git a/pkg/controllers/updaterun/stop.go b/pkg/controllers/updaterun/stop.go new file mode 100644 index 000000000..f36604db6 --- /dev/null +++ b/pkg/controllers/updaterun/stop.go @@ -0,0 +1,251 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package updaterun + +import ( + "errors" + "fmt" + "time" + + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/klog/v2" + + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/pkg/utils/condition" + "go.goms.io/fleet/pkg/utils/controller" +) + +// stop handles stopping the update run. +func (r *Reconciler) stop( + updateRun placementv1beta1.UpdateRunObj, + updatingStageIndex int, + toBeUpdatedBindings, toBeDeletedBindings []placementv1beta1.BindingObj, +) (finished bool, waitTime time.Duration, stopErr error) { + updateRunStatus := updateRun.GetUpdateRunStatus() + var updatingStageStatus *placementv1beta1.StageUpdatingStatus + + // Set up defer function to handle errStagedUpdatedAborted. + defer func() { + checkIfErrorStagedUpdateAborted(stopErr, updateRun, updatingStageStatus) + }() + + markUpdateRunStopping(updateRun) + + if updatingStageIndex < len(updateRunStatus.StagesStatus) { + return r.stopUpdatingStage(updateRun, updatingStageIndex, toBeUpdatedBindings) + } + // All the stages have finished, stop the delete stage. + finished, stopErr = r.stopDeleteStage(updateRun, toBeDeletedBindings) + return finished, clusterUpdatingWaitTime, stopErr +} + +// stopUpdatingStage stops the updating stage by letting the updating bindings finish and not starting new updates. +func (r *Reconciler) stopUpdatingStage( + updateRun placementv1beta1.UpdateRunObj, + updatingStageIndex int, + toBeUpdatedBindings []placementv1beta1.BindingObj, +) (bool, time.Duration, error) { + updateRunStatus := updateRun.GetUpdateRunStatus() + updatingStageStatus := &updateRunStatus.StagesStatus[updatingStageIndex] + updateRunRef := klog.KObj(updateRun) + // Create the map of the toBeUpdatedBindings. + toBeUpdatedBindingsMap := make(map[string]placementv1beta1.BindingObj, len(toBeUpdatedBindings)) + for _, binding := range toBeUpdatedBindings { + bindingSpec := binding.GetBindingSpec() + toBeUpdatedBindingsMap[bindingSpec.TargetCluster] = binding + } + // Mark the stage as stopping in case it's not. + markStageUpdatingStopping(updatingStageStatus, updateRun.GetGeneration()) + clusterUpdatingCount := 0 + var stuckClusterNames []string + var clusterUpdateErrors []error + // Go through each cluster in the stage and check if it's updating/succeeded/failed/not started. + for i := 0; i < len(updatingStageStatus.Clusters); i++ { + clusterStatus := &updatingStageStatus.Clusters[i] + clusterStartedCond := meta.FindStatusCondition(clusterStatus.Conditions, string(placementv1beta1.ClusterUpdatingConditionStarted)) + if !condition.IsConditionStatusTrue(clusterStartedCond, updateRun.GetGeneration()) { + // Cluster has not started updating therefore no need to do anything. + continue + } + + clusterUpdateSucceededCond := meta.FindStatusCondition(clusterStatus.Conditions, string(placementv1beta1.ClusterUpdatingConditionSucceeded)) + if condition.IsConditionStatusFalse(clusterUpdateSucceededCond, updateRun.GetGeneration()) || condition.IsConditionStatusTrue(clusterUpdateSucceededCond, updateRun.GetGeneration()) { + // The cluster has already been updated or failed to update. + continue + } + + clusterUpdatingCount++ + + binding := toBeUpdatedBindingsMap[clusterStatus.ClusterName] + finished, updateErr := checkClusterUpdateResult(binding, clusterStatus, updatingStageStatus, updateRun) + if updateErr != nil { + clusterUpdateErrors = append(clusterUpdateErrors, updateErr) + } + if finished { + // The cluster has finished successfully, we can process another cluster in this round. + clusterUpdatingCount-- + } else { + // If cluster update has been running for more than "updateRunStuckThreshold", mark the update run as stuck. + timeElapsed := time.Since(clusterStartedCond.LastTransitionTime.Time) + if timeElapsed > updateRunStuckThreshold { + klog.V(2).InfoS("Time waiting for cluster update to finish passes threshold, mark the update run as stuck", "time elapsed", timeElapsed, "threshold", updateRunStuckThreshold, "cluster", clusterStatus.ClusterName, "stage", updatingStageStatus.StageName, "updateRun", updateRunRef) + stuckClusterNames = append(stuckClusterNames, clusterStatus.ClusterName) + } + } + } + + // If there are stuck clusters, aggregate them into an error. + aggregateUpdateRunStatus(updateRun, updatingStageStatus.StageName, stuckClusterNames) + + // Aggregate and return errors. + if len(clusterUpdateErrors) > 0 { + // Even though we aggregate errors, we can still check if one of the errors is a staged update aborted error by using errors.Is in the caller. + return false, 0, utilerrors.NewAggregate(clusterUpdateErrors) + } + + if clusterUpdatingCount == 0 { + // All the clusters in the stage have finished updating or not started. + markStageUpdatingStopped(updatingStageStatus, updateRun.GetGeneration()) + klog.InfoS("The stage has finished all clusters updating", "stage", updatingStageStatus.StageName, "updateRun", updateRunRef) + return true, 0, nil + } + // Some clusters are still updating. + klog.InfoS("The updating stage is waiting for updating clusters to finish before completely stopping", "numberOfUpdatingClusters", clusterUpdatingCount, "stage", updatingStageStatus.StageName, "updateRun", updateRunRef) + return false, clusterUpdatingWaitTime, nil +} + +// stopDeleteStage stops the delete stage by letting the deleting bindings finish. +func (r *Reconciler) stopDeleteStage( + updateRun placementv1beta1.UpdateRunObj, + toBeDeletedBindings []placementv1beta1.BindingObj, +) (bool, error) { + updateRunRef := klog.KObj(updateRun) + updateRunStatus := updateRun.GetUpdateRunStatus() + existingDeleteStageStatus := updateRunStatus.DeletionStageStatus + existingDeleteStageClusterMap := make(map[string]*placementv1beta1.ClusterUpdatingStatus, len(existingDeleteStageStatus.Clusters)) + for i := range existingDeleteStageStatus.Clusters { + existingDeleteStageClusterMap[existingDeleteStageStatus.Clusters[i].ClusterName] = &existingDeleteStageStatus.Clusters[i] + } + // Mark the delete stage as stopping in case it's not. + markStageUpdatingStopping(existingDeleteStageStatus, updateRun.GetGeneration()) + + for _, binding := range toBeDeletedBindings { + bindingSpec := binding.GetBindingSpec() + curCluster, exist := existingDeleteStageClusterMap[bindingSpec.TargetCluster] + if !exist { + // This is unexpected because we already checked in validation. + missingErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the to be deleted cluster `%s` is not in the deleting stage during stopping", bindingSpec.TargetCluster)) + klog.ErrorS(missingErr, "The cluster in the deleting stage does not include all the to be deleted binding", "updateRun", updateRunRef) + return false, fmt.Errorf("%w: %s", errStagedUpdatedAborted, missingErr.Error()) + } + // In validation, we already check the binding must exist in the status. + delete(existingDeleteStageClusterMap, bindingSpec.TargetCluster) + // Make sure the cluster is not marked as deleted as the binding is still there. + if condition.IsConditionStatusTrue(meta.FindStatusCondition(curCluster.Conditions, string(placementv1beta1.ClusterUpdatingConditionSucceeded)), updateRun.GetGeneration()) { + // The cluster status is marked as deleted. + unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the deleted cluster `%s` in the deleting stage still has a binding", bindingSpec.TargetCluster)) + klog.ErrorS(unexpectedErr, "The cluster in the deleting stage is not removed yet but marked as deleted", "cluster", curCluster.ClusterName, "updateRun", updateRunRef) + return false, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) + } + if condition.IsConditionStatusTrue(meta.FindStatusCondition(curCluster.Conditions, string(placementv1beta1.ClusterUpdatingConditionStarted)), updateRun.GetGeneration()) { + // The cluster status is marked as being deleted. + if binding.GetDeletionTimestamp().IsZero() { + // The cluster is marked as deleting but the binding is not deleting. + unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the cluster `%s` in the deleting stage is marked as deleting but its corresponding binding is not deleting", curCluster.ClusterName)) + klog.ErrorS(unexpectedErr, "The binding should be deleting before we mark a cluster deleting", "clusterStatus", curCluster, "updateRun", updateRunRef) + return false, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) + } + continue + } + } + + // The rest of the clusters in the stage are not in the toBeDeletedBindings so it should be marked as delete succeeded. + for _, clusterStatus := range existingDeleteStageClusterMap { + // Make sure the cluster is marked as deleted. + if !condition.IsConditionStatusTrue(meta.FindStatusCondition(clusterStatus.Conditions, string(placementv1beta1.ClusterUpdatingConditionStarted)), updateRun.GetGeneration()) { + markClusterUpdatingStarted(clusterStatus, updateRun.GetGeneration()) + } + markClusterUpdatingSucceeded(clusterStatus, updateRun.GetGeneration()) + } + + klog.V(2).InfoS("The delete stage is stopping", "numberOfDeletingClusters", len(toBeDeletedBindings), "updateRun", updateRunRef) + allDeletingClustersDeleted := true + for _, clusterStatus := range updateRunStatus.DeletionStageStatus.Clusters { + if condition.IsConditionStatusTrue(meta.FindStatusCondition(clusterStatus.Conditions, + string(placementv1beta1.ClusterUpdatingConditionStarted)), updateRun.GetGeneration()) && !condition.IsConditionStatusTrue( + meta.FindStatusCondition(clusterStatus.Conditions, string(placementv1beta1.ClusterUpdatingConditionSucceeded)), + updateRun.GetGeneration()) { + allDeletingClustersDeleted = false + break + } + } + + if allDeletingClustersDeleted { + markStageUpdatingStopped(updateRunStatus.DeletionStageStatus, updateRun.GetGeneration()) + } + return len(toBeDeletedBindings) == 0, nil +} + +// markUpdateRunStopping marks the update run as stopping in memory. +func markUpdateRunStopping(updateRun placementv1beta1.UpdateRunObj) { + klog.V(2).InfoS("Marking the update run as stopping", "updateRun", klog.KObj(updateRun)) + updateRunStatus := updateRun.GetUpdateRunStatus() + meta.SetStatusCondition(&updateRunStatus.Conditions, metav1.Condition{ + Type: string(placementv1beta1.StagedUpdateRunConditionProgressing), + Status: metav1.ConditionUnknown, + ObservedGeneration: updateRun.GetGeneration(), + Reason: condition.UpdateRunStoppingReason, + Message: "The update run is the process of stopping, waiting for all the updating/deleting clusters to finish updating before completing the stop process", + }) +} + +// markStageUpdatingStopping marks the stage updating status as pausing in memory. +func markStageUpdatingStopping(stageUpdatingStatus *placementv1beta1.StageUpdatingStatus, generation int64) { + meta.SetStatusCondition(&stageUpdatingStatus.Conditions, metav1.Condition{ + Type: string(placementv1beta1.StageUpdatingConditionProgressing), + Status: metav1.ConditionUnknown, + ObservedGeneration: generation, + Reason: condition.StageUpdatingStoppingReason, + Message: "Waiting for all the updating clusters to finish updating before completing the stop process", + }) +} + +// markStageUpdatingStopped marks the stage updating status as stopped in memory. +func markStageUpdatingStopped(stageUpdatingStatus *placementv1beta1.StageUpdatingStatus, generation int64) { + meta.SetStatusCondition(&stageUpdatingStatus.Conditions, metav1.Condition{ + Type: string(placementv1beta1.StageUpdatingConditionProgressing), + Status: metav1.ConditionFalse, + ObservedGeneration: generation, + Reason: condition.StageUpdatingStoppedReason, + Message: "All the updating clusters have finished updating, the stage is now stopped, waiting to be resumed", + }) +} + +func checkIfErrorStagedUpdateAborted(err error, updateRun placementv1beta1.UpdateRunObj, updatingStageStatus *placementv1beta1.StageUpdatingStatus) { + if errors.Is(err, errStagedUpdatedAborted) { + if updatingStageStatus != nil { + klog.InfoS("The update run is aborted due to unrecoverable behavior in updating stage, marking the stage as failed", "stage", updatingStageStatus.StageName, "updateRun", klog.KObj(updateRun)) + markStageUpdatingFailed(updatingStageStatus, updateRun.GetGeneration(), err.Error()) + } else { + // Handle deletion stage case. + updateRunStatus := updateRun.GetUpdateRunStatus() + markStageUpdatingFailed(updateRunStatus.DeletionStageStatus, updateRun.GetGeneration(), err.Error()) + } + } +} diff --git a/pkg/controllers/updaterun/stop_integration_test.go b/pkg/controllers/updaterun/stop_integration_test.go new file mode 100644 index 000000000..bcd45365e --- /dev/null +++ b/pkg/controllers/updaterun/stop_integration_test.go @@ -0,0 +1,644 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package updaterun + +import ( + "context" + "fmt" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + promclient "github.com/prometheus/client_model/go" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1" + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/pkg/utils" + "go.goms.io/fleet/pkg/utils/condition" +) + +var _ = Describe("UpdateRun stop tests", func() { + var updateRun *placementv1beta1.ClusterStagedUpdateRun + var crp *placementv1beta1.ClusterResourcePlacement + var policySnapshot *placementv1beta1.ClusterSchedulingPolicySnapshot + var updateStrategy *placementv1beta1.ClusterStagedUpdateStrategy + var resourceBindings []*placementv1beta1.ClusterResourceBinding + var targetClusters []*clusterv1beta1.MemberCluster + var unscheduledClusters []*clusterv1beta1.MemberCluster + var resourceSnapshot *placementv1beta1.ClusterResourceSnapshot + var wantStatus *placementv1beta1.UpdateRunStatus + var numTargetClusters int + var numUnscheduledClusters int + + BeforeEach(OncePerOrdered, func() { + testUpdateRunName = "updaterun-" + utils.RandStr() + testCRPName = "crp-" + utils.RandStr() + testResourceSnapshotName = testCRPName + "-" + testResourceSnapshotIndex + "-snapshot" + testUpdateStrategyName = "updatestrategy-" + utils.RandStr() + testCROName = "cro-" + utils.RandStr() + updateRunNamespacedName = types.NamespacedName{Name: testUpdateRunName} + + updateRun = generateTestClusterStagedUpdateRun() + crp = generateTestClusterResourcePlacement() + // 1 BeforeStageTask: Approval + // 2 AfterStageTasks: Approval + TimedWait + updateStrategy = generateTestClusterStagedUpdateStrategyWithSingleStage([]placementv1beta1.StageTask{ + { + Type: placementv1beta1.StageTaskTypeApproval, + }, + }, []placementv1beta1.StageTask{ + { + Type: placementv1beta1.StageTaskTypeApproval, + }, + { + Type: placementv1beta1.StageTaskTypeTimedWait, + WaitTime: &metav1.Duration{ + Duration: time.Second * 4, + }, + }, + }) + resourceBindings, targetClusters, unscheduledClusters = generateSmallTestClusterResourceBindingsAndClusters(1, 3) + policySnapshot = generateTestClusterSchedulingPolicySnapshot(1, len(targetClusters)) + resourceSnapshot = generateTestClusterResourceSnapshot() + numTargetClusters, numUnscheduledClusters = len(targetClusters), len(unscheduledClusters) + + // Set smaller wait time for testing + stageUpdatingWaitTime = time.Second * 3 + clusterUpdatingWaitTime = time.Second * 2 + + By("Creating a new clusterResourcePlacement") + Expect(k8sClient.Create(ctx, crp)).To(Succeed()) + + By("Creating scheduling policy snapshot") + Expect(k8sClient.Create(ctx, policySnapshot)).To(Succeed()) + + By("Setting the latest policy snapshot condition as fully scheduled") + meta.SetStatusCondition(&policySnapshot.Status.Conditions, metav1.Condition{ + Type: string(placementv1beta1.PolicySnapshotScheduled), + Status: metav1.ConditionTrue, + ObservedGeneration: policySnapshot.Generation, + Reason: "scheduled", + }) + Expect(k8sClient.Status().Update(ctx, policySnapshot)).Should(Succeed(), "failed to update the policy snapshot condition") + + By("Creating the member clusters") + for _, cluster := range targetClusters { + Expect(k8sClient.Create(ctx, cluster)).To(Succeed()) + } + for _, cluster := range unscheduledClusters { + Expect(k8sClient.Create(ctx, cluster)).To(Succeed()) + } + + By("Creating a bunch of ClusterResourceBindings") + for _, binding := range resourceBindings { + Expect(k8sClient.Create(ctx, binding)).To(Succeed()) + } + + By("Creating a clusterStagedUpdateStrategy") + Expect(k8sClient.Create(ctx, updateStrategy)).To(Succeed()) + + By("Creating a new resource snapshot") + Expect(k8sClient.Create(ctx, resourceSnapshot)).To(Succeed()) + }) + + AfterEach(OncePerOrdered, func() { + By("Deleting the clusterStagedUpdateRun") + Expect(k8sClient.Delete(ctx, updateRun)).Should(Succeed()) + updateRun = nil + + By("Deleting the clusterResourcePlacement") + Expect(k8sClient.Delete(ctx, crp)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) + crp = nil + + By("Deleting the clusterSchedulingPolicySnapshot") + Expect(k8sClient.Delete(ctx, policySnapshot)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) + policySnapshot = nil + + By("Deleting the clusterResourceBindings") + for _, binding := range resourceBindings { + Expect(k8sClient.Delete(ctx, binding)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) + } + resourceBindings = nil + + By("Deleting the member clusters") + for _, cluster := range targetClusters { + Expect(k8sClient.Delete(ctx, cluster)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) + } + for _, cluster := range unscheduledClusters { + Expect(k8sClient.Delete(ctx, cluster)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) + } + targetClusters, unscheduledClusters = nil, nil + + By("Deleting the clusterStagedUpdateStrategy") + Expect(k8sClient.Delete(ctx, updateStrategy)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) + updateStrategy = nil + + By("Deleting the clusterResourceSnapshot") + Expect(k8sClient.Delete(ctx, resourceSnapshot)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) + resourceSnapshot = nil + + By("Checking update run status metrics are removed") + // No metrics are emitted as all are removed after updateRun is deleted. + validateUpdateRunMetricsEmitted() + resetUpdateRunMetrics() + }) + + Context("Cluster staged update run should have stopped when state Stop", Ordered, func() { + var wantApprovalRequest *placementv1beta1.ClusterApprovalRequest + var wantMetrics []*promclient.Metric + BeforeAll(func() { + // Add finalizer to one of the bindings for unscheduled cluster to test deletion stage later. + binding := resourceBindings[numTargetClusters] // first unscheduled cluster + binding.Finalizers = append(binding.Finalizers, "block-deletion-for-test") + Expect(k8sClient.Update(ctx, binding)).Should(Succeed(), "failed to add finalizer to binding for deletion stage test") + + By("Creating a new clusterStagedUpdateRun") + updateRun.Spec.State = placementv1beta1.StateRun + Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) + + By("Validating the initialization succeeded and the execution has not started") + initialized := generateSucceededInitializationStatusForSmallClusters(crp, updateRun, testResourceSnapshotIndex, policySnapshot, updateStrategy, 3) + wantStatus = generateExecutionNotStartedStatus(updateRun, initialized) + validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") + + By("Validating the first beforeStage approvalRequest has been created") + wantApprovalRequest = &placementv1beta1.ClusterApprovalRequest{ + ObjectMeta: metav1.ObjectMeta{ + Name: updateRun.Status.StagesStatus[0].BeforeStageTaskStatus[0].ApprovalRequestName, + Labels: map[string]string{ + placementv1beta1.TargetUpdatingStageNameLabel: updateRun.Status.StagesStatus[0].StageName, + placementv1beta1.TargetUpdateRunLabel: updateRun.Name, + placementv1beta1.TaskTypeLabel: placementv1beta1.BeforeStageTaskLabelValue, + placementv1beta1.IsLatestUpdateRunApprovalLabel: "true", + }, + }, + Spec: placementv1beta1.ApprovalRequestSpec{ + TargetUpdateRun: updateRun.Name, + TargetStage: updateRun.Status.StagesStatus[0].StageName, + }, + } + validateApprovalRequestCreated(wantApprovalRequest) + + By("Checking update run status metrics are emitted") + wantMetrics = []*promclient.Metric{generateWaitingMetric(updateRun)} + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + + It("Should stop the update run in BeforeStageTask for 1st stage when state is Stop", func() { + By("Updating updateRun state to Stop") + updateRun = updateClusterStagedUpdateRunState(updateRun.Name, placementv1beta1.StateStop) + // Update the test's want status to match the new generation. + updateAllStatusConditionsGeneration(wantStatus, updateRun.Generation) + + By("Validating the update run is stopped") + // Mark stage progressing condition as stopped. + meta.SetStatusCondition(&wantStatus.StagesStatus[0].Conditions, generateFalseConditionWithReason(updateRun, placementv1beta1.StageUpdatingConditionProgressing, condition.StageUpdatingStoppedReason)) + // Mark update run stopped. + meta.SetStatusCondition(&wantStatus.Conditions, generateFalseConditionWithReason(updateRun, placementv1beta1.StagedUpdateRunConditionProgressing, condition.UpdateRunStoppedReason)) + validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") + + By("Checking update run status metrics are emitted") + wantMetrics = append(wantMetrics, generateStoppedMetric(updateRun)) + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + + It("Should accept the approval request and not rollout 1st stage while in Stop state", func() { + By("Approving the approvalRequest") + approveClusterApprovalRequest(ctx, wantApprovalRequest.Name) + + By("Validating update run is still stopped") + validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, wantStatus, "") + + By("Checking update run status metrics are emitted") + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + + It("Should start executing stage 1 of the update run when state is Run", func() { + By("Updating updateRun state to Run") + updateRun = updateClusterStagedUpdateRunState(updateRun.Name, placementv1beta1.StateRun) + // Update the test's want status to match the new generation. + updateAllStatusConditionsGeneration(wantStatus, updateRun.Generation) + + By("Validating the approvalRequest has ApprovalAccepted status") + validateApprovalRequestAccepted(ctx, wantApprovalRequest.Name) + + By("Validating update run is running") + wantStatus = generateExecutionStartedStatus(updateRun, wantStatus) + // Approval task has been approved. + wantStatus.StagesStatus[0].BeforeStageTaskStatus[0].Conditions = append(wantStatus.StagesStatus[0].BeforeStageTaskStatus[0].Conditions, + generateTrueCondition(updateRun, placementv1beta1.StageTaskConditionApprovalRequestApproved)) + validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") + + By("Checking update run status metrics are emitted") + wantMetrics = append(wantMetrics, generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + + It("Should mark the 1st cluster in the 1st stage as succeeded after marking the binding available", func() { + By("Validating the 1st clusterResourceBinding is updated to Bound") + binding := resourceBindings[0] // cluster-0 + validateBindingState(ctx, binding, resourceSnapshot.Name, updateRun, 0) + + By("Updating the 1st clusterResourceBinding to Available") + meta.SetStatusCondition(&binding.Status.Conditions, generateTrueCondition(binding, placementv1beta1.ResourceBindingAvailable)) + Expect(k8sClient.Status().Update(ctx, binding)).Should(Succeed(), "failed to update the binding status") + + By("Validating the 1st cluster has succeeded and 2nd cluster has started") + wantStatus.StagesStatus[0].Clusters[0].Conditions = append(wantStatus.StagesStatus[0].Clusters[0].Conditions, generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionSucceeded)) + wantStatus.StagesStatus[0].Clusters[1].Conditions = append(wantStatus.StagesStatus[0].Clusters[1].Conditions, generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionStarted)) + validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") + + By("Validating the 1st stage has startTime set") + Expect(updateRun.Status.StagesStatus[0].StartTime).ShouldNot(BeNil()) + + By("Checking update run status metrics are emitted") + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + + It("Should be stopping in the middle of cluster updating when update run state is Stop", func() { + By("Updating updateRun state to Stop") + updateRun = updateClusterStagedUpdateRunState(updateRun.Name, placementv1beta1.StateStop) + // Update the test's want status to match the new generation. + updateAllStatusConditionsGeneration(wantStatus, updateRun.Generation) + + By("Validating the update run is stopping") + // 2nd cluster has started condition but no succeeded condition. + // Mark stage progressing condition as unknown with stopping reason. + meta.SetStatusCondition(&wantStatus.StagesStatus[0].Conditions, generateProgressingUnknownConditionWithReason(updateRun, condition.StageUpdatingStoppingReason)) + // Mark updateRun progressing condition as unknown with stopping reason. + meta.SetStatusCondition(&wantStatus.Conditions, generateProgressingUnknownConditionWithReason(updateRun, condition.UpdateRunStoppingReason)) + validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") + + By("Checking update run status metrics are emitted") + wantMetrics = append(wantMetrics, generateStoppingMetric(updateRun)) + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + + It("Should wait for cluster to finish updating so update run should still be stopping", func() { + By("Validating the 2nd cluster has NOT succeeded and the update run is still stopping") + validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, wantStatus, "") + + By("Checking update run status metrics are emitted") + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + + It("Should have completely stopped after the in-progress cluster has finished updating", func() { + By("Validating the 2nd clusterResourceBinding is updated to Bound") + binding := resourceBindings[1] // cluster-1 + validateBindingState(ctx, binding, resourceSnapshot.Name, updateRun, 0) + + By("Updating the 2nd clusterResourceBinding to Available") + meta.SetStatusCondition(&binding.Status.Conditions, generateTrueCondition(binding, placementv1beta1.ResourceBindingAvailable)) + Expect(k8sClient.Status().Update(ctx, binding)).Should(Succeed(), "failed to update the binding status") + + By("Validating the 2nd cluster has succeeded and the update run has completely stopped") + // Mark 2nd cluster succeeded. + meta.SetStatusCondition(&wantStatus.StagesStatus[0].Clusters[1].Conditions, generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionSucceeded)) + // Mark stage progressing condition as false with stopped reason. + meta.SetStatusCondition(&wantStatus.StagesStatus[0].Conditions, generateFalseProgressingCondition(updateRun, placementv1beta1.StageUpdatingConditionProgressing, condition.StageUpdatingStoppedReason)) + // Mark updateRun progressing condition as false with stopped reason. + meta.SetStatusCondition(&wantStatus.Conditions, generateFalseProgressingCondition(updateRun, placementv1beta1.StagedUpdateRunConditionProgressing, condition.UpdateRunStoppedReason)) + validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") + + By("Checking update run status metrics are emitted") + wantMetrics = append(wantMetrics, generateStoppedMetric(updateRun)) + validateUpdateRunMetricsEmitted(wantMetrics...) + + By("Validating update run is in stopped state") + validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, wantStatus, "") + + By("Validating 3rd clusterResourceBinding is NOT updated to Bound") + binding = resourceBindings[2] // cluster-2 + validateNotBoundBindingState(ctx, binding) + }) + + It("Should continue executing stage 1 of the update run when state is Run", func() { + By("Updating updateRun state to Run") + updateRun = updateClusterStagedUpdateRunState(updateRun.Name, placementv1beta1.StateRun) + // Update the test's want status to match the new generation. + updateAllStatusConditionsGeneration(wantStatus, updateRun.Generation) + + By("Validating update run is running") + // Mark 3rd cluster started. + meta.SetStatusCondition(&wantStatus.StagesStatus[0].Clusters[2].Conditions, generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionStarted)) + // Mark stage progressing condition as true with progressing reason. + meta.SetStatusCondition(&wantStatus.StagesStatus[0].Conditions, generateTrueCondition(updateRun, placementv1beta1.StageUpdatingConditionProgressing)) + // Mark updateRun progressing condition as true with progressing reason. + meta.SetStatusCondition(&wantStatus.Conditions, generateTrueCondition(updateRun, placementv1beta1.StagedUpdateRunConditionProgressing)) + validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") + + By("Checking update run status metrics are emitted") + wantMetrics = append(wantMetrics, generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + + It("Should mark the 3rd cluster in the 1st stage as succeeded after marking the binding available", func() { + By("Validating the 3rd clusterResourceBinding is updated to Bound") + binding := resourceBindings[2] // cluster-2 + validateBindingState(ctx, binding, resourceSnapshot.Name, updateRun, 0) + + By("Updating the 3rd clusterResourceBinding to Available") + meta.SetStatusCondition(&binding.Status.Conditions, generateTrueCondition(binding, placementv1beta1.ResourceBindingAvailable)) + Expect(k8sClient.Status().Update(ctx, binding)).Should(Succeed(), "failed to update the binding status") + + By("Validating the 3rd cluster has succeeded") + wantStatus.StagesStatus[0].Clusters[2].Conditions = append(wantStatus.StagesStatus[0].Clusters[2].Conditions, generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionSucceeded)) + + // Approval request for AfterStageTasks is created. + meta.SetStatusCondition(&wantStatus.StagesStatus[0].AfterStageTaskStatus[0].Conditions, generateTrueCondition(updateRun, placementv1beta1.StageTaskConditionApprovalRequestCreated)) + // Stage is waiting for AfterStageTasks to complete. + meta.SetStatusCondition(&wantStatus.StagesStatus[0].Conditions, generateFalseCondition(updateRun, placementv1beta1.StageUpdatingConditionProgressing)) + meta.SetStatusCondition(&wantStatus.Conditions, generateFalseCondition(updateRun, placementv1beta1.StagedUpdateRunConditionProgressing)) + validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") + + By("Checking update run status metrics are emitted") + wantMetrics = append(wantMetrics, generateWaitingMetric(updateRun)) + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + + It("Should have approval request created for 1st stage AfterStageTask", func() { + By("Validating the approvalRequest has been created") + wantApprovalRequest = &placementv1beta1.ClusterApprovalRequest{ + ObjectMeta: metav1.ObjectMeta{ + Name: updateRun.Status.StagesStatus[0].AfterStageTaskStatus[0].ApprovalRequestName, + Labels: map[string]string{ + placementv1beta1.TargetUpdatingStageNameLabel: updateRun.Status.StagesStatus[0].StageName, + placementv1beta1.TargetUpdateRunLabel: updateRun.Name, + placementv1beta1.TaskTypeLabel: placementv1beta1.AfterStageTaskLabelValue, + placementv1beta1.IsLatestUpdateRunApprovalLabel: "true", + }, + }, + Spec: placementv1beta1.ApprovalRequestSpec{ + TargetUpdateRun: updateRun.Name, + TargetStage: updateRun.Status.StagesStatus[0].StageName, + }, + } + validateApprovalRequestCreated(wantApprovalRequest) + + By("Checking update run status metrics are emitted") + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + + It("Should stop the update run in AfterStageTask for 1st stage when state is Stop", func() { + By("Updating updateRun state to Stop") + updateRun = updateClusterStagedUpdateRunState(updateRun.Name, placementv1beta1.StateStop) + // Update the test's want status to match the new generation. + updateAllStatusConditionsGeneration(wantStatus, updateRun.Generation) + + By("Validating the update run is stopped") + // Mark stage progressing condition as stopped. + meta.SetStatusCondition(&wantStatus.StagesStatus[0].Conditions, generateFalseConditionWithReason(updateRun, placementv1beta1.StageUpdatingConditionProgressing, condition.StageUpdatingStoppedReason)) + // Mark update run stopped. + meta.SetStatusCondition(&wantStatus.Conditions, generateFalseConditionWithReason(updateRun, placementv1beta1.StagedUpdateRunConditionProgressing, condition.UpdateRunStoppedReason)) + validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") + + By("Checking update run status metrics are emitted") + wantMetrics = append(wantMetrics, generateStoppedMetric(updateRun)) + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + + It("Should not continue to delete stage after approval when still stopped", func() { + By("Approving the approvalRequest") + approveClusterApprovalRequest(ctx, wantApprovalRequest.Name) + + By("Validating the to-be-deleted bindings are NOT deleted") + Consistently(func() error { + for i := numTargetClusters; i < numTargetClusters+numUnscheduledClusters; i++ { + binding := placementv1beta1.ClusterResourceBinding{} + if err := k8sClient.Get(ctx, types.NamespacedName{Name: resourceBindings[i].Name}, &binding); err != nil { + return fmt.Errorf("get binding %s returned a not-found error or another error: %w", binding.Name, err) + } + + if !binding.DeletionTimestamp.IsZero() { + return fmt.Errorf("binding %s is being deleted when it should not be", binding.Name) + } + } + return nil + }, duration, interval).Should(Succeed(), "failed to validate the to-be-deleted bindings still exist") + + By("Validating update run is stopped") + validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, wantStatus, "") + + By("Checking update run status metrics are emitted") + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + + It("Should complete the 1st stage once it starts running again when wait time passed and approval request approved then move on to the Delete stage", func() { + By("Updating updateRun state to Run") + updateRun = updateClusterStagedUpdateRunState(updateRun.Name, placementv1beta1.StateRun) + // Update the test's want status to match the new generation. + updateAllStatusConditionsGeneration(wantStatus, updateRun.Generation) + + By("Validating the approvalRequest has ApprovalAccepted status") + validateApprovalRequestAccepted(ctx, wantApprovalRequest.Name) + + By("Validating both after stage tasks have completed and Deletion has started") + // Approval AfterStageTask completed. + wantStatus.StagesStatus[0].AfterStageTaskStatus[0].Conditions = append(wantStatus.StagesStatus[0].AfterStageTaskStatus[0].Conditions, + generateTrueCondition(updateRun, placementv1beta1.StageTaskConditionApprovalRequestApproved)) + // Timedwait AfterStageTask completed. + wantStatus.StagesStatus[0].AfterStageTaskStatus[1].Conditions = append(wantStatus.StagesStatus[0].AfterStageTaskStatus[1].Conditions, + generateTrueCondition(updateRun, placementv1beta1.StageTaskConditionWaitTimeElapsed)) + // 1st stage completed, mark progressing condition reason as succeeded and add succeeded condition. + wantStatus.StagesStatus[0].Conditions[0] = generateFalseProgressingCondition(updateRun, placementv1beta1.StageUpdatingConditionProgressing, condition.StageUpdatingSucceededReason) + wantStatus.StagesStatus[0].Conditions = append(wantStatus.StagesStatus[0].Conditions, generateTrueCondition(updateRun, placementv1beta1.StageUpdatingConditionSucceeded)) + // Deletion stage started. Mark deletion stage progressing condition as true with progressing reason. + meta.SetStatusCondition(&wantStatus.DeletionStageStatus.Conditions, generateTrueCondition(updateRun, placementv1beta1.StageUpdatingConditionProgressing)) + // Mark 1 cluster started and the other clusters as succeeded in deletion stage. + for i := range wantStatus.DeletionStageStatus.Clusters { + wantStatus.DeletionStageStatus.Clusters[i].Conditions = append(wantStatus.DeletionStageStatus.Clusters[i].Conditions, generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionStarted)) + if i != 0 { // first unscheduled cluster is still deleting + wantStatus.DeletionStageStatus.Clusters[i].Conditions = append(wantStatus.DeletionStageStatus.Clusters[i].Conditions, generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionSucceeded)) + } + } + // Mark updateRun progressing condition as true with progressing reason. + meta.SetStatusCondition(&wantStatus.Conditions, generateTrueCondition(updateRun, placementv1beta1.StagedUpdateRunConditionProgressing)) + validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") + + By("Validating the 1st stage has endTime set") + Expect(updateRun.Status.StagesStatus[0].EndTime).ShouldNot(BeNil()) + + By("Validating the waitTime after stage task only completes after the wait time") + waitStartTime := meta.FindStatusCondition(updateRun.Status.StagesStatus[0].Conditions, string(placementv1beta1.StageUpdatingConditionProgressing)).LastTransitionTime.Time + waitEndTime := meta.FindStatusCondition(updateRun.Status.StagesStatus[0].AfterStageTaskStatus[1].Conditions, string(placementv1beta1.StageTaskConditionWaitTimeElapsed)).LastTransitionTime.Time + Expect(waitStartTime.Add(updateStrategy.Spec.Stages[0].AfterStageTasks[1].WaitTime.Duration).After(waitEndTime)).Should(BeFalse(), + fmt.Sprintf("waitEndTime %v did not pass waitStartTime %v long enough, want at least %v", waitEndTime, waitStartTime, updateStrategy.Spec.Stages[0].AfterStageTasks[1].WaitTime.Duration)) + + By("Validating the creation time of the approval request is before the complete time of the timedwait task") + approvalCreateTime := meta.FindStatusCondition(updateRun.Status.StagesStatus[0].AfterStageTaskStatus[0].Conditions, string(placementv1beta1.StageTaskConditionApprovalRequestCreated)).LastTransitionTime.Time + Expect(approvalCreateTime.Before(waitEndTime)).Should(BeTrue()) + + By("Checking update run status metrics are emitted") + wantMetrics = append(wantMetrics, generateProgressingMetric(updateRun)) + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + + It("Should stop the update run in deletion stage when state is Stop", func() { + By("Updating updateRun state to Stop") + updateRun = updateClusterStagedUpdateRunState(updateRun.Name, placementv1beta1.StateStop) + // Update the test's want status to match the new generation. + updateAllStatusConditionsGeneration(wantStatus, updateRun.Generation) + + By("Validating the update run is stopping") + // Mark stage progressing condition as stopping. + meta.SetStatusCondition(&wantStatus.DeletionStageStatus.Conditions, generateProgressingUnknownConditionWithReason(updateRun, condition.StageUpdatingStoppingReason)) + // Mark update run stopped. + meta.SetStatusCondition(&wantStatus.Conditions, generateProgressingUnknownConditionWithReason(updateRun, condition.UpdateRunStoppingReason)) + validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") + + By("Checking update run status metrics are emitted") + wantMetrics = append(wantMetrics, generateStoppingMetric(updateRun)) + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + + It("Should not complete deletion stage when in progress clusters still deleting while stopped", func() { + By("Validating the first unscheduled cluster resource binding has started deleting but is NOT deleted") + Consistently(func() error { + binding := &placementv1beta1.ClusterResourceBinding{} + if err := k8sClient.Get(ctx, types.NamespacedName{Name: resourceBindings[numTargetClusters].Name}, binding); err != nil { + return fmt.Errorf("get binding %s returned a not-found error or another error: %w", binding.Name, err) + } + if binding.DeletionTimestamp.IsZero() { + return fmt.Errorf("binding %s is not marked for deletion yet", binding.Name) + } + return nil + }, duration, interval).Should(Succeed(), "failed to validate the to-be-deleted bindings for unscheduled-cluster-0 still exist") + + By("Validating update run is stopping") + validateClusterStagedUpdateRunStatusConsistently(ctx, updateRun, wantStatus, "") + + By("Checking update run status metrics are emitted") + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + + It("Should stop completely after in-progress deletion is done when state is Stop", func() { + By("Removing the finalizer on the in-progress deletion binding to allow deletion to complete") + Eventually(func() error { + binding := &placementv1beta1.ClusterResourceBinding{} + if err := k8sClient.Get(ctx, types.NamespacedName{Name: resourceBindings[numTargetClusters].Name}, binding); err != nil { + return fmt.Errorf("get binding %s returned a not-found error or another error: %w", binding.Name, err) + } + if len(binding.Finalizers) == 0 { + return nil + } + binding.Finalizers = []string{} + if err := k8sClient.Update(ctx, binding); err != nil { + return fmt.Errorf("failed to remove finalizer from binding %s: %w", binding.Name, err) + } + return nil + }, timeout, interval).Should(Succeed(), "failed to remove finalizer from binding for deletion stage test") + + By("Validating the binding is deleted") + Eventually(func() error { + binding := &placementv1beta1.ClusterResourceBinding{} + err := k8sClient.Get(ctx, types.NamespacedName{Name: resourceBindings[numTargetClusters].Name}, binding) + if err == nil { + return fmt.Errorf("binding %s is not deleted", binding.Name) + } + if !apierrors.IsNotFound(err) { + return fmt.Errorf("get binding %s does not return a not-found error: %w", binding.Name, err) + } + return nil + }, timeout, interval).Should(Succeed(), "failed to validate the to-be-deleted binding for unscheduled-cluster-0 is deleted") + + By("Validating the update run is completely stopped") + // Mark the first unscheduled cluster succeeded. + meta.SetStatusCondition(&wantStatus.DeletionStageStatus.Clusters[0].Conditions, generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionSucceeded)) + // Mark deletion stage progressing condition as stopped. + meta.SetStatusCondition(&wantStatus.DeletionStageStatus.Conditions, generateFalseConditionWithReason(updateRun, placementv1beta1.StageUpdatingConditionProgressing, condition.StageUpdatingStoppedReason)) + // Mark update run stopped. + meta.SetStatusCondition(&wantStatus.Conditions, generateFalseConditionWithReason(updateRun, placementv1beta1.StagedUpdateRunConditionProgressing, condition.UpdateRunStoppedReason)) + validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") + + By("Checking update run status metrics are emitted") + wantMetrics = append(wantMetrics, generateStoppedMetric(updateRun)) + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + + It("Should complete delete stage and complete the update run when state is Run", func() { + By("Updating updateRun state to Run") + updateRun = updateClusterStagedUpdateRunState(updateRun.Name, placementv1beta1.StateRun) + // Update the test's want status to match the new generation. + updateAllStatusConditionsGeneration(wantStatus, updateRun.Generation) + + By("Validating the to-be-deleted bindings are all deleted") + Eventually(func() error { + for i := numTargetClusters; i < numTargetClusters+numUnscheduledClusters; i++ { + binding := &placementv1beta1.ClusterResourceBinding{} + err := k8sClient.Get(ctx, types.NamespacedName{Name: resourceBindings[i].Name}, binding) + if err == nil { + return fmt.Errorf("binding %s is not deleted", binding.Name) + } + if !apierrors.IsNotFound(err) { + return fmt.Errorf("get binding %s does not return a not-found error: %w", binding.Name, err) + } + + if !binding.DeletionTimestamp.IsZero() { + return fmt.Errorf("binding %s is not deleted yet", binding.Name) + } + } + return nil + }, timeout, interval).Should(Succeed(), "failed to validate the deletion of the to-be-deleted bindings") + + By("Validating the delete stage and the clusterStagedUpdateRun has completed") + for i := range wantStatus.DeletionStageStatus.Clusters { + meta.SetStatusCondition(&wantStatus.DeletionStageStatus.Clusters[i].Conditions, generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionSucceeded)) + } + // Mark the stage progressing condition as false with succeeded reason and add succeeded condition. + wantStatus.DeletionStageStatus.Conditions[0] = generateFalseProgressingCondition(updateRun, placementv1beta1.StageUpdatingConditionProgressing, condition.StageUpdatingSucceededReason) + wantStatus.DeletionStageStatus.Conditions = append(wantStatus.DeletionStageStatus.Conditions, generateTrueCondition(updateRun, placementv1beta1.StageUpdatingConditionSucceeded)) + // Mark updateRun progressing condition as false with succeeded reason and add succeeded condition. + meta.SetStatusCondition(&wantStatus.Conditions, generateFalseProgressingCondition(updateRun, placementv1beta1.StagedUpdateRunConditionProgressing, condition.UpdateRunSucceededReason)) + wantStatus.Conditions = append(wantStatus.Conditions, generateTrueCondition(updateRun, placementv1beta1.StagedUpdateRunConditionSucceeded)) + validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") + + By("Checking update run status metrics are emitted") + wantMetrics = append(wantMetrics, generateSucceededMetric(updateRun)) + validateUpdateRunMetricsEmitted(wantMetrics...) + }) + }) +}) + +func updateClusterStagedUpdateRunState(updateRunName string, state placementv1beta1.State) *placementv1beta1.ClusterStagedUpdateRun { + updateRun := &placementv1beta1.ClusterStagedUpdateRun{} + Eventually(func() error { + if err := k8sClient.Get(ctx, types.NamespacedName{Name: updateRunName}, updateRun); err != nil { + return fmt.Errorf("failed to get ClusterStagedUpdateRun %s", updateRunName) + } + + updateRun.Spec.State = state + if err := k8sClient.Update(ctx, updateRun); err != nil { + return fmt.Errorf("failed to update ClusterStagedUpdateRun %s", updateRunName) + } + return nil + }, timeout, interval).Should(Succeed(), "Failed to update ClusterStagedUpdateRun %s state to %s", updateRunName, state) + return updateRun +} + +func validateApprovalRequestAccepted(ctx context.Context, approvalRequestName string) { + Eventually(func() (bool, error) { + var approvalRequest placementv1beta1.ClusterApprovalRequest + if err := k8sClient.Get(ctx, types.NamespacedName{Name: approvalRequestName}, &approvalRequest); err != nil { + return false, err + } + return condition.IsConditionStatusTrue(meta.FindStatusCondition(approvalRequest.Status.Conditions, string(placementv1beta1.ApprovalRequestConditionApprovalAccepted)), approvalRequest.Generation), nil + }, timeout, interval).Should(BeTrue(), "failed to validate the approvalRequest %s is accepted", approvalRequestName) +} diff --git a/pkg/controllers/updaterun/stop_test.go b/pkg/controllers/updaterun/stop_test.go new file mode 100644 index 000000000..b992e7cdf --- /dev/null +++ b/pkg/controllers/updaterun/stop_test.go @@ -0,0 +1,484 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package updaterun + +import ( + "errors" + "strings" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/pkg/utils/condition" +) + +func TestStopUpdatingStage(t *testing.T) { + tests := []struct { + name string + updateRun *placementv1beta1.ClusterStagedUpdateRun + bindings []placementv1beta1.BindingObj + wantErr error + wantFinished bool + wantWaitTime time.Duration + wantProgressCond metav1.Condition + }{ + { + name: "cluster update failed", + updateRun: &placementv1beta1.ClusterStagedUpdateRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-update-run", + Generation: 1, + }, + Spec: placementv1beta1.UpdateRunSpec{ + PlacementName: "test-placement", + ResourceSnapshotIndex: "1", + }, + Status: placementv1beta1.UpdateRunStatus{ + StagesStatus: []placementv1beta1.StageUpdatingStatus{ + { + StageName: "test-stage", + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-1", + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 1, + Reason: condition.ClusterUpdatingStartedReason, + }, + { + Type: string(placementv1beta1.ClusterUpdatingConditionSucceeded), + Status: metav1.ConditionFalse, + ObservedGeneration: 1, + Reason: condition.ClusterUpdatingFailedReason, + }, + }, + }, + }, + }, + }, + }, + }, + bindings: nil, + wantFinished: true, + wantErr: nil, + wantWaitTime: 0, + wantProgressCond: metav1.Condition{ + Type: string(placementv1beta1.StageUpdatingConditionProgressing), + Status: metav1.ConditionFalse, + ObservedGeneration: 1, + Reason: condition.StageUpdatingStoppedReason, + }, + }, + { + name: "binding synced, bound, rolloutStarted true, but binding has failed condition", + updateRun: &placementv1beta1.ClusterStagedUpdateRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-update-run", + Generation: 1, + }, + Spec: placementv1beta1.UpdateRunSpec{ + PlacementName: "test-placement", + ResourceSnapshotIndex: "1", + }, + Status: placementv1beta1.UpdateRunStatus{ + ResourceSnapshotIndexUsed: "1", + StagesStatus: []placementv1beta1.StageUpdatingStatus{ + { + StageName: "test-stage", + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-1", + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 1, + Reason: condition.ClusterUpdatingStartedReason, + }, + }, + }, + }, + }, + }, + }, + }, + bindings: []placementv1beta1.BindingObj{ + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "binding-1", + Generation: 1, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + TargetCluster: "cluster-1", + ResourceSnapshotName: "test-placement-1-snapshot", // Already synced. + State: placementv1beta1.BindingStateBound, // Already Bound. + }, + Status: placementv1beta1.ResourceBindingStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceBindingRolloutStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 1, + Reason: condition.RolloutStartedReason, + }, + { + Type: string(placementv1beta1.ResourceBindingApplied), + Status: metav1.ConditionFalse, + ObservedGeneration: 1, + Reason: condition.ApplyFailedReason, + }, + }, + }, + }, + }, + wantErr: errors.New("cluster updating encountered an error at stage"), + wantFinished: false, + wantWaitTime: 0, + wantProgressCond: metav1.Condition{ + Type: string(placementv1beta1.StageUpdatingConditionProgressing), + Status: metav1.ConditionUnknown, + ObservedGeneration: 1, + Reason: condition.StageUpdatingStoppingReason, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + scheme := runtime.NewScheme() + _ = placementv1beta1.AddToScheme(scheme) + objs := make([]client.Object, len(tt.bindings)) + for i := range tt.bindings { + objs[i] = tt.bindings[i] + } + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(objs...).Build() + r := &Reconciler{ + Client: fakeClient, + } + + // Stop the stage. + finished, waitTime, gotErr := r.stopUpdatingStage(tt.updateRun, 0, tt.bindings) + + // Verify error expectation. + if (tt.wantErr != nil) != (gotErr != nil) { + t.Fatalf("stopUpdatingStage() want error: %v, got error: %v", tt.wantErr, gotErr) + } + + // Verify error message contains expected substring. + if tt.wantErr != nil && gotErr != nil { + if !strings.Contains(gotErr.Error(), tt.wantErr.Error()) { + t.Fatalf("stopUpdatingStage() want error: %v, got error: %v", tt.wantErr, gotErr) + } + } + + // Verify finished result. + if finished != tt.wantFinished { + t.Fatalf("stopUpdatingStage() want finished: %v, got finished: %v", tt.wantFinished, finished) + } + + // Verify wait time. + if waitTime != tt.wantWaitTime { + t.Fatalf("stopUpdatingStage() want waitTime: %v, got waitTime: %v", tt.wantWaitTime, waitTime) + } + + // Verify progressing condition. + progressingCond := meta.FindStatusCondition( + tt.updateRun.Status.StagesStatus[0].Conditions, + string(placementv1beta1.StageUpdatingConditionProgressing), + ) + if diff := cmp.Diff(tt.wantProgressCond, *progressingCond, cmpOptions...); diff != "" { + t.Errorf("stopUpdatingStage() status mismatch: (-want +got):\n%s", diff) + } + }) + } +} + +func TestStopDeleteStage(t *testing.T) { + now := metav1.Now() + deletionTime := metav1.NewTime(now.Add(-1 * time.Minute)) + + tests := []struct { + name string + updateRun *placementv1beta1.ClusterStagedUpdateRun + toBeDeletedBindings []placementv1beta1.BindingObj + wantFinished bool + wantError error + wantProgressCond metav1.Condition + }{ + { + name: "no bindings to delete - should finish and mark stage as stopped", + updateRun: &placementv1beta1.ClusterStagedUpdateRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-updaterun", + Generation: 1, + }, + Status: placementv1beta1.UpdateRunStatus{ + DeletionStageStatus: &placementv1beta1.StageUpdatingStatus{ + StageName: "deletion", + Clusters: []placementv1beta1.ClusterUpdatingStatus{}, + }, + }, + }, + toBeDeletedBindings: []placementv1beta1.BindingObj{}, + wantFinished: true, + wantError: nil, + wantProgressCond: metav1.Condition{ + Type: string(placementv1beta1.StageUpdatingConditionProgressing), + Status: metav1.ConditionFalse, + ObservedGeneration: 1, + Reason: condition.StageUpdatingStoppedReason, + }, + }, + { + name: "cluster being deleted with proper binding deletion timestamp - should not finish", + updateRun: &placementv1beta1.ClusterStagedUpdateRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-updaterun", + Generation: 1, + }, + Status: placementv1beta1.UpdateRunStatus{ + DeletionStageStatus: &placementv1beta1.StageUpdatingStatus{ + StageName: "deletion", + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-1", + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 1, + LastTransitionTime: now, + Reason: condition.ClusterUpdatingStartedReason, + }, + }, + }, + }, + }, + }, + }, + toBeDeletedBindings: []placementv1beta1.BindingObj{ + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + DeletionTimestamp: &deletionTime, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + TargetCluster: "cluster-1", + }, + }, + }, + wantFinished: false, + wantError: nil, + wantProgressCond: metav1.Condition{ + Type: string(placementv1beta1.StageUpdatingConditionProgressing), + Status: metav1.ConditionUnknown, + ObservedGeneration: 1, + Reason: condition.StageUpdatingStoppingReason, + }, + }, + { + name: "cluster marked as deleting but binding not deleting - should abort", + updateRun: &placementv1beta1.ClusterStagedUpdateRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-updaterun", + Generation: 1, + }, + Status: placementv1beta1.UpdateRunStatus{ + DeletionStageStatus: &placementv1beta1.StageUpdatingStatus{ + StageName: "deletion", + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-1", + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 1, + LastTransitionTime: now, + Reason: condition.ClusterUpdatingStartedReason, + }, + }, + }, + }, + }, + }, + }, + toBeDeletedBindings: []placementv1beta1.BindingObj{ + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + // No DeletionTimestamp set + }, + Spec: placementv1beta1.ResourceBindingSpec{ + TargetCluster: "cluster-1", + }, + }, + }, + wantFinished: false, + wantError: errors.New("the cluster `cluster-1` in the deleting stage is marked as deleting but its corresponding binding is not deleting"), + wantProgressCond: metav1.Condition{ + Type: string(placementv1beta1.StageUpdatingConditionProgressing), + Status: metav1.ConditionUnknown, + ObservedGeneration: 1, + Reason: condition.StageUpdatingStoppingReason, + }, + }, + { + name: "cluster not marked as deleting and binding not deleting", + updateRun: &placementv1beta1.ClusterStagedUpdateRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-updaterun", + Generation: 1, + }, + Status: placementv1beta1.UpdateRunStatus{ + DeletionStageStatus: &placementv1beta1.StageUpdatingStatus{ + StageName: "deletion", + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-1", + }, + }, + }, + }, + }, + toBeDeletedBindings: []placementv1beta1.BindingObj{ + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + // No DeletionTimestamp set + }, + Spec: placementv1beta1.ResourceBindingSpec{ + TargetCluster: "cluster-1", + }, + }, + }, + wantFinished: false, + wantError: nil, + wantProgressCond: metav1.Condition{ + Type: string(placementv1beta1.StageUpdatingConditionProgressing), + Status: metav1.ConditionFalse, + ObservedGeneration: 1, + Reason: condition.StageUpdatingStoppedReason, + }, + }, + { + name: "multiple clusters with mixed states", + updateRun: &placementv1beta1.ClusterStagedUpdateRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-updaterun", + Generation: 1, + }, + Status: placementv1beta1.UpdateRunStatus{ + DeletionStageStatus: &placementv1beta1.StageUpdatingStatus{ + StageName: "deletion", + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-1", + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 1, + LastTransitionTime: now, + Reason: condition.ClusterUpdatingStartedReason, + }, + { + Type: string(placementv1beta1.ClusterUpdatingConditionSucceeded), + Status: metav1.ConditionTrue, + ObservedGeneration: 1, + LastTransitionTime: now, + Reason: condition.ClusterUpdatingSucceededReason, + }, + }, + }, + { + ClusterName: "cluster-2", + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 1, + LastTransitionTime: now, + Reason: condition.ClusterUpdatingStartedReason, + }, + }, + }, + }, + }, + }, + }, + toBeDeletedBindings: []placementv1beta1.BindingObj{ + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + DeletionTimestamp: &deletionTime, + }, + Spec: placementv1beta1.ResourceBindingSpec{ + TargetCluster: "cluster-2", + }, + }, + }, + wantFinished: false, + wantError: nil, + wantProgressCond: metav1.Condition{ + Type: string(placementv1beta1.StageUpdatingConditionProgressing), + Status: metav1.ConditionUnknown, + ObservedGeneration: 1, + Reason: condition.StageUpdatingStoppingReason, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := &Reconciler{} + + gotFinished, gotErr := r.stopDeleteStage(tt.updateRun, tt.toBeDeletedBindings) + + // Check finished result. + if gotFinished != tt.wantFinished { + t.Errorf("stopDeleteStage() finished = %v, want %v", gotFinished, tt.wantFinished) + } + + // Verify error expectation. + if (tt.wantError != nil) != (gotErr != nil) { + t.Fatalf("stopUpdatingStage() want error: %v, got error: %v", tt.wantError, gotErr) + } + + // Verify error message contains expected substring. + if tt.wantError != nil && gotErr != nil { + if !strings.Contains(gotErr.Error(), tt.wantError.Error()) { + t.Fatalf("stopUpdatingStage() want error: %v, got error: %v", tt.wantError, gotErr) + } + } + + // Check stage status condition. + progressingCond := meta.FindStatusCondition( + tt.updateRun.Status.DeletionStageStatus.Conditions, + string(placementv1beta1.StageUpdatingConditionProgressing), + ) + if diff := cmp.Diff(tt.wantProgressCond, *progressingCond, cmpOptions...); diff != "" { + t.Errorf("stopDeleteStage() status mismatch: (-want +got):\n%s", diff) + } + }) + } +} diff --git a/pkg/controllers/updaterun/validation.go b/pkg/controllers/updaterun/validation.go index 1a2adb57e..fbdbd5cec 100644 --- a/pkg/controllers/updaterun/validation.go +++ b/pkg/controllers/updaterun/validation.go @@ -136,8 +136,9 @@ func (r *Reconciler) validateStagesStatus( func validateUpdateStagesStatus(existingStageStatus []placementv1beta1.StageUpdatingStatus, updateRun placementv1beta1.UpdateRunObj) (int, int, error) { updatingStageIndex := -1 lastFinishedStageIndex := -1 + updateRunStatus := updateRun.GetUpdateRunStatus() // Remember the newly computed stage status. - newStageStatus := updateRun.GetUpdateRunStatus().StagesStatus + newStageStatus := updateRunStatus.StagesStatus // Make sure the number of stages in the updateRun are still the same. if len(existingStageStatus) != len(newStageStatus) { mismatchErr := fmt.Errorf("the number of stages in the updateRun has changed, new: %d, existing: %d", len(newStageStatus), len(existingStageStatus)) @@ -164,9 +165,12 @@ func validateUpdateStagesStatus(existingStageStatus []placementv1beta1.StageUpda return -1, -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, mismatchErr.Error()) } } - - var err error - updatingStageIndex, lastFinishedStageIndex, err = validateClusterUpdatingStatus(curStage, updatingStageIndex, lastFinishedStageIndex, &existingStageStatus[curStage], updateRun) + // Calculate maxConcurrency for the current stage. + maxConcurrency, err := calculateMaxConcurrencyValue(updateRunStatus, curStage) + if err != nil { + return -1, -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, err.Error()) + } + updatingStageIndex, lastFinishedStageIndex, err = validateClusterUpdatingStatus(curStage, updatingStageIndex, lastFinishedStageIndex, &existingStageStatus[curStage], maxConcurrency, updateRun) if err != nil { return -1, -1, err } @@ -181,6 +185,7 @@ func validateUpdateStagesStatus(existingStageStatus []placementv1beta1.StageUpda func validateClusterUpdatingStatus( curStage, updatingStageIndex, lastFinishedStageIndex int, stageStatus *placementv1beta1.StageUpdatingStatus, + maxConcurrency int, updateRun placementv1beta1.UpdateRunObj, ) (int, int, error) { stageSucceedCond := meta.FindStatusCondition(stageStatus.Conditions, string(placementv1beta1.StageUpdatingConditionSucceeded)) @@ -234,7 +239,21 @@ func validateClusterUpdatingStatus( return -1, -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) } updatingStageIndex = curStage - // TODO(arvindth): add validation to ensure updating cluster count should not exceed maxConcurrency. + // Collect the updating clusters. + updatingClusterCount := 0 + for j := range stageStatus.Clusters { + clusterStartedCond := meta.FindStatusCondition(stageStatus.Clusters[j].Conditions, string(placementv1beta1.ClusterUpdatingConditionStarted)) + clusterFinishedCond := meta.FindStatusCondition(stageStatus.Clusters[j].Conditions, string(placementv1beta1.ClusterUpdatingConditionSucceeded)) + // cluster is updating if it has started but not yet finished, we also consider failed clusters as updating clusters in execution. + if condition.IsConditionStatusTrue(clusterStartedCond, updateRun.GetGeneration()) && !(condition.IsConditionStatusTrue(clusterFinishedCond, updateRun.GetGeneration())) { + updatingClusterCount++ + } + } + if updatingClusterCount > maxConcurrency { + unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the number of updating clusters `%d` in the updating stage `%s` exceeds maxConcurrency `%d`", updatingClusterCount, stageStatus.StageName, maxConcurrency)) + klog.ErrorS(unexpectedErr, "The number of updating clusters in the updating stage exceeds maxConcurrency", "updateRun", klog.KObj(updateRun)) + return -1, -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) + } } return updatingStageIndex, lastFinishedStageIndex, nil } diff --git a/pkg/controllers/updaterun/validation_integration_test.go b/pkg/controllers/updaterun/validation_integration_test.go index 0d37e7ce2..c4864101a 100644 --- a/pkg/controllers/updaterun/validation_integration_test.go +++ b/pkg/controllers/updaterun/validation_integration_test.go @@ -178,9 +178,12 @@ var _ = Describe("UpdateRun validation tests", func() { }) It("Should fail to validate if CRP does not have external rollout strategy type", func() { - By("Updating CRP's rollout strategy type") + // Re-create the CRP with different strategy type as we cannot update the strategy type directly from `External` to others. + By("Re-creating the CRP with rolling update rollout strategy type") + Expect(k8sClient.Delete(ctx, crp)).To(Succeed()) + crp = generateTestClusterResourcePlacement() crp.Spec.Strategy.Type = placementv1beta1.RollingUpdateRolloutStrategyType - Expect(k8sClient.Update(ctx, crp)).To(Succeed()) + Expect(k8sClient.Create(ctx, crp)).To(Succeed()) By("Validating the validation failed") wantStatus = generateFailedValidationStatus(updateRun, wantStatus) diff --git a/pkg/controllers/updaterun/validation_test.go b/pkg/controllers/updaterun/validation_test.go index 8bea6db14..33a35387c 100644 --- a/pkg/controllers/updaterun/validation_test.go +++ b/pkg/controllers/updaterun/validation_test.go @@ -40,6 +40,7 @@ func TestValidateClusterUpdatingStatus(t *testing.T) { updatingStageIndex int lastFinishedStageIndex int stageStatus *placementv1beta1.StageUpdatingStatus + maxConcurrency int wantErr error wantUpdatingStageIndex int wantLastFinishedStageIndex int @@ -144,6 +145,28 @@ func TestValidateClusterUpdatingStatus(t *testing.T) { wantUpdatingStageIndex: -1, wantLastFinishedStageIndex: -1, }, + { + name: "determineUpdatignStage should not return error if there are multiple clusters in an updating stage with no condition set (execution not started)", + curStage: 0, + updatingStageIndex: -1, + lastFinishedStageIndex: -1, + stageStatus: &placementv1beta1.StageUpdatingStatus{ + StageName: "test-stage", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1beta1.StageUpdatingConditionProgressing)}, + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-1", + }, + { + ClusterName: "cluster-2", + }, + }, + }, + maxConcurrency: 1, + wantErr: nil, + wantUpdatingStageIndex: 0, + wantLastFinishedStageIndex: -1, + }, { name: "determineUpdatignStage should not return error if there are multiple clusters updating in an updating stage", curStage: 0, @@ -159,14 +182,67 @@ func TestValidateClusterUpdatingStatus(t *testing.T) { }, { ClusterName: "cluster-2", - Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionStarted)}, + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionStarted), generateFalseCondition(updateRun, placementv1beta1.ClusterUpdatingConditionSucceeded)}, }, }, }, + maxConcurrency: 2, wantErr: nil, wantUpdatingStageIndex: 0, wantLastFinishedStageIndex: -1, }, + { + name: "determineUpdatignStage should not return error if multiple clusters have succeeded in an updating stage", + curStage: 0, + updatingStageIndex: -1, + lastFinishedStageIndex: -1, + stageStatus: &placementv1beta1.StageUpdatingStatus{ + StageName: "test-stage", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1beta1.StageUpdatingConditionProgressing)}, + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-1", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionStarted), generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionSucceeded)}, + }, + { + ClusterName: "cluster-2", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionStarted), generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionSucceeded)}, + }, + }, + }, + maxConcurrency: 1, + wantErr: nil, + wantUpdatingStageIndex: 0, + wantLastFinishedStageIndex: -1, + }, + { + name: "validateClusterUpdatingStatus should return error if number of updating clusters exceeds maxConcurrency", + curStage: 0, + updatingStageIndex: -1, + lastFinishedStageIndex: -1, + stageStatus: &placementv1beta1.StageUpdatingStatus{ + StageName: "test-stage", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1beta1.StageUpdatingConditionProgressing)}, + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-1", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionStarted)}, + }, + { + ClusterName: "cluster-2", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionStarted), generateFalseCondition(updateRun, placementv1beta1.ClusterUpdatingConditionSucceeded)}, + }, + { + ClusterName: "cluster-3", + Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1beta1.ClusterUpdatingConditionStarted), generateFalseCondition(updateRun, placementv1beta1.ClusterUpdatingConditionSucceeded)}, + }, + }, + }, + maxConcurrency: 1, + wantErr: wrapErr(true, fmt.Errorf("the number of updating clusters `3` in the updating stage `test-stage` exceeds maxConcurrency `1`")), + wantUpdatingStageIndex: -1, + wantLastFinishedStageIndex: -1, + }, { name: "validateClusterUpdatingStatus should return -1 as the updatingStageIndex if no stage is updating", curStage: 0, @@ -188,6 +264,7 @@ func TestValidateClusterUpdatingStatus(t *testing.T) { StageName: "test-stage", Conditions: []metav1.Condition{generateTrueCondition(updateRun, placementv1beta1.StageUpdatingConditionProgressing)}, }, + maxConcurrency: 1, wantErr: nil, wantUpdatingStageIndex: 2, wantLastFinishedStageIndex: 1, @@ -213,7 +290,7 @@ func TestValidateClusterUpdatingStatus(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { gotUpdatingStageIndex, gotLastFinishedStageIndex, err := - validateClusterUpdatingStatus(test.curStage, test.updatingStageIndex, test.lastFinishedStageIndex, test.stageStatus, updateRun) + validateClusterUpdatingStatus(test.curStage, test.updatingStageIndex, test.lastFinishedStageIndex, test.stageStatus, test.maxConcurrency, updateRun) if test.wantErr == nil { if err != nil { t.Fatalf("validateClusterUpdatingStatus() got error = %+v, want error = nil", err) diff --git a/pkg/controllers/workapplier/backoff_integration_test.go b/pkg/controllers/workapplier/backoff_integration_test.go index c9cb0c6a3..9361e7497 100644 --- a/pkg/controllers/workapplier/backoff_integration_test.go +++ b/pkg/controllers/workapplier/backoff_integration_test.go @@ -32,6 +32,7 @@ import ( fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" "go.goms.io/fleet/pkg/utils" "go.goms.io/fleet/pkg/utils/condition" + testutilsactuals "go.goms.io/fleet/test/utils/actuals" ) // Note (chenyu1): all test cases in this file use a separate test environment @@ -178,7 +179,7 @@ var _ = Describe("exponential backoff", func() { applyStrategy := &fleetv1beta1.ApplyStrategy{ WhenToTakeOver: fleetv1beta1.WhenToTakeOverTypeIfNoDiff, } - createWorkObject(workName, memberReservedNSName2, applyStrategy, regularNSJSON) + createWorkObject(workName, memberReservedNSName2, applyStrategy, nil, regularNSJSON) }) // For simplicity reasons, this test case will skip some of the regular apply op result verification @@ -295,10 +296,10 @@ var _ = Describe("exponential backoff", func() { deleteWorkObject(workName, memberReservedNSName2) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient2, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName2) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace // deletion; consequently this test suite would not attempt to verify its deletion. @@ -395,7 +396,7 @@ var _ = Describe("exponential backoff", func() { WhenToTakeOver: fleetv1beta1.WhenToTakeOverTypeAlways, WhenToApply: fleetv1beta1.WhenToApplyTypeAlways, } - createWorkObject(workName, memberReservedNSName2, applyStrategy, regularNSJSON) + createWorkObject(workName, memberReservedNSName2, applyStrategy, nil, regularNSJSON) }) // For simplicity reasons, this test case will skip some of the regular apply op result verification @@ -512,10 +513,10 @@ var _ = Describe("exponential backoff", func() { deleteWorkObject(workName, memberReservedNSName2) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient2, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName2) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace // deletion; consequently this test suite would not attempt to verify its deletion. diff --git a/pkg/controllers/workapplier/controller.go b/pkg/controllers/workapplier/controller.go index b1b0f8493..83cdd1397 100644 --- a/pkg/controllers/workapplier/controller.go +++ b/pkg/controllers/workapplier/controller.go @@ -505,6 +505,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu trackWorkAndManifestProcessingRequestMetrics(work) // Requeue the Work object with a delay based on the requeue rate limiter. + // + // Note (chenyu1): at this moment the work applier does not register changes on back-reported + // status as a trigger for resetting the rate limiter. requeueDelay := r.requeueRateLimiter.When(work, bundles) klog.V(2).InfoS("Requeue the Work object for re-processing", "work", workRef, "delaySeconds", requeueDelay.Seconds()) return ctrl.Result{RequeueAfter: requeueDelay}, nil diff --git a/pkg/controllers/workapplier/controller_integration_test.go b/pkg/controllers/workapplier/controller_integration_test.go index d29628ec6..d4d90d12f 100644 --- a/pkg/controllers/workapplier/controller_integration_test.go +++ b/pkg/controllers/workapplier/controller_integration_test.go @@ -19,6 +19,7 @@ package workapplier import ( "crypto/rand" "encoding/base64" + "encoding/json" "fmt" "time" @@ -32,7 +33,6 @@ import ( rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" @@ -41,6 +41,8 @@ import ( fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" "go.goms.io/fleet/pkg/utils" "go.goms.io/fleet/pkg/utils/condition" + testutilsactuals "go.goms.io/fleet/test/utils/actuals" + testutilsresource "go.goms.io/fleet/test/utils/resource" ) const ( @@ -61,6 +63,7 @@ var ( ignoreFieldConditionLTTMsg = cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime", "Message") ignoreDriftDetailsObsTime = cmpopts.IgnoreFields(fleetv1beta1.DriftDetails{}, "ObservationTime", "FirstDriftedObservedTime") ignoreDiffDetailsObsTime = cmpopts.IgnoreFields(fleetv1beta1.DiffDetails{}, "ObservationTime", "FirstDiffedObservedTime") + ignoreBackReportedStatus = cmpopts.IgnoreFields(fleetv1beta1.ManifestCondition{}, "BackReportedStatus") lessFuncPatchDetail = func(a, b fleetv1beta1.PatchDetail) bool { return a.Path < b.Path @@ -76,29 +79,9 @@ var ( dummyLabelValue5 = "quux" ) -// createWorkObject creates a new Work object with the given name, manifests, and apply strategy. -func createWorkObject(workName, memberClusterReservedNSName string, applyStrategy *fleetv1beta1.ApplyStrategy, rawManifestJSON ...[]byte) { - manifests := make([]fleetv1beta1.Manifest, len(rawManifestJSON)) - for idx := range rawManifestJSON { - manifests[idx] = fleetv1beta1.Manifest{ - RawExtension: runtime.RawExtension{ - Raw: rawManifestJSON[idx], - }, - } - } - - work := &fleetv1beta1.Work{ - ObjectMeta: metav1.ObjectMeta{ - Name: workName, - Namespace: memberClusterReservedNSName, - }, - Spec: fleetv1beta1.WorkSpec{ - Workload: fleetv1beta1.WorkloadTemplate{ - Manifests: manifests, - }, - ApplyStrategy: applyStrategy, - }, - } +// createWorkObject creates a new Work object with the given work name/namespace, apply strategy, and raw manifest JSONs. +func createWorkObject(workName, memberClusterReservedNSName string, applyStrategy *fleetv1beta1.ApplyStrategy, reportBackStrategy *fleetv1beta1.ReportBackStrategy, rawManifestJSON ...[]byte) { + work := testutilsresource.WorkObjectForTest(workName, memberClusterReservedNSName, "", "", applyStrategy, reportBackStrategy, rawManifestJSON...) Expect(hubClient.Create(ctx, work)).To(Succeed()) } @@ -121,19 +104,16 @@ func updateWorkObject(workName string, applyStrategy *fleetv1beta1.ApplyStrategy } func marshalK8sObjJSON(obj runtime.Object) []byte { - unstructuredObjMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(obj) - Expect(err).To(BeNil(), "Failed to convert the object to an unstructured object") - unstructuredObj := &unstructured.Unstructured{Object: unstructuredObjMap} - json, err := unstructuredObj.MarshalJSON() - Expect(err).To(BeNil(), "Failed to marshal the unstructured object to JSON") + json, err := testutilsresource.MarshalRuntimeObjToJSONForTest(obj) + Expect(err).To(BeNil(), "Failed to marshal the k8s object to JSON") return json } -func workFinalizerAddedActual(workName string) func() error { +func workFinalizerAddedActual(workNS, workName string) func() error { return func() error { // Retrieve the Work object. work := &fleetv1beta1.Work{} - if err := hubClient.Get(ctx, client.ObjectKey{Name: workName, Namespace: memberReservedNSName1}, work); err != nil { + if err := hubClient.Get(ctx, client.ObjectKey{Name: workName, Namespace: workNS}, work); err != nil { return fmt.Errorf("failed to retrieve the Work object: %w", err) } @@ -145,11 +125,11 @@ func workFinalizerAddedActual(workName string) func() error { } } -func appliedWorkCreatedActual(workName string) func() error { +func appliedWorkCreatedActual(memberClient client.Client, workNS, workName string) func() error { return func() error { // Retrieve the AppliedWork object. appliedWork := &fleetv1beta1.AppliedWork{} - if err := memberClient1.Get(ctx, client.ObjectKey{Name: workName, Namespace: memberReservedNSName1}, appliedWork); err != nil { + if err := memberClient.Get(ctx, client.ObjectKey{Name: workName}, appliedWork); err != nil { return fmt.Errorf("failed to retrieve the AppliedWork object: %w", err) } @@ -159,7 +139,7 @@ func appliedWorkCreatedActual(workName string) func() error { }, Spec: fleetv1beta1.AppliedWorkSpec{ WorkName: workName, - WorkNamespace: memberReservedNSName1, + WorkNamespace: workNS, }, } if diff := cmp.Diff( @@ -173,10 +153,10 @@ func appliedWorkCreatedActual(workName string) func() error { } } -func prepareAppliedWorkOwnerRef(workName string) *metav1.OwnerReference { +func prepareAppliedWorkOwnerRef(memberClient client.Client, workNS, workName string) *metav1.OwnerReference { // Retrieve the AppliedWork object. appliedWork := &fleetv1beta1.AppliedWork{} - Expect(memberClient1.Get(ctx, client.ObjectKey{Name: workName, Namespace: memberReservedNSName1}, appliedWork)).To(Succeed(), "Failed to retrieve the AppliedWork object") + Expect(memberClient.Get(ctx, client.ObjectKey{Name: workName, Namespace: workNS}, appliedWork)).To(Succeed(), "Failed to retrieve the AppliedWork object") // Prepare the expected OwnerReference. return &metav1.OwnerReference{ @@ -188,11 +168,11 @@ func prepareAppliedWorkOwnerRef(workName string) *metav1.OwnerReference { } } -func regularNSObjectAppliedActual(nsName string, appliedWorkOwnerRef *metav1.OwnerReference) func() error { +func regularNSObjectAppliedActual(memberClient client.Client, nsName string, appliedWorkOwnerRef *metav1.OwnerReference) func() error { return func() error { // Retrieve the NS object. gotNS := &corev1.Namespace{} - if err := memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, gotNS); err != nil { + if err := memberClient.Get(ctx, client.ObjectKey{Name: nsName}, gotNS); err != nil { return fmt.Errorf("failed to retrieve the NS object: %w", err) } @@ -370,11 +350,11 @@ func regularClusterRoleObjectAppliedActual(clusterRoleName string, appliedWorkOw } } -func regularConfigMapObjectAppliedActual(nsName, configMapName string, appliedWorkOwnerRef *metav1.OwnerReference) func() error { +func regularConfigMapObjectAppliedActual(memberClient client.Client, nsName, configMapName string, appliedWorkOwnerRef *metav1.OwnerReference) func() error { return func() error { // Retrieve the ConfigMap object. gotConfigMap := &corev1.ConfigMap{} - if err := memberClient1.Get(ctx, client.ObjectKey{Namespace: nsName, Name: configMapName}, gotConfigMap); err != nil { + if err := memberClient.Get(ctx, client.ObjectKey{Namespace: nsName, Name: configMapName}, gotConfigMap); err != nil { return fmt.Errorf("failed to retrieve the ConfigMap object: %w", err) } @@ -504,6 +484,9 @@ func workStatusUpdated( work.Status, wantWorkStatus, ignoreFieldConditionLTTMsg, ignoreDiffDetailsObsTime, ignoreDriftDetailsObsTime, + // Back-reported status must be checked separately, as the serialization/deserialization process + // does not guarantee key order in objects. + ignoreBackReportedStatus, cmpopts.SortSlices(lessFuncPatchDetail), ); diff != "" { return fmt.Errorf("work status diff (-got, +want):\n%s", diff) @@ -546,11 +529,11 @@ func workStatusUpdated( } } -func appliedWorkStatusUpdated(workName string, appliedResourceMeta []fleetv1beta1.AppliedResourceMeta) func() error { +func appliedWorkStatusUpdated(memberClient client.Client, workName string, appliedResourceMeta []fleetv1beta1.AppliedResourceMeta) func() error { return func() error { // Retrieve the AppliedWork object. appliedWork := &fleetv1beta1.AppliedWork{} - if err := memberClient1.Get(ctx, client.ObjectKey{Name: workName, Namespace: memberReservedNSName1}, appliedWork); err != nil { + if err := memberClient.Get(ctx, client.ObjectKey{Name: workName}, appliedWork); err != nil { return fmt.Errorf("failed to retrieve the AppliedWork object: %w", err) } @@ -565,21 +548,6 @@ func appliedWorkStatusUpdated(workName string, appliedResourceMeta []fleetv1beta } } -func workRemovedActual(workName string) func() error { - // Wait for the removal of the Work object. - return func() error { - work := &fleetv1beta1.Work{} - if err := hubClient.Get(ctx, client.ObjectKey{Name: workName, Namespace: memberReservedNSName1}, work); !errors.IsNotFound(err) && err != nil { - return fmt.Errorf("work object still exists or an unexpected error occurred: %w", err) - } - if controllerutil.ContainsFinalizer(work, fleetv1beta1.WorkFinalizer) { - // The Work object is being deleted, but the finalizer is still present. - return fmt.Errorf("work object is being deleted, but the finalizer is still present") - } - return nil - } -} - func deleteWorkObject(workName, memberClusterReservedNSName string) { // Retrieve the Work object. work := &fleetv1beta1.Work{ @@ -591,10 +559,10 @@ func deleteWorkObject(workName, memberClusterReservedNSName string) { Expect(hubClient.Delete(ctx, work)).To(Succeed(), "Failed to delete the Work object") } -func checkNSOwnerReferences(workName, nsName string) { +func checkNSOwnerReferences(memberClient client.Client, workName, nsName string) { // Retrieve the AppliedWork object. appliedWork := &fleetv1beta1.AppliedWork{} - Expect(memberClient1.Get(ctx, client.ObjectKey{Name: workName}, appliedWork)).To(Succeed(), "Failed to retrieve the AppliedWork object") + Expect(memberClient.Get(ctx, client.ObjectKey{Name: workName}, appliedWork)).To(Succeed(), "Failed to retrieve the AppliedWork object") // Check that the Namespace object has the AppliedWork as an owner reference. ns := &corev1.Namespace{ @@ -602,7 +570,7 @@ func checkNSOwnerReferences(workName, nsName string) { Name: nsName, }, } - Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, ns)).To(Succeed(), "Failed to retrieve the Namespace object") + Expect(memberClient.Get(ctx, client.ObjectKey{Name: nsName}, ns)).To(Succeed(), "Failed to retrieve the Namespace object") Expect(ns.OwnerReferences).To(ContainElement(metav1.OwnerReference{ APIVersion: fleetv1beta1.GroupVersion.String(), Kind: "AppliedWork", @@ -612,11 +580,11 @@ func checkNSOwnerReferences(workName, nsName string) { }), " AppliedWork OwnerReference not found in Namespace object") } -func appliedWorkRemovedActual(workName, nsName string) func() error { +func appliedWorkRemovedActual(memberClient client.Client, workName string) func() error { return func() error { // Retrieve the AppliedWork object. appliedWork := &fleetv1beta1.AppliedWork{} - if err := memberClient1.Get(ctx, client.ObjectKey{Name: workName}, appliedWork); err != nil { + if err := memberClient.Get(ctx, client.ObjectKey{Name: workName}, appliedWork); err != nil { if errors.IsNotFound(err) { // The AppliedWork object has been deleted, which is expected. return nil @@ -627,7 +595,7 @@ func appliedWorkRemovedActual(workName, nsName string) func() error { // The AppliedWork object is being deleted, but the finalizer is still present. Remove the finalizer as there // are no real built-in controllers in this test environment to handle garbage collection. controllerutil.RemoveFinalizer(appliedWork, metav1.FinalizerDeleteDependents) - Expect(memberClient1.Update(ctx, appliedWork)).To(Succeed(), "Failed to remove the finalizer from the AppliedWork object") + Expect(memberClient.Update(ctx, appliedWork)).To(Succeed(), "Failed to remove the finalizer from the AppliedWork object") } return fmt.Errorf("appliedWork object still exists") } @@ -672,7 +640,7 @@ func regularClusterRoleRemovedActual(clusterRoleName string) func() error { } } -func regularConfigMapRemovedActual(nsName, configMapName string) func() error { +func regularConfigMapRemovedActual(memberClient client.Client, nsName, configMapName string) func() error { return func() error { // Retrieve the ConfigMap object. configMap := &corev1.ConfigMap{ @@ -681,12 +649,12 @@ func regularConfigMapRemovedActual(nsName, configMapName string) func() error { Name: configMapName, }, } - if err := memberClient1.Delete(ctx, configMap); err != nil && !errors.IsNotFound(err) { + if err := memberClient.Delete(ctx, configMap); err != nil && !errors.IsNotFound(err) { return fmt.Errorf("failed to delete the ConfigMap object: %w", err) } // Check that the ConfigMap object has been deleted. - if err := memberClient1.Get(ctx, client.ObjectKey{Namespace: nsName, Name: configMapName}, configMap); !errors.IsNotFound(err) { + if err := memberClient.Get(ctx, client.ObjectKey{Namespace: nsName, Name: configMapName}, configMap); !errors.IsNotFound(err) { return fmt.Errorf("configMap object still exists or an unexpected error occurred: %w", err) } return nil @@ -781,24 +749,24 @@ var _ = Describe("applying manifests", func() { regularDeployJSON := marshalK8sObjJSON(regularDeploy) // Create a new Work object with all the manifest JSONs. - createWorkObject(workName, memberReservedNSName1, nil, regularNSJSON, regularDeployJSON) + createWorkObject(workName, memberReservedNSName1, nil, nil, regularNSJSON, regularDeployJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply the manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -912,7 +880,7 @@ var _ = Describe("applying manifests", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -926,13 +894,13 @@ var _ = Describe("applying manifests", func() { // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -963,24 +931,24 @@ var _ = Describe("applying manifests", func() { regularDeployJSON := marshalK8sObjJSON(regularDeploy) // Create a new Work object with all the manifest JSONs. - createWorkObject(workName, memberReservedNSName1, nil, regularNSJSON, regularDeployJSON) + createWorkObject(workName, memberReservedNSName1, nil, nil, regularNSJSON, regularDeployJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply the manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -1094,7 +1062,7 @@ var _ = Describe("applying manifests", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -1175,7 +1143,7 @@ var _ = Describe("applying manifests", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -1185,13 +1153,13 @@ var _ = Describe("applying manifests", func() { // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace // deletion; consequently this test suite would not attempt to verify its deletion. @@ -1229,19 +1197,19 @@ var _ = Describe("applying manifests", func() { regularDeployJSON := marshalK8sObjJSON(regularDeploy) // Create a new Work object with all the manifest JSONs. - createWorkObject(workName, memberReservedNSName1, nil, regularNSJSON, regularDeployJSON) + createWorkObject(workName, memberReservedNSName1, nil, nil, regularNSJSON, regularDeployJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply some of the manifests", func() { @@ -1372,7 +1340,7 @@ var _ = Describe("applying manifests", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -1382,13 +1350,13 @@ var _ = Describe("applying manifests", func() { // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -1429,30 +1397,30 @@ var _ = Describe("applying manifests", func() { regularConfigMapJSON := marshalK8sObjJSON(regularConfigMap) // Create a new Work object with all the manifest JSONs. - createWorkObject(workName, memberReservedNSName1, nil, regularNSJSON, decodingErredDeployJSON, regularConfigMapJSON) + createWorkObject(workName, memberReservedNSName1, nil, nil, regularNSJSON, decodingErredDeployJSON, regularConfigMapJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply the manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") // Ensure that the ConfigMap object has been applied as expected. - regularConfigMapObjectAppliedActual := regularConfigMapObjectAppliedActual(nsName, configMapName, appliedWorkOwnerRef) + regularConfigMapObjectAppliedActual := regularConfigMapObjectAppliedActual(memberClient1, nsName, configMapName, appliedWorkOwnerRef) Eventually(regularConfigMapObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the ConfigMap object") Expect(memberClient1.Get(ctx, client.ObjectKey{Namespace: nsName, Name: configMapName}, regularConfigMap)).To(Succeed(), "Failed to retrieve the ConfigMap object") }) @@ -1564,7 +1532,7 @@ var _ = Describe("applying manifests", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -1573,18 +1541,18 @@ var _ = Describe("applying manifests", func() { deleteWorkObject(workName, memberReservedNSName1) // Ensure applied manifest has been removed. - regularConfigMapRemovedActual := regularConfigMapRemovedActual(nsName, configMapName) + regularConfigMapRemovedActual := regularConfigMapRemovedActual(memberClient1, nsName, configMapName) Eventually(regularConfigMapRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the ConfigMap object") // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -1617,19 +1585,19 @@ var _ = Describe("applying manifests", func() { malformedConfigMapJSON := marshalK8sObjJSON(malformedConfigMap) // Create a new Work object with all the manifest JSONs and proper apply strategy. - createWorkObject(workName, memberReservedNSName1, nil, regularNSJSON, malformedConfigMapJSON) + createWorkObject(workName, memberReservedNSName1, nil, nil, regularNSJSON, malformedConfigMapJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should not apply malformed manifest", func() { @@ -1645,7 +1613,7 @@ var _ = Describe("applying manifests", func() { It("should apply the other manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -1727,7 +1695,7 @@ var _ = Describe("applying manifests", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -1736,10 +1704,10 @@ var _ = Describe("applying manifests", func() { deleteWorkObject(workName, memberReservedNSName1) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -1778,24 +1746,24 @@ var _ = Describe("work applier garbage collection", func() { regularDeployJSON := marshalK8sObjJSON(regularDeploy) // Create a new Work object with all the manifest JSONs. - createWorkObject(workName, memberReservedNSName1, &fleetv1beta1.ApplyStrategy{AllowCoOwnership: true}, regularNSJSON, regularDeployJSON) + createWorkObject(workName, memberReservedNSName1, &fleetv1beta1.ApplyStrategy{AllowCoOwnership: true}, nil, regularNSJSON, regularDeployJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply the manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -1909,7 +1877,7 @@ var _ = Describe("work applier garbage collection", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -1993,13 +1961,13 @@ var _ = Describe("work applier garbage collection", func() { AfterAll(func() { // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, 2*time.Minute, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // Ensure that the Deployment object still exists. @@ -2043,24 +2011,24 @@ var _ = Describe("work applier garbage collection", func() { regularClusterRoleJSON := marshalK8sObjJSON(regularClusterRole) // Create a new Work object with all the manifest JSONs. - createWorkObject(workName, memberReservedNSName1, &fleetv1beta1.ApplyStrategy{AllowCoOwnership: true}, regularNSJSON, regularDeployJSON, regularClusterRoleJSON) + createWorkObject(workName, memberReservedNSName1, &fleetv1beta1.ApplyStrategy{AllowCoOwnership: true}, nil, regularNSJSON, regularDeployJSON, regularClusterRoleJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply the manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -2215,7 +2183,7 @@ var _ = Describe("work applier garbage collection", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -2309,17 +2277,17 @@ var _ = Describe("work applier garbage collection", func() { AfterAll(func() { // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure applied manifest has been removed. regularDeployRemovedActual := regularDeployRemovedActual(nsName, deployName) Eventually(regularDeployRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the deployment object") // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, 2*time.Minute, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // Ensure that the ClusterRole object still exists. @@ -2362,24 +2330,24 @@ var _ = Describe("work applier garbage collection", func() { regularClusterRoleJSON := marshalK8sObjJSON(regularClusterRole) // Create a new Work object with all the manifest JSONs. - createWorkObject(workName, memberReservedNSName1, &fleetv1beta1.ApplyStrategy{AllowCoOwnership: true}, regularNSJSON, regularDeployJSON, regularClusterRoleJSON) + createWorkObject(workName, memberReservedNSName1, &fleetv1beta1.ApplyStrategy{AllowCoOwnership: true}, nil, regularNSJSON, regularDeployJSON, regularClusterRoleJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply the manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -2534,7 +2502,7 @@ var _ = Describe("work applier garbage collection", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -2626,17 +2594,17 @@ var _ = Describe("work applier garbage collection", func() { AfterAll(func() { // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure applied manifest has been removed. regularClusterRoleRemovedActual := regularClusterRoleRemovedActual(clusterRoleName) Eventually(regularClusterRoleRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the ClusterRole object") // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, 2*time.Minute, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // Ensure that the Deployment object still exists. @@ -2685,24 +2653,24 @@ var _ = Describe("drift detection and takeover", func() { ComparisonOption: fleetv1beta1.ComparisonOptionTypePartialComparison, WhenToTakeOver: fleetv1beta1.WhenToTakeOverTypeIfNoDiff, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON, regularDeployJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON, regularDeployJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply the manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -2814,7 +2782,7 @@ var _ = Describe("drift detection and takeover", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -2828,13 +2796,13 @@ var _ = Describe("drift detection and takeover", func() { // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -2885,19 +2853,19 @@ var _ = Describe("drift detection and takeover", func() { ComparisonOption: fleetv1beta1.ComparisonOptionTypePartialComparison, WhenToTakeOver: fleetv1beta1.WhenToTakeOverTypeIfNoDiff, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON, regularDeployJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON, regularDeployJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply some manifests (while preserving diffs in unmanaged fields)", func() { @@ -3084,7 +3052,7 @@ var _ = Describe("drift detection and takeover", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -3097,10 +3065,10 @@ var _ = Describe("drift detection and takeover", func() { Consistently(regularDeployNotRemovedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to remove the deployment object") // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -3150,16 +3118,16 @@ var _ = Describe("drift detection and takeover", func() { ComparisonOption: fleetv1beta1.ComparisonOptionTypeFullComparison, WhenToTakeOver: fleetv1beta1.WhenToTakeOverTypeIfNoDiff, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON, regularDeployJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON, regularDeployJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") }) @@ -3362,7 +3330,7 @@ var _ = Describe("drift detection and takeover", func() { It("should update the AppliedWork object status", func() { // No object can be applied, hence no resource are bookkept in the AppliedWork object status. - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, nil) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, nil) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -3375,10 +3343,10 @@ var _ = Describe("drift detection and takeover", func() { Consistently(regularDeployNotRemovedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to remove the deployment object") // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -3413,24 +3381,24 @@ var _ = Describe("drift detection and takeover", func() { ComparisonOption: fleetv1beta1.ComparisonOptionTypePartialComparison, WhenToApply: fleetv1beta1.WhenToApplyTypeIfNotDrifted, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON, regularDeployJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON, regularDeployJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply the manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -3544,7 +3512,7 @@ var _ = Describe("drift detection and takeover", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -3776,7 +3744,7 @@ var _ = Describe("drift detection and takeover", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -3790,13 +3758,13 @@ var _ = Describe("drift detection and takeover", func() { // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -3840,19 +3808,19 @@ var _ = Describe("drift detection and takeover", func() { WhenToApply: fleetv1beta1.WhenToApplyTypeIfNotDrifted, WhenToTakeOver: fleetv1beta1.WhenToTakeOverTypeAlways, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON, regularJobJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON, regularJobJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should update the Work object status", func() { @@ -3927,7 +3895,7 @@ var _ = Describe("drift detection and takeover", func() { It("should apply all manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -3967,7 +3935,7 @@ var _ = Describe("drift detection and takeover", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -4177,7 +4145,7 @@ var _ = Describe("drift detection and takeover", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -4190,10 +4158,10 @@ var _ = Describe("drift detection and takeover", func() { Consistently(jobNotRemovedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to remove the job object") // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -4223,24 +4191,24 @@ var _ = Describe("drift detection and takeover", func() { ComparisonOption: fleetv1beta1.ComparisonOptionTypeFullComparison, WhenToApply: fleetv1beta1.WhenToApplyTypeIfNotDrifted, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply the manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -4307,7 +4275,7 @@ var _ = Describe("drift detection and takeover", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -4419,7 +4387,7 @@ var _ = Describe("drift detection and takeover", func() { It("should update the AppliedWork object status", func() { // No object can be applied, hence no resource are bookkept in the AppliedWork object status. - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, nil) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, nil) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -4428,10 +4396,10 @@ var _ = Describe("drift detection and takeover", func() { deleteWorkObject(workName, memberReservedNSName1) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -4463,24 +4431,24 @@ var _ = Describe("drift detection and takeover", func() { ComparisonOption: fleetv1beta1.ComparisonOptionTypePartialComparison, WhenToApply: fleetv1beta1.WhenToApplyTypeAlways, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply the manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -4547,7 +4515,7 @@ var _ = Describe("drift detection and takeover", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -4673,7 +4641,7 @@ var _ = Describe("drift detection and takeover", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -4683,13 +4651,13 @@ var _ = Describe("drift detection and takeover", func() { // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -4720,24 +4688,24 @@ var _ = Describe("drift detection and takeover", func() { ComparisonOption: fleetv1beta1.ComparisonOptionTypePartialComparison, WhenToApply: fleetv1beta1.WhenToApplyTypeIfNotDrifted, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, marshalK8sObjJSON(regularNS)) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, marshalK8sObjJSON(regularNS)) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply the manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -4804,7 +4772,7 @@ var _ = Describe("drift detection and takeover", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -4917,7 +4885,7 @@ var _ = Describe("drift detection and takeover", func() { It("should update the AppliedWork object status", func() { // No object can be applied, hence no resource are bookkept in the AppliedWork object status. - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, nil) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, nil) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -5034,7 +5002,7 @@ var _ = Describe("drift detection and takeover", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -5044,13 +5012,13 @@ var _ = Describe("drift detection and takeover", func() { // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -5081,24 +5049,24 @@ var _ = Describe("drift detection and takeover", func() { ComparisonOption: fleetv1beta1.ComparisonOptionTypePartialComparison, WhenToApply: fleetv1beta1.WhenToApplyTypeIfNotDrifted, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, marshalK8sObjJSON(regularNS)) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, marshalK8sObjJSON(regularNS)) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply the manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -5165,7 +5133,7 @@ var _ = Describe("drift detection and takeover", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -5322,13 +5290,13 @@ var _ = Describe("drift detection and takeover", func() { // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -5366,19 +5334,19 @@ var _ = Describe("drift detection and takeover", func() { applyStrategy := &fleetv1beta1.ApplyStrategy{ WhenToTakeOver: fleetv1beta1.WhenToTakeOverTypeNever, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON, marshalK8sObjJSON(regularDeploy)) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON, marshalK8sObjJSON(regularDeploy)) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply the manifests that haven not been created yet", func() { @@ -5494,7 +5462,7 @@ var _ = Describe("drift detection and takeover", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -5507,10 +5475,10 @@ var _ = Describe("drift detection and takeover", func() { Eventually(regularDeployRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the deployment object") // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -5552,30 +5520,30 @@ var _ = Describe("drift detection and takeover", func() { WhenToTakeOver: fleetv1beta1.WhenToTakeOverTypeNever, ComparisonOption: fleetv1beta1.ComparisonOptionTypePartialComparison, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON, regularCMJSON, regularSecretJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON, regularCMJSON, regularSecretJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply the manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") // Ensure that the ConfigMap object has been applied as expected. - regularCMObjectAppliedActual := regularConfigMapObjectAppliedActual(nsName, configMapName, appliedWorkOwnerRef) + regularCMObjectAppliedActual := regularConfigMapObjectAppliedActual(memberClient1, nsName, configMapName, appliedWorkOwnerRef) Eventually(regularCMObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the configMap object") Expect(memberClient1.Get(ctx, client.ObjectKey{Namespace: nsName, Name: configMapName}, regularCM)).To(Succeed(), "Failed to retrieve the ConfigMap object") @@ -5722,7 +5690,7 @@ var _ = Describe("drift detection and takeover", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -6082,7 +6050,7 @@ var _ = Describe("drift detection and takeover", func() { deleteWorkObject(workName, memberReservedNSName1) // Ensure that the ConfigMap object has been removed. - regularCMRemovedActual := regularConfigMapRemovedActual(nsName, configMapName) + regularCMRemovedActual := regularConfigMapRemovedActual(memberClient1, nsName, configMapName) Eventually(regularCMRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the ConfigMap object") // Ensure that the secret object has been removed. @@ -6091,13 +6059,13 @@ var _ = Describe("drift detection and takeover", func() { // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -6126,19 +6094,19 @@ var _ = Describe("report diff", func() { applyStrategy := &fleetv1beta1.ApplyStrategy{ Type: fleetv1beta1.ApplyStrategyTypeReportDiff, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should not apply the manifests", func() { @@ -6191,7 +6159,7 @@ var _ = Describe("report diff", func() { It("should update the AppliedWork object status", func() { // Prepare the status information. - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, nil) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, nil) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -6200,10 +6168,10 @@ var _ = Describe("report diff", func() { deleteWorkObject(workName, memberReservedNSName1) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -6245,19 +6213,19 @@ var _ = Describe("report diff", func() { ComparisonOption: fleetv1beta1.ComparisonOptionTypePartialComparison, Type: fleetv1beta1.ApplyStrategyTypeReportDiff, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON, regularDeployJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON, regularDeployJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should own the objects, but not apply any manifests", func() { @@ -6425,7 +6393,7 @@ var _ = Describe("report diff", func() { // Prepare the status information. var appliedResourceMeta []fleetv1beta1.AppliedResourceMeta - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -6515,7 +6483,7 @@ var _ = Describe("report diff", func() { // Prepare the status information. var appliedResourceMeta []fleetv1beta1.AppliedResourceMeta - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -6528,10 +6496,10 @@ var _ = Describe("report diff", func() { Consistently(regularDeployNotRemovedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to remove the deployment object") // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -6573,16 +6541,16 @@ var _ = Describe("report diff", func() { Type: fleetv1beta1.ApplyStrategyTypeReportDiff, WhenToTakeOver: fleetv1beta1.WhenToTakeOverTypeNever, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON, regularDeployJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON, regularDeployJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") }) @@ -6730,7 +6698,7 @@ var _ = Describe("report diff", func() { // Prepare the status information. var appliedResourceMeta []fleetv1beta1.AppliedResourceMeta - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -6743,10 +6711,10 @@ var _ = Describe("report diff", func() { Eventually(regularDeployRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the deployment object") // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -6782,19 +6750,19 @@ var _ = Describe("report diff", func() { applyStrategy := &fleetv1beta1.ApplyStrategy{ Type: fleetv1beta1.ApplyStrategyTypeReportDiff, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON, malformedConfigMapJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON, malformedConfigMapJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should not apply any manifest", func() { @@ -6874,7 +6842,7 @@ var _ = Describe("report diff", func() { It("should update the AppliedWork object status", func() { // Prepare the status information. - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, nil) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, nil) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -6883,10 +6851,10 @@ var _ = Describe("report diff", func() { deleteWorkObject(workName, memberReservedNSName1) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -6935,19 +6903,19 @@ var _ = Describe("report diff", func() { Type: fleetv1beta1.ApplyStrategyTypeReportDiff, WhenToTakeOver: fleetv1beta1.WhenToTakeOverTypeNever, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON, updatedJSONJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON, updatedJSONJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should update the Work object status", func() { @@ -7136,7 +7104,7 @@ var _ = Describe("report diff", func() { // Prepare the status information. var appliedResourceMeta []fleetv1beta1.AppliedResourceMeta - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -7149,10 +7117,10 @@ var _ = Describe("report diff", func() { Consistently(jobNotRemovedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to remove the job object") // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -7201,16 +7169,16 @@ var _ = Describe("report diff", func() { WhenToTakeOver: fleetv1beta1.WhenToTakeOverTypeNever, ComparisonOption: fleetv1beta1.ComparisonOptionTypePartialComparison, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON, regularCMJSON, regularSecretJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON, regularCMJSON, regularSecretJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") }) @@ -7320,7 +7288,7 @@ var _ = Describe("report diff", func() { It("should update the AppliedWork object status", func() { // Prepare the status information. - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, nil) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, nil) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -7598,7 +7566,7 @@ var _ = Describe("report diff", func() { deleteWorkObject(workName, memberReservedNSName1) // Ensure that the ConfigMap object has been removed. - regularCMRemovedActual := regularConfigMapRemovedActual(nsName, configMapName) + regularCMRemovedActual := regularConfigMapRemovedActual(memberClient1, nsName, configMapName) Eventually(regularCMRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the ConfigMap object") // Ensure that the secret object has been removed. @@ -7606,10 +7574,10 @@ var _ = Describe("report diff", func() { Eventually(regularSecretRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Secret object") // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -7653,19 +7621,19 @@ var _ = Describe("handling different apply strategies", func() { ComparisonOption: fleetv1beta1.ComparisonOptionTypePartialComparison, Type: fleetv1beta1.ApplyStrategyTypeReportDiff, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON, regularDeployJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON, regularDeployJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should own the objects, but not apply any manifests", func() { @@ -7833,7 +7801,7 @@ var _ = Describe("handling different apply strategies", func() { // Prepare the status information. var appliedResourceMeta []fleetv1beta1.AppliedResourceMeta - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -7955,7 +7923,7 @@ var _ = Describe("handling different apply strategies", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -7969,13 +7937,13 @@ var _ = Describe("handling different apply strategies", func() { // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -8010,24 +7978,24 @@ var _ = Describe("handling different apply strategies", func() { ComparisonOption: fleetv1beta1.ComparisonOptionTypePartialComparison, Type: fleetv1beta1.ApplyStrategyTypeServerSideApply, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON, regularDeployJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON, regularDeployJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply the manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -8067,7 +8035,7 @@ var _ = Describe("handling different apply strategies", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -8216,7 +8184,7 @@ var _ = Describe("handling different apply strategies", func() { // Prepare the status information. var appliedResourceMeta []fleetv1beta1.AppliedResourceMeta - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -8230,13 +8198,13 @@ var _ = Describe("handling different apply strategies", func() { // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -8279,19 +8247,19 @@ var _ = Describe("handling different apply strategies", func() { Type: fleetv1beta1.ApplyStrategyTypeClientSideApply, WhenToTakeOver: fleetv1beta1.WhenToTakeOverTypeNever, } - createWorkObject(workName, memberReservedNSName1, applyStrategy, regularNSJSON, regularDeployJSON) + createWorkObject(workName, memberReservedNSName1, applyStrategy, nil, regularNSJSON, regularDeployJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should not take over some objects", func() { @@ -8451,7 +8419,7 @@ var _ = Describe("handling different apply strategies", func() { It("should take over some objects", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -8601,7 +8569,7 @@ var _ = Describe("handling different apply strategies", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") }) @@ -8615,13 +8583,13 @@ var _ = Describe("handling different apply strategies", func() { // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -8678,24 +8646,24 @@ var _ = Describe("handling different apply strategies", func() { oversizedCMJSON := marshalK8sObjJSON(oversizedCM) // Create a new Work object with all the manifest JSONs and proper apply strategy. - createWorkObject(workName, memberReservedNSName1, nil, regularNSJSON, oversizedCMJSON) + createWorkObject(workName, memberReservedNSName1, nil, nil, regularNSJSON, oversizedCMJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply the manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -8826,7 +8794,7 @@ var _ = Describe("handling different apply strategies", func() { deleteWorkObject(workName, memberReservedNSName1) // Ensure that all applied manifests have been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") Eventually(func() error { @@ -8886,30 +8854,30 @@ var _ = Describe("negative cases", func() { malformedConfigMapJSON := marshalK8sObjJSON(malformedConfigMap) // Create a Work object with all the manifest JSONs. - createWorkObject(workName, memberReservedNSName1, nil, regularNSJSON, malformedConfigMapJSON, regularConfigMapJson) + createWorkObject(workName, memberReservedNSName1, nil, nil, regularNSJSON, malformedConfigMapJSON, regularConfigMapJson) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply some manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") // Ensure that the ConfigMap object has been applied as expected. - regularConfigMapAppliedActual := regularConfigMapObjectAppliedActual(nsName, configMapName, appliedWorkOwnerRef) + regularConfigMapAppliedActual := regularConfigMapObjectAppliedActual(memberClient1, nsName, configMapName, appliedWorkOwnerRef) Eventually(regularConfigMapAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the ConfigMap object") Expect(memberClient1.Get(ctx, client.ObjectKey{Namespace: nsName, Name: configMapName}, regularConfigMap)).To(Succeed(), "Failed to retrieve the ConfigMap object") @@ -9026,7 +8994,7 @@ var _ = Describe("negative cases", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") Consistently(appliedWorkStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "AppliedWork status changed unexpectedly") }) @@ -9036,18 +9004,18 @@ var _ = Describe("negative cases", func() { deleteWorkObject(workName, memberReservedNSName1) // Ensure applied manifest has been removed. - regularConfigMapRemovedActual := regularConfigMapRemovedActual(nsName, configMapName) + regularConfigMapRemovedActual := regularConfigMapRemovedActual(memberClient1, nsName, configMapName) Eventually(regularConfigMapRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the configMap object") // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -9079,24 +9047,24 @@ var _ = Describe("negative cases", func() { regularConfigMapJSON := marshalK8sObjJSON(regularConfigMap) // Create a Work object with all the manifest JSONs. - createWorkObject(workName, memberReservedNSName1, nil, regularNSJSON, regularConfigMapJSON) + createWorkObject(workName, memberReservedNSName1, nil, nil, regularNSJSON, regularConfigMapJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply some manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") @@ -9177,7 +9145,7 @@ var _ = Describe("negative cases", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") Consistently(appliedWorkStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "AppliedWork status changed unexpectedly") }) @@ -9188,13 +9156,13 @@ var _ = Describe("negative cases", func() { // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -9230,30 +9198,30 @@ var _ = Describe("negative cases", func() { duplicatedConfigMap.Data[dummyLabelKey] = dummyLabelValue2 // Create a Work object with all the manifest JSONs. - createWorkObject(workName, memberReservedNSName1, nil, regularNSJSON, regularConfigMapJSON, duplicatedConfigMapJSON) + createWorkObject(workName, memberReservedNSName1, nil, nil, regularNSJSON, regularConfigMapJSON, duplicatedConfigMapJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply some manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") // Ensure that the ConfigMap object has been applied as expected. - regularConfigMapAppliedActual := regularConfigMapObjectAppliedActual(nsName, configMapName, appliedWorkOwnerRef) + regularConfigMapAppliedActual := regularConfigMapObjectAppliedActual(memberClient1, nsName, configMapName, appliedWorkOwnerRef) Eventually(regularConfigMapAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the ConfigMap object") Expect(memberClient1.Get(ctx, client.ObjectKey{Namespace: nsName, Name: configMapName}, regularConfigMap)).To(Succeed(), "Failed to retrieve the ConfigMap object") @@ -9371,7 +9339,7 @@ var _ = Describe("negative cases", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") Consistently(appliedWorkStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "AppliedWork status changed unexpectedly") }) @@ -9425,30 +9393,30 @@ var _ = Describe("negative cases", func() { duplicatedConfigMapJSON := marshalK8sObjJSON(duplicatedConfigMap) // Create a Work object with all the manifest JSONs. - createWorkObject(workName, memberReservedNSName1, nil, regularNSJSON, regularConfigMapJSON, malformedConfigMapJSON, configMapWithGenerateNameJSON, duplicatedConfigMapJSON) + createWorkObject(workName, memberReservedNSName1, nil, nil, regularNSJSON, regularConfigMapJSON, malformedConfigMapJSON, configMapWithGenerateNameJSON, duplicatedConfigMapJSON) }) It("should add cleanup finalizer to the Work object", func() { - finalizerAddedActual := workFinalizerAddedActual(workName) + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") }) It("should prepare an AppliedWork object", func() { - appliedWorkCreatedActual := appliedWorkCreatedActual(workName) + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") - appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(workName) + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) }) It("should apply some manifests", func() { // Ensure that the NS object has been applied as expected. - regularNSObjectAppliedActual := regularNSObjectAppliedActual(nsName, appliedWorkOwnerRef) + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") // Ensure that the ConfigMap object has been applied as expected. - regularConfigMapAppliedActual := regularConfigMapObjectAppliedActual(nsName, configMapName, appliedWorkOwnerRef) + regularConfigMapAppliedActual := regularConfigMapObjectAppliedActual(memberClient1, nsName, configMapName, appliedWorkOwnerRef) Eventually(regularConfigMapAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the ConfigMap object") Expect(memberClient1.Get(ctx, client.ObjectKey{Namespace: nsName, Name: configMapName}, regularConfigMap)).To(Succeed(), "Failed to retrieve the ConfigMap object") @@ -9602,7 +9570,7 @@ var _ = Describe("negative cases", func() { }, } - appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(workName, appliedResourceMeta) + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") Consistently(appliedWorkStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "AppliedWork status changed unexpectedly") }) @@ -9612,18 +9580,18 @@ var _ = Describe("negative cases", func() { deleteWorkObject(workName, memberReservedNSName1) // Ensure applied manifest has been removed. - regularConfigMapRemovedActual := regularConfigMapRemovedActual(nsName, configMapName) + regularConfigMapRemovedActual := regularConfigMapRemovedActual(memberClient1, nsName, configMapName) Eventually(regularConfigMapRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the configMap object") // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). - checkNSOwnerReferences(workName, nsName) + checkNSOwnerReferences(memberClient1, workName, nsName) // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace @@ -9631,3 +9599,378 @@ var _ = Describe("negative cases", func() { }) }) }) + +var _ = Describe("status back-reporting", func() { + deploymentKind := "Deployment" + deployStatusBackReportedActual := func(workName, nsName, deployName string, beforeTimestamp metav1.Time) func() error { + return func() error { + workObj := &fleetv1beta1.Work{} + if err := hubClient.Get(ctx, client.ObjectKey{Namespace: memberReservedNSName1, Name: workName}, workObj); err != nil { + return fmt.Errorf("failed to retrieve the Work object: %w", err) + } + + var backReportedDeployStatusWrapper []byte + var backReportedDeployStatusObservedTime metav1.Time + for idx := range workObj.Status.ManifestConditions { + manifestCond := &workObj.Status.ManifestConditions[idx] + + if manifestCond.Identifier.Kind == deploymentKind && manifestCond.Identifier.Name == deployName && manifestCond.Identifier.Namespace == nsName { + backReportedDeployStatusWrapper = manifestCond.BackReportedStatus.ObservedStatus.Raw + backReportedDeployStatusObservedTime = manifestCond.BackReportedStatus.ObservationTime + break + } + } + + if len(backReportedDeployStatusWrapper) == 0 { + return fmt.Errorf("no status back-reported for deployment") + } + if backReportedDeployStatusObservedTime.Before(&beforeTimestamp) { + return fmt.Errorf("back-reported deployment status observation time, want after %v, got %v", beforeTimestamp, backReportedDeployStatusObservedTime) + } + + deployWithBackReportedStatus := &appsv1.Deployment{} + if err := json.Unmarshal(backReportedDeployStatusWrapper, deployWithBackReportedStatus); err != nil { + return fmt.Errorf("failed to unmarshal wrapped back-reported deployment status: %w", err) + } + currentDeployWithStatus := &appsv1.Deployment{} + if err := memberClient1.Get(ctx, client.ObjectKey{Namespace: nsName, Name: deployName}, currentDeployWithStatus); err != nil { + return fmt.Errorf("failed to retrieve Deployment object from member cluster side: %w", err) + } + + if diff := cmp.Diff(deployWithBackReportedStatus.Status, currentDeployWithStatus.Status); diff != "" { + return fmt.Errorf("back-reported deployment status mismatch (-got, +want):\n%s", diff) + } + return nil + } + } + + Context("can handle both object with status and object with no status", Ordered, func() { + workName := fmt.Sprintf(workNameTemplate, utils.RandStr()) + // The environment prepared by the envtest package does not support namespace + // deletion; each test case would use a new namespace. + nsName := fmt.Sprintf(nsNameTemplate, utils.RandStr()) + + var appliedWorkOwnerRef *metav1.OwnerReference + // Note: namespaces and deployments have status subresources; config maps do not. + var regularNS *corev1.Namespace + var regularDeploy *appsv1.Deployment + var regularCM *corev1.ConfigMap + + beforeTimestamp := metav1.Now() + + BeforeAll(func() { + // Prepare a NS object. + regularNS = ns.DeepCopy() + regularNS.Name = nsName + regularNSJSON := marshalK8sObjJSON(regularNS) + + // Prepare a Deployment object. + regularDeploy = deploy.DeepCopy() + regularDeploy.Namespace = nsName + regularDeploy.Name = deployName + regularDeployJSON := marshalK8sObjJSON(regularDeploy) + + // Prepare a ConfigMap object. + regularCM = configMap.DeepCopy() + regularCM.Namespace = nsName + regularCMJSON := marshalK8sObjJSON(regularCM) + + // Create a new Work object with all the manifest JSONs. + reportBackStrategy := &fleetv1beta1.ReportBackStrategy{ + Type: fleetv1beta1.ReportBackStrategyTypeMirror, + Destination: ptr.To(fleetv1beta1.ReportBackDestinationWorkAPI), + } + createWorkObject(workName, memberReservedNSName1, nil, reportBackStrategy, regularNSJSON, regularDeployJSON, regularCMJSON) + }) + + It("should add cleanup finalizer to the Work object", func() { + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName1, workName) + Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") + }) + + It("should prepare an AppliedWork object", func() { + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient1, memberReservedNSName1, workName) + Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") + + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient1, memberReservedNSName1, workName) + }) + + It("can mark the deployment as available", func() { + markDeploymentAsAvailable(nsName, deployName) + }) + + It("should update the Work object status", func() { + // Prepare the status information. + workConds := []metav1.Condition{ + { + Type: fleetv1beta1.WorkConditionTypeApplied, + Status: metav1.ConditionTrue, + Reason: condition.WorkAllManifestsAppliedReason, + }, + { + Type: fleetv1beta1.WorkConditionTypeAvailable, + Status: metav1.ConditionTrue, + Reason: condition.WorkAllManifestsAvailableReason, + }, + } + manifestConds := []fleetv1beta1.ManifestCondition{ + { + Identifier: fleetv1beta1.WorkResourceIdentifier{ + Ordinal: 0, + Group: "", + Version: "v1", + Kind: "Namespace", + Resource: "namespaces", + Name: nsName, + }, + Conditions: []metav1.Condition{ + { + Type: fleetv1beta1.WorkConditionTypeApplied, + Status: metav1.ConditionTrue, + Reason: string(ApplyOrReportDiffResTypeApplied), + ObservedGeneration: 0, + }, + { + Type: fleetv1beta1.WorkConditionTypeAvailable, + Status: metav1.ConditionTrue, + Reason: string(AvailabilityResultTypeAvailable), + ObservedGeneration: 0, + }, + }, + }, + { + Identifier: fleetv1beta1.WorkResourceIdentifier{ + Ordinal: 1, + Group: "apps", + Version: "v1", + Kind: "Deployment", + Resource: "deployments", + Name: deployName, + Namespace: nsName, + }, + Conditions: []metav1.Condition{ + { + Type: fleetv1beta1.WorkConditionTypeApplied, + Status: metav1.ConditionTrue, + Reason: string(ApplyOrReportDiffResTypeApplied), + ObservedGeneration: 1, + }, + { + Type: fleetv1beta1.WorkConditionTypeAvailable, + Status: metav1.ConditionTrue, + Reason: string(AvailabilityResultTypeAvailable), + ObservedGeneration: 1, + }, + }, + }, + { + Identifier: fleetv1beta1.WorkResourceIdentifier{ + Ordinal: 2, + Group: "", + Version: "v1", + Kind: "ConfigMap", + Resource: "configmaps", + Name: configMapName, + Namespace: nsName, + }, + Conditions: []metav1.Condition{ + { + Type: fleetv1beta1.WorkConditionTypeApplied, + Status: metav1.ConditionTrue, + Reason: string(ApplyOrReportDiffResTypeApplied), + ObservedGeneration: 0, + }, + { + Type: fleetv1beta1.WorkConditionTypeAvailable, + Status: metav1.ConditionTrue, + Reason: string(AvailabilityResultTypeAvailable), + ObservedGeneration: 0, + }, + }, + }, + } + + workStatusUpdatedActual := workStatusUpdated(memberReservedNSName1, workName, workConds, manifestConds, nil, nil) + Eventually(workStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update work status") + }) + + It("should apply the manifests", func() { + // Ensure that the NS object has been applied as expected. + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient1, nsName, appliedWorkOwnerRef) + Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") + + Expect(memberClient1.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") + + // Ensure that the Deployment object has been applied as expected. + regularDeploymentObjectAppliedActual := regularDeploymentObjectAppliedActual(nsName, deployName, appliedWorkOwnerRef) + Eventually(regularDeploymentObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the deployment object") + + Expect(memberClient1.Get(ctx, client.ObjectKey{Namespace: nsName, Name: deployName}, regularDeploy)).To(Succeed(), "Failed to retrieve the Deployment object") + + // Ensure that the ConfigMap object has been applied as expected. + regularCMObjectAppliedActual := regularConfigMapObjectAppliedActual(memberClient1, nsName, configMapName, appliedWorkOwnerRef) + Eventually(regularCMObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the config map object") + + Expect(memberClient1.Get(ctx, client.ObjectKey{Namespace: nsName, Name: configMapName}, regularCM)).To(Succeed(), "Failed to retrieve the ConfigMap object") + }) + + It("should back-report deployment status to the Work object", func() { + Eventually(deployStatusBackReportedActual(workName, nsName, deployName, beforeTimestamp), eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to back-report deployment status to the Work object") + }) + + It("should handle objects with no status gracefully", func() { + Eventually(func() error { + workObj := &fleetv1beta1.Work{} + if err := hubClient.Get(ctx, client.ObjectKey{Namespace: memberReservedNSName1, Name: workName}, workObj); err != nil { + return fmt.Errorf("failed to retrieve the Work object: %w", err) + } + + for idx := range workObj.Status.ManifestConditions { + manifestCond := &workObj.Status.ManifestConditions[idx] + + if manifestCond.Identifier.Kind == "ConfigMap" && manifestCond.Identifier.Name == configMapName && manifestCond.Identifier.Namespace == nsName { + if manifestCond.BackReportedStatus != nil { + return fmt.Errorf("back-reported status for configMap object, want empty, got %s", string(manifestCond.BackReportedStatus.ObservedStatus.Raw)) + } + return nil + } + } + return fmt.Errorf("configMap object not found") + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to handle objects with no status gracefully") + }) + + It("can refresh deployment status", func() { + // Retrieve the Deployment object and update its replica count. + // + // Use an Eventually block to reduce flakiness. + Eventually(func() error { + deploy := &appsv1.Deployment{} + if err := memberClient1.Get(ctx, client.ObjectKey{Namespace: nsName, Name: deployName}, deploy); err != nil { + return fmt.Errorf("failed to retrieve the Deployment object: %w", err) + } + + deploy.Spec.Replicas = ptr.To(int32(10)) + if err := memberClient1.Update(ctx, deploy); err != nil { + return fmt.Errorf("failed to update the Deployment object: %w", err) + } + return nil + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to retrieve and update the Deployment object") + + // Refresh the status of the Deployment. + // + // Note that the Deployment object now becomes unavailable. + Eventually(func() error { + deploy := &appsv1.Deployment{} + if err := memberClient1.Get(ctx, client.ObjectKey{Namespace: nsName, Name: deployName}, deploy); err != nil { + return fmt.Errorf("failed to retrieve the Deployment object: %w", err) + } + + now := metav1.Now() + deploy.Status = appsv1.DeploymentStatus{ + ObservedGeneration: deploy.Generation, + Replicas: 10, + UpdatedReplicas: 2, + ReadyReplicas: 8, + AvailableReplicas: 8, + UnavailableReplicas: 2, + Conditions: []appsv1.DeploymentCondition{ + { + Type: appsv1.DeploymentAvailable, + Status: corev1.ConditionFalse, + Reason: "MarkedAsUnavailable", + Message: "Deployment has been marked as unavailable", + LastUpdateTime: now, + LastTransitionTime: now, + }, + { + Type: appsv1.DeploymentProgressing, + Status: corev1.ConditionTrue, + Reason: "MarkedAsProgressing", + Message: "Deployment has been marked as progressing", + LastUpdateTime: now, + LastTransitionTime: now, + }, + }, + } + if err := memberClient1.Status().Update(ctx, deploy); err != nil { + return fmt.Errorf("failed to update the Deployment status: %w", err) + } + return nil + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to refresh the status of the Deployment") + }) + + It("should back-report refreshed deployment status to the Work object", func() { + Eventually(deployStatusBackReportedActual(workName, nsName, deployName, beforeTimestamp), eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to back-report deployment status to the Work object") + }) + + It("should update the AppliedWork object status", func() { + // Prepare the status information. + appliedResourceMeta := []fleetv1beta1.AppliedResourceMeta{ + { + WorkResourceIdentifier: fleetv1beta1.WorkResourceIdentifier{ + Ordinal: 0, + Group: "", + Version: "v1", + Kind: "Namespace", + Resource: "namespaces", + Name: nsName, + }, + UID: regularNS.UID, + }, + { + WorkResourceIdentifier: fleetv1beta1.WorkResourceIdentifier{ + Ordinal: 1, + Group: "apps", + Version: "v1", + Kind: "Deployment", + Resource: "deployments", + Name: deployName, + Namespace: nsName, + }, + UID: regularDeploy.UID, + }, + { + WorkResourceIdentifier: fleetv1beta1.WorkResourceIdentifier{ + Ordinal: 2, + Group: "", + Version: "v1", + Kind: "ConfigMap", + Resource: "configmaps", + Name: configMapName, + Namespace: nsName, + }, + UID: regularCM.UID, + }, + } + + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient1, workName, appliedResourceMeta) + Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") + }) + + AfterAll(func() { + // Delete the Work object and related resources. + deleteWorkObject(workName, memberReservedNSName1) + + // Ensure applied manifest has been removed. + regularDeployRemovedActual := regularDeployRemovedActual(nsName, deployName) + Eventually(regularDeployRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the deployment object") + + regularCMRemovedActual := regularConfigMapRemovedActual(memberClient1, nsName, configMapName) + Eventually(regularCMRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the configMap object") + + // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object + // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). + checkNSOwnerReferences(memberClient1, workName, nsName) + + // Ensure that the AppliedWork object has been removed. + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient1, workName) + Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") + + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName1) + Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") + + // The environment prepared by the envtest package does not support namespace + // deletion; consequently this test suite would not attempt to verify its deletion. + }) + }) +}) diff --git a/pkg/controllers/workapplier/preprocess.go b/pkg/controllers/workapplier/preprocess.go index 589c8093f..863cd8795 100644 --- a/pkg/controllers/workapplier/preprocess.go +++ b/pkg/controllers/workapplier/preprocess.go @@ -489,7 +489,7 @@ func (r *Reconciler) removeOneLeftOverManifest( return nil case err != nil: // Failed to retrieve the object from the member cluster. - wrappedErr := controller.NewAPIServerError(true, err) + wrappedErr := controller.NewAPIServerError(false, err) // false as dynamic client is non-caching. return fmt.Errorf("failed to retrieve the object from the member cluster (gvr=%+v, manifestObj=%+v): %w", gvr, klog.KRef(manifestNamespace, manifestName), wrappedErr) case inMemberClusterObj.GetDeletionTimestamp() != nil: // The object has been marked for deletion; no further action is needed. diff --git a/pkg/controllers/workapplier/process.go b/pkg/controllers/workapplier/process.go index 00eb9234a..fededa20c 100644 --- a/pkg/controllers/workapplier/process.go +++ b/pkg/controllers/workapplier/process.go @@ -229,7 +229,7 @@ func (r *Reconciler) findInMemberClusterObjectFor( return false default: // An unexpected error has occurred. - wrappedErr := controller.NewAPIServerError(true, err) + wrappedErr := controller.NewAPIServerError(false, err) // false as dynamic client is non-caching. bundle.applyOrReportDiffErr = fmt.Errorf("failed to find the corresponding object for the manifest object in the member cluster: %w", wrappedErr) bundle.applyOrReportDiffResTyp = ApplyOrReportDiffResTypeFailedToFindObjInMemberCluster klog.ErrorS(wrappedErr, diff --git a/pkg/controllers/workapplier/status.go b/pkg/controllers/workapplier/status.go index a309dbea5..49511dd11 100644 --- a/pkg/controllers/workapplier/status.go +++ b/pkg/controllers/workapplier/status.go @@ -18,10 +18,14 @@ package workapplier import ( "context" + "encoding/json" "fmt" + "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/klog/v2" "k8s.io/utils/ptr" @@ -39,11 +43,13 @@ const ( // refreshWorkStatus refreshes the status of a Work object based on the processing results of its manifests. // // TO-DO (chenyu1): refactor this method a bit to reduce its complexity and enable parallelization. -func (r *Reconciler) refreshWorkStatus( +func (r *Reconciler) refreshWorkStatus( //nolint:gocyclo ctx context.Context, work *fleetv1beta1.Work, bundles []*manifestProcessingBundle, ) error { + originalStatus := work.Status.DeepCopy() + // Note (chenyu1): this method can run in parallel; however, for simplicity reasons, // considering that in most of the time the count of manifests would be low, currently // Fleet still does the status refresh sequentially. @@ -94,7 +100,10 @@ func (r *Reconciler) refreshWorkStatus( } } + // Set the two flags here as they are per-work-object settings. isReportDiffModeOn := work.Spec.ApplyStrategy != nil && work.Spec.ApplyStrategy.Type == fleetv1beta1.ApplyStrategyTypeReportDiff + isStatusBackReportingOn := work.Spec.ReportBackStrategy != nil && work.Spec.ReportBackStrategy.Type == fleetv1beta1.ReportBackStrategyTypeMirror + isDriftedOrDiffed := false for idx := range bundles { bundle := bundles[idx] @@ -123,6 +132,8 @@ func (r *Reconciler) refreshWorkStatus( // Reset the drift details (such details need no port-back). manifestCond.DriftDetails = nil if len(bundle.drifts) > 0 { + isDriftedOrDiffed = true + // Populate drift details if there are drifts found. var observedInMemberClusterGen int64 if bundle.inMemberClusterObj != nil { @@ -145,6 +156,8 @@ func (r *Reconciler) refreshWorkStatus( // Reset the diff details (such details need no port-back). manifestCond.DiffDetails = nil if len(bundle.diffs) > 0 { + isDriftedOrDiffed = true + // Populate diff details if there are diffs found. var observedInMemberClusterGen *int64 if bundle.inMemberClusterObj != nil { @@ -159,9 +172,18 @@ func (r *Reconciler) refreshWorkStatus( } } - // Tally the stats. + // Tally the stats, and perform status back-reporting if applicable. if isManifestObjectApplied(bundle.applyOrReportDiffResTyp) { appliedManifestsCount++ + + if isStatusBackReportingOn { + // Back-report the status from the member cluster side, if applicable. + // + // Back-reporting is only performed when: + // a) the ReportBackStrategy is of the type Mirror; and + // b) the manifest object has been applied successfully. + backReportStatus(bundle.inMemberClusterObj, manifestCond, now, klog.KObj(work)) + } } if isAppliedObjectAvailable(bundle.availabilityResTyp) { availableAppliedObjectsCount++ @@ -234,8 +256,14 @@ func (r *Reconciler) refreshWorkStatus( setWorkStatusTrimmedCondition(work, sizeDeltaBytes, resource.DefaultObjSizeLimitWithPaddingBytes) // Update the Work object status. - if err := r.hubClient.Status().Update(ctx, work); err != nil { - return controller.NewAPIServerError(false, err) + if shouldSkipStatusUpdate(isDriftedOrDiffed, isStatusBackReportingOn, originalStatus, &work.Status) { + // No status change found; skip the update. + klog.V(2).InfoS("No status change found for Work object; skip the status update", "work", klog.KObj(work)) + } else { + klog.V(2).InfoS("Refreshing work object status", "work", klog.KObj(work), "isDriftedOrDiffed", isDriftedOrDiffed, "isStatusBackReportingOn", isStatusBackReportingOn) + if err := r.hubClient.Status().Update(ctx, work); err != nil { + return controller.NewAPIServerError(false, err) + } } return nil } @@ -246,6 +274,8 @@ func (r *Reconciler) refreshAppliedWorkStatus( appliedWork *fleetv1beta1.AppliedWork, bundles []*manifestProcessingBundle, ) error { + originalStatus := appliedWork.Status.DeepCopy() + // Note (chenyu1): this method can run in parallel; however, for simplicity reasons, // considering that in most of the time the count of manifests would be low, currently // Fleet still does the status refresh sequentially. @@ -270,12 +300,18 @@ func (r *Reconciler) refreshAppliedWorkStatus( // Update the AppliedWork object status. appliedWork.Status.AppliedResources = appliedResources - if err := r.spokeClient.Status().Update(ctx, appliedWork); err != nil { - klog.ErrorS(err, "Failed to update AppliedWork status", - "appliedWork", klog.KObj(appliedWork)) - return controller.NewAPIServerError(false, err) + + // Skip the status update if no change found. + if equality.Semantic.DeepEqual(originalStatus, &appliedWork.Status) { + klog.V(2).InfoS("No status change found for AppliedWork object; skip the status update", "appliedWork", klog.KObj(appliedWork)) + } else { + klog.V(2).InfoS("Refreshing AppliedWork object status", "appliedWork", klog.KObj(appliedWork)) + if err := r.spokeClient.Status().Update(ctx, appliedWork); err != nil { + klog.ErrorS(err, "Failed to update AppliedWork status", + "appliedWork", klog.KObj(appliedWork)) + return controller.NewAPIServerError(false, err) + } } - klog.V(2).InfoS("Refreshed AppliedWork object status", "appliedWork", klog.KObj(appliedWork)) return nil } @@ -693,6 +729,57 @@ func prepareRebuiltManifestCondQIdx(bundles []*manifestProcessingBundle) map[str return rebuiltManifestCondQIdx } +// backReportStatus writes the status field of an object applied on the member cluster side in +// the status of the Work object. +func backReportStatus( + inMemberClusterObj *unstructured.Unstructured, + manifestCond *fleetv1beta1.ManifestCondition, + now metav1.Time, + workRef klog.ObjectRef, +) { + if inMemberClusterObj == nil || inMemberClusterObj.Object == nil { + // Do a sanity check; normally this will never occur (as status back-reporting + // only applies to objects that have been successfully applied). + // + // Should this unexpected situation occurs, the work applier does not register + // it as an error; the object shall be ignored for the status back-reporting + // part of the reconciliation loop. + wrapperErr := fmt.Errorf("attempted to back-report status for a manifest that has not been applied yet or cannot be found on the member cluster side") + _ = controller.NewUnexpectedBehaviorError(wrapperErr) + klog.ErrorS(wrapperErr, "Failed to back-report status", "work", workRef, "resourceIdentifier", manifestCond.Identifier) + return + } + if _, ok := inMemberClusterObj.Object["status"]; !ok { + // The object from the member cluster side does not have a status subresource; this + // is not considered as an error. + klog.V(2).InfoS("cannot back-report status as the applied resource on the member cluster side does not have a status subresource", "work", workRef, "resourceIdentifier", manifestCond.Identifier) + return + } + + statusBackReportingWrapper := make(map[string]interface{}) + // The TypeMeta fields must be added in the wrapper, otherwise the client libraries would + // have trouble serializing/deserializing the wrapper object when it's written/read to/from + // the API server. + statusBackReportingWrapper["apiVersion"] = inMemberClusterObj.GetAPIVersion() + statusBackReportingWrapper["kind"] = inMemberClusterObj.GetKind() + statusBackReportingWrapper["status"] = inMemberClusterObj.Object["status"] + statusData, err := json.Marshal(statusBackReportingWrapper) + if err != nil { + // This normally should never occur. + wrappedErr := fmt.Errorf("failed to marshal wrapped back-reported status: %w", err) + _ = controller.NewUnexpectedBehaviorError(wrappedErr) + klog.ErrorS(wrappedErr, "Failed to prepare status wrapper", "work", workRef, "resourceIdentifier", manifestCond.Identifier) + return + } + + manifestCond.BackReportedStatus = &fleetv1beta1.BackReportedStatus{ + ObservedStatus: runtime.RawExtension{ + Raw: statusData, + }, + ObservationTime: now, + } +} + // trimWorkStatusDataWhenOversized trims some data from the Work object status when the object // reaches its size limit. func trimWorkStatusDataWhenOversized(work *fleetv1beta1.Work) { @@ -767,3 +854,15 @@ func setWorkStatusTrimmedCondition(work *fleetv1beta1.Work, sizeDeltaBytes, size ObservedGeneration: work.Generation, }) } + +func shouldSkipStatusUpdate(isDriftedOrDiffed, isStatusBackReportingOn bool, originalStatus, currentStatus *fleetv1beta1.WorkStatus) bool { + if isDriftedOrDiffed || isStatusBackReportingOn { + // Always proceed with status update if there are drifts/diffs detected or if status back-reporting is on. + // This is necessary as the drift/diff details and back-reported status data are timestamped and the timestamps are + // always refreshed per reconciliation loop. + return false + } + + // Skip status update if there is no change in the status. + return equality.Semantic.DeepEqual(originalStatus, currentStatus) +} diff --git a/pkg/controllers/workapplier/status_integration_test.go b/pkg/controllers/workapplier/status_integration_test.go new file mode 100644 index 000000000..bb2fb7d1f --- /dev/null +++ b/pkg/controllers/workapplier/status_integration_test.go @@ -0,0 +1,301 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package workapplier + +import ( + "context" + "fmt" + "sync" + "time" + + crossplanetest "github.com/crossplane/crossplane-runtime/v2/pkg/test" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/pkg/utils" + "go.goms.io/fleet/pkg/utils/condition" + testutilsactuals "go.goms.io/fleet/test/utils/actuals" +) + +// Note (chenyu1): all test cases in this file use a separate test environment +// (same hub cluster, different fleet member reserved namespace, different +// work applier instance) from the other integration tests. This is needed +// as a client wrapper is used to verify the work applier behavior. + +type clientWrapperWithStatusUpdateCounter struct { + *crossplanetest.MockClient + + mu sync.Mutex + statusUpdateCount map[string]int +} + +func (c *clientWrapperWithStatusUpdateCounter) GetStatusUpdateCount(workNS, workName string) int { + c.mu.Lock() + defer c.mu.Unlock() + + return c.statusUpdateCount[fmt.Sprintf("%s/%s", workNS, workName)] +} + +func NewClientWrapperWithStatusUpdateCounter(realClient client.Client) client.Client { + wrapper := &clientWrapperWithStatusUpdateCounter{ + statusUpdateCount: make(map[string]int), + } + + wrapper.MockClient = &crossplanetest.MockClient{ + MockGet: func(ctx context.Context, key client.ObjectKey, obj client.Object) error { + return realClient.Get(ctx, key, obj) + }, + MockList: func(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error { + return realClient.List(ctx, list, opts...) + }, + MockCreate: func(ctx context.Context, obj client.Object, opts ...client.CreateOption) error { + return realClient.Create(ctx, obj, opts...) + }, + MockDelete: func(ctx context.Context, obj client.Object, opts ...client.DeleteOption) error { + return realClient.Delete(ctx, obj, opts...) + }, + MockDeleteAllOf: func(ctx context.Context, obj client.Object, opts ...client.DeleteAllOfOption) error { + return realClient.DeleteAllOf(ctx, obj, opts...) + }, + MockUpdate: func(ctx context.Context, obj client.Object, opts ...client.UpdateOption) error { + return realClient.Update(ctx, obj, opts...) + }, + MockPatch: func(ctx context.Context, obj client.Object, patch client.Patch, opts ...client.PatchOption) error { + return realClient.Patch(ctx, obj, patch, opts...) + }, + MockApply: func(ctx context.Context, config runtime.ApplyConfiguration, opts ...client.ApplyOption) error { + return realClient.Apply(ctx, config, opts...) + }, + MockStatusUpdate: func(ctx context.Context, obj client.Object, opts ...client.SubResourceUpdateOption) error { + wrapper.mu.Lock() + defer wrapper.mu.Unlock() + + objNS := obj.GetNamespace() + objName := obj.GetName() + key := fmt.Sprintf("%s/%s", objNS, objName) + wrapper.statusUpdateCount[key]++ + return realClient.Status().Update(ctx, obj, opts...) + }, + MockStatusPatch: func(ctx context.Context, obj client.Object, patch client.Patch, opts ...client.SubResourcePatchOption) error { + return realClient.Status().Patch(ctx, obj, patch, opts...) + }, + } + + return wrapper +} + +var _ = Describe("skipping status update", func() { + Context("apply new manifests", Ordered, func() { + workName := fmt.Sprintf(workNameTemplate, utils.RandStr()) + // The environment prepared by the envtest package does not support namespace + // deletion; each test case would use a new namespace. + nsName := fmt.Sprintf(nsNameTemplate, utils.RandStr()) + + var appliedWorkOwnerRef *metav1.OwnerReference + var regularNS *corev1.Namespace + var regularCM *corev1.ConfigMap + + BeforeAll(func() { + // Prepare a NS object. + regularNS = ns.DeepCopy() + regularNS.Name = nsName + regularNSJSON := marshalK8sObjJSON(regularNS) + + // Prepare a ConfigMap object. + regularCM = configMap.DeepCopy() + regularCM.Namespace = nsName + regularCM.Name = configMapName + regularCMJSON := marshalK8sObjJSON(regularCM) + + // Create a new Work object with all the manifest JSONs. + createWorkObject(workName, memberReservedNSName4, nil, nil, regularNSJSON, regularCMJSON) + }) + + It("should add cleanup finalizer to the Work object", func() { + finalizerAddedActual := workFinalizerAddedActual(memberReservedNSName4, workName) + Eventually(finalizerAddedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to add cleanup finalizer to the Work object") + }) + + It("should prepare an AppliedWork object", func() { + appliedWorkCreatedActual := appliedWorkCreatedActual(memberClient4, memberReservedNSName4, workName) + Eventually(appliedWorkCreatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to prepare an AppliedWork object") + + appliedWorkOwnerRef = prepareAppliedWorkOwnerRef(memberClient4, memberReservedNSName4, workName) + }) + + It("should apply the manifests", func() { + // Ensure that the NS object has been applied as expected. + regularNSObjectAppliedActual := regularNSObjectAppliedActual(memberClient4, nsName, appliedWorkOwnerRef) + Eventually(regularNSObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") + + Expect(memberClient4.Get(ctx, client.ObjectKey{Name: nsName}, regularNS)).To(Succeed(), "Failed to retrieve the NS object") + + // Ensure that the ConfigMap object has been applied as expected. + regularConfigMapObjectAppliedActual := regularConfigMapObjectAppliedActual(memberClient4, nsName, configMapName, appliedWorkOwnerRef) + Eventually(regularConfigMapObjectAppliedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the ConfigMap object") + + Expect(memberClient4.Get(ctx, client.ObjectKey{Namespace: nsName, Name: configMapName}, regularCM)).To(Succeed(), "Failed to retrieve the ConfigMap object") + }) + + It("should update the AppliedWork object status", func() { + // Prepare the status information. + appliedResourceMeta := []fleetv1beta1.AppliedResourceMeta{ + { + WorkResourceIdentifier: fleetv1beta1.WorkResourceIdentifier{ + Ordinal: 0, + Group: "", + Version: "v1", + Kind: "Namespace", + Resource: "namespaces", + Name: nsName, + }, + UID: regularNS.UID, + }, + { + WorkResourceIdentifier: fleetv1beta1.WorkResourceIdentifier{ + Ordinal: 1, + Group: "", + Version: "v1", + Kind: "ConfigMap", + Resource: "configmaps", + Name: configMapName, + Namespace: nsName, + }, + UID: regularCM.UID, + }, + } + + appliedWorkStatusUpdatedActual := appliedWorkStatusUpdated(memberClient4, workName, appliedResourceMeta) + Eventually(appliedWorkStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update appliedWork status") + }) + + It("should update the Work object status", func() { + // Prepare the status information. + workConds := []metav1.Condition{ + { + Type: fleetv1beta1.WorkConditionTypeApplied, + Status: metav1.ConditionTrue, + Reason: condition.WorkAllManifestsAppliedReason, + }, + { + Type: fleetv1beta1.WorkConditionTypeAvailable, + Status: metav1.ConditionTrue, + Reason: condition.WorkAllManifestsAvailableReason, + }, + } + manifestConds := []fleetv1beta1.ManifestCondition{ + { + Identifier: fleetv1beta1.WorkResourceIdentifier{ + Ordinal: 0, + Group: "", + Version: "v1", + Kind: "Namespace", + Resource: "namespaces", + Name: nsName, + }, + Conditions: []metav1.Condition{ + { + Type: fleetv1beta1.WorkConditionTypeApplied, + Status: metav1.ConditionTrue, + Reason: string(ApplyOrReportDiffResTypeApplied), + ObservedGeneration: 0, + }, + { + Type: fleetv1beta1.WorkConditionTypeAvailable, + Status: metav1.ConditionTrue, + Reason: string(AvailabilityResultTypeAvailable), + ObservedGeneration: 0, + }, + }, + }, + { + Identifier: fleetv1beta1.WorkResourceIdentifier{ + Ordinal: 1, + Group: "", + Version: "v1", + Kind: "ConfigMap", + Resource: "configmaps", + Name: configMapName, + Namespace: nsName, + }, + Conditions: []metav1.Condition{ + { + Type: fleetv1beta1.WorkConditionTypeApplied, + Status: metav1.ConditionTrue, + Reason: string(ApplyOrReportDiffResTypeApplied), + ObservedGeneration: 0, + }, + { + Type: fleetv1beta1.WorkConditionTypeAvailable, + Status: metav1.ConditionTrue, + Reason: string(AvailabilityResultTypeAvailable), + ObservedGeneration: 0, + }, + }, + }, + } + + workStatusUpdatedActual := workStatusUpdated(memberReservedNSName4, workName, workConds, manifestConds, nil, nil) + Eventually(workStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update work status") + // With the default backoff setup, this Consistently duration should allow 10-15 reconciliations. + Consistently(workStatusUpdatedActual, time.Minute*2, time.Second*10).Should(Succeed(), "Work status was updated unexpectedly") + }) + + It("should have skipped most status updates", func() { + memberClientStatusUpdateCount := memberClient4Wrapper.GetStatusUpdateCount("", workName) + // There should be 1 status update on the member cluster side in total: + // 1) one status update for populating the initial appliedWork status after all manifests have been applied. + wantMemberClientStatusUpdateCount := 1 + Expect(memberClientStatusUpdateCount).To(Equal(wantMemberClientStatusUpdateCount), "Unexpected number of status updates") + + hubClientStatusUpdateCount := hubClientWrapperForWorkApplier4.GetStatusUpdateCount(memberReservedNSName4, workName) + // There should be 2 status updates on the hub cluster side in total: + // 1) one status update for writing ahead the manifests to be applied; + // 2) one status update for populating the initial work status after all manifests have been applied. + wantHubClientStatusUpdateCount := 2 + Expect(hubClientStatusUpdateCount).To(Equal(wantHubClientStatusUpdateCount), "Unexpected number of status updates") + }) + + AfterAll(func() { + // Delete the Work object and related resources. + deleteWorkObject(workName, memberReservedNSName4) + + // Ensure applied manifest has been removed. + regularCMRemovedActual := regularConfigMapRemovedActual(memberClient4, nsName, configMapName) + Eventually(regularCMRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the ConfigMap object") + + // Kubebuilder suggests that in a testing environment like this, to check for the existence of the AppliedWork object + // OwnerReference in the Namespace object (https://book.kubebuilder.io/reference/envtest.html#testing-considerations). + checkNSOwnerReferences(memberClient4, workName, nsName) + + // Ensure that the AppliedWork object has been removed. + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient4, workName) + Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") + + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName4) + Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") + + // The environment prepared by the envtest package does not support namespace + // deletion; consequently this test suite would not attempt to verify its deletion. + }) + }) +}) diff --git a/pkg/controllers/workapplier/status_test.go b/pkg/controllers/workapplier/status_test.go index 77744675e..294b02cd3 100644 --- a/pkg/controllers/workapplier/status_test.go +++ b/pkg/controllers/workapplier/status_test.go @@ -18,18 +18,23 @@ package workapplier import ( "context" + "encoding/json" "fmt" "testing" "time" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" + "k8s.io/klog/v2" "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" @@ -2032,6 +2037,169 @@ func TestSetWorkDiffReportedCondition(t *testing.T) { } } +// TestBackReportStatus tests the backReportStatus method. +func TestBackReportStatus(t *testing.T) { + workRef := klog.ObjectRef{ + Name: workName, + Namespace: memberReservedNSName1, + } + now := metav1.Now() + + deployWithStatus := deploy.DeepCopy() + deployWithStatus.Status = appsv1.DeploymentStatus{ + ObservedGeneration: 2, + Replicas: 5, + UpdatedReplicas: 5, + ReadyReplicas: 5, + AvailableReplicas: 5, + UnavailableReplicas: 0, + Conditions: []appsv1.DeploymentCondition{ + { + Type: appsv1.DeploymentAvailable, + Status: corev1.ConditionTrue, + }, + }, + } + deployStatusWrapperMap := map[string]interface{}{ + "apiVersion": "apps/v1", + "kind": "Deployment", + "status": deployWithStatus.Status, + } + deployStatusWrapperBytes, _ := json.Marshal(deployStatusWrapperMap) + + deployWithStatusBfr := deploy.DeepCopy() + deployWithStatusBfr.Status = appsv1.DeploymentStatus{ + ObservedGeneration: 1, + Replicas: 4, + UpdatedReplicas: 1, + ReadyReplicas: 1, + AvailableReplicas: 1, + UnavailableReplicas: 3, + Conditions: []appsv1.DeploymentCondition{ + { + Type: appsv1.DeploymentAvailable, + Status: corev1.ConditionFalse, + }, + { + Type: appsv1.DeploymentProgressing, + Status: corev1.ConditionTrue, + }, + }, + } + deployStatusWrapperMapBfr := map[string]interface{}{ + "apiVersion": "apps/v1", + "kind": "Deployment", + "status": deployWithStatusBfr.Status, + } + deployStatusWrapperBytesBfr, _ := json.Marshal(deployStatusWrapperMapBfr) + + testCases := []struct { + name string + manifestCond *fleetv1beta1.ManifestCondition + inMemberClusterObj *unstructured.Unstructured + // The placeholder is added here to help verify the integrity of backported + // status by unmarshalling the data into its original data structure (e.g., + // a Kubernetes Deployment). + objPlaceholder client.Object + wantManifestCond *fleetv1beta1.ManifestCondition + wantIgnored bool + }{ + { + name: "object with status", + manifestCond: &fleetv1beta1.ManifestCondition{}, + inMemberClusterObj: toUnstructured(t, deployWithStatus), + objPlaceholder: deploy.DeepCopy(), + wantManifestCond: &fleetv1beta1.ManifestCondition{ + BackReportedStatus: &fleetv1beta1.BackReportedStatus{ + ObservedStatus: runtime.RawExtension{ + Raw: deployStatusWrapperBytes, + }, + ObservationTime: now, + }, + }, + }, + { + name: "object with status, overwriting previous back-reported status", + manifestCond: &fleetv1beta1.ManifestCondition{ + BackReportedStatus: &fleetv1beta1.BackReportedStatus{ + ObservedStatus: runtime.RawExtension{ + Raw: deployStatusWrapperBytesBfr, + }, + ObservationTime: metav1.Time{ + Time: now.Add(-1 * time.Minute), + }, + }, + }, + inMemberClusterObj: toUnstructured(t, deployWithStatus), + objPlaceholder: deploy.DeepCopy(), + wantManifestCond: &fleetv1beta1.ManifestCondition{ + BackReportedStatus: &fleetv1beta1.BackReportedStatus{ + ObservedStatus: runtime.RawExtension{ + Raw: deployStatusWrapperBytes, + }, + ObservationTime: now, + }, + }, + }, + { + name: "object with no status", + manifestCond: &fleetv1beta1.ManifestCondition{}, + inMemberClusterObj: toUnstructured(t, configMap.DeepCopy()), + wantManifestCond: &fleetv1beta1.ManifestCondition{}, + wantIgnored: true, + }, + // Normally this case will never occur. + { + name: "no object found on the member cluster side", + manifestCond: &fleetv1beta1.ManifestCondition{}, + wantManifestCond: &fleetv1beta1.ManifestCondition{}, + wantIgnored: true, + }, + // Normally this case will never occur. + { + name: "object found on the member cluster side but has no data", + manifestCond: &fleetv1beta1.ManifestCondition{}, + inMemberClusterObj: &unstructured.Unstructured{}, + wantManifestCond: &fleetv1beta1.ManifestCondition{}, + wantIgnored: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + backReportStatus(tc.inMemberClusterObj, tc.manifestCond, now, workRef) + + if tc.wantIgnored { + if tc.manifestCond.BackReportedStatus != nil { + t.Fatalf("backReportStatus() reported status data unexpectedly") + return + } + return + } + + // The test spec here attempts to re-build the status instead of directly + // comparing the Raw bytes, as the JSON marshalling ops are not guaranteed + // to produce deterministic results (e.g., the order of object keys might vary + // on different unmarshalling attempts, even though the data remains the same). + backReportedStatusBytes := tc.manifestCond.BackReportedStatus.ObservedStatus.Raw + if err := json.Unmarshal(backReportedStatusBytes, tc.objPlaceholder); err != nil { + t.Fatalf("back reported data unmarshalling err: %v", err) + } + backReportedStatusUnstructured := toUnstructured(t, tc.objPlaceholder) + // The test spec here does not verify the API version and Kind info as they + // are tracked just for structural integrity reasons; the information is not + // actually in use. + if diff := cmp.Diff(backReportedStatusUnstructured.Object["status"], tc.inMemberClusterObj.Object["status"]); diff != "" { + t.Errorf("backReportStatus() manifestCond diffs (-got, +want):\n%s", diff) + } + + if !cmp.Equal(tc.manifestCond.BackReportedStatus.ObservationTime, now) { + t.Errorf("backReportStatus() observed timestamp not equal, got %v, want %v", tc.manifestCond.BackReportedStatus.ObservationTime, now) + } + }) + } +} + // TestTrimWorkStatusDataWhenOversized tests the trimWorkStatusDataWhenOversized function. func TestTrimWorkStatusDataWhenOversized(t *testing.T) { now := metav1.Now() diff --git a/pkg/controllers/workapplier/suite_test.go b/pkg/controllers/workapplier/suite_test.go index 89b0e47bc..22a1f5020 100644 --- a/pkg/controllers/workapplier/suite_test.go +++ b/pkg/controllers/workapplier/suite_test.go @@ -80,6 +80,15 @@ var ( memberDynamicClient3 dynamic.Interface workApplier3 *Reconciler + memberCfg4 *rest.Config + memberEnv4 *envtest.Environment + hubMgr4 manager.Manager + memberClient4 client.Client + memberClient4Wrapper *clientWrapperWithStatusUpdateCounter + memberDynamicClient4 dynamic.Interface + workApplier4 *Reconciler + hubClientWrapperForWorkApplier4 *clientWrapperWithStatusUpdateCounter + ctx context.Context cancel context.CancelFunc wg sync.WaitGroup @@ -94,6 +103,7 @@ const ( memberReservedNSName1 = "fleet-member-experimental-1" memberReservedNSName2 = "fleet-member-experimental-2" memberReservedNSName3 = "fleet-member-experimental-3" + memberReservedNSName4 = "fleet-member-experimental-4" parallelizerFixedDelay = time.Second * 5 ) @@ -147,6 +157,13 @@ func setupResources() { }, } Expect(hubClient.Create(ctx, ns3)).To(Succeed()) + + ns4 := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: memberReservedNSName4, + }, + } + Expect(hubClient.Create(ctx, ns4)).To(Succeed()) } var _ = BeforeSuite(func() { @@ -193,6 +210,14 @@ var _ = BeforeSuite(func() { filepath.Join("../../../", "test", "manifests"), }, } + // memberEnv4 is the test environment for verifying that work applier can skip status updates as + // expected. + memberEnv4 = &envtest.Environment{ + CRDDirectoryPaths: []string{ + filepath.Join("../../../", "config", "crd", "bases"), + filepath.Join("../../../", "test", "manifests"), + }, + } var err error hubCfg, err = hubEnv.Start() @@ -211,9 +236,9 @@ var _ = BeforeSuite(func() { Expect(err).ToNot(HaveOccurred()) Expect(memberCfg3).ToNot(BeNil()) - memberCfg2, err = memberEnv2.Start() + memberCfg4, err = memberEnv4.Start() Expect(err).ToNot(HaveOccurred()) - Expect(memberCfg2).ToNot(BeNil()) + Expect(memberCfg4).ToNot(BeNil()) err = batchv1.AddToScheme(scheme.Scheme) Expect(err).NotTo(HaveOccurred()) @@ -239,6 +264,10 @@ var _ = BeforeSuite(func() { Expect(err).ToNot(HaveOccurred()) Expect(memberClient3).ToNot(BeNil()) + memberClient4, err = client.New(memberCfg4, client.Options{Scheme: scheme.Scheme}) + Expect(err).ToNot(HaveOccurred()) + Expect(memberClient4).ToNot(BeNil()) + // This setup also requires a client-go dynamic client for the member cluster. memberDynamicClient1, err = dynamic.NewForConfig(memberCfg1) Expect(err).ToNot(HaveOccurred()) @@ -249,6 +278,9 @@ var _ = BeforeSuite(func() { memberDynamicClient3, err = dynamic.NewForConfig(memberCfg3) Expect(err).ToNot(HaveOccurred()) + memberDynamicClient4, err = dynamic.NewForConfig(memberCfg4) + Expect(err).ToNot(HaveOccurred()) + By("Setting up the resources") setupResources() @@ -331,7 +363,7 @@ var _ = BeforeSuite(func() { superLongExponentialBackoffRateLimiter, ) // Due to name conflicts, the second work applier must be set up manually. - err = ctrl.NewControllerManagedBy(hubMgr2).Named("work-applier-controller-duplicate"). + err = ctrl.NewControllerManagedBy(hubMgr2).Named("work-applier-controller-exponential-backoff"). WithOptions(ctrloption.Options{ MaxConcurrentReconciles: workApplier2.concurrentReconciles, }). @@ -383,8 +415,50 @@ var _ = BeforeSuite(func() { Complete(workApplier3) Expect(err).NotTo(HaveOccurred()) + By("Setting up the controller and the controller manager for member cluster 4") + hubMgr4, err = ctrl.NewManager(hubCfg, ctrl.Options{ + Scheme: scheme.Scheme, + Metrics: server.Options{ + BindAddress: "0", + }, + Cache: cache.Options{ + DefaultNamespaces: map[string]cache.Config{ + memberReservedNSName4: {}, + }, + }, + Logger: textlogger.NewLogger(textlogger.NewConfig(textlogger.Verbosity(4))), + }) + Expect(err).ToNot(HaveOccurred()) + + wrappedHubClient := NewClientWrapperWithStatusUpdateCounter(hubClient) + hubClientWrapperForWorkApplier4 = wrappedHubClient.(*clientWrapperWithStatusUpdateCounter) + wrappedMemberClient4 := NewClientWrapperWithStatusUpdateCounter(memberClient4) + memberClient4Wrapper = wrappedMemberClient4.(*clientWrapperWithStatusUpdateCounter) + workApplier4 = NewReconciler( + wrappedHubClient, + memberReservedNSName4, + memberDynamicClient4, + wrappedMemberClient4, + memberClient4.RESTMapper(), + hubMgr4.GetEventRecorderFor("work-applier"), + maxConcurrentReconciles, + parallelizer.NewParallelizer(workerCount), + 30*time.Second, + true, + 60, + nil, // Use the default backoff rate limiter. + ) + // Due to name conflicts, the third work applier must be set up manually. + err = ctrl.NewControllerManagedBy(hubMgr4).Named("work-applier-controller-skipping-status-update"). + WithOptions(ctrloption.Options{ + MaxConcurrentReconciles: workApplier4.concurrentReconciles, + }). + For(&fleetv1beta1.Work{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). + Complete(workApplier4) + Expect(err).NotTo(HaveOccurred()) + wg = sync.WaitGroup{} - wg.Add(3) + wg.Add(4) go func() { defer GinkgoRecover() defer wg.Done() @@ -405,6 +479,13 @@ var _ = BeforeSuite(func() { Expect(workApplier3.Join(ctx)).To(Succeed()) Expect(hubMgr3.Start(ctx)).To(Succeed()) }() + + go func() { + defer GinkgoRecover() + defer wg.Done() + Expect(workApplier4.Join(ctx)).To(Succeed()) + Expect(hubMgr4.Start(ctx)).To(Succeed()) + }() }) var _ = AfterSuite(func() { @@ -417,4 +498,5 @@ var _ = AfterSuite(func() { Expect(memberEnv1.Stop()).To(Succeed()) Expect(memberEnv2.Stop()).To(Succeed()) Expect(memberEnv3.Stop()).To(Succeed()) + Expect(memberEnv4.Stop()).To(Succeed()) }) diff --git a/pkg/controllers/workapplier/waves_integration_test.go b/pkg/controllers/workapplier/waves_integration_test.go index fabb401cb..d983fabbf 100644 --- a/pkg/controllers/workapplier/waves_integration_test.go +++ b/pkg/controllers/workapplier/waves_integration_test.go @@ -47,6 +47,7 @@ import ( fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" "go.goms.io/fleet/pkg/utils" "go.goms.io/fleet/pkg/utils/condition" + testutilsactuals "go.goms.io/fleet/test/utils/actuals" ) // Note (chenyu1): all test cases in this file use a separate test environment @@ -86,7 +87,7 @@ var _ = Describe("parallel processing with waves", func() { regularPCJSON := marshalK8sObjJSON(regularPC) // Create a new Work object with all the manifest JSONs. - createWorkObject(workName, memberReservedNSName3, nil, regularNSJSON, regularPCJSON) + createWorkObject(workName, memberReservedNSName3, nil, nil, regularNSJSON, regularPCJSON) }) // For simplicity reasons, this test case will skip some of the regular apply op result verification @@ -196,10 +197,10 @@ var _ = Describe("parallel processing with waves", func() { }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the PriorityClass object") // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient3, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName3) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace // deletion; consequently this test suite would not attempt to verify its deletion. @@ -224,7 +225,7 @@ var _ = Describe("parallel processing with waves", func() { regularCMJSON := marshalK8sObjJSON(regularCM) // Create a new Work object with all the manifest JSONs. - createWorkObject(workName, memberReservedNSName3, nil, regularNSJSON, regularCMJSON) + createWorkObject(workName, memberReservedNSName3, nil, nil, regularNSJSON, regularCMJSON) }) // For simplicity reasons, this test case will skip some of the regular apply op result verification @@ -325,14 +326,14 @@ var _ = Describe("parallel processing with waves", func() { deleteWorkObject(workName, memberReservedNSName3) // Remove the ConfigMap object if it still exists. - cmRemovedActual := regularConfigMapRemovedActual(nsName, configMapName) + cmRemovedActual := regularConfigMapRemovedActual(memberClient3, nsName, configMapName) Eventually(cmRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the ConfigMap object") // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient3, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName3) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace // deletion; consequently this test suite would not attempt to verify its deletion. @@ -368,7 +369,7 @@ var _ = Describe("parallel processing with waves", func() { regularRoleJSON := marshalK8sObjJSON(regularRole) // Create a new Work object with all the manifest JSONs. - createWorkObject(workName, memberReservedNSName3, nil, regularNSJSON, regularRoleJSON) + createWorkObject(workName, memberReservedNSName3, nil, nil, regularNSJSON, regularRoleJSON) }) // For simplicity reasons, this test case will skip some of the regular apply op result verification @@ -488,10 +489,10 @@ var _ = Describe("parallel processing with waves", func() { }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Role object") // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient3, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName3) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace // deletion; consequently this test suite would not attempt to verify its deletion. @@ -1114,7 +1115,7 @@ var _ = Describe("parallel processing with waves", func() { }) // Create a new Work object with all the manifest JSONs. - createWorkObject(workName, memberReservedNSName3, nil, allManifestJSONByteArrs...) + createWorkObject(workName, memberReservedNSName3, nil, nil, allManifestJSONByteArrs...) }) // For simplicity reasons, this test case will skip some of the regular apply op result verification @@ -1257,10 +1258,10 @@ var _ = Describe("parallel processing with waves", func() { } // Ensure that the AppliedWork object has been removed. - appliedWorkRemovedActual := appliedWorkRemovedActual(workName, nsName) + appliedWorkRemovedActual := appliedWorkRemovedActual(memberClient3, workName) Eventually(appliedWorkRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the AppliedWork object") - workRemovedActual := workRemovedActual(workName) + workRemovedActual := testutilsactuals.WorkObjectRemovedActual(ctx, hubClient, workName, memberReservedNSName3) Eventually(workRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove the Work object") // The environment prepared by the envtest package does not support namespace // deletion; consequently this test suite would not attempt to verify its deletion. diff --git a/pkg/controllers/workgenerator/controller.go b/pkg/controllers/workgenerator/controller.go index 63063e20b..13167d3c7 100644 --- a/pkg/controllers/workgenerator/controller.go +++ b/pkg/controllers/workgenerator/controller.go @@ -673,6 +673,13 @@ func (r *Reconciler) syncApplyStrategy( // areAllWorkSynced checks if all the works are synced with the resource binding. func areAllWorkSynced(existingWorks map[string]*fleetv1beta1.Work, resourceBinding fleetv1beta1.BindingObj, _, _ string) bool { + // If there is no existing work, they are not synced. + // Even for the case where the resource snapshot has no selected resources, + // there should be one work created for the empty resource list. + if len(existingWorks) == 0 { + return false + } + // TODO: check resourceOverrideSnapshotHash and clusterResourceOverrideSnapshotHash after all the work has the ParentResourceOverrideSnapshotHashAnnotation and ParentClusterResourceOverrideSnapshotHashAnnotation resourceSnapshotName := resourceBinding.GetBindingSpec().ResourceSnapshotName for _, work := range existingWorks { diff --git a/pkg/controllers/workgenerator/controller_test.go b/pkg/controllers/workgenerator/controller_test.go index b6e896810..7b4310448 100644 --- a/pkg/controllers/workgenerator/controller_test.go +++ b/pkg/controllers/workgenerator/controller_test.go @@ -3751,6 +3751,176 @@ func TestSyncApplyStrategy(t *testing.T) { } } +func TestAreAllWorkSynced(t *testing.T) { + tests := map[string]struct { + existingWorks map[string]*fleetv1beta1.Work + resourceBinding fleetv1beta1.BindingObj + want bool + }{ + "returns false when no existing works": { + existingWorks: map[string]*fleetv1beta1.Work{}, + resourceBinding: &fleetv1beta1.ClusterResourceBinding{ + Spec: fleetv1beta1.ResourceBindingSpec{ + ResourceSnapshotName: "snapshot-1", + }, + }, + want: false, + }, + "returns true when all works are synced with annotation": { + existingWorks: map[string]*fleetv1beta1.Work{ + "work1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "work1", + Annotations: map[string]string{ + fleetv1beta1.ParentResourceSnapshotNameAnnotation: "snapshot-1", + }, + }, + }, + "work2": { + ObjectMeta: metav1.ObjectMeta{ + Name: "work2", + Annotations: map[string]string{ + fleetv1beta1.ParentResourceSnapshotNameAnnotation: "snapshot-1", + }, + }, + }, + }, + resourceBinding: &fleetv1beta1.ClusterResourceBinding{ + Spec: fleetv1beta1.ResourceBindingSpec{ + ResourceSnapshotName: "snapshot-1", + }, + }, + want: true, + }, + "returns false when works have different snapshot names": { + existingWorks: map[string]*fleetv1beta1.Work{ + "work1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "work1", + Annotations: map[string]string{ + fleetv1beta1.ParentResourceSnapshotNameAnnotation: "snapshot-1", + }, + }, + }, + "work2": { + ObjectMeta: metav1.ObjectMeta{ + Name: "work2", + Annotations: map[string]string{ + fleetv1beta1.ParentResourceSnapshotNameAnnotation: "snapshot-2", + }, + }, + }, + }, + resourceBinding: &fleetv1beta1.ClusterResourceBinding{ + Spec: fleetv1beta1.ResourceBindingSpec{ + ResourceSnapshotName: "snapshot-1", + }, + }, + want: false, + }, + "returns true when works are synced via label construction (fallback)": { + existingWorks: map[string]*fleetv1beta1.Work{ + "work1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "work1", + Labels: map[string]string{ + fleetv1beta1.ParentResourceSnapshotIndexLabel: "1", + }, + }, + }, + }, + resourceBinding: &fleetv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + fleetv1beta1.PlacementTrackingLabel: "test-placement", + }, + }, + Spec: fleetv1beta1.ResourceBindingSpec{ + ResourceSnapshotName: "test-placement-1-snapshot", + }, + }, + want: true, + }, + "returns false when label construction fallback fails": { + existingWorks: map[string]*fleetv1beta1.Work{ + "work1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "work1", + Labels: map[string]string{ + fleetv1beta1.ParentResourceSnapshotIndexLabel: "2", + }, + }, + }, + }, + resourceBinding: &fleetv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + fleetv1beta1.PlacementTrackingLabel: "test-placement", + }, + }, + Spec: fleetv1beta1.ResourceBindingSpec{ + ResourceSnapshotName: "test-placement-1-snapshot", + }, + }, + want: false, + }, + "returns false when works have no annotation and invalid label": { + existingWorks: map[string]*fleetv1beta1.Work{ + "work1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "work1", + Labels: map[string]string{ + fleetv1beta1.ParentResourceSnapshotIndexLabel: "invalid", + }, + }, + }, + }, + resourceBinding: &fleetv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + fleetv1beta1.PlacementTrackingLabel: "test-placement", + }, + }, + Spec: fleetv1beta1.ResourceBindingSpec{ + ResourceSnapshotName: "test-placement-1-snapshot", + }, + }, + want: false, + }, + "returns true for ResourceBinding (namespaced) with annotation": { + existingWorks: map[string]*fleetv1beta1.Work{ + "work1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "work1", + Annotations: map[string]string{ + fleetv1beta1.ParentResourceSnapshotNameAnnotation: "test-snapshot-1", + }, + }, + }, + }, + resourceBinding: &fleetv1beta1.ResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-binding", + Namespace: "test-namespace", + }, + Spec: fleetv1beta1.ResourceBindingSpec{ + ResourceSnapshotName: "test-snapshot-1", + }, + }, + want: true, + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + got := areAllWorkSynced(tt.existingWorks, tt.resourceBinding, "", "") + if got != tt.want { + t.Errorf("areAllWorkSynced() = %v, want %v", got, tt.want) + } + }) + } +} + func TestShouldIgnoreWork(t *testing.T) { tests := map[string]struct { enqueueCRP bool diff --git a/pkg/propertyprovider/azure/provider.go b/pkg/propertyprovider/azure/provider.go index 347088219..cd58188de 100644 --- a/pkg/propertyprovider/azure/provider.go +++ b/pkg/propertyprovider/azure/provider.go @@ -274,9 +274,13 @@ func (p *PropertyProvider) Start(ctx context.Context, config *rest.Config) error } p.clusterCertificateAuthority = cadata p.clusterCertificateAuthorityObservedTime = time.Now() + klog.V(2).Info("Cached cluster certificate authority data from file") + } else if len(config.CAData) > 0 { + p.clusterCertificateAuthority = config.CAData + p.clusterCertificateAuthorityObservedTime = time.Now() klog.V(2).Info("Cached cluster certificate authority data") } else { - err := fmt.Errorf("rest.Config CAFile empty: %s", config.CAFile) + err := fmt.Errorf("rest.Config has empty CAFile and CAData") klog.ErrorS(err, "No certificate authority data available in rest.Config") } diff --git a/pkg/resourcewatcher/change_dector.go b/pkg/resourcewatcher/change_dector.go index cb381d72c..01f314944 100644 --- a/pkg/resourcewatcher/change_dector.go +++ b/pkg/resourcewatcher/change_dector.go @@ -23,7 +23,6 @@ import ( "golang.org/x/sync/errgroup" "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/discovery" "k8s.io/client-go/tools/cache" @@ -45,7 +44,7 @@ var ( // ChangeDetector is a resource watcher which watches all types of resources in the cluster and reconcile the events. type ChangeDetector struct { // DiscoveryClient is used to do resource discovery. - DiscoveryClient *discovery.DiscoveryClient + DiscoveryClient discovery.DiscoveryInterface // RESTMapper is used to convert between GVK and GVR RESTMapper meta.RESTMapper @@ -137,43 +136,20 @@ func (d *ChangeDetector) discoverAPIResourcesLoop(ctx context.Context, period ti }, period) } -// discoverResources goes through all the api resources in the cluster and create informers on selected types +// discoverResources goes through all the api resources in the cluster and adds event handlers to informers func (d *ChangeDetector) discoverResources(dynamicResourceEventHandler cache.ResourceEventHandler) { - newResources, err := d.getWatchableResources() - var dynamicResources []informer.APIResourceMeta - if err != nil { - klog.ErrorS(err, "Failed to get all the api resources from the cluster") - } - for _, res := range newResources { - // all the static resources are disabled by default - if d.shouldWatchResource(res.GroupVersionResource) { - dynamicResources = append(dynamicResources, res) - } + resourcesToWatch := discoverWatchableResources(d.DiscoveryClient, d.RESTMapper, d.ResourceConfig) + + // On the leader, add event handlers to informers that were already created by InformerPopulator + // The informers exist on all pods, but only the leader adds handlers and processes events + for _, res := range resourcesToWatch { + d.InformerManager.AddEventHandlerToInformer(res.GroupVersionResource, dynamicResourceEventHandler) } - d.InformerManager.AddDynamicResources(dynamicResources, dynamicResourceEventHandler, err == nil) + // this will start the newly added informers if there is any d.InformerManager.Start() -} - -// gvrDisabled returns whether GroupVersionResource is disabled. -func (d *ChangeDetector) shouldWatchResource(gvr schema.GroupVersionResource) bool { - // By default, all of the APIs are allowed. - if d.ResourceConfig == nil { - return true - } - gvks, err := d.RESTMapper.KindsFor(gvr) - if err != nil { - klog.ErrorS(err, "gvr transform failed", "gvr", gvr.String()) - return false - } - for _, gvk := range gvks { - if d.ResourceConfig.IsResourceDisabled(gvk) { - klog.V(4).InfoS("Skip watch resource", "group version kind", gvk.String()) - return false - } - } - return true + klog.V(2).InfoS("Change detector: discovered resources", "count", len(resourcesToWatch)) } // dynamicResourceFilter filters out resources that we don't want to watch diff --git a/pkg/resourcewatcher/change_detector_test.go b/pkg/resourcewatcher/change_detector_test.go new file mode 100644 index 000000000..c478a4966 --- /dev/null +++ b/pkg/resourcewatcher/change_detector_test.go @@ -0,0 +1,166 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourcewatcher + +import ( + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + fakediscovery "k8s.io/client-go/discovery/fake" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/restmapper" + "k8s.io/client-go/tools/cache" + + "go.goms.io/fleet/pkg/utils" + testinformer "go.goms.io/fleet/test/utils/informer" + testresource "go.goms.io/fleet/test/utils/resource" +) + +func TestChangeDetector_discoverResources(t *testing.T) { + tests := []struct { + name string + discoveryResources []*metav1.APIResourceList + resourceConfig *utils.ResourceConfig + }{ + { + name: "discovers and adds handlers for watchable resources", + discoveryResources: []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + testresource.APIResourceConfigMap(), + testresource.APIResourceSecret(), + }, + }, + }, + resourceConfig: nil, // Allow all resources + }, + { + name: "skips resources without list/watch verbs", + discoveryResources: []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + testresource.APIResourceWithVerbs("configmaps", "ConfigMap", true, []string{"get", "delete"}), // Missing list/watch + }, + }, + }, + resourceConfig: nil, + }, + { + name: "respects resource config filtering", + discoveryResources: []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + testresource.APIResourceConfigMap(), + testresource.APIResourceSecret(), + }, + }, + }, + resourceConfig: func() *utils.ResourceConfig { + rc := utils.NewResourceConfig(false) // Skip mode + _ = rc.Parse("v1/Secret") // Skip secrets + return rc + }(), + }, + { + name: "discovers apps group resources", + discoveryResources: []*metav1.APIResourceList{ + { + GroupVersion: "apps/v1", + APIResources: []metav1.APIResource{ + testresource.APIResourceDeployment(), + testresource.APIResourceStatefulSet(), + }, + }, + }, + resourceConfig: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create fake discovery client + fakeClient := fake.NewSimpleClientset() + fakeDiscovery, ok := fakeClient.Discovery().(*fakediscovery.FakeDiscovery) + if !ok { + t.Fatal("Failed to cast to FakeDiscovery") + } + fakeDiscovery.Resources = tt.discoveryResources + + // Create REST mapper + groupResources := []*restmapper.APIGroupResources{} + for _, resourceList := range tt.discoveryResources { + gv, err := schema.ParseGroupVersion(resourceList.GroupVersion) + if err != nil { + t.Fatalf("Failed to parse group version: %v", err) + } + + groupResources = append(groupResources, &restmapper.APIGroupResources{ + Group: metav1.APIGroup{ + Name: gv.Group, + Versions: []metav1.GroupVersionForDiscovery{ + {GroupVersion: resourceList.GroupVersion, Version: gv.Version}, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: resourceList.GroupVersion, + Version: gv.Version, + }, + }, + VersionedResources: map[string][]metav1.APIResource{ + gv.Version: resourceList.APIResources, + }, + }) + } + restMapper := restmapper.NewDiscoveryRESTMapper(groupResources) + + // Create fake informer manager + fakeInformerManager := &testinformer.FakeManager{ + APIResources: make(map[schema.GroupVersionKind]bool), + } + + // Track handler additions + testHandler := cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) {}, + } + + // Create ChangeDetector with the interface type + detector := &ChangeDetector{ + DiscoveryClient: fakeDiscovery, + RESTMapper: restMapper, + InformerManager: fakeInformerManager, + ResourceConfig: tt.resourceConfig, + } + + // Test discoverResources which discovers resources and adds handlers + detector.discoverResources(testHandler) + + // The main goal is to verify no panics occur during discovery and handler addition + }) + } +} + +func TestChangeDetector_NeedLeaderElection(t *testing.T) { + detector := &ChangeDetector{} + + // ChangeDetector SHOULD need leader election so only the leader processes events + if !detector.NeedLeaderElection() { + t.Error("ChangeDetector should need leader election") + } +} diff --git a/pkg/resourcewatcher/informer_populator.go b/pkg/resourcewatcher/informer_populator.go new file mode 100644 index 000000000..1faef543b --- /dev/null +++ b/pkg/resourcewatcher/informer_populator.go @@ -0,0 +1,102 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourcewatcher + +import ( + "context" + "time" + + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/discovery" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/manager" + + "go.goms.io/fleet/pkg/utils" + "go.goms.io/fleet/pkg/utils/informer" +) + +const ( + // informerPopulatorDiscoveryPeriod is how often the InformerPopulator rediscovers API resources + informerPopulatorDiscoveryPeriod = 30 * time.Second +) + +// make sure that our InformerPopulator implements controller runtime interfaces +var ( + _ manager.Runnable = &InformerPopulator{} + _ manager.LeaderElectionRunnable = &InformerPopulator{} +) + +// InformerPopulator discovers API resources and creates informers for them WITHOUT adding event handlers. +// This allows follower pods to have synced informer caches for webhook validation while the leader's +// ChangeDetector adds event handlers and runs controllers. +type InformerPopulator struct { + // DiscoveryClient is used to do resource discovery. + DiscoveryClient discovery.DiscoveryInterface + + // RESTMapper is used to convert between GVK and GVR + RESTMapper meta.RESTMapper + + // InformerManager manages all the dynamic informers created by the discovery client + InformerManager informer.Manager + + // ResourceConfig contains all the API resources that we won't select based on the allowed or skipped propagating APIs option. + ResourceConfig *utils.ResourceConfig +} + +// Start runs the informer populator, discovering resources and creating informers. +// This runs on ALL pods (leader and followers) to ensure all have synced caches. +func (p *InformerPopulator) Start(ctx context.Context) error { + klog.InfoS("Starting the informer populator") + defer klog.InfoS("The informer populator is stopped") + + // Run initial discovery to create informers + p.discoverAndCreateInformers() + + // Wait for initial cache sync + p.InformerManager.WaitForCacheSync() + klog.InfoS("Informer populator: initial cache sync complete") + + // Continue discovering resources periodically to handle CRD installations + wait.UntilWithContext(ctx, func(ctx context.Context) { + p.discoverAndCreateInformers() + }, informerPopulatorDiscoveryPeriod) + + return nil +} + +// discoverAndCreateInformers discovers API resources and creates informers WITHOUT adding event handlers +func (p *InformerPopulator) discoverAndCreateInformers() { + resourcesToWatch := discoverWatchableResources(p.DiscoveryClient, p.RESTMapper, p.ResourceConfig) + + // Create informers directly without adding event handlers. + // This avoids adding any event handlers on follower pods + for _, res := range resourcesToWatch { + p.InformerManager.CreateInformerForResource(res) + } + + // Start any newly created informers + p.InformerManager.Start() + + klog.V(2).InfoS("Informer populator: discovered resources", "count", len(resourcesToWatch)) +} + +// NeedLeaderElection implements LeaderElectionRunnable interface. +// Returns false so this runs on ALL pods (leader and followers). +func (p *InformerPopulator) NeedLeaderElection() bool { + return false +} diff --git a/pkg/resourcewatcher/informer_populator_test.go b/pkg/resourcewatcher/informer_populator_test.go new file mode 100644 index 000000000..92a162760 --- /dev/null +++ b/pkg/resourcewatcher/informer_populator_test.go @@ -0,0 +1,350 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourcewatcher + +import ( + "context" + "testing" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + fakediscovery "k8s.io/client-go/discovery/fake" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/restmapper" + + "go.goms.io/fleet/pkg/utils" + testinformer "go.goms.io/fleet/test/utils/informer" + testresource "go.goms.io/fleet/test/utils/resource" +) + +const ( + // testTimeout is the timeout for test operations + testTimeout = 200 * time.Millisecond + // testSleep is how long to sleep to allow periodic operations + testSleep = 150 * time.Millisecond +) + +func TestInformerPopulator_NeedLeaderElection(t *testing.T) { + populator := &InformerPopulator{} + + // InformerPopulator should NOT need leader election so it runs on all pods + if populator.NeedLeaderElection() { + t.Error("InformerPopulator should not need leader election") + } +} + +func TestInformerPopulator_discoverAndCreateInformers(t *testing.T) { + tests := []struct { + name string + discoveryResources []*metav1.APIResourceList + resourceConfig *utils.ResourceConfig + expectedInformerCreated bool + expectedResourceCount int + }{ + { + name: "creates informers for watchable resources", + discoveryResources: []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + testresource.APIResourceConfigMap(), + }, + }, + }, + resourceConfig: nil, // Allow all resources + expectedInformerCreated: true, + expectedResourceCount: 1, + }, + { + name: "skips resources without list/watch verbs", + discoveryResources: []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + testresource.APIResourceWithVerbs("configmaps", "ConfigMap", true, []string{"get", "delete"}), // Missing list/watch + }, + }, + }, + resourceConfig: nil, + expectedInformerCreated: false, + expectedResourceCount: 0, + }, + { + name: "respects resource config filtering", + discoveryResources: []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + testresource.APIResourceSecret(), + }, + }, + }, + resourceConfig: func() *utils.ResourceConfig { + rc := utils.NewResourceConfig(false) // Skip mode + _ = rc.Parse("v1/Secret") // Skip secrets + return rc + }(), + expectedInformerCreated: false, + expectedResourceCount: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create fake discovery client + fakeClient := fake.NewSimpleClientset() + fakeDiscovery, ok := fakeClient.Discovery().(*fakediscovery.FakeDiscovery) + if !ok { + t.Fatal("Failed to cast to FakeDiscovery") + } + fakeDiscovery.Resources = tt.discoveryResources + + // Create REST mapper + groupResources := []*restmapper.APIGroupResources{} + for _, resourceList := range tt.discoveryResources { + gv, err := schema.ParseGroupVersion(resourceList.GroupVersion) + if err != nil { + t.Fatalf("Failed to parse group version: %v", err) + } + + apiResources := []metav1.APIResource{} + apiResources = append(apiResources, resourceList.APIResources...) + + groupResources = append(groupResources, &restmapper.APIGroupResources{ + Group: metav1.APIGroup{ + Name: gv.Group, + Versions: []metav1.GroupVersionForDiscovery{ + {GroupVersion: resourceList.GroupVersion, Version: gv.Version}, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: resourceList.GroupVersion, + Version: gv.Version, + }, + }, + VersionedResources: map[string][]metav1.APIResource{ + gv.Version: apiResources, + }, + }) + } + restMapper := restmapper.NewDiscoveryRESTMapper(groupResources) + + // Create fake informer manager + fakeInformerManager := &testinformer.FakeManager{ + APIResources: make(map[schema.GroupVersionKind]bool), + } + + // Track calls to CreateInformerForResource + populator := &InformerPopulator{ + DiscoveryClient: fakeDiscovery, + RESTMapper: restMapper, + InformerManager: fakeInformerManager, + ResourceConfig: tt.resourceConfig, + } + + // Run discovery + populator.discoverAndCreateInformers() + + // Note: FakeManager doesn't track calls, so we verify no panics occurred + }) + } +} + +func TestInformerPopulator_Start(t *testing.T) { + // Create fake discovery client with some resources + fakeClient := fake.NewSimpleClientset() + fakeDiscovery, ok := fakeClient.Discovery().(*fakediscovery.FakeDiscovery) + if !ok { + t.Fatal("Failed to cast to FakeDiscovery") + } + + fakeDiscovery.Resources = []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + testresource.APIResourceConfigMap(), + }, + }, + } + + // Create REST mapper + gv := schema.GroupVersion{Group: "", Version: "v1"} + groupResources := []*restmapper.APIGroupResources{ + testresource.APIGroupResourcesV1(testresource.APIResourceConfigMap()), + } + restMapper := restmapper.NewDiscoveryRESTMapper(groupResources) + + // Create fake informer manager + fakeInformerManager := &testinformer.FakeManager{ + APIResources: map[schema.GroupVersionKind]bool{ + gv.WithKind("ConfigMap"): true, + }, + IsClusterScopedResource: false, + } + + populator := &InformerPopulator{ + DiscoveryClient: fakeDiscovery, + RESTMapper: restMapper, + InformerManager: fakeInformerManager, + ResourceConfig: nil, + } + + // Create a context that will cancel after a short time + // Use half of testTimeout to ensure we have time to verify after cancellation + ctx, cancel := context.WithTimeout(context.Background(), testTimeout/2) + defer cancel() + + // Start the populator in a goroutine + done := make(chan error, 1) + go func() { + done <- populator.Start(ctx) + }() + + // Wait for context to cancel or error + select { + case err := <-done: + // Should return nil when context is canceled + if err != nil { + t.Errorf("Start should not return error on context cancellation: %v", err) + } + case <-time.After(testTimeout): + t.Fatal("Start did not exit after context cancellation") + } +} + +func TestInformerPopulator_Integration(t *testing.T) { + // This test verifies the integration between InformerPopulator and the informer manager + + // Create fake discovery with multiple resource types + fakeClient := fake.NewSimpleClientset() + fakeDiscovery, ok := fakeClient.Discovery().(*fakediscovery.FakeDiscovery) + if !ok { + t.Fatal("Failed to cast to FakeDiscovery") + } + + fakeDiscovery.Resources = []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + testresource.APIResourceConfigMap(), + testresource.APIResourceSecret(), + }, + }, + { + GroupVersion: "apps/v1", + APIResources: []metav1.APIResource{ + testresource.APIResourceDeployment(), + }, + }, + } + + // Create REST mapper + groupResources := []*restmapper.APIGroupResources{ + { + Group: testresource.APIGroupV1(), + VersionedResources: map[string][]metav1.APIResource{ + "v1": fakeDiscovery.Resources[0].APIResources, + }, + }, + { + Group: testresource.APIGroupAppsV1(), + VersionedResources: map[string][]metav1.APIResource{ + "v1": fakeDiscovery.Resources[1].APIResources, + }, + }, + } + restMapper := restmapper.NewDiscoveryRESTMapper(groupResources) + + // Create resource config that skips secrets + resourceConfig := utils.NewResourceConfig(false) + err := resourceConfig.Parse("v1/Secret") + if err != nil { + t.Fatalf("Failed to parse resource config: %v", err) + } + + fakeInformerManager := &testinformer.FakeManager{ + APIResources: make(map[schema.GroupVersionKind]bool), + IsClusterScopedResource: false, + } + + populator := &InformerPopulator{ + DiscoveryClient: fakeDiscovery, + RESTMapper: restMapper, + InformerManager: fakeInformerManager, + ResourceConfig: resourceConfig, + } + + // Run discovery + populator.discoverAndCreateInformers() + + // Note: FakeManager doesn't track calls, so we just verify no panics +} + +func TestInformerPopulator_PeriodicDiscovery(t *testing.T) { + // This test verifies that the populator continues to discover resources periodically + + fakeClient := fake.NewSimpleClientset() + fakeDiscovery, ok := fakeClient.Discovery().(*fakediscovery.FakeDiscovery) + if !ok { + t.Fatal("Failed to cast to FakeDiscovery") + } + + fakeDiscovery.Resources = []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + testresource.APIResourceConfigMap(), + }, + }, + } + + groupResources := []*restmapper.APIGroupResources{ + { + Group: testresource.APIGroupV1(), + VersionedResources: map[string][]metav1.APIResource{ + "v1": fakeDiscovery.Resources[0].APIResources, + }, + }, + } + restMapper := restmapper.NewDiscoveryRESTMapper(groupResources) + + fakeInformerManager := &testinformer.FakeManager{ + APIResources: make(map[schema.GroupVersionKind]bool), + IsClusterScopedResource: false, + } + + populator := &InformerPopulator{ + DiscoveryClient: fakeDiscovery, + RESTMapper: restMapper, + InformerManager: fakeInformerManager, + ResourceConfig: nil, + } + + // Override the discovery period for testing + ctx, cancel := context.WithTimeout(context.Background(), testTimeout) + defer cancel() + + // Start the populator + go func() { + _ = populator.Start(ctx) + }() + + // Wait a bit to allow multiple discovery cycles + time.Sleep(testSleep) + + // Note: FakeManager doesn't track calls, so we just verify successful execution +} diff --git a/pkg/resourcewatcher/resource_collector.go b/pkg/resourcewatcher/resource_collector.go index a0d92fd35..cebb7f5be 100644 --- a/pkg/resourcewatcher/resource_collector.go +++ b/pkg/resourcewatcher/resource_collector.go @@ -17,12 +17,14 @@ limitations under the License. package resourcewatcher import ( + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/errors" "k8s.io/client-go/discovery" "k8s.io/klog/v2" metricsV1beta1 "k8s.io/metrics/pkg/apis/metrics/v1beta1" + "go.goms.io/fleet/pkg/utils" "go.goms.io/fleet/pkg/utils/informer" ) @@ -30,10 +32,11 @@ import ( // More specifically, all api resources which support the 'list', and 'watch' verbs. // All discovery errors are considered temporary. Upon encountering any error, // getWatchableResources will log and return any discovered resources it was able to process (which may be none). -func (d *ChangeDetector) getWatchableResources() ([]informer.APIResourceMeta, error) { +// This is a standalone function that can be used by both ChangeDetector and InformerPopulator. +func getWatchableResources(discoveryClient discovery.ServerResourcesInterface) ([]informer.APIResourceMeta, error) { // Get all the resources this cluster has. We only need to care about the preferred version as the informers watch // the preferred version will get watch event for resources on the other versions since there is only one version in etcd. - allResources, discoverError := d.DiscoveryClient.ServerPreferredResources() + allResources, discoverError := discoveryClient.ServerPreferredResources() allErr := make([]error, 0) if discoverError != nil { if discovery.IsGroupDiscoveryFailedError(discoverError) { @@ -82,3 +85,22 @@ func (d *ChangeDetector) getWatchableResources() ([]informer.APIResourceMeta, er return watchableGroupVersionResources, errors.NewAggregate(allErr) } + +// discoverWatchableResources discovers all API resources in the cluster and filters them +// based on the resource configuration. This is a shared helper used by both InformerPopulator +// and ChangeDetector to ensure consistent resource discovery logic. +func discoverWatchableResources(discoveryClient discovery.DiscoveryInterface, restMapper meta.RESTMapper, resourceConfig *utils.ResourceConfig) []informer.APIResourceMeta { + newResources, err := getWatchableResources(discoveryClient) + if err != nil { + klog.ErrorS(err, "Failed to get all the api resources from the cluster") + } + + var resourcesToWatch []informer.APIResourceMeta + for _, res := range newResources { + if utils.ShouldProcessResource(res.GroupVersionResource, restMapper, resourceConfig) { + resourcesToWatch = append(resourcesToWatch, res) + } + } + + return resourcesToWatch +} diff --git a/pkg/scheduler/framework/framework.go b/pkg/scheduler/framework/framework.go index 48530cf8e..d0018214e 100644 --- a/pkg/scheduler/framework/framework.go +++ b/pkg/scheduler/framework/framework.go @@ -289,7 +289,7 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, placementKey queu // overloading). In the long run we might still want to resort to a cached situation. // // TO-DO (chenyu1): explore the possibilities of using a mutation cache for better performance. - bindings, err := controller.ListBindingsFromKey(ctx, f.uncachedReader, types.NamespacedName{Namespace: namespace, Name: name}) + bindings, err := controller.ListBindingsFromKey(ctx, f.uncachedReader, types.NamespacedName{Namespace: namespace, Name: name}, false) if err != nil { klog.ErrorS(err, "Failed to collect bindings", "policySnapshot", policyRef) return ctrl.Result{}, err @@ -607,7 +607,7 @@ type filteredClusterWithStatus struct { status *Status } -// helper type to pretty print a list of filteredClusterWithStatus +// filteredClusterWithStatusList is a list of filteredClusterWithStatus. type filteredClusterWithStatusList []*filteredClusterWithStatus func (cs filteredClusterWithStatusList) String() string { @@ -621,6 +621,15 @@ func (cs filteredClusterWithStatusList) String() string { return fmt.Sprintf("filteredClusters[%s]", strings.Join(filteredClusters, ", ")) } +// Implement sort.Interface for filteredClusterWithStatusList. +func (f filteredClusterWithStatusList) Len() int { return len(f) } +func (f filteredClusterWithStatusList) Less(i, j int) bool { + return f[i].cluster.Name < f[j].cluster.Name +} +func (f filteredClusterWithStatusList) Swap(i, j int) { + f[i], f[j] = f[j], f[i] +} + // runFilterPlugins runs filter plugins on clusters in parallel. func (f *framework) runFilterPlugins(ctx context.Context, state *CycleState, policy placementv1beta1.PolicySnapshotObj, clusters []clusterv1beta1.MemberCluster) (passed []*clusterv1beta1.MemberCluster, filtered filteredClusterWithStatusList, err error) { // Create a child context. @@ -787,6 +796,14 @@ func (f *framework) updatePolicySnapshotStatusFromBindings( return controller.NewUnexpectedBehaviorError(err) } + // Sort all filtered clusters. + // + // This step is needed to produce deterministic decision outputs. If there are enough slots, + // the scheduler will try to explain why some clusters are filtered out in the decision list; to ensure + // that the list will not change across scheduling cycles without actual scheduling policy + // refreshes, the filtered clusters need to be sorted. + sort.Sort(filteredClusterWithStatusList(filtered)) + // Prepare new scheduling decisions. newDecisions := newSchedulingDecisionsFromBindings(f.maxUnselectedClusterDecisionCount, notPicked, filtered, existing...) // Prepare new scheduling condition. diff --git a/pkg/scheduler/framework/framework_test.go b/pkg/scheduler/framework/framework_test.go index 83973e854..066c9eeba 100644 --- a/pkg/scheduler/framework/framework_test.go +++ b/pkg/scheduler/framework/framework_test.go @@ -23,9 +23,11 @@ import ( "log" "os" "strings" + "sync/atomic" "testing" "time" + crossplanetest "github.com/crossplane/crossplane-runtime/v2/pkg/test" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" k8serrors "k8s.io/apimachinery/pkg/api/errors" @@ -2734,7 +2736,7 @@ func TestUpdatePolicySnapshotStatusFromBindings(t *testing.T) { { cluster: &clusterv1beta1.MemberCluster{ ObjectMeta: metav1.ObjectMeta{ - Name: altClusterName, + Name: anotherClusterName, }, }, status: filteredStatus, @@ -2742,7 +2744,7 @@ func TestUpdatePolicySnapshotStatusFromBindings(t *testing.T) { { cluster: &clusterv1beta1.MemberCluster{ ObjectMeta: metav1.ObjectMeta{ - Name: anotherClusterName, + Name: altClusterName, }, }, status: filteredStatus, @@ -2760,7 +2762,7 @@ func TestUpdatePolicySnapshotStatusFromBindings(t *testing.T) { Reason: fmt.Sprintf(resourceScheduleSucceededWithScoreMessageFormat, clusterName, affinityScore1, topologySpreadScore1), }, { - ClusterName: altClusterName, + ClusterName: anotherClusterName, Selected: false, Reason: filteredStatus.String(), }, @@ -6536,3 +6538,175 @@ func TestUpdatePolicySnapshotStatusForPickFixedPlacementType(t *testing.T) { }) } } + +// TestRunSchedulingCycleForPickAllPlacementType_StableStatusOutputInLargeFleet tests the +// runSchedulingCycleForPickAllPlacementType method, specifically to ensure that the status output +// remains consistent when running the scheduling cycle in a large fleet (i.e., the scheduler +// will not constantly refresh the status across scheduling cycles). +func TestRunSchedulingCycleForPickAllPlacementType_StableStatusOutputInLargeFleet(t *testing.T) { + ctx := context.Background() + + // Set up the scheduler profile with a label-based dummy filter plugin. + profile := NewProfile("TestOnly") + + dummyLabelBasedFilterPluginName := fmt.Sprintf(dummyAllPurposePluginNameFormat, 0) + wantLabelKey := "pre-selected" + wantLabelValue := "true" + wantLabels := map[string]string{ + wantLabelKey: wantLabelValue, + } + dummyLabelBasedFilterPlugin := &DummyAllPurposePlugin{ + name: dummyLabelBasedFilterPluginName, + filterRunner: func(ctx context.Context, state CycleStatePluginReadWriter, policy placementv1beta1.PolicySnapshotObj, cluster *clusterv1beta1.MemberCluster) (status *Status) { + memberClusterLabels := cluster.GetLabels() + for wk, wv := range wantLabels { + if v, ok := memberClusterLabels[wk]; !ok || v != wv { + return NewNonErrorStatus(ClusterUnschedulable, dummyLabelBasedFilterPluginName) + } + } + return nil + }, + } + profile.WithFilterPlugin(dummyLabelBasedFilterPlugin) + + mockClientStatusUpdateCount := atomic.Int32{} + mockClient := crossplanetest.MockClient{ + MockCreate: func(ctx context.Context, obj client.Object, opts ...client.CreateOption) error { + return nil + }, + MockStatusUpdate: func(ctx context.Context, obj client.Object, opts ...client.SubResourceUpdateOption) error { + _ = mockClientStatusUpdateCount.Add(1) + return nil + }, + } + + f := &framework{ + profile: profile, + client: &mockClient, + uncachedReader: &mockClient, + manager: nil, + eventRecorder: nil, + parallelizer: parallelizer.NewParallelizer(parallelizer.DefaultNumOfWorkers), + maxUnselectedClusterDecisionCount: 3, + // The cluster eligibility checker is not invoked in this test spec. + clusterEligibilityChecker: clustereligibilitychecker.New(), + } + // No need to set up plugins with the framework. + + clusters := []clusterv1beta1.MemberCluster{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(clusterNameTemplate, 1), + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(clusterNameTemplate, 2), + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(clusterNameTemplate, 3), + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(clusterNameTemplate, 4), + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(clusterNameTemplate, 5), + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(clusterNameTemplate, 6), + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(clusterNameTemplate, 7), + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(clusterNameTemplate, 8), + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(clusterNameTemplate, 9), + }, + }, + } + state := NewCycleState(clusters, nil, nil) + placementKey := queue.PlacementKey(crpName) + wantClusterUnschedulableReason := "ClusterUnschedulable" + policy := &placementv1beta1.ClusterSchedulingPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + Annotations: map[string]string{ + placementv1beta1.CRPGenerationAnnotation: "0", + }, + }, + Spec: placementv1beta1.SchedulingPolicySnapshotSpec{ + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickAllPlacementType, + Affinity: &placementv1beta1.Affinity{ + ClusterAffinity: &placementv1beta1.ClusterAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &placementv1beta1.ClusterSelector{ + ClusterSelectorTerms: []placementv1beta1.ClusterSelectorTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + wantLabelKey: wantLabelValue, + }, + }, + }, + }, + }, + }, + }, + }, + }, + Status: placementv1beta1.SchedulingPolicySnapshotStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.PolicySnapshotScheduled), + Status: metav1.ConditionTrue, + Reason: FullyScheduledReason, + Message: fmt.Sprintf(fullyScheduledMessage, 1), + }, + }, + ObservedCRPGeneration: 0, + ClusterDecisions: []placementv1beta1.ClusterDecision{ + { + ClusterName: fmt.Sprintf(clusterNameTemplate, 1), + Reason: wantClusterUnschedulableReason, + }, + { + ClusterName: fmt.Sprintf(clusterNameTemplate, 2), + Reason: wantClusterUnschedulableReason, + }, + { + ClusterName: fmt.Sprintf(clusterNameTemplate, 3), + Reason: wantClusterUnschedulableReason, + }, + }, + }, + } + + // Simulate 100 consecutive scheduling cycles. + for i := 0; i < 100; i++ { + _, err := f.runSchedulingCycleForPickAllPlacementType(ctx, state, placementKey, policy, clusters, nil, nil, nil, nil) + if err != nil { + t.Fatalf("runSchedulingCycleForPickAllPlacementType() = %v, want no error", err) + } + } + + // Check if any status update was attempted; all should be skipped as there is no status change. + if mockClientStatusUpdateCount.Load() != 0 { + t.Errorf("runSchedulingCycleForPickAllPlacementType() status update attempt count = %d, want 0", mockClientStatusUpdateCount.Load()) + } +} diff --git a/pkg/scheduler/queue/batched.go b/pkg/scheduler/queue/batched.go new file mode 100644 index 000000000..003b05a1b --- /dev/null +++ b/pkg/scheduler/queue/batched.go @@ -0,0 +1,233 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package queue + +import ( + "fmt" + "time" + + "k8s.io/client-go/util/workqueue" +) + +const ( + maxNumberOfKeysToMoveFromBatchedToActiveQueuePerGo = 20000 +) + +// batchedProcessingPlacementSchedulingQueue implements the PlacementSchedulingQueue +// interface. +// +// It consists of two work queues to allow processing for both immediate and batched +// processing for scheduling related events (changes) of different responsiveness levels. +type batchedProcessingPlacementSchedulingQueue struct { + active workqueue.TypedRateLimitingInterface[any] + batched workqueue.TypedRateLimitingInterface[any] + + moveNow chan struct{} + movePeriodSeconds int32 +} + +// Verify that batchedProcessingPlacementSchedulingQueue implements +// PlacementSchedulingQueue at compile time. +var _ PlacementSchedulingQueue = &batchedProcessingPlacementSchedulingQueue{} + +// batchedProcessingPlacementSchedulingQueueOptions are the options for the +// batchedProcessingPlacementSchedulingQueue. +type batchedProcessingPlacementSchedulingQueueOptions struct { + activeQueueRateLimiter workqueue.TypedRateLimiter[any] + batchedQueueRateLimiter workqueue.TypedRateLimiter[any] + name string + movePeriodSeconds int32 +} + +var defaultBatchedProcessingPlacementSchedulingQueueOptions = batchedProcessingPlacementSchedulingQueueOptions{ + activeQueueRateLimiter: workqueue.DefaultTypedControllerRateLimiter[any](), + batchedQueueRateLimiter: workqueue.DefaultTypedControllerRateLimiter[any](), + name: "batchedProcessingPlacementSchedulingQueue", + movePeriodSeconds: int32(300), // 5 minutes +} + +// Close shuts down the scheduling queue immediately. +// +// Note that items remaining in the active queue might not get processed any more, and items +// left in the batched queue might not be moved to the active queue any more either. +func (bq *batchedProcessingPlacementSchedulingQueue) Close() { + // Signal the mover goroutine to exit. + // + // Note that this will trigger the mover goroutine to attempt another key move, but the + // active queue might not be able to accept the key any more (which is OK and does not + // result in an error). + close(bq.moveNow) + + bq.batched.ShutDown() + bq.active.ShutDown() +} + +// CloseWithDrain shuts down the scheduling queue and returns until: +// a) all the items in the batched queue have been moved to the active queue; and +// b) all the items in the active queue have been processed. +func (bq *batchedProcessingPlacementSchedulingQueue) CloseWithDrain() { + // Signal that all items in the batched queue should be moved to the active queue right away. + close(bq.moveNow) + + // Wait until all the items in the moving process from the batched queue to the active queue have completed + // their moves. + bq.batched.ShutDownWithDrain() + // Wait until all the items that are currently being processed by the scheduler to finish. + bq.active.ShutDownWithDrain() +} + +// NextPlacementKey returns the next PlacementKey (either clusterResourcePlacementKey or resourcePlacementKey) +// in the work queue for the scheduler to process. +func (bq *batchedProcessingPlacementSchedulingQueue) NextPlacementKey() (key PlacementKey, closed bool) { + // This will block on a condition variable if the queue is empty. + placementKey, shutdown := bq.active.Get() + if shutdown { + return "", true + } + return placementKey.(PlacementKey), false +} + +// Done marks a PlacementKey as done. +func (bq *batchedProcessingPlacementSchedulingQueue) Done(placementKey PlacementKey) { + bq.active.Done(placementKey) + // The keys in the batched queue are marked as done as soon as they are moved to the active queue. +} + +// Add adds a PlacementKey to the work queue for immediate processing. +// +// Note that this bypasses the rate limiter (if any). +func (bq *batchedProcessingPlacementSchedulingQueue) Add(placementKey PlacementKey) { + bq.active.Add(placementKey) +} + +// AddAfter adds a PlacementKey to the work queue after a set duration for immediate processing. +// +// Note that this bypasses the rate limiter (if any). +func (bq *batchedProcessingPlacementSchedulingQueue) AddAfter(placementKey PlacementKey, duration time.Duration) { + bq.active.AddAfter(placementKey, duration) +} + +// AddRateLimited adds a PlacementKey to the work queue after the rate limiter (if any) +// says that it is OK, for immediate processing. +func (bq *batchedProcessingPlacementSchedulingQueue) AddRateLimited(placementKey PlacementKey) { + bq.active.AddRateLimited(placementKey) +} + +// Forget untracks a PlacementKey from rate limiter(s) (if any) set up with the queue. +func (bq *batchedProcessingPlacementSchedulingQueue) Forget(placementKey PlacementKey) { + bq.active.Forget(placementKey) + // The keys in the batched queue are forgotten as soon as they are moved to the active queue. +} + +// AddBatched tracks a PlacementKey and adds such keys in batch later to the work queue when appropriate. +func (bq *batchedProcessingPlacementSchedulingQueue) AddBatched(placementKey PlacementKey) { + bq.batched.Add(placementKey) +} + +// Run starts the scheduling queue. +func (bq *batchedProcessingPlacementSchedulingQueue) Run() { + // Spin up a goroutine to move items periodically from the batched queue to the active queue. + go func() { + timer := time.NewTimer(time.Duration(bq.movePeriodSeconds) * time.Second) + for { + select { + case _, closed := <-bq.moveNow: + if closed && bq.batched.ShuttingDown() { + // The batched queue has been shut down, and the moveNow channel has been closed; + // now it is safe to assume that after moving all the items from the batched queue to the active queue + // this time, the batched queue will be drained. + bq.moveAllBatchedItemsToActiveQueue() + return + } + + // The batched queue might still be running; move all items and re-enter the loop. + bq.moveAllBatchedItemsToActiveQueue() + case <-timer.C: + // The timer has fired; move all items. + bq.moveAllBatchedItemsToActiveQueue() + } + + // Reset the timer for the next round. + timer.Reset(time.Duration(bq.movePeriodSeconds) * time.Second) + } + }() +} + +func (bq *batchedProcessingPlacementSchedulingQueue) moveAllBatchedItemsToActiveQueue() { + keysToMove := []PlacementKey{} + + for bq.batched.Len() > 0 { + // Note that the batched queue is an internal object and is only read here by the scheduling queue + // itself (i.e., the batched queue has only one reader, though there might be multiple writers); + // consequently, if the Len() > 0 check passes, the subsequent Get() call is guaranteed to return + // an item (i.e., the call will not block). For simplicity reasons we do not do additional + // sanity checks here. + placementKey, shutdown := bq.batched.Get() + if shutdown { + break + } + keysToMove = append(keysToMove, placementKey.(PlacementKey)) + + if len(keysToMove) > maxNumberOfKeysToMoveFromBatchedToActiveQueuePerGo { + // The keys popped from the batched queue are not yet added to the active queue, in other words, + // they are not yet marked as done; the batched queue will still track them and adding them + // to the batched queue again at this moment will not trigger the batched queue to yield the same + // keys again. This implies that the at maximum we will be moving a number of keys equal to + // the number of placement objects in the system at a time, which should be a finite number. + // Still, to be on the safer side here KubeFleet sets a cap the number of keys to move per go. + break + } + } + + for _, key := range keysToMove { + // Mark the keys as done in the batched queue and add the keys to the active queue in batch. Here the + // implementation keeps the keys in memory first and does not move keys right after they are popped as + // this pattern risks synchronized processing (i.e., a key is popped from the batched queue, immeidiately added to the + // active queue and gets marked as done by the scheduler, then added back to the batched queue again by + // one of the watchers before the key moving attempt is finished, which results in perpetual key moving). + bq.active.Add(key) + bq.batched.Done(key) + bq.batched.Forget(key) + } +} + +// NewBatchedProcessingPlacementSchedulingQueue returns a batchedProcessingPlacementSchedulingQueue. +func NewBatchedProcessingPlacementSchedulingQueue(name string, activeQRateLimiter, batchedQRateLimiter workqueue.TypedRateLimiter[any], movePeriodSeconds int32) PlacementSchedulingQueue { + if len(name) == 0 { + name = defaultBatchedProcessingPlacementSchedulingQueueOptions.name + } + if activeQRateLimiter == nil { + activeQRateLimiter = defaultBatchedProcessingPlacementSchedulingQueueOptions.activeQueueRateLimiter + } + if batchedQRateLimiter == nil { + batchedQRateLimiter = defaultBatchedProcessingPlacementSchedulingQueueOptions.batchedQueueRateLimiter + } + if movePeriodSeconds <= 0 { + movePeriodSeconds = defaultBatchedProcessingPlacementSchedulingQueueOptions.movePeriodSeconds + } + + return &batchedProcessingPlacementSchedulingQueue{ + active: workqueue.NewTypedRateLimitingQueueWithConfig(activeQRateLimiter, workqueue.TypedRateLimitingQueueConfig[any]{ + Name: fmt.Sprintf("%s_Active", name), + }), + batched: workqueue.NewTypedRateLimitingQueueWithConfig(batchedQRateLimiter, workqueue.TypedRateLimitingQueueConfig[any]{ + Name: fmt.Sprintf("%s_Batched", name), + }), + moveNow: make(chan struct{}), + movePeriodSeconds: movePeriodSeconds, + } +} diff --git a/pkg/scheduler/queue/batched_test.go b/pkg/scheduler/queue/batched_test.go new file mode 100644 index 000000000..93056e8d0 --- /dev/null +++ b/pkg/scheduler/queue/batched_test.go @@ -0,0 +1,187 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package queue + +import ( + "testing" + "time" + + "github.com/google/go-cmp/cmp" +) + +// TestBatchedProcessingPlacementSchedulingQueue_BasicOps tests the basic ops +// (Add, Next, Done) of a batchedProcessingPlacementSchedulingQueue. +func TestBatchedProcessingPlacementSchedulingQueue_BasicOps(t *testing.T) { + bq := NewBatchedProcessingPlacementSchedulingQueue("TestOnly", nil, nil, 0) + bq.Run() + + keysToAdd := []PlacementKey{"A", "B", "C", "D", "E"} + for _, key := range keysToAdd { + bq.Add(key) + } + + keysRecved := []PlacementKey{} + for i := 0; i < len(keysToAdd); i++ { + key, closed := bq.NextPlacementKey() + if closed { + t.Fatalf("Queue closed unexpected") + } + keysRecved = append(keysRecved, key) + bq.Done(key) + bq.Forget(key) + } + + if !cmp.Equal(keysToAdd, keysRecved) { + t.Fatalf("Received keys %v, want %v", keysRecved, keysToAdd) + } + + bq.Close() +} + +// TestBatchedProcessingPlacementSchedulingQueue_BatchedOps tests the batched ops +// (AddBatched) of a batchedProcessingPlacementSchedulingQueue. +func TestBatchedProcessingPlacementSchedulingQueue_BatchedOps(t *testing.T) { + movePeriodSeconds := int32(5) // 5 seconds + bq := NewBatchedProcessingPlacementSchedulingQueue("TestOnly", nil, nil, movePeriodSeconds) + bq.Run() + + addedTimestamp := time.Now() + keysToAddBatched := []PlacementKey{"A", "B", "C"} + for _, key := range keysToAddBatched { + bq.AddBatched(key) + } + + keysRecved := []PlacementKey{} + for i := 0; i < len(keysToAddBatched); i++ { + key, closed := bq.NextPlacementKey() + if closed { + t.Fatalf("Queue closed unexpected") + } + keysRecved = append(keysRecved, key) + bq.Done(key) + bq.Forget(key) + } + + if !cmp.Equal(keysToAddBatched, keysRecved) { + t.Fatalf("Received keys %v, want %v", keysRecved, keysToAddBatched) + } + // Allow some buffer time (+1 second). + if timeSpent := time.Since(addedTimestamp); timeSpent < time.Duration(movePeriodSeconds-1)*time.Second { + t.Fatalf("time to move keys, want no less than %f seconds, got %f seconds", float64(movePeriodSeconds-1), timeSpent.Seconds()) + } +} + +// TestBatchedProcessingPlacementSchedulingQueue_MoveNow tests the moveNow signal +// built in a batchedProcessingPlacementSchedulingQueue. +func TestBatchedProcessingPlacementSchedulingQueue_MoveNow(t *testing.T) { + movePeriodSeconds := int32(10) // 10 seconds + bq := NewBatchedProcessingPlacementSchedulingQueue("TestOnly", nil, nil, movePeriodSeconds) + bq.Run() + + keysToAddBatched := []PlacementKey{"A", "B", "C"} + for _, key := range keysToAddBatched { + bq.AddBatched(key) + } + + // Send a move now signal. + bqStruct, ok := bq.(*batchedProcessingPlacementSchedulingQueue) + if !ok { + t.Fatalf("Failed to cast to batchedProcessingPlacementSchedulingQueue") + } + bqStruct.moveNow <- struct{}{} + + moveNowTriggeredTimestamp := time.Now() + keysRecved := []PlacementKey{} + for i := 0; i < len(keysToAddBatched); i++ { + key, closed := bq.NextPlacementKey() + if closed { + t.Fatalf("Queue closed unexpected") + } + keysRecved = append(keysRecved, key) + bq.Done(key) + bq.Forget(key) + } + + if !cmp.Equal(keysToAddBatched, keysRecved) { + t.Fatalf("Received keys %v, want %v", keysRecved, keysToAddBatched) + } + // Allow some buffer time (1 seconds). + if timeSpent := time.Since(moveNowTriggeredTimestamp); timeSpent > time.Second { + t.Fatalf("time to move keys after move now triggered, want no more than %f seconds, got %f seconds", 1.0, timeSpent.Seconds()) + } +} + +// TestBatchedProcessingPlacementSchedulingQueue_CloseWithDrain tests the CloseWithDrain +// method of a batchedProcessingPlacementSchedulingQueue. +func TestBatchedProcessingPlacementSchedulingQueue_CloseWithDrain(t *testing.T) { + movePeriodSeconds := int32(600) // 10 minutes + bq := NewBatchedProcessingPlacementSchedulingQueue("TestOnly", nil, nil, movePeriodSeconds) + bq.Run() + + keysToAdd := []PlacementKey{"A", "B", "C"} + for _, key := range keysToAdd { + bq.Add(key) + } + + keysToAddBatched := []PlacementKey{"D", "E", "F"} + for _, key := range keysToAddBatched { + bq.AddBatched(key) + } + + // Send a move now signal. + bqStruct, ok := bq.(*batchedProcessingPlacementSchedulingQueue) + if !ok { + t.Fatalf("Failed to cast to batchedProcessingPlacementSchedulingQueue") + } + bqStruct.moveNow <- struct{}{} + + keysRecved := []PlacementKey{} + for i := 0; i < len(keysToAdd)+len(keysToAddBatched); i++ { + key, closed := bq.NextPlacementKey() + if closed { + t.Fatalf("Queue closed unexpected") + } + keysRecved = append(keysRecved, key) + // Do not yet mark the keys as Done. + } + + timerPeriodSeconds := int32(5) + go func() { + timer := time.NewTimer(time.Duration(timerPeriodSeconds) * time.Second) + <-timer.C + // Mark all keys as Done after 5 seconds. + for _, key := range keysRecved { + bq.Done(key) + bq.Forget(key) + } + }() + + // Close and drain the queue; this should block until all keys are marked Done. + closeWithDrainTimestamp := time.Now() + bq.CloseWithDrain() + + wantKeys := make([]PlacementKey, 0, len(keysToAdd)+len(keysToAddBatched)) + wantKeys = append(wantKeys, keysToAdd...) + wantKeys = append(wantKeys, keysToAddBatched...) + if !cmp.Equal(wantKeys, keysRecved) { + t.Fatalf("Received keys %v, want %v", keysRecved, wantKeys) + } + // Allow some buffer time (+1 second). + if timeSpent := time.Since(closeWithDrainTimestamp); timeSpent > time.Duration(timerPeriodSeconds+1)*time.Second { + t.Fatalf("time to close with drain, want no more than %f seconds, got %f seconds", float64(timerPeriodSeconds+1), timeSpent.Seconds()) + } +} diff --git a/pkg/scheduler/queue/queue.go b/pkg/scheduler/queue/queue.go index 552fd64fc..82ecdbf9d 100644 --- a/pkg/scheduler/queue/queue.go +++ b/pkg/scheduler/queue/queue.go @@ -20,8 +20,6 @@ package queue import ( "time" - - "k8s.io/client-go/util/workqueue" ) // PlacementKey is the unique identifier for a Placement checked into a scheduling queue. @@ -44,6 +42,10 @@ type PlacementSchedulingQueueWriter interface { AddRateLimited(placementKey PlacementKey) // AddAfter adds a PlacementKey to the work queue after a set duration. AddAfter(placementKey PlacementKey, duration time.Duration) + // AddBatched tracks a PlacementKey and adds such keys in batch later to the work queue when appropriate. + // + // This is most helpful in cases where certain changes do not require immediate processing by the scheduler. + AddBatched(placementKey PlacementKey) } // PlacementSchedulingQueue is an interface which queues PlacementKeys for the scheduler @@ -65,124 +67,3 @@ type PlacementSchedulingQueue interface { // Forget untracks a PlacementKey from rate limiter(s) (if any) set up with the queue. Forget(placementKey PlacementKey) } - -// simplePlacementSchedulingQueue is a simple implementation of -// PlacementSchedulingQueue. -// -// At this moment, one single workqueue would suffice, as sources such as the cluster watcher, -// the binding watcher, etc., can catch all changes that need the scheduler's attention. -// In the future, when more features, e.g., inter-placement affinity/anti-affinity, are added, -// more queues, such as a backoff queue, might become necessary. -type simplePlacementSchedulingQueue struct { - active workqueue.TypedRateLimitingInterface[any] -} - -// Verify that simplePlacementSchedulingQueue implements -// PlacementSchedulingQueue at compile time. -var _ PlacementSchedulingQueue = &simplePlacementSchedulingQueue{} - -// simplePlacementSchedulingQueueOptions are the options for the -// simplePlacementSchedulingQueue. -type simplePlacementSchedulingQueueOptions struct { - rateLimiter workqueue.TypedRateLimiter[any] - name string -} - -// Option is the function that configures the simplePlacementSchedulingQueue. -type Option func(*simplePlacementSchedulingQueueOptions) - -var defaultSimplePlacementSchedulingQueueOptions = simplePlacementSchedulingQueueOptions{ - rateLimiter: workqueue.DefaultTypedControllerRateLimiter[any](), - name: "placementSchedulingQueue", -} - -// WithRateLimiter sets a rate limiter for the workqueue. -func WithRateLimiter(rateLimiter workqueue.TypedRateLimiter[any]) Option { - return func(o *simplePlacementSchedulingQueueOptions) { - o.rateLimiter = rateLimiter - } -} - -// WithName sets a name for the workqueue. -func WithName(name string) Option { - return func(o *simplePlacementSchedulingQueueOptions) { - o.name = name - } -} - -// Run starts the scheduling queue. -// -// At this moment, Run is an no-op as there is only one queue present; in the future, -// when more queues are added, Run would start goroutines that move items between queues as -// appropriate. -func (sq *simplePlacementSchedulingQueue) Run() {} - -// Close shuts down the scheduling queue immediately. -func (sq *simplePlacementSchedulingQueue) Close() { - sq.active.ShutDown() -} - -// CloseWithDrain shuts down the scheduling queue and returns until all items are processed. -func (sq *simplePlacementSchedulingQueue) CloseWithDrain() { - sq.active.ShutDownWithDrain() -} - -// NextPlacementKey returns the next PlacementKey (either clusterResourcePlacementKey or resourcePlacementKey) -// in the work queue for the scheduler to process. -// -// Note that for now the queue simply wraps a work queue, and consider its state (whether it -// is shut down or not) as its own closedness. In the future, when more queues are added, the -// queue implementation must manage its own state. -func (sq *simplePlacementSchedulingQueue) NextPlacementKey() (key PlacementKey, closed bool) { - // This will block on a condition variable if the queue is empty. - placementKey, shutdown := sq.active.Get() - if shutdown { - return "", true - } - return placementKey.(PlacementKey), false -} - -// Done marks a PlacementKey as done. -func (sq *simplePlacementSchedulingQueue) Done(placementKey PlacementKey) { - sq.active.Done(placementKey) -} - -// Add adds a PlacementKey to the work queue. -// -// Note that this bypasses the rate limiter (if any). -func (sq *simplePlacementSchedulingQueue) Add(placementKey PlacementKey) { - sq.active.Add(placementKey) -} - -// AddRateLimited adds a PlacementKey to the work queue after the rate limiter (if any) -// says that it is OK. -func (sq *simplePlacementSchedulingQueue) AddRateLimited(placementKey PlacementKey) { - sq.active.AddRateLimited(placementKey) -} - -// AddAfter adds a PlacementKey to the work queue after a set duration. -// -// Note that this bypasses the rate limiter (if any) -func (sq *simplePlacementSchedulingQueue) AddAfter(placementKey PlacementKey, duration time.Duration) { - sq.active.AddAfter(placementKey, duration) -} - -// Forget untracks a PlacementKey from rate limiter(s) (if any) set up with the queue. -func (sq *simplePlacementSchedulingQueue) Forget(placementKey PlacementKey) { - sq.active.Forget(placementKey) -} - -// NewSimplePlacementSchedulingQueue returns a -// simplePlacementSchedulingQueue. -func NewSimplePlacementSchedulingQueue(opts ...Option) PlacementSchedulingQueue { - options := defaultSimplePlacementSchedulingQueueOptions - for _, opt := range opts { - opt(&options) - } - - return &simplePlacementSchedulingQueue{ - active: workqueue.NewTypedRateLimitingQueueWithConfig(options.rateLimiter, workqueue.TypedRateLimitingQueueConfig[any]{ - Name: options.name, - }), - } -} diff --git a/pkg/scheduler/queue/simple.go b/pkg/scheduler/queue/simple.go new file mode 100644 index 000000000..8721bc517 --- /dev/null +++ b/pkg/scheduler/queue/simple.go @@ -0,0 +1,133 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package queue + +import ( + "time" + + "k8s.io/client-go/util/workqueue" +) + +// simplePlacementSchedulingQueue is a simple implementation of +// PlacementSchedulingQueue. +// +// This implementation is essentially a thin wrapper around one rate limiting +// workqueue, which queues all placement keys indiscriminately for processing. +type simplePlacementSchedulingQueue struct { + active workqueue.TypedRateLimitingInterface[any] +} + +// Verify that simplePlacementSchedulingQueue implements +// PlacementSchedulingQueue at compile time. +var _ PlacementSchedulingQueue = &simplePlacementSchedulingQueue{} + +// simplePlacementSchedulingQueueOptions are the options for the +// simplePlacementSchedulingQueue. +type simplePlacementSchedulingQueueOptions struct { + rateLimiter workqueue.TypedRateLimiter[any] + name string +} + +var defaultSimplePlacementSchedulingQueueOptions = simplePlacementSchedulingQueueOptions{ + rateLimiter: workqueue.DefaultTypedControllerRateLimiter[any](), + name: "simplePlacementSchedulingQueue", +} + +// Run starts the scheduling queue. +// +// At this moment, Run is an no-op as there is only one queue present; in the future, +// when more queues are added, Run would start goroutines that move items between queues as +// appropriate. +func (sq *simplePlacementSchedulingQueue) Run() {} + +// Close shuts down the scheduling queue immediately. +func (sq *simplePlacementSchedulingQueue) Close() { + sq.active.ShutDown() +} + +// CloseWithDrain shuts down the scheduling queue and returns until all items are processed. +func (sq *simplePlacementSchedulingQueue) CloseWithDrain() { + sq.active.ShutDownWithDrain() +} + +// NextPlacementKey returns the next PlacementKey (either clusterResourcePlacementKey or resourcePlacementKey) +// in the work queue for the scheduler to process. +// +// Note that for now the queue simply wraps a work queue, and consider its state (whether it +// is shut down or not) as its own closedness. In the future, when more queues are added, the +// queue implementation must manage its own state. +func (sq *simplePlacementSchedulingQueue) NextPlacementKey() (key PlacementKey, closed bool) { + // This will block on a condition variable if the queue is empty. + placementKey, shutdown := sq.active.Get() + if shutdown { + return "", true + } + return placementKey.(PlacementKey), false +} + +// Done marks a PlacementKey as done. +func (sq *simplePlacementSchedulingQueue) Done(placementKey PlacementKey) { + sq.active.Done(placementKey) +} + +// Add adds a PlacementKey to the work queue. +// +// Note that this bypasses the rate limiter (if any). +func (sq *simplePlacementSchedulingQueue) Add(placementKey PlacementKey) { + sq.active.Add(placementKey) +} + +// AddRateLimited adds a PlacementKey to the work queue after the rate limiter (if any) +// says that it is OK. +func (sq *simplePlacementSchedulingQueue) AddRateLimited(placementKey PlacementKey) { + sq.active.AddRateLimited(placementKey) +} + +// AddAfter adds a PlacementKey to the work queue after a set duration. +// +// Note that this bypasses the rate limiter (if any). +func (sq *simplePlacementSchedulingQueue) AddAfter(placementKey PlacementKey, duration time.Duration) { + sq.active.AddAfter(placementKey, duration) +} + +// AddBatched tracks a PlacementKey and adds such keys in batch later to the work queue when appropriate. +// +// For the simple queue implementation, this is equivalent to Add. +func (sq *simplePlacementSchedulingQueue) AddBatched(placementKey PlacementKey) { + sq.active.Add(placementKey) +} + +// Forget untracks a PlacementKey from rate limiter(s) (if any) set up with the queue. +func (sq *simplePlacementSchedulingQueue) Forget(placementKey PlacementKey) { + sq.active.Forget(placementKey) +} + +// NewSimplePlacementSchedulingQueue returns a simplePlacementSchedulingQueue. +func NewSimplePlacementSchedulingQueue(name string, rateLimiter workqueue.TypedRateLimiter[any]) PlacementSchedulingQueue { + if len(name) == 0 { + name = defaultSimplePlacementSchedulingQueueOptions.name + } + if rateLimiter == nil { + rateLimiter = defaultSimplePlacementSchedulingQueueOptions.rateLimiter + } + + return &simplePlacementSchedulingQueue{ + active: workqueue.NewTypedRateLimitingQueueWithConfig(rateLimiter, workqueue.TypedRateLimitingQueueConfig[any]{ + Name: name, + }), + } +} diff --git a/pkg/scheduler/queue/queue_test.go b/pkg/scheduler/queue/simple_test.go similarity index 86% rename from pkg/scheduler/queue/queue_test.go rename to pkg/scheduler/queue/simple_test.go index 4a100c9ff..e7fe1993a 100644 --- a/pkg/scheduler/queue/queue_test.go +++ b/pkg/scheduler/queue/simple_test.go @@ -22,10 +22,10 @@ import ( "github.com/google/go-cmp/cmp" ) -// TestSimplePlacementSchedulingQueueBasicOps tests the basic ops +// TestSimplePlacementSchedulingQueue_BasicOps tests the basic ops // (Add, Next, Done) of a simpleClusterResourcePlacementSchedulingQueue. -func TestSimplePlacementSchedulingQueueBasicOps(t *testing.T) { - sq := NewSimplePlacementSchedulingQueue() +func TestSimplePlacementSchedulingQueue_BasicOps(t *testing.T) { + sq := NewSimplePlacementSchedulingQueue("", nil) sq.Run() keysToAdd := []PlacementKey{"A", "B", "C", "D", "E"} diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 6040bc63a..866f7670b 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -305,7 +305,7 @@ func (s *Scheduler) cleanUpAllBindingsFor(ctx context.Context, placement fleetv1 // Note that the listing is performed using the uncached client; this is to ensure that all related // bindings can be found, even if they have not been synced to the cache yet. // TO-DO (chenyu1): this is a very expensive op; explore options for optimization. - bindings, err := controller.ListBindingsFromKey(ctx, s.uncachedReader, types.NamespacedName{Namespace: placement.GetNamespace(), Name: placement.GetName()}) + bindings, err := controller.ListBindingsFromKey(ctx, s.uncachedReader, types.NamespacedName{Namespace: placement.GetNamespace(), Name: placement.GetName()}, false) if err != nil { klog.ErrorS(err, "Failed to list all bindings", "placement", placementRef) return err diff --git a/pkg/scheduler/watchers/binding/suite_test.go b/pkg/scheduler/watchers/binding/suite_test.go index 157d5c9ea..835ff35c9 100644 --- a/pkg/scheduler/watchers/binding/suite_test.go +++ b/pkg/scheduler/watchers/binding/suite_test.go @@ -93,7 +93,7 @@ var _ = BeforeSuite(func() { }) Expect(err).NotTo(HaveOccurred(), "Failed to create controller manager") - schedulerWorkQueue := queue.NewSimplePlacementSchedulingQueue() + schedulerWorkQueue := queue.NewSimplePlacementSchedulingQueue("", nil) // Create ClusterResourceBinding watcher crbReconciler. crbReconciler := &Reconciler{ diff --git a/pkg/scheduler/watchers/membercluster/suite_test.go b/pkg/scheduler/watchers/membercluster/suite_test.go index 4ae107f5e..bf9fb2fa9 100644 --- a/pkg/scheduler/watchers/membercluster/suite_test.go +++ b/pkg/scheduler/watchers/membercluster/suite_test.go @@ -184,7 +184,7 @@ var _ = BeforeSuite(func() { }) Expect(err).NotTo(HaveOccurred(), "Failed to create controller manager") - schedulerWorkQueue := queue.NewSimplePlacementSchedulingQueue() + schedulerWorkQueue := queue.NewSimplePlacementSchedulingQueue("", nil) reconciler := Reconciler{ Client: hubClient, diff --git a/pkg/scheduler/watchers/membercluster/watcher.go b/pkg/scheduler/watchers/membercluster/watcher.go index 2ceaff37a..ceaec38aa 100644 --- a/pkg/scheduler/watchers/membercluster/watcher.go +++ b/pkg/scheduler/watchers/membercluster/watcher.go @@ -171,7 +171,10 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu "Enqueueing placement for scheduler processing", "memberCluster", memberClusterRef, "placement", klog.KObj(placement)) - r.SchedulerWorkQueue.Add(controller.GetObjectKeyFromObj(placement)) + // TO-DO (chenyu1): at this moment, the scheduler still uses a simple queue implementation; as a result, + // the placement keys will be added to the queue immediately even with the AddBatched() call. Switch + // to a batched processing queue implementation later to take advantage of the batched processing feature. + r.SchedulerWorkQueue.AddBatched(controller.GetObjectKeyFromObj(placement)) } // The reconciliation loop completes. diff --git a/pkg/scheduler/watchers/placement/suite_test.go b/pkg/scheduler/watchers/placement/suite_test.go index 64e039a88..a2e659414 100644 --- a/pkg/scheduler/watchers/placement/suite_test.go +++ b/pkg/scheduler/watchers/placement/suite_test.go @@ -93,7 +93,7 @@ var _ = BeforeSuite(func() { }) Expect(err).NotTo(HaveOccurred(), "Failed to create controller manager") - schedulerWorkQueue := queue.NewSimplePlacementSchedulingQueue() + schedulerWorkQueue := queue.NewSimplePlacementSchedulingQueue("", nil) crpReconciler := &Reconciler{ Client: hubClient, diff --git a/pkg/scheduler/watchers/schedulingpolicysnapshot/suite_test.go b/pkg/scheduler/watchers/schedulingpolicysnapshot/suite_test.go index d7f88ae4c..0dbe8011f 100644 --- a/pkg/scheduler/watchers/schedulingpolicysnapshot/suite_test.go +++ b/pkg/scheduler/watchers/schedulingpolicysnapshot/suite_test.go @@ -93,7 +93,7 @@ var _ = BeforeSuite(func() { }) Expect(err).NotTo(HaveOccurred(), "Failed to create controller manager") - schedulerWorkQueue := queue.NewSimplePlacementSchedulingQueue() + schedulerWorkQueue := queue.NewSimplePlacementSchedulingQueue("", nil) reconciler := &Reconciler{ Client: hubClient, diff --git a/pkg/utils/apiresources.go b/pkg/utils/apiresources.go index 194d81e13..bc34d54b6 100644 --- a/pkg/utils/apiresources.go +++ b/pkg/utils/apiresources.go @@ -23,7 +23,9 @@ import ( coordv1 "k8s.io/api/coordination/v1" corev1 "k8s.io/api/core/v1" eventsv1 "k8s.io/api/events/v1" + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/klog/v2" metricsV1beta1 "k8s.io/metrics/pkg/apis/metrics/v1beta1" clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1" @@ -362,3 +364,26 @@ func (r *ResourceConfig) AddGroupKind(gk schema.GroupKind) { func (r *ResourceConfig) AddGroupVersionKind(gvk schema.GroupVersionKind) { r.groupVersionKinds[gvk] = struct{}{} } + +// ShouldProcessResource returns whether a GroupVersionResource should be processed (watched or selected). +// It checks if the resource is enabled based on the ResourceConfig settings. +// Returns true if resourceConfig is nil (all APIs allowed by default) or if the resource is not disabled. +func ShouldProcessResource(gvr schema.GroupVersionResource, restMapper meta.RESTMapper, resourceConfig *ResourceConfig) bool { + // By default, all of the APIs are allowed. + if resourceConfig == nil { + return true + } + + gvks, err := restMapper.KindsFor(gvr) + if err != nil { + klog.ErrorS(err, "gvr transform failed", "gvr", gvr.String()) + return false + } + for _, gvk := range gvks { + if resourceConfig.IsResourceDisabled(gvk) { + klog.V(4).InfoS("Skip processing resource", "group version kind", gvk.String()) + return false + } + } + return true +} diff --git a/pkg/utils/apiresources_test.go b/pkg/utils/apiresources_test.go index b01929ce4..de249791f 100644 --- a/pkg/utils/apiresources_test.go +++ b/pkg/utils/apiresources_test.go @@ -19,7 +19,12 @@ package utils import ( "testing" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/restmapper" + + "go.goms.io/fleet/test/utils/resource" ) func TestResourceConfigGVKParse(t *testing.T) { @@ -609,3 +614,149 @@ func checkIfResourcesAreEnabledInConfig(t *testing.T, r *ResourceConfig, resourc } } } + +// testResource represents a simplified API resource for testing +type testResource struct { + Group string + Version string + Resource string + Kind string +} + +// newTestRESTMapper creates a RESTMapper with the specified resources for testing. +// Each resource is configured with standard settings (namespaced, standard verbs). +// Assumes input resources are valid and well-formed. +func newTestRESTMapper(resources ...testResource) meta.RESTMapper { + groupMap := make(map[string]*restmapper.APIGroupResources) + + for _, res := range resources { + groupVersion := res.Version + if res.Group != "" { + groupVersion = res.Group + "/" + res.Version + } + + // Initialize group if not exists + if groupMap[res.Group] == nil { + groupMap[res.Group] = &restmapper.APIGroupResources{ + Group: metav1.APIGroup{ + Name: res.Group, + Versions: []metav1.GroupVersionForDiscovery{ + {GroupVersion: groupVersion, Version: res.Version}, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: groupVersion, + Version: res.Version, + }, + }, + VersionedResources: make(map[string][]metav1.APIResource), + } + } + + // Add resource to the version + groupMap[res.Group].VersionedResources[res.Version] = append( + groupMap[res.Group].VersionedResources[res.Version], + metav1.APIResource{ + Name: res.Resource, + Kind: res.Kind, + Namespaced: true, + Verbs: resource.VerbsAll, + }, + ) + } + + // Convert map to slice + groupResources := make([]*restmapper.APIGroupResources, 0, len(groupMap)) + for _, group := range groupMap { + groupResources = append(groupResources, group) + } + + return restmapper.NewDiscoveryRESTMapper(groupResources) +} + +func TestShouldProcessResource(t *testing.T) { + tests := []struct { + name string + gvr schema.GroupVersionResource + resourceConfig *ResourceConfig + setupMapper func() meta.RESTMapper + expected bool + }{ + { + name: "returns true when resourceConfig is nil", + gvr: schema.GroupVersionResource{Group: "", Version: "v1", Resource: "configmaps"}, + resourceConfig: nil, + setupMapper: func() meta.RESTMapper { + return newTestRESTMapper( + testResource{Group: "", Version: "v1", Resource: "configmaps", Kind: "ConfigMap"}, + ) + }, + expected: true, + }, + { + name: "returns true when resource is not disabled", + gvr: schema.GroupVersionResource{Group: "", Version: "v1", Resource: "configmaps"}, + resourceConfig: func() *ResourceConfig { + rc := NewResourceConfig(false) + // Disable secrets, but not configmaps + _ = rc.Parse("v1/Secret") + return rc + }(), + setupMapper: func() meta.RESTMapper { + return newTestRESTMapper( + testResource{Group: "", Version: "v1", Resource: "configmaps", Kind: "ConfigMap"}, + ) + }, + expected: true, + }, + { + name: "returns false when resource is disabled", + gvr: schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}, + resourceConfig: func() *ResourceConfig { + rc := NewResourceConfig(false) + _ = rc.Parse("v1/Secret") + return rc + }(), + setupMapper: func() meta.RESTMapper { + return newTestRESTMapper( + testResource{Group: "", Version: "v1", Resource: "secrets", Kind: "Secret"}, + ) + }, + expected: false, + }, + { + name: "returns false when GVR mapping fails", + gvr: schema.GroupVersionResource{ + Group: "invalid.group", + Version: "v1", + Resource: "nonexistent", + }, + resourceConfig: NewResourceConfig(false), + setupMapper: func() meta.RESTMapper { + // Empty mapper - will fail to map the GVR + return newTestRESTMapper() + }, + expected: false, + }, + { + name: "handles apps group resources correctly", + gvr: schema.GroupVersionResource{Group: "apps", Version: "v1", Resource: "deployments"}, + resourceConfig: nil, + setupMapper: func() meta.RESTMapper { + return newTestRESTMapper( + testResource{Group: "apps", Version: "v1", Resource: "deployments", Kind: "Deployment"}, + ) + }, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + restMapper := tt.setupMapper() + result := ShouldProcessResource(tt.gvr, restMapper, tt.resourceConfig) + if result != tt.expected { + t.Errorf("ShouldProcessResource() = %v, want %v", result, tt.expected) + } + }) + } +} diff --git a/pkg/utils/condition/reason.go b/pkg/utils/condition/reason.go index 9566ee42e..29d9291a2 100644 --- a/pkg/utils/condition/reason.go +++ b/pkg/utils/condition/reason.go @@ -170,6 +170,12 @@ const ( // UpdateRunWaitingReason is the reason string of condition if the staged update run is waiting for an after-stage task to complete. UpdateRunWaitingReason = "UpdateRunWaiting" + // UpdateRunStoppingReason is the reason string of condition if the staged update run stopping. + UpdateRunStoppingReason = "UpdateRunStopping" + + // UpdateRunStoppedReason is the reason string of condition if the staged update run stopped. + UpdateRunStoppedReason = "UpdateRunStopped" + // UpdateRunSucceededReason is the reason string of condition if the staged update run succeeded. UpdateRunSucceededReason = "UpdateRunSucceeded" @@ -179,6 +185,12 @@ const ( // StageUpdatingWaitingReason is the reason string of condition if the stage updating is waiting. StageUpdatingWaitingReason = "StageUpdatingWaiting" + // StageUpdatingStoppingReason is the reason string of condition if the stage updating is stopping. + StageUpdatingStoppingReason = "StageUpdatingStopping" + + // StageUpdatingStoppedReason is the reason string of condition if the stage updating is stopped. + StageUpdatingStoppedReason = "StageUpdatingStopped" + // StageUpdatingFailedReason is the reason string of condition if the stage updating failed. StageUpdatingFailedReason = "StageUpdatingFailed" diff --git a/pkg/utils/controller/binding_resolver.go b/pkg/utils/controller/binding_resolver.go index 5c08d9d30..1f25ebaf3 100644 --- a/pkg/utils/controller/binding_resolver.go +++ b/pkg/utils/controller/binding_resolver.go @@ -18,6 +18,7 @@ package controller import ( "context" + "sort" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -46,7 +47,8 @@ func FetchBindingFromKey(ctx context.Context, c client.Reader, bindingKey types. // that belong to the specified binding key. // The binding key format determines whether to list ClusterResourceBindings (cluster-scoped) // or ResourceBindings (namespaced). For namespaced resources, the key format has a namespace. -func ListBindingsFromKey(ctx context.Context, c client.Reader, placementKey types.NamespacedName) ([]placementv1beta1.BindingObj, error) { +// The fromCache parameter indicates whether the client is a cached client (true) or an uncached client (false). +func ListBindingsFromKey(ctx context.Context, c client.Reader, placementKey types.NamespacedName, fromCache bool) ([]placementv1beta1.BindingObj, error) { // Extract namespace and name from the binding key namespace := placementKey.Namespace name := placementKey.Name @@ -64,10 +66,22 @@ func ListBindingsFromKey(ctx context.Context, c client.Reader, placementKey type bindingList = &placementv1beta1.ClusterResourceBindingList{} } if err := c.List(ctx, bindingList, listOptions...); err != nil { - return nil, NewAPIServerError(false, err) + return nil, NewAPIServerError(fromCache, err) } - return bindingList.GetBindingObjs(), nil + bindingObjs := bindingList.GetBindingObjs() + + // Sort the list of bindings. + // + // This is needed to ensure deterministic decision output from the scheduler. + sort.Slice(bindingObjs, func(i, j int) bool { + A, B := bindingObjs[i], bindingObjs[j] + // Sort the bindings only by their names; for ClusterResourceBindings, their namespaces are always empty; + // for ResourceBindings, in this case they all come from the same namespace. + return A.GetName() < B.GetName() + }) + + return bindingObjs, nil } // ConvertCRBObjsToBindingObjs converts a slice of ClusterResourceBinding items to BindingObj array. diff --git a/pkg/utils/controller/binding_resolver_test.go b/pkg/utils/controller/binding_resolver_test.go index 6fec57cda..54b2388be 100644 --- a/pkg/utils/controller/binding_resolver_test.go +++ b/pkg/utils/controller/binding_resolver_test.go @@ -20,10 +20,13 @@ import ( "context" "errors" "fmt" + "sync/atomic" "testing" + crossplanetest "github.com/crossplane/crossplane-runtime/v2/pkg/test" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -33,6 +36,12 @@ import ( placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" ) +const ( + bindingName1 = "binding-1" + bindingName2 = "binding-2" + bindingName3 = "binding-3" +) + func TestListBindingsFromKey(t *testing.T) { ctx := context.Background() @@ -399,7 +408,7 @@ func TestListBindingsFromKey(t *testing.T) { WithObjects(tt.objects...). Build() - got, err := ListBindingsFromKey(ctx, fakeClient, tt.placementKey) + got, err := ListBindingsFromKey(ctx, fakeClient, tt.placementKey, true) if tt.wantErr { if err == nil { @@ -428,26 +437,118 @@ func TestListBindingsFromKey(t *testing.T) { } } -func TestListBindingsFromKey_ClientError(t *testing.T) { +// TestListBindingsFromKey_Sorted verifies that the returned bindings are always sorted by their names. +func TestListBindingsFromKey_Sorted(t *testing.T) { ctx := context.Background() - // Create a client that will return an error - scheme := runtime.NewScheme() - _ = placementv1beta1.AddToScheme(scheme) + // Set a mode variable to control the behavior of list ops. + mockMode := atomic.Int32{} + mockMode.Store(0) - // Use a fake client but override List to return error - fakeClient := &failingListClient{ - Client: fake.NewClientBuilder().WithScheme(scheme).Build(), + // Use the mock client from the crossplane package rather than the commonly used fake.Client to + // better manipulate the list op results. + mockClient := crossplanetest.MockClient{ + MockList: func(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error { + mode := mockMode.Load() + switch mode { + case 0: + if err := meta.SetList(list, []runtime.Object{ + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName1, + }, + }, + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName2, + }, + }, + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName3, + }, + }, + }); err != nil { + return fmt.Errorf("cannot set list results: %w", err) + } + case 1: + if err := meta.SetList(list, []runtime.Object{ + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName3, + }, + }, + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName2, + }, + }, + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName1, + }, + }, + }); err != nil { + return fmt.Errorf("cannot set list results: %w", err) + } + default: + return fmt.Errorf("unexpected mock mode: %d", mode) + } + return nil + }, } - _, err := ListBindingsFromKey(ctx, fakeClient, types.NamespacedName{Name: "test-placement"}) + bindingsInMode0, err := ListBindingsFromKey(ctx, &mockClient, types.NamespacedName{Name: "placeholder"}, true) + if err != nil { + t.Fatalf("ListBindingsFromKey() in mode 0 returned error: %v", err) + } - if err == nil { - t.Fatalf("Expected error but got nil") + mockMode.Store(1) + bindingsInMode1, err := ListBindingsFromKey(ctx, &mockClient, types.NamespacedName{Name: "placeholder"}, true) + if err != nil { + t.Fatalf("ListBindingsFromKey() in mode 1 returned error: %v", err) } - if !errors.Is(err, ErrAPIServerError) { - t.Errorf("Expected ErrAPIServerError but got: %v", err) + if diff := cmp.Diff(bindingsInMode0, bindingsInMode1); diff != "" { + t.Errorf("ListBindingsFromKey() returned different results in different modes (-mode0, +mode1):\n%s", diff) + } +} + +func TestListBindingsFromKey_ClientError(t *testing.T) { + ctx := context.Background() + + tests := []struct { + name string + fromCache bool + wantErr error + }{ + { + name: "uncached client returns ErrAPIServerError", + fromCache: false, + wantErr: ErrAPIServerError, + }, + { + name: "cached client returns ErrUnexpectedBehavior as cached List should not fail", + fromCache: true, + wantErr: ErrUnexpectedBehavior, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a client that will return an error + scheme := runtime.NewScheme() + _ = placementv1beta1.AddToScheme(scheme) + + // Use a fake client but override List to return error + fakeClient := &failingListClient{ + Client: fake.NewClientBuilder().WithScheme(scheme).Build(), + } + + if _, err := ListBindingsFromKey(ctx, fakeClient, types.NamespacedName{Name: "test-placement"}, tt.fromCache); !errors.Is(err, tt.wantErr) { + t.Errorf("ListBindingsFromKey() got err %v, want error %v", err, tt.wantErr) + } + }) } } diff --git a/pkg/utils/informer/informermanager.go b/pkg/utils/informer/informermanager.go index 4ea1a5143..07aed02fb 100644 --- a/pkg/utils/informer/informermanager.go +++ b/pkg/utils/informer/informermanager.go @@ -33,11 +33,6 @@ import ( // InformerManager manages dynamic shared informer for all resources, include Kubernetes resource and // custom resources defined by CustomResourceDefinition. type Manager interface { - // AddDynamicResources builds a dynamicInformer for each resource in the resources list with the event handler. - // A resource is dynamic if its definition can be created/deleted/updated during runtime. - // Normally, it is a custom resource that is installed by users. The handler should not be nil. - AddDynamicResources(resources []APIResourceMeta, handler cache.ResourceEventHandler, listComplete bool) - // AddStaticResource creates a dynamicInformer for the static 'resource' and set its event handler. // A resource is static if its definition is pre-determined and immutable during runtime. // Normally, it is a resource that is pre-installed by the system. @@ -61,6 +56,9 @@ type Manager interface { // GetNameSpaceScopedResources returns the list of namespace scoped resources we are watching. GetNameSpaceScopedResources() []schema.GroupVersionResource + // GetAllResources returns the list of all resources (both cluster-scoped and namespace-scoped) we are watching. + GetAllResources() []schema.GroupVersionResource + // IsClusterScopedResources returns if a resource is cluster scoped. IsClusterScopedResources(resource schema.GroupVersionKind) bool @@ -69,6 +67,16 @@ type Manager interface { // GetClient returns the dynamic dynamicClient. GetClient() dynamic.Interface + + // AddEventHandlerToInformer adds an event handler to an existing informer for the given resource. + // If the informer doesn't exist, it will be created. This is used by the leader's ChangeDetector + // to add event handlers to informers that were created by the InformerPopulator. + AddEventHandlerToInformer(resource schema.GroupVersionResource, handler cache.ResourceEventHandler) + + // CreateInformerForResource creates an informer for the given resource without adding any event handlers. + // This is used by InformerPopulator to create informers on all pods (leader and followers) so they have + // synced caches for webhook validation. The leader's ChangeDetector will add event handlers later. + CreateInformerForResource(resource APIResourceMeta) } // NewInformerManager constructs a new instance of informerManagerImpl. @@ -77,11 +85,12 @@ func NewInformerManager(client dynamic.Interface, defaultResync time.Duration, p // TODO: replace this with plain context ctx, cancel := ContextForChannel(parentCh) return &informerManagerImpl{ - dynamicClient: client, - ctx: ctx, - cancel: cancel, - informerFactory: dynamicinformer.NewDynamicSharedInformerFactory(client, defaultResync), - apiResources: make(map[schema.GroupVersionKind]*APIResourceMeta), + dynamicClient: client, + ctx: ctx, + cancel: cancel, + informerFactory: dynamicinformer.NewDynamicSharedInformerFactory(client, defaultResync), + apiResources: make(map[schema.GroupVersionKind]*APIResourceMeta), + registeredHandlers: make(map[schema.GroupVersionResource]bool), } } @@ -119,61 +128,10 @@ type informerManagerImpl struct { // the apiResources map collects all the api resources we watch apiResources map[schema.GroupVersionKind]*APIResourceMeta resourcesLock sync.RWMutex -} - -func (s *informerManagerImpl) AddDynamicResources(dynResources []APIResourceMeta, handler cache.ResourceEventHandler, listComplete bool) { - newGVKs := make(map[schema.GroupVersionKind]bool, len(dynResources)) - - addInformerFunc := func(newRes APIResourceMeta) { - dynRes, exist := s.apiResources[newRes.GroupVersionKind] - if !exist { - newRes.isPresent = true - s.apiResources[newRes.GroupVersionKind] = &newRes - // TODO (rzhang): remember the ResourceEventHandlerRegistration and remove it when the resource is deleted - // TODO: handle error which only happens if the informer is stopped - informer := s.informerFactory.ForResource(newRes.GroupVersionResource).Informer() - // Strip away the ManagedFields info from objects to save memory. - // - // TO-DO (chenyu1): evaluate if there are other fields, e.g., owner refs, status, that can also be stripped - // away to save memory. - if err := informer.SetTransform(ctrlcache.TransformStripManagedFields()); err != nil { - // The SetTransform func would only fail if the informer has already started. In this case, - // no further action is needed. - klog.ErrorS(err, "Failed to set transform func for informer", "gvr", newRes.GroupVersionResource) - } - _, _ = informer.AddEventHandler(handler) - klog.InfoS("Added an informer for a new resource", "res", newRes) - } else if !dynRes.isPresent { - // we just mark it as enabled as we should not add another eventhandler to the informer as it's still - // in the informerFactory - // TODO: add the Event handler back - dynRes.isPresent = true - klog.InfoS("Reactivated an informer for a reappeared resource", "res", dynRes) - } - } - - s.resourcesLock.Lock() - defer s.resourcesLock.Unlock() - - // Add the new dynResources that do not exist yet while build a map to speed up lookup - for _, newRes := range dynResources { - newGVKs[newRes.GroupVersionKind] = true - addInformerFunc(newRes) - } - if !listComplete { - // do not disable any informer if we know the resource list is not complete - return - } - - // mark the disappeared dynResources from the handler map - for gvk, dynRes := range s.apiResources { - if !newGVKs[gvk] && !dynRes.isStaticResource && dynRes.isPresent { - // TODO: Remove the Event handler from the informer using the resourceEventHandlerRegistration during creat - dynRes.isPresent = false - klog.InfoS("Disabled an informer for a disappeared resource", "res", dynRes) - } - } + // registeredHandlers tracks which GVRs already have event handlers registered + // to prevent duplicate registrations and goroutine leaks + registeredHandlers map[schema.GroupVersionResource]bool } func (s *informerManagerImpl) AddStaticResource(resource APIResourceMeta, handler cache.ResourceEventHandler) { @@ -224,6 +182,19 @@ func (s *informerManagerImpl) GetNameSpaceScopedResources() []schema.GroupVersio return res } +func (s *informerManagerImpl) GetAllResources() []schema.GroupVersionResource { + s.resourcesLock.RLock() + defer s.resourcesLock.RUnlock() + + res := make([]schema.GroupVersionResource, 0, len(s.apiResources)) + for _, resource := range s.apiResources { + if resource.isPresent { + res = append(res, resource.GroupVersionResource) + } + } + return res +} + func (s *informerManagerImpl) IsClusterScopedResources(gvk schema.GroupVersionKind) bool { s.resourcesLock.RLock() defer s.resourcesLock.RUnlock() @@ -239,6 +210,56 @@ func (s *informerManagerImpl) Stop() { s.cancel() } +// AddEventHandlerToInformer adds an event handler to an existing informer for the given resource. +// If the informer doesn't exist, it will be created. This is used by the leader's ChangeDetector +// to add event handlers to informers that were created by the InformerPopulator. +// This method is idempotent - calling it multiple times for the same resource will only register +// the handler once, preventing goroutine leaks from duplicate registrations. +func (s *informerManagerImpl) AddEventHandlerToInformer(resource schema.GroupVersionResource, handler cache.ResourceEventHandler) { + s.resourcesLock.Lock() + defer s.resourcesLock.Unlock() + + // Check if handler already registered for this resource + if s.registeredHandlers[resource] { + return + } + + informer := s.getOrCreateInformerWithTransform(resource) + + // AddEventHandler returns (ResourceEventHandlerRegistration, error). The registration handle + // can be used to remove the handler later, but we never remove handlers dynamically - + // they persist for the lifetime of the informer, so we discard the handle. + if _, err := informer.AddEventHandler(handler); err != nil { + klog.Fatal(err, "Failed to add event handler to informer - leader cannot function", "gvr", resource) + } + + // Mark this resource as having a handler registered + s.registeredHandlers[resource] = true + klog.V(2).InfoS("Added event handler to informer", "gvr", resource) +} + +func (s *informerManagerImpl) CreateInformerForResource(resource APIResourceMeta) { + s.resourcesLock.Lock() + defer s.resourcesLock.Unlock() + + dynRes, exist := s.apiResources[resource.GroupVersionKind] + if !exist { + // Register this resource in our tracking map + resource.isPresent = true + resource.isStaticResource = false + s.apiResources[resource.GroupVersionKind] = &resource + + // Create the informer without adding any event handler, with transform set + _ = s.getOrCreateInformerWithTransform(resource.GroupVersionResource) + + klog.V(3).InfoS("Created informer without handler", "res", resource) + } else if !dynRes.isPresent { + // Mark it as present again (resource reappeared) + dynRes.isPresent = true + klog.V(3).InfoS("Reactivated informer for reappeared resource", "res", dynRes) + } +} + // ContextForChannel derives a child context from a parent channel. // // The derived context's Done channel is closed when the returned cancel function @@ -257,3 +278,22 @@ func ContextForChannel(parentCh <-chan struct{}) (context.Context, context.Cance }() return ctx, cancel } + +// getOrCreateInformerWithTransform gets or creates an informer for the given resource and ensures +// the ManagedFields transform is set. This is idempotent - if the informer exists, we get the same +// instance. +func (s *informerManagerImpl) getOrCreateInformerWithTransform(resource schema.GroupVersionResource) cache.SharedIndexInformer { + // Get or create the informer (this is idempotent - if it exists, we get the same instance) + // The idempotent behavior is important because this method may be called multiple times, + // potentially concurrently, and relies on the shared informer instance from the factory. + informer := s.informerFactory.ForResource(resource).Informer() + + // Set the transform to strip ManagedFields. This is safe to call even if + // already set, since we get the same informer instance. If the informer has already + // started, this will fail silently (which is fine). + if err := informer.SetTransform(ctrlcache.TransformStripManagedFields()); err != nil { + klog.V(4).InfoS("Transform already set or informer started", "gvr", resource, "err", err) + } + + return informer +} diff --git a/pkg/utils/informer/informermanager_test.go b/pkg/utils/informer/informermanager_test.go new file mode 100644 index 000000000..0372df637 --- /dev/null +++ b/pkg/utils/informer/informermanager_test.go @@ -0,0 +1,414 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package informer + +import ( + "testing" + + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic/fake" + "k8s.io/client-go/kubernetes/scheme" + + testhandler "go.goms.io/fleet/test/utils/handler" + testresource "go.goms.io/fleet/test/utils/resource" +) + +func TestGetAllResources(t *testing.T) { + tests := []struct { + name string + namespaceScopedResources []APIResourceMeta + clusterScopedResources []APIResourceMeta + staticResources []APIResourceMeta + expectedResourceCount int + expectedNamespacedCount int + }{ + { + name: "mixed cluster and namespace scoped resources", + namespaceScopedResources: []APIResourceMeta{ + { + GroupVersionKind: testresource.GVKConfigMap(), + GroupVersionResource: testresource.GVRConfigMap(), + IsClusterScoped: false, + }, + { + GroupVersionKind: testresource.GVKSecret(), + GroupVersionResource: testresource.GVRSecret(), + IsClusterScoped: false, + }, + }, + clusterScopedResources: []APIResourceMeta{ + { + GroupVersionKind: testresource.GVKNamespace(), + GroupVersionResource: testresource.GVRNamespace(), + IsClusterScoped: true, + }, + }, + staticResources: []APIResourceMeta{ + { + GroupVersionKind: testresource.GVKNode(), + GroupVersionResource: testresource.GVRNode(), + IsClusterScoped: true, + isStaticResource: true, + }, + }, + expectedResourceCount: 4, // All resources including static + expectedNamespacedCount: 2, // Only namespace-scoped, excluding static + }, + { + name: "no resources", + expectedResourceCount: 0, + expectedNamespacedCount: 0, + }, + { + name: "only namespace scoped resources", + namespaceScopedResources: []APIResourceMeta{ + { + GroupVersionKind: testresource.GVKDeployment(), + GroupVersionResource: testresource.GVRDeployment(), + IsClusterScoped: false, + }, + }, + expectedResourceCount: 1, + expectedNamespacedCount: 1, + }, + { + name: "only cluster scoped resources", + clusterScopedResources: []APIResourceMeta{ + { + GroupVersionKind: testresource.GVKClusterRole(), + GroupVersionResource: testresource.GVRClusterRole(), + IsClusterScoped: true, + }, + }, + expectedResourceCount: 1, + expectedNamespacedCount: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a fake dynamic client + fakeClient := fake.NewSimpleDynamicClient(scheme.Scheme) + stopCh := make(chan struct{}) + defer close(stopCh) + + mgr := NewInformerManager(fakeClient, 0, stopCh) + implMgr := mgr.(*informerManagerImpl) + + // Add namespace-scoped resources + for _, res := range tt.namespaceScopedResources { + res.isPresent = true + implMgr.apiResources[res.GroupVersionKind] = &res + } + + // Add cluster-scoped resources + for _, res := range tt.clusterScopedResources { + res.isPresent = true + implMgr.apiResources[res.GroupVersionKind] = &res + } + + // Add static resources + for _, res := range tt.staticResources { + res.isPresent = true + implMgr.apiResources[res.GroupVersionKind] = &res + } + + // Test GetAllResources + allResources := mgr.GetAllResources() + if got := len(allResources); got != tt.expectedResourceCount { + t.Errorf("GetAllResources() returned %d resources, want %d", got, tt.expectedResourceCount) + } + + // Verify all expected resources are present + resourceMap := make(map[schema.GroupVersionResource]bool) + for _, gvr := range allResources { + resourceMap[gvr] = true + } + + for _, res := range tt.namespaceScopedResources { + if !resourceMap[res.GroupVersionResource] { + t.Errorf("namespace-scoped resource %v should be in GetAllResources", res.GroupVersionResource) + } + } + + for _, res := range tt.clusterScopedResources { + if !resourceMap[res.GroupVersionResource] { + t.Errorf("cluster-scoped resource %v should be in GetAllResources", res.GroupVersionResource) + } + } + + for _, res := range tt.staticResources { + if !resourceMap[res.GroupVersionResource] { + t.Errorf("static resource %v should be in GetAllResources", res.GroupVersionResource) + } + } + + // Test GetNameSpaceScopedResources + namespacedResources := mgr.GetNameSpaceScopedResources() + if got := len(namespacedResources); got != tt.expectedNamespacedCount { + t.Errorf("GetNameSpaceScopedResources() returned %d resources, want %d", got, tt.expectedNamespacedCount) + } + + // Verify only namespace-scoped, non-static resources are present + namespacedMap := make(map[schema.GroupVersionResource]bool) + for _, gvr := range namespacedResources { + namespacedMap[gvr] = true + } + + for _, res := range tt.namespaceScopedResources { + if !namespacedMap[res.GroupVersionResource] { + t.Errorf("namespace-scoped resource %v should be in GetNameSpaceScopedResources", res.GroupVersionResource) + } + } + + // Verify cluster-scoped and static resources are NOT in namespace-scoped list + for _, res := range tt.clusterScopedResources { + if namespacedMap[res.GroupVersionResource] { + t.Errorf("cluster-scoped resource %v should NOT be in GetNameSpaceScopedResources", res.GroupVersionResource) + } + } + + for _, res := range tt.staticResources { + if namespacedMap[res.GroupVersionResource] { + t.Errorf("static resource %v should NOT be in GetNameSpaceScopedResources", res.GroupVersionResource) + } + } + }) + } +} + +func TestGetAllResources_NotPresent(t *testing.T) { + // Test that resources marked as not present are excluded + fakeClient := fake.NewSimpleDynamicClient(scheme.Scheme) + stopCh := make(chan struct{}) + defer close(stopCh) + + mgr := NewInformerManager(fakeClient, 0, stopCh) + implMgr := mgr.(*informerManagerImpl) + + // Add a resource that is present + presentRes := APIResourceMeta{ + GroupVersionKind: testresource.GVKConfigMap(), + GroupVersionResource: testresource.GVRConfigMap(), + IsClusterScoped: false, + isPresent: true, + } + implMgr.apiResources[presentRes.GroupVersionKind] = &presentRes + + // Add a resource that is NOT present (deleted) + notPresentRes := APIResourceMeta{ + GroupVersionKind: testresource.GVKSecret(), + GroupVersionResource: testresource.GVRSecret(), + IsClusterScoped: false, + isPresent: false, + } + implMgr.apiResources[notPresentRes.GroupVersionKind] = ¬PresentRes + + allResources := mgr.GetAllResources() + if got := len(allResources); got != 1 { + t.Fatalf("GetAllResources() returned %d resources, want 1 (should only return present resources)", got) + } + if got := allResources[0]; got != presentRes.GroupVersionResource { + t.Errorf("GetAllResources()[0] = %v, want %v", got, presentRes.GroupVersionResource) + } +} + +func TestAddEventHandlerToInformer(t *testing.T) { + tests := []struct { + name string + gvr schema.GroupVersionResource + callMultipleTimes bool + }{ + { + name: "add handler to new informer", + gvr: testresource.GVRConfigMap(), + callMultipleTimes: false, + }, + { + name: "calling multiple times is idempotent - only registers once", + gvr: testresource.GVRDeployment(), + callMultipleTimes: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fakeClient := fake.NewSimpleDynamicClient(scheme.Scheme) + stopCh := make(chan struct{}) + defer close(stopCh) + + mgr := NewInformerManager(fakeClient, 0, stopCh) + implMgr := mgr.(*informerManagerImpl) + + handler := &testhandler.TestHandler{ + OnAddFunc: func() {}, + } + + // Add the handler first time + mgr.AddEventHandlerToInformer(tt.gvr, handler) + + // Verify handler is tracked as registered + implMgr.resourcesLock.RLock() + checkHandler(t, implMgr, tt.gvr) + implMgr.resourcesLock.RUnlock() + + if tt.callMultipleTimes { + // Call again with same GVR - should be idempotent + mgr.AddEventHandlerToInformer(tt.gvr, handler) + mgr.AddEventHandlerToInformer(tt.gvr, handler) + checkHandler(t, implMgr, tt.gvr) + } + }) + } +} + +func checkHandler(t *testing.T, implMgr *informerManagerImpl, gvr schema.GroupVersionResource) { + t.Helper() + implMgr.resourcesLock.RLock() + defer implMgr.resourcesLock.RUnlock() + if !implMgr.registeredHandlers[gvr] { + t.Errorf("Expected handler for %v to be registered", gvr) + } + if len(implMgr.registeredHandlers) != 1 { + t.Errorf("Expected 1 registered handler, got %d", len(implMgr.registeredHandlers)) + } +} + +func TestCreateInformerForResource(t *testing.T) { + tests := []struct { + name string + resource APIResourceMeta + createTwice bool + markNotPresent bool // Mark resource as not present before second create + }{ + { + name: "create new informer", + resource: APIResourceMeta{ + GroupVersionKind: testresource.GVKConfigMap(), + GroupVersionResource: testresource.GVRConfigMap(), + IsClusterScoped: false, + }, + createTwice: false, + }, + { + name: "create informer twice (idempotent)", + resource: APIResourceMeta{ + GroupVersionKind: testresource.GVKDeployment(), + GroupVersionResource: testresource.GVRDeployment(), + IsClusterScoped: false, + }, + createTwice: true, + }, + { + name: "recreate informer for reappeared resource", + resource: APIResourceMeta{ + GroupVersionKind: testresource.GVKSecret(), + GroupVersionResource: testresource.GVRSecret(), + IsClusterScoped: false, + }, + createTwice: true, + markNotPresent: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fakeClient := fake.NewSimpleDynamicClient(scheme.Scheme) + stopCh := make(chan struct{}) + defer close(stopCh) + + mgr := NewInformerManager(fakeClient, 0, stopCh) + implMgr := mgr.(*informerManagerImpl) + + // Create the informer + mgr.CreateInformerForResource(tt.resource) + + // Verify resource is tracked + resMeta, exists := implMgr.apiResources[tt.resource.GroupVersionKind] + if !exists { + t.Fatal("Expected resource to be tracked in apiResources map") + } + if !resMeta.isPresent { + t.Error("Expected resource to be marked as present") + } + if resMeta.IsClusterScoped != tt.resource.IsClusterScoped { + t.Errorf("IsClusterScoped = %v, want %v", resMeta.IsClusterScoped, tt.resource.IsClusterScoped) + } + + // Verify informer was created + informer := implMgr.informerFactory.ForResource(tt.resource.GroupVersionResource).Informer() + if informer == nil { + t.Fatal("Expected informer to be created") + } + + if tt.createTwice { + if tt.markNotPresent { + // Mark as not present (simulating resource deletion) + resMeta.isPresent = false + } + + // Create again + mgr.CreateInformerForResource(tt.resource) + + // Verify it's marked as present again + if !resMeta.isPresent { + t.Error("Expected resource to be marked as present after recreation") + } + } + }) + } +} + +func TestCreateInformerForResource_IsIdempotent(t *testing.T) { + // Use 3 attempts to verify idempotency works consistently across multiple calls, + // not just a single retry scenario + const createAttempts = 3 + + // Test that creating the same informer multiple times doesn't cause issues + fakeClient := fake.NewSimpleDynamicClient(scheme.Scheme) + stopCh := make(chan struct{}) + defer close(stopCh) + + mgr := NewInformerManager(fakeClient, 0, stopCh) + implMgr := mgr.(*informerManagerImpl) + + resource := APIResourceMeta{ + GroupVersionKind: testresource.GVKPod(), + GroupVersionResource: testresource.GVRPod(), + IsClusterScoped: false, + } + + // Create multiple times + for i := 0; i < createAttempts; i++ { + mgr.CreateInformerForResource(resource) + } + + // Should only have one entry in apiResources after + // we create the same informer multiple times + if len(implMgr.apiResources) != 1 { + t.Errorf("Expected 1 resource in apiResources, got %d", len(implMgr.apiResources)) + } + + // Verify resource is still tracked correctly + resMeta, exists := implMgr.apiResources[resource.GroupVersionKind] + if !exists { + t.Fatal("Expected resource to be tracked") + } + if !resMeta.isPresent { + t.Error("Expected resource to be marked as present") + } +} diff --git a/pkg/utils/informer/readiness/readiness.go b/pkg/utils/informer/readiness/readiness.go new file mode 100644 index 000000000..7ce61cabb --- /dev/null +++ b/pkg/utils/informer/readiness/readiness.go @@ -0,0 +1,61 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package readiness + +import ( + "fmt" + "net/http" + + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/klog/v2" + + "go.goms.io/fleet/pkg/utils/informer" +) + +// InformerReadinessChecker creates a readiness check function that verifies +// all resource informer caches are synced before marking the pod as ready. +// This prevents components from processing requests before the discovery cache is populated. +func InformerReadinessChecker(resourceInformer informer.Manager) func(*http.Request) error { + return func(_ *http.Request) error { + if resourceInformer == nil { + return fmt.Errorf("resource informer not initialized") + } + + // Require ALL informer caches to be synced before marking ready + allResources := resourceInformer.GetAllResources() + if len(allResources) == 0 { + // This can happen during startup when the ResourceInformer is created but the InformerPopulator + // hasn't discovered and registered any resources yet via AddDynamicResources(). + return fmt.Errorf("resource informer not ready: no resources registered") + } + + // Check that ALL informers have synced + unsyncedResources := []schema.GroupVersionResource{} + for _, gvr := range allResources { + if !resourceInformer.IsInformerSynced(gvr) { + unsyncedResources = append(unsyncedResources, gvr) + } + } + + if len(unsyncedResources) > 0 { + return fmt.Errorf("resource informer not ready: %d/%d informers not synced yet", len(unsyncedResources), len(allResources)) + } + + klog.V(5).InfoS("All resource informers synced", "totalInformers", len(allResources)) + return nil + } +} diff --git a/pkg/utils/informer/readiness/readiness_test.go b/pkg/utils/informer/readiness/readiness_test.go new file mode 100644 index 000000000..fab796e65 --- /dev/null +++ b/pkg/utils/informer/readiness/readiness_test.go @@ -0,0 +1,150 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package readiness + +import ( + "strings" + "testing" + + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/utils/ptr" + + "go.goms.io/fleet/pkg/utils/informer" + testinformer "go.goms.io/fleet/test/utils/informer" +) + +func TestReadinessChecker(t *testing.T) { + tests := []struct { + name string + resourceInformer informer.Manager + expectError bool + errorContains string + }{ + { + name: "nil informer", + resourceInformer: nil, + expectError: true, + errorContains: "resource informer not initialized", + }, + { + name: "no resources registered", + resourceInformer: &testinformer.FakeManager{ + APIResources: map[schema.GroupVersionKind]bool{}, + }, + expectError: true, + errorContains: "no resources registered", + }, + { + name: "all informers synced", + resourceInformer: &testinformer.FakeManager{ + APIResources: map[schema.GroupVersionKind]bool{ + {Group: "", Version: "v1", Kind: "ConfigMap"}: true, // this boolean is ignored + {Group: "", Version: "v1", Kind: "Secret"}: true, + {Group: "", Version: "v1", Kind: "Namespace"}: true, + }, + InformerSynced: ptr.To(true), // this makes all informers synced + }, + expectError: false, + }, + { + name: "some informers not synced", + resourceInformer: &testinformer.FakeManager{ + APIResources: map[schema.GroupVersionKind]bool{ + {Group: "", Version: "v1", Kind: "ConfigMap"}: false, // this boolean is ignored + {Group: "", Version: "v1", Kind: "Secret"}: false, + {Group: "", Version: "v1", Kind: "Namespace"}: false, + }, + IsClusterScopedResource: true, + InformerSynced: ptr.To(false), // this makes all informers not synced + }, + expectError: true, + errorContains: "informers not synced yet", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + checker := InformerReadinessChecker(tt.resourceInformer) + err := checker(nil) + + if tt.expectError { + if err == nil { + t.Errorf("ReadinessChecker() expected error, got nil") + } + if tt.errorContains != "" && err != nil { + if got := err.Error(); !strings.Contains(got, tt.errorContains) { + t.Errorf("error message should contain %q, got: %s", tt.errorContains, got) + } + } + } else { + if err != nil { + t.Errorf("ReadinessChecker() unexpected error: %v", err) + } + } + }) + } +} + +func TestReadinessChecker_NoneSync(t *testing.T) { + // Test the case where we have multiple resources but none are synced + mockManager := &testinformer.FakeManager{ + APIResources: map[schema.GroupVersionKind]bool{ + {Group: "", Version: "v1", Kind: "ConfigMap"}: false, // this boolean is ignored + {Group: "", Version: "v1", Kind: "Secret"}: false, + {Group: "apps", Version: "v1", Kind: "Deployment"}: false, + {Group: "", Version: "v1", Kind: "Namespace"}: false, + }, + InformerSynced: ptr.To(false), // this makes all informers not synced + } + + checker := InformerReadinessChecker(mockManager) + err := checker(nil) + + if err == nil { + t.Fatal("ReadinessChecker() should return error when no informers are synced") + } + if got := err.Error(); !strings.Contains(got, "informers not synced yet") { + t.Errorf("error message should contain 'informers not synced yet', got: %s", got) + } + // Should report 4 unsynced + if got := err.Error(); !strings.Contains(got, "4/4") { + t.Errorf("error message should contain '4/4', got: %s", got) + } +} + +func TestReadinessChecker_AllSyncedMultipleResources(t *testing.T) { + // Test with many resources all synced + mockManager := &testinformer.FakeManager{ + APIResources: map[schema.GroupVersionKind]bool{ + {Group: "", Version: "v1", Kind: "ConfigMap"}: true, // this boolean is ignored + {Group: "", Version: "v1", Kind: "Secret"}: true, + {Group: "", Version: "v1", Kind: "Service"}: true, + {Group: "apps", Version: "v1", Kind: "Deployment"}: true, + {Group: "apps", Version: "v1", Kind: "StatefulSet"}: true, + {Group: "", Version: "v1", Kind: "Namespace"}: true, + {Group: "rbac.authorization.k8s.io", Version: "v1", Kind: "ClusterRole"}: true, + }, + InformerSynced: ptr.To(true), // this makes all informers synced + } + + checker := InformerReadinessChecker(mockManager) + err := checker(nil) + + if err != nil { + t.Errorf("ReadinessChecker() unexpected error when all informers are synced: %v", err) + } +} diff --git a/pkg/webhook/fleetresourcehandler/fleetresourcehandler_webhook_test.go b/pkg/webhook/fleetresourcehandler/fleetresourcehandler_webhook_test.go index 5df4cd7ee..cbcb8effd 100644 --- a/pkg/webhook/fleetresourcehandler/fleetresourcehandler_webhook_test.go +++ b/pkg/webhook/fleetresourcehandler/fleetresourcehandler_webhook_test.go @@ -53,7 +53,7 @@ func TestHandleCRD(t *testing.T) { resourceValidator: fleetResourceValidator{}, wantResponse: admission.Allowed(fmt.Sprintf(validation.ResourceAllowedFormat, "test-user", utils.GenerateGroupString([]string{"test-group"}), admissionv1.Create, &utils.CRDMetaGVK, "", types.NamespacedName{Name: "test-crd"})), }, - "allow user in system:masters group to modify fleet CRD": { + "allow user in system:masters group to modify fleet managed CRD": { req: admission.Request{ AdmissionRequest: admissionv1.AdmissionRequest{ Name: "memberclusters.cluster.kubernetes-fleet.io", @@ -68,10 +68,25 @@ func TestHandleCRD(t *testing.T) { resourceValidator: fleetResourceValidator{}, wantResponse: admission.Allowed(fmt.Sprintf(validation.ResourceAllowedFormat, "test-user", utils.GenerateGroupString([]string{"system:masters"}), admissionv1.Update, &utils.CRDMetaGVK, "", types.NamespacedName{Name: "memberclusters.cluster.kubernetes-fleet.io"})), }, - "allow white listed user to modify fleet CRD": { + "allow user in kubeadm:cluster-admins group for fleet managed CRD": { req: admission.Request{ AdmissionRequest: admissionv1.AdmissionRequest{ - Name: "memberclusters.cluster.kubernetes-fleet.io", + Name: "clusterprofiles.multicluster.x-k8s.io", + UserInfo: authenticationv1.UserInfo{ + Username: "test-user", + Groups: []string{"kubeadm:cluster-admins"}, + }, + RequestKind: &utils.CRDMetaGVK, + Operation: admissionv1.Update, + }, + }, + resourceValidator: fleetResourceValidator{}, + wantResponse: admission.Allowed(fmt.Sprintf(validation.ResourceAllowedFormat, "test-user", utils.GenerateGroupString([]string{"kubeadm:cluster-admins"}), admissionv1.Update, &utils.CRDMetaGVK, "", types.NamespacedName{Name: "clusterprofiles.multicluster.x-k8s.io"})), + }, + "allow white listed user to modify fleet managed CRD": { + req: admission.Request{ + AdmissionRequest: admissionv1.AdmissionRequest{ + Name: "clusterresourceplacements.placement.kubernetes-fleet.io", UserInfo: authenticationv1.UserInfo{ Username: "test-user", Groups: []string{"test-group"}, @@ -83,9 +98,9 @@ func TestHandleCRD(t *testing.T) { resourceValidator: fleetResourceValidator{ whiteListedUsers: []string{"test-user"}, }, - wantResponse: admission.Allowed(fmt.Sprintf(validation.ResourceAllowedFormat, "test-user", utils.GenerateGroupString([]string{"test-group"}), admissionv1.Delete, &utils.CRDMetaGVK, "", types.NamespacedName{Name: "memberclusters.cluster.kubernetes-fleet.io"})), + wantResponse: admission.Allowed(fmt.Sprintf(validation.ResourceAllowedFormat, "test-user", utils.GenerateGroupString([]string{"test-group"}), admissionv1.Delete, &utils.CRDMetaGVK, "", types.NamespacedName{Name: "clusterresourceplacements.placement.kubernetes-fleet.io"})), }, - "deny non system user to modify fleet CRD": { + "deny non system user to modify fleet managed CRD": { req: admission.Request{ AdmissionRequest: admissionv1.AdmissionRequest{ Name: "memberclusters.cluster.kubernetes-fleet.io", diff --git a/pkg/webhook/validation/uservalidation.go b/pkg/webhook/validation/uservalidation.go index b0011bcaa..49b12c303 100644 --- a/pkg/webhook/validation/uservalidation.go +++ b/pkg/webhook/validation/uservalidation.go @@ -14,6 +14,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/klog/v2" "k8s.io/utils/strings/slices" + clusterinventory "sigs.k8s.io/cluster-inventory-api/apis/v1alpha1" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" @@ -44,7 +45,12 @@ const ( ) var ( - fleetCRDGroups = []string{"networking.fleet.azure.com", "cluster.kubernetes-fleet.io", "placement.kubernetes-fleet.io"} + fleetManagedCRDGroups = []string{ + utils.NetworkingGroupName, + clusterv1beta1.GroupVersion.Group, + placementv1beta1.GroupVersion.Group, + clusterinventory.GroupVersion.Group, + } ) // ValidateUserForFleetCRD checks to see if user is not allowed to modify fleet CRDs. @@ -255,7 +261,7 @@ func isMemberClusterUpdated(currentObj, oldObj client.Object) (bool, error) { // checkCRDGroup returns true if the input CRD group is a fleet CRD group. func checkCRDGroup(group string) bool { - return slices.Contains(fleetCRDGroups, group) + return slices.Contains(fleetManagedCRDGroups, group) } // ValidateMCIdentity returns admission allowed/denied based on the member cluster's identity. diff --git a/pkg/webhook/validation/uservalidation_test.go b/pkg/webhook/validation/uservalidation_test.go index 6ef1dcd8d..aedb57cf7 100644 --- a/pkg/webhook/validation/uservalidation_test.go +++ b/pkg/webhook/validation/uservalidation_test.go @@ -5,6 +5,7 @@ import ( "sort" "testing" + "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" admissionv1 "k8s.io/api/admission/v1" authenticationv1 "k8s.io/api/authentication/v1" @@ -437,3 +438,168 @@ func TestValidateFleetMemberClusterUpdate(t *testing.T) { }) } } + +func TestValidateUserForFleetCRD(t *testing.T) { + testCases := map[string]struct { + req admission.Request + whiteListedUsers []string + group string + wantResponse admission.Response + }{ + "allow user in system:masters group for fleet placement CRD": { + req: admission.Request{ + AdmissionRequest: admissionv1.AdmissionRequest{ + Name: "clusterresourceplacements.placement.kubernetes-fleet.io", + RequestKind: &utils.CRDMetaGVK, + UserInfo: authenticationv1.UserInfo{ + Username: "test-user", + Groups: []string{mastersGroup}, + }, + Operation: admissionv1.Create, + }, + }, + group: "placement.kubernetes-fleet.io", + wantResponse: admission.Allowed(fmt.Sprintf(ResourceAllowedFormat, "test-user", utils.GenerateGroupString([]string{mastersGroup}), admissionv1.Create, + &utils.CRDMetaGVK, "", types.NamespacedName{Name: "clusterresourceplacements.placement.kubernetes-fleet.io"})), + }, + "deny non-admin user for fleet placement CRD": { + req: admission.Request{ + AdmissionRequest: admissionv1.AdmissionRequest{ + Name: "clusterresourceplacements.placement.kubernetes-fleet.io", + RequestKind: &utils.CRDMetaGVK, + UserInfo: authenticationv1.UserInfo{ + Username: "regular-user", + Groups: []string{"regular-group"}, + }, + Operation: admissionv1.Update, + }, + }, + group: "placement.kubernetes-fleet.io", + wantResponse: admission.Denied(fmt.Sprintf(ResourceDeniedFormat, "regular-user", utils.GenerateGroupString([]string{"regular-group"}), admissionv1.Update, + &utils.CRDMetaGVK, "", types.NamespacedName{Name: "clusterresourceplacements.placement.kubernetes-fleet.io"})), + }, + "allow user in kubeadm:cluster-admins group for fleet cluster CRD": { + req: admission.Request{ + AdmissionRequest: admissionv1.AdmissionRequest{ + Name: "memberclusters.cluster.kubernetes-fleet.io", + RequestKind: &utils.CRDMetaGVK, + UserInfo: authenticationv1.UserInfo{ + Username: "test-user", + Groups: []string{kubeadmClusterAdminsGroup}, + }, + Operation: admissionv1.Update, + }, + }, + group: "cluster.kubernetes-fleet.io", + wantResponse: admission.Allowed(fmt.Sprintf(ResourceAllowedFormat, "test-user", utils.GenerateGroupString([]string{kubeadmClusterAdminsGroup}), admissionv1.Update, + &utils.CRDMetaGVK, "", types.NamespacedName{Name: "memberclusters.cluster.kubernetes-fleet.io"})), + }, + "deny service account for fleet cluster CRD": { + req: admission.Request{ + AdmissionRequest: admissionv1.AdmissionRequest{ + Name: "memberclusters.cluster.kubernetes-fleet.io", + RequestKind: &utils.CRDMetaGVK, + UserInfo: authenticationv1.UserInfo{ + Username: "system:serviceaccount:default:test-sa", + Groups: []string{serviceAccountsGroup}, + }, + Operation: admissionv1.Update, + }, + }, + group: "cluster.kubernetes-fleet.io", + wantResponse: admission.Denied(fmt.Sprintf(ResourceDeniedFormat, "system:serviceaccount:default:test-sa", utils.GenerateGroupString([]string{serviceAccountsGroup}), admissionv1.Update, + &utils.CRDMetaGVK, "", types.NamespacedName{Name: "memberclusters.cluster.kubernetes-fleet.io"})), + }, + "allow white listed user for fleet networking CRD": { + req: admission.Request{ + AdmissionRequest: admissionv1.AdmissionRequest{ + Name: "internalserviceexports.networking.fleet.azure.com", + RequestKind: &utils.CRDMetaGVK, + UserInfo: authenticationv1.UserInfo{ + Username: "white-listed-user", + Groups: []string{"test-group"}, + }, + Operation: admissionv1.Delete, + SubResource: "status", + }, + }, + whiteListedUsers: []string{"white-listed-user"}, + group: "networking.fleet.azure.com", + wantResponse: admission.Allowed(fmt.Sprintf(ResourceAllowedFormat, "white-listed-user", utils.GenerateGroupString([]string{"test-group"}), admissionv1.Delete, + &utils.CRDMetaGVK, "status", types.NamespacedName{Name: "internalserviceexports.networking.fleet.azure.com"})), + }, + "deny regular user for fleet networking CRD": { + req: admission.Request{ + AdmissionRequest: admissionv1.AdmissionRequest{ + Name: "internalserviceexports.networking.fleet.azure.com", + RequestKind: &utils.CRDMetaGVK, + UserInfo: authenticationv1.UserInfo{ + Username: "regular-user", + Groups: []string{"regular-group"}, + }, + Operation: admissionv1.Create, + }, + }, + group: "networking.fleet.azure.com", + wantResponse: admission.Denied(fmt.Sprintf(ResourceDeniedFormat, "regular-user", utils.GenerateGroupString([]string{"regular-group"}), admissionv1.Create, + &utils.CRDMetaGVK, "", types.NamespacedName{Name: "internalserviceexports.networking.fleet.azure.com"})), + }, + "allow system:masters for cluster inventory CRD": { + req: admission.Request{ + AdmissionRequest: admissionv1.AdmissionRequest{ + Name: "clusterprofiles.multicluster.x-k8s.io", + RequestKind: &utils.CRDMetaGVK, + UserInfo: authenticationv1.UserInfo{ + Username: "admin-user", + Groups: []string{mastersGroup}, + }, + Operation: admissionv1.Create, + }, + }, + group: "multicluster.x-k8s.io", + wantResponse: admission.Allowed(fmt.Sprintf(ResourceAllowedFormat, "admin-user", utils.GenerateGroupString([]string{mastersGroup}), admissionv1.Create, + &utils.CRDMetaGVK, "", types.NamespacedName{Name: "clusterprofiles.multicluster.x-k8s.io"})), + }, + "deny regular user for cluster inventory CRD": { + req: admission.Request{ + AdmissionRequest: admissionv1.AdmissionRequest{ + Name: "clusterprofiles.multicluster.x-k8s.io", + RequestKind: &utils.CRDMetaGVK, + UserInfo: authenticationv1.UserInfo{ + Username: "regular-user", + Groups: []string{"regular-group"}, + }, + Operation: admissionv1.Create, + }, + }, + group: "multicluster.x-k8s.io", + wantResponse: admission.Denied(fmt.Sprintf(ResourceDeniedFormat, "regular-user", utils.GenerateGroupString([]string{"regular-group"}), admissionv1.Create, + &utils.CRDMetaGVK, "", types.NamespacedName{Name: "clusterprofiles.multicluster.x-k8s.io"})), + }, + "allow any user for non-fleet-managed CRD group": { + req: admission.Request{ + AdmissionRequest: admissionv1.AdmissionRequest{ + Name: "gateways.gateway.networking.k8s.io", + RequestKind: &utils.CRDMetaGVK, + UserInfo: authenticationv1.UserInfo{ + Username: "regular-user", + Groups: []string{"regular-group"}, + }, + Operation: admissionv1.Create, + }, + }, + group: "networking.k8s.io", + wantResponse: admission.Allowed(fmt.Sprintf(ResourceAllowedFormat, "regular-user", utils.GenerateGroupString([]string{"regular-group"}), admissionv1.Create, + &utils.CRDMetaGVK, "", types.NamespacedName{Name: "gateways.gateway.networking.k8s.io"})), + }, + } + + for testName, testCase := range testCases { + t.Run(testName, func(t *testing.T) { + gotResult := ValidateUserForFleetCRD(testCase.req, testCase.whiteListedUsers, testCase.group) + if diff := cmp.Diff(testCase.wantResponse, gotResult); diff != "" { + t.Errorf("ValidateUserForFleetCRD() mismatch (-want +got):\n%s", diff) + } + }) + } +} diff --git a/test/apis/placement/v1beta1/api_validation_integration_test.go b/test/apis/placement/v1beta1/api_validation_integration_test.go index 6cf6d8c7e..a635a211a 100644 --- a/test/apis/placement/v1beta1/api_validation_integration_test.go +++ b/test/apis/placement/v1beta1/api_validation_integration_test.go @@ -142,6 +142,9 @@ var _ = Describe("Test placement v1beta1 API validation", func() { PlacementType: placementv1beta1.PickFixedPlacementType, ClusterNames: []string{"cluster1", "cluster2"}, }, + Strategy: placementv1beta1.RolloutStrategy{ + Type: placementv1beta1.ExternalRolloutStrategyType, + }, }, } Expect(hubClient.Create(ctx, &crp)).Should(Succeed()) @@ -166,6 +169,14 @@ var _ = Describe("Test placement v1beta1 API validation", func() { Expect(errors.As(err, &statusErr)).To(BeTrue(), fmt.Sprintf("Update CRP call produced error %s. Error type wanted is %s.", reflect.TypeOf(err), reflect.TypeOf(&k8sErrors.StatusError{}))) Expect(statusErr.ErrStatus.Message).Should(MatchRegexp("placement type is immutable")) }) + + It("should deny update of RolloutStrategy type when External", func() { + crp.Spec.Strategy.Type = placementv1beta1.RollingUpdateRolloutStrategyType + err := hubClient.Update(ctx, &crp) + var statusErr *k8sErrors.StatusError + Expect(errors.As(err, &statusErr)).To(BeTrue(), fmt.Sprintf("Update CRP call produced error %s. Error type wanted is %s.", reflect.TypeOf(err), reflect.TypeOf(&k8sErrors.StatusError{}))) + Expect(statusErr.ErrStatus.Message).Should(MatchRegexp("cannot change rollout strategy type from 'External' to other types")) + }) }) Context("Test ClusterResourcePlacement StatusReportingScope validation - create, allow cases", func() { @@ -1129,7 +1140,7 @@ var _ = Describe("Test placement v1beta1 API validation", func() { }) Context("Test ClusterStagedUpdateRun API validation - invalid cases", func() { - It("Should deny creation of ClusterStagedUpdateRun with name length > 127", func() { + It("Should deny creation of ClusterStagedUpdateRun with name length > 63", func() { updateRun := placementv1beta1.ClusterStagedUpdateRun{ ObjectMeta: metav1.ObjectMeta{ Name: fmt.Sprintf(invalidupdateRunNameTemplate, GinkgoParallelProcess()), @@ -1138,7 +1149,7 @@ var _ = Describe("Test placement v1beta1 API validation", func() { err := hubClient.Create(ctx, &updateRun) var statusErr *k8sErrors.StatusError Expect(errors.As(err, &statusErr)).To(BeTrue(), fmt.Sprintf("Create updateRun call produced error %s. Error type wanted is %s.", reflect.TypeOf(err), reflect.TypeOf(&k8sErrors.StatusError{}))) - Expect(statusErr.ErrStatus.Message).Should(MatchRegexp("metadata.name max length is 127")) + Expect(statusErr.ErrStatus.Message).Should(MatchRegexp("metadata.name max length is 63")) }) It("Should deny update of ClusterStagedUpdateRun placementName field", func() { diff --git a/test/e2e/actuals_test.go b/test/e2e/actuals_test.go index f36d7f105..9388f2f8c 100644 --- a/test/e2e/actuals_test.go +++ b/test/e2e/actuals_test.go @@ -26,6 +26,8 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -111,6 +113,26 @@ func validateConfigMapOnCluster(cluster *framework.Cluster, name types.Namespace return nil } +func validateCustomResourceOnCluster(cluster *framework.Cluster, name types.NamespacedName, gvk schema.GroupVersionKind) error { + clusterCR := &unstructured.Unstructured{} + clusterCR.SetGroupVersionKind(gvk) + if err := cluster.KubeClient.Get(ctx, name, clusterCR); err != nil { + return fmt.Errorf("custom resource not found on cluster: %w", err) + } + + hubCR := &unstructured.Unstructured{} + hubCR.SetGroupVersionKind(gvk) + if err := hubClient.Get(ctx, name, hubCR); err != nil { + return fmt.Errorf("custom resource not found on hub: %w", err) + } + + if diff := cmp.Diff(hubCR.Object["spec"], clusterCR.Object["spec"]); diff != "" { + return fmt.Errorf("custom resource spec mismatch (-hub +cluster):\n%s", diff) + } + + return nil +} + func validateAnnotationOfConfigMapOnCluster(cluster *framework.Cluster, wantAnnotations map[string]string) error { workNamespaceName := fmt.Sprintf(workNamespaceNameTemplate, GinkgoParallelProcess()) appConfigMapName := fmt.Sprintf(appConfigMapNameTemplate, GinkgoParallelProcess()) @@ -2068,16 +2090,12 @@ func updateRunStageTaskSucceedConditions(generation int64, taskType placementv1b } func updateRunSucceedConditions(generation int64) []metav1.Condition { - initializeCondGeneration := generation - if generation > 1 { - initializeCondGeneration = 1 - } return []metav1.Condition{ { Type: string(placementv1beta1.StagedUpdateRunConditionInitialized), Status: metav1.ConditionTrue, Reason: condition.UpdateRunInitializeSucceededReason, - ObservedGeneration: initializeCondGeneration, + ObservedGeneration: generation, }, { Type: string(placementv1beta1.StagedUpdateRunConditionProgressing), @@ -2105,91 +2123,6 @@ func updateRunInitializedConditions(generation int64) []metav1.Condition { } } -func clusterStagedUpdateRunStatusSucceededActual( - updateRunName string, - wantResourceIndex string, - wantPolicyIndex string, - wantClusterCount int, - wantApplyStrategy *placementv1beta1.ApplyStrategy, - wantStrategySpec *placementv1beta1.UpdateStrategySpec, - wantSelectedClusters [][]string, - wantUnscheduledClusters []string, - wantCROs map[string][]string, - wantROs map[string][]placementv1beta1.NamespacedName, - execute bool, -) func() error { - return func() error { - updateRun := &placementv1beta1.ClusterStagedUpdateRun{} - if err := hubClient.Get(ctx, types.NamespacedName{Name: updateRunName}, updateRun); err != nil { - return err - } - - wantStatus := placementv1beta1.UpdateRunStatus{ - PolicySnapshotIndexUsed: wantPolicyIndex, - ResourceSnapshotIndexUsed: wantResourceIndex, - PolicyObservedClusterCount: wantClusterCount, - ApplyStrategy: wantApplyStrategy.DeepCopy(), - UpdateStrategySnapshot: wantStrategySpec, - } - - if execute { - wantStatus.StagesStatus = buildStageUpdatingStatuses(wantStrategySpec, wantSelectedClusters, wantCROs, wantROs, updateRun) - wantStatus.DeletionStageStatus = buildDeletionStageStatus(wantUnscheduledClusters, updateRun) - wantStatus.Conditions = updateRunSucceedConditions(updateRun.Generation) - } else { - wantStatus.StagesStatus = buildStageUpdatingStatusesForInitialized(wantStrategySpec, wantSelectedClusters, wantCROs, wantROs, updateRun) - wantStatus.DeletionStageStatus = buildDeletionStatusWithoutConditions(wantUnscheduledClusters, updateRun) - wantStatus.Conditions = updateRunInitializedConditions(updateRun.Generation) - } - if diff := cmp.Diff(updateRun.Status, wantStatus, updateRunStatusCmpOption...); diff != "" { - return fmt.Errorf("UpdateRun status diff (-got, +want): %s", diff) - } - return nil - } -} - -func stagedUpdateRunStatusSucceededActual( - updateRunName, namespace string, - wantResourceIndex, wantPolicyIndex string, - wantClusterCount int, - wantApplyStrategy *placementv1beta1.ApplyStrategy, - wantStrategySpec *placementv1beta1.UpdateStrategySpec, - wantSelectedClusters [][]string, - wantUnscheduledClusters []string, - wantCROs map[string][]string, - wantROs map[string][]placementv1beta1.NamespacedName, - execute bool, -) func() error { - return func() error { - updateRun := &placementv1beta1.StagedUpdateRun{} - if err := hubClient.Get(ctx, client.ObjectKey{Name: updateRunName, Namespace: namespace}, updateRun); err != nil { - return err - } - - wantStatus := placementv1beta1.UpdateRunStatus{ - PolicySnapshotIndexUsed: wantPolicyIndex, - ResourceSnapshotIndexUsed: wantResourceIndex, - PolicyObservedClusterCount: wantClusterCount, - ApplyStrategy: wantApplyStrategy.DeepCopy(), - UpdateStrategySnapshot: wantStrategySpec, - } - - if execute { - wantStatus.StagesStatus = buildStageUpdatingStatuses(wantStrategySpec, wantSelectedClusters, wantCROs, wantROs, updateRun) - wantStatus.DeletionStageStatus = buildDeletionStageStatus(wantUnscheduledClusters, updateRun) - wantStatus.Conditions = updateRunSucceedConditions(updateRun.Generation) - } else { - wantStatus.StagesStatus = buildStageUpdatingStatusesForInitialized(wantStrategySpec, wantSelectedClusters, wantCROs, wantROs, updateRun) - wantStatus.DeletionStageStatus = buildDeletionStatusWithoutConditions(wantUnscheduledClusters, updateRun) - wantStatus.Conditions = updateRunInitializedConditions(updateRun.Generation) - } - if diff := cmp.Diff(updateRun.Status, wantStatus, updateRunStatusCmpOption...); diff != "" { - return fmt.Errorf("UpdateRun status diff (-got, +want): %s", diff) - } - return nil - } -} - func buildStageUpdatingStatusesForInitialized( wantStrategySpec *placementv1beta1.UpdateStrategySpec, wantSelectedClusters [][]string, diff --git a/test/e2e/change_detector_test.go b/test/e2e/change_detector_test.go new file mode 100644 index 000000000..f1f79a37a --- /dev/null +++ b/test/e2e/change_detector_test.go @@ -0,0 +1,347 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Test scenarios: +// 1. Basic change detection: Verify Create, Update, Delete config map detection works +// 2. CRD discovery: Verify InformerPopulator discovers new CRDs and ChangeDetector detects CR updates + +package e2e + +import ( + "fmt" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/klog/v2" + "k8s.io/utils/ptr" + + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" +) + +// Test 1: Basic change detection - verifies Create/Update/Delete config map detection +var _ = Describe("validating ChangeDetector detects resource changes", Label("resourceplacement"), Ordered, func() { + crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess()) + + BeforeAll(func() { + // Only create the namespace (not the config map yet) + createNamespace() + + // Create the CRP that selects the namespace + crp := &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + Finalizers: []string{customDeletionBlockerFinalizer}, + }, + Spec: placementv1beta1.PlacementSpec{ + ResourceSelectors: []placementv1beta1.ResourceSelectorTerm{ + { + Group: "", + Kind: "Namespace", + Version: "v1", + Name: appNamespace().Name, + }, + }, + Strategy: placementv1beta1.RolloutStrategy{ + Type: placementv1beta1.RollingUpdateRolloutStrategyType, + RollingUpdate: &placementv1beta1.RollingUpdateConfig{ + UnavailablePeriodSeconds: ptr.To(2), + }, + }, + }, + } + Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP") + }) + + AfterAll(func() { + ensureCRPAndRelatedResourcesDeleted(crpName, allMemberClusters) + }) + + It("should select namespace only before config map creation", func() { + // Expect only the namespace to be selected (no config map yet) + expectedIdentifiers := workNamespaceIdentifiers() + crpStatusUpdatedActual := crpStatusUpdatedActual(expectedIdentifiers, allMemberClusterNames, nil, "0") + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), + "CRP should have initial snapshot with namespace selected") + }) + + It("should create config map", func() { + // Now create the config map + configMap := appConfigMap() + Expect(hubClient.Create(ctx, &configMap)).To(Succeed(), "Failed to create config map") + + klog.InfoS("Config map created", "configMap", configMap.Name) + }) + + It("should select namespace and config map", func() { + // After creating the config map, expect both namespace and config map to be selected + expectedIdentifiers := workResourceIdentifiers() + crpStatusUpdatedActual := crpStatusUpdatedActual(expectedIdentifiers, allMemberClusterNames, nil, "1") + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), + "CRP should have new snapshot with namespace and config map selected") + }) + + It("should propagate config map to all member clusters", func() { + configMapName := fmt.Sprintf(appConfigMapNameTemplate, GinkgoParallelProcess()) + name := types.NamespacedName{Name: configMapName, Namespace: appNamespace().Name} + + for _, cluster := range allMemberClusters { + Eventually(func() error { + return validateConfigMapOnCluster(cluster, name) + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), + "ConfigMap should be propagated to member cluster %s", cluster.ClusterName) + } + }) + + It("should update config map data", func() { + configMapName := fmt.Sprintf(appConfigMapNameTemplate, GinkgoParallelProcess()) + + // Update ConfigMap data + configMap := &corev1.ConfigMap{} + Expect(hubClient.Get(ctx, types.NamespacedName{Name: configMapName, Namespace: appNamespace().Name}, configMap)).Should(Succeed()) + configMap.Data["data"] = "updated-value" + Expect(hubClient.Update(ctx, configMap)).Should(Succeed(), "Failed to update config map data") + + klog.InfoS("Config map data updated", "configMap", configMapName) + }) + + It("should propagate config map updates to member clusters", func() { + configMapName := fmt.Sprintf(appConfigMapNameTemplate, GinkgoParallelProcess()) + name := types.NamespacedName{Name: configMapName, Namespace: appNamespace().Name} + + for _, cluster := range allMemberClusters { + Eventually(func() error { + return validateConfigMapOnCluster(cluster, name) + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), + "ConfigMap updates should be propagated to member cluster %s", cluster.ClusterName) + } + }) + + It("should delete config map", func() { + configMapName := fmt.Sprintf(appConfigMapNameTemplate, GinkgoParallelProcess()) + + // Delete the ConfigMap + configMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: configMapName, + Namespace: appNamespace().Name, + }, + } + Expect(hubClient.Delete(ctx, configMap)).To(Succeed(), "Failed to delete config map") + + klog.InfoS("Config map deleted", "configMap", configMapName) + }) + + It("should update CRP status after config map deletion", func() { + // Verify CRP status updated to show only namespace selected (config map removed) + expectedIdentifiers := []placementv1beta1.ResourceIdentifier{ + { + Group: "", + Version: "v1", + Kind: "Namespace", + Name: appNamespace().Name, + Namespace: "", + }, + } + // Snapshot progression: + // Index 0: namespace only (initial) + // Index 1: namespace + config map (after creation) + // Index 2: namespace + config map (after update) + // Index 3: namespace only (after deletion) <- current state + crpStatusUpdatedActual := crpStatusUpdatedActual(expectedIdentifiers, allMemberClusterNames, nil, "3") + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), + "CRP should detect config map deletion and update snapshot") + }) + + It("should remove config map from all member clusters", func() { + checkIfRemovedConfigMapFromMemberClusters(allMemberClusters) + }) +}) + +// Test 2: CRD discovery - verifies InformerPopulator discovers new CRDs and ChangeDetector detects CR updates +var _ = Describe("validating InformerPopulator discovers new CRDs", Label("resourceplacement"), Ordered, func() { + crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess()) + crdName := fmt.Sprintf("testcrds-%d.kubefleet.test", GinkgoParallelProcess()) + crName := fmt.Sprintf("test-cr-%d", GinkgoParallelProcess()) + + BeforeAll(func() { + // Create namespace + createNamespace() + }) + + AfterAll(func() { + ensureCRPAndRelatedResourcesDeleted(crpName, allMemberClusters) + + // Clean up CRD + crd := &apiextensionsv1.CustomResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: crdName, + }, + } + Expect(hubClient.Delete(ctx, crd)).To(Succeed(), "Failed to delete CRD") + }) + + It("should create CRD", func() { + crd := testCRD() + Expect(hubClient.Create(ctx, &crd)).To(Succeed(), "Failed to create CRD") + + klog.InfoS("CRD created", "crd", crdName) + }) + + It("should establish CRD", func() { + waitForCRDToBeReady(crdName) + klog.InfoS("CRD established", "crd", crdName) + }) + + It("should create custom resource", func() { + cr := &unstructured.Unstructured{} + cr.SetGroupVersionKind(schema.GroupVersionKind{ + Group: "kubefleet.test", + Version: "v1", + Kind: testCRD().Spec.Names.Kind, + }) + cr.SetName(crName) + cr.SetNamespace(appNamespace().Name) + cr.Object["spec"] = map[string]interface{}{ + "field": "initial-value", + } + Expect(hubClient.Create(ctx, cr)).To(Succeed(), "Failed to create custom resource") + + klog.InfoS("Custom resource created", "cr", crName) + }) + + It("should create CRP", func() { + // Create CRP to select the CRD and namespace + // The custom resource instance will be automatically included because it's in the selected namespace + crp := &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + Finalizers: []string{customDeletionBlockerFinalizer}, + }, + Spec: placementv1beta1.PlacementSpec{ + ResourceSelectors: []placementv1beta1.ResourceSelectorTerm{ + { + Group: "apiextensions.k8s.io", + Kind: "CustomResourceDefinition", + Version: "v1", + Name: crdName, + }, + { + Group: "", + Kind: "Namespace", + Version: "v1", + Name: appNamespace().Name, + }, + }, + Strategy: placementv1beta1.RolloutStrategy{ + Type: placementv1beta1.RollingUpdateRolloutStrategyType, + RollingUpdate: &placementv1beta1.RollingUpdateConfig{ + UnavailablePeriodSeconds: ptr.To(2), + }, + }, + }, + } + Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP") + }) + + It("should update CRP status with selected resources", func() { + expectedIdentifiers := []placementv1beta1.ResourceIdentifier{ + { + Group: "", + Version: "v1", + Kind: "Namespace", + Name: appNamespace().Name, + Namespace: "", + }, + { + Group: "apiextensions.k8s.io", + Version: "v1", + Kind: "CustomResourceDefinition", + Name: crdName, + Namespace: "", + }, + { + Group: "kubefleet.test", + Version: "v1", + Kind: testCRD().Spec.Names.Kind, + Name: crName, + Namespace: appNamespace().Name, + }, + } + // Use customizedPlacementStatusUpdatedActual with resourceIsTrackable=false + // because CRDs and custom resources don't have availability tracking + crpKey := types.NamespacedName{Name: crpName} + crpStatusUpdatedActual := customizedPlacementStatusUpdatedActual(crpKey, expectedIdentifiers, allMemberClusterNames, nil, "0", false) + // Use workloadEventuallyDuration (45s) to account for InformerPopulator's 30s discovery cycle + // The InformerPopulator needs up to 30s to discover the new CRD, then ChangeDetector can watch it + Eventually(crpStatusUpdatedActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), + "CRP should have selected namespace, CRD, and custom resource") + }) + + It("should propagate custom resource to all member clusters", func() { + name := types.NamespacedName{Name: crName, Namespace: appNamespace().Name} + gvk := schema.GroupVersionKind{ + Group: "kubefleet.test", + Version: "v1", + Kind: testCRD().Spec.Names.Kind, + } + + for _, cluster := range allMemberClusters { + Eventually(func() error { + return validateCustomResourceOnCluster(cluster, name, gvk) + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), + "Custom resource should be propagated to member cluster %s", cluster.ClusterName) + } + }) + + It("should update custom resource", func() { + // Update the custom resource + cr := &unstructured.Unstructured{} + cr.SetGroupVersionKind(schema.GroupVersionKind{ + Group: "kubefleet.test", + Version: "v1", + Kind: testCRD().Spec.Names.Kind, + }) + Expect(hubClient.Get(ctx, types.NamespacedName{Name: crName, Namespace: appNamespace().Name}, cr)).To(Succeed()) + cr.Object["spec"] = map[string]interface{}{ + "field": "updated-value", + } + Expect(hubClient.Update(ctx, cr)).To(Succeed(), "Failed to update custom resource") + + klog.InfoS("Custom resource updated", "cr", crName) + }) + + It("should propagate custom resource updates to all member clusters", func() { + name := types.NamespacedName{Name: crName, Namespace: appNamespace().Name} + gvk := schema.GroupVersionKind{ + Group: "kubefleet.test", + Version: "v1", + Kind: testCRD().Spec.Names.Kind, + } + + for _, cluster := range allMemberClusters { + Eventually(func() error { + return validateCustomResourceOnCluster(cluster, name, gvk) + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), + "Custom resource updates should be propagated to member cluster %s", cluster.ClusterName) + } + }) +}) diff --git a/test/e2e/cluster_staged_updaterun_test.go b/test/e2e/cluster_staged_updaterun_test.go index 99d4c0b87..214f48d64 100644 --- a/test/e2e/cluster_staged_updaterun_test.go +++ b/test/e2e/cluster_staged_updaterun_test.go @@ -27,7 +27,6 @@ import ( apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" @@ -145,7 +144,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - validateAndApproveClusterApprovalRequests(updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -153,9 +152,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to all the members after approval and complete the cluster staged update run successfully", func() { - validateAndApproveClusterApprovalRequests(updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -217,7 +216,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { []string{resourceSnapshotIndex1st, resourceSnapshotIndex2nd, resourceSnapshotIndex1st}, []bool{true, true, true}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - validateAndApproveClusterApprovalRequests(updateRunNames[1], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[1], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -229,9 +228,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to all the members after approval and complete the cluster staged update run successfully", func() { - validateAndApproveClusterApprovalRequests(updateRunNames[1], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[1], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[1], resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) By("Verify that new the configmap is updated on all member clusters") for idx := range allMemberClusters { @@ -325,7 +324,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - validateAndApproveClusterApprovalRequests(updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -333,9 +332,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to all the members after approval and complete the cluster staged update run successfully", func() { - validateAndApproveClusterApprovalRequests(updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -396,7 +395,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { []string{resourceSnapshotIndex1st, resourceSnapshotIndex2nd, resourceSnapshotIndex1st}, []bool{true, true, true}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - validateAndApproveClusterApprovalRequests(updateRunNames[1], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[1], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -408,9 +407,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to member-cluster-1 and member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - validateAndApproveClusterApprovalRequests(updateRunNames[1], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[1], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[1], resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) By("Verify that new the configmap is updated on all member clusters") for idx := range allMemberClusters { @@ -444,7 +443,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { []string{resourceSnapshotIndex2nd, resourceSnapshotIndex1st, resourceSnapshotIndex2nd}, []bool{true, true, true}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - validateAndApproveClusterApprovalRequests(updateRunNames[2], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[2], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollback resources to prod stage until approved", func() { @@ -456,9 +455,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollback resources to member-cluster-1 and member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - validateAndApproveClusterApprovalRequests(updateRunNames[2], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[2], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[2], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) for idx := range allMemberClusters { configMapActual := configMapPlacedOnClusterActual(allMemberClusters[idx], &oldConfigMap) @@ -550,7 +549,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames[:2], []string{"", resourceSnapshotIndex1st}, []bool{false, true}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - validateAndApproveClusterApprovalRequests(updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -558,9 +557,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to member-cluster-1 after approval but not member-cluster-3 and complete the cluster staged update run successfully", func() { - validateAndApproveClusterApprovalRequests(updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) checkIfRemovedWorkResourcesFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[2]}) @@ -606,7 +605,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{resourceSnapshotIndex1st, resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to keep CRP %s status as expected", crpName) - validateAndApproveClusterApprovalRequests(updateRunNames[1], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[1], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -614,9 +613,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - validateAndApproveClusterApprovalRequests(updateRunNames[1], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[1], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex2nd, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex2nd, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -658,14 +657,14 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(workResourceIdentifiers(), resourceSnapshotIndex1st, false, []string{allMemberClusterNames[2]}, []string{resourceSnapshotIndex1st}, []bool{false}, nil, nil) Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - validateAndApproveClusterApprovalRequests(updateRunNames[2], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[2], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should remove resources on member-cluster-1 and member-cluster-2 after approval and complete the cluster staged update run successfully", func() { - validateAndApproveClusterApprovalRequests(updateRunNames[2], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[2], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) // need to go through two stages - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[2], resourceSnapshotIndex1st, policySnapshotIndex3rd, 1, defaultApplyStrategy, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0], allMemberClusterNames[1]}, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], resourceSnapshotIndex1st, policySnapshotIndex3rd, 1, defaultApplyStrategy, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0], allMemberClusterNames[1]}, nil, nil, true) Eventually(csurSucceededActual, 2*updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[2]) checkIfRemovedWorkResourcesFromMemberClusters([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) checkIfPlacedWorkResourcesOnMemberClustersConsistently([]*framework.Cluster{allMemberClusters[2]}) @@ -753,7 +752,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames[2:], []string{""}, []bool{false}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - validateAndApproveClusterApprovalRequests(updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -761,9 +760,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - validateAndApproveClusterApprovalRequests(updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, 1, defaultApplyStrategy, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, 1, defaultApplyStrategy, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[2]}) checkIfRemovedWorkResourcesFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) @@ -808,7 +807,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, resourceSnapshotIndex1st}, []bool{false, true, true}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to keep CRP %s status as expected", crpName) - validateAndApproveClusterApprovalRequests(updateRunNames[1], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[1], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to member-cluster-1 until approved", func() { @@ -816,9 +815,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to member-cluster-1 after approval and complete the cluster staged update run successfully", func() { - validateAndApproveClusterApprovalRequests(updateRunNames[1], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[1], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -860,7 +859,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(workResourceIdentifiers(), resourceSnapshotIndex1st, true, allMemberClusterNames[1:], []string{resourceSnapshotIndex1st, resourceSnapshotIndex1st}, []bool{true, true}, nil, nil) Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - validateAndApproveClusterApprovalRequests(updateRunNames[2], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[2], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not remove resources from member-cluster-1 until approved", func() { @@ -868,9 +867,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should remove resources on member-cluster-1 after approval and complete the cluster staged update run successfully", func() { - validateAndApproveClusterApprovalRequests(updateRunNames[2], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[2], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[2], resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0]}, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0]}, nil, nil, true) Eventually(csurSucceededActual, 2*updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[2]) checkIfRemovedWorkResourcesFromMemberClusters([]*framework.Cluster{allMemberClusters[0]}) checkIfPlacedWorkResourcesOnMemberClustersConsistently([]*framework.Cluster{allMemberClusters[1], allMemberClusters[2]}) @@ -1038,7 +1037,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, wantROs) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - validateAndApproveClusterApprovalRequests(updateRunName, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunName, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to member-cluster-1 and member-cluster-3 until approved", func() { @@ -1046,9 +1045,9 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to member-cluster-1 and member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - validateAndApproveClusterApprovalRequests(updateRunName, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunName, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, wantCROs, wantROs, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, wantCROs, wantROs, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunName) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1144,13 +1143,13 @@ var _ = Describe("test CRP rollout with staged update run", func() { []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - validateAndApproveClusterApprovalRequests(updateRunName, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunName, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should report diff for member-cluster-1 and member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - validateAndApproveClusterApprovalRequests(updateRunName, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunName, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), applyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), applyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunName) }) @@ -1259,13 +1258,13 @@ var _ = Describe("test CRP rollout with staged update run", func() { Eventually(configMapActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update to the new configmap %s on cluster %s", newConfigMap.Name, allMemberClusterNames[1]) // Approval for AfterStageTasks of canary stage - validateAndApproveClusterApprovalRequests(updateRunName, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunName, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) // Approval for BeforeStageTasks of prod stage - validateAndApproveClusterApprovalRequests(updateRunName, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunName, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) // Verify complete rollout - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunName, resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunName) // Verify new configmap is on all member clusters @@ -1354,7 +1353,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should complete the staged update run after approval", func() { - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunName) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1366,128 +1365,6 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) }) - Context("Test CRP rollout strategy transition from external to rollingUpdate", Ordered, func() { - var strategy *placementv1beta1.ClusterStagedUpdateStrategy - updateRunName := fmt.Sprintf(clusterStagedUpdateRunNameWithSubIndexTemplate, GinkgoParallelProcess(), 0) - var oldConfigMap, newConfigMap corev1.ConfigMap - - BeforeAll(func() { - // Create a test namespace and a configMap inside it on the hub cluster. - createWorkResources() - - // Create the CRP with external rollout strategy initially. - crp := &placementv1beta1.ClusterResourcePlacement{ - ObjectMeta: metav1.ObjectMeta{ - Name: crpName, - // Add a custom finalizer; this would allow us to better observe - // the behavior of the controllers. - Finalizers: []string{customDeletionBlockerFinalizer}, - }, - Spec: placementv1beta1.PlacementSpec{ - ResourceSelectors: workResourceSelector(), - Strategy: placementv1beta1.RolloutStrategy{ - Type: placementv1beta1.ExternalRolloutStrategyType, - }, - }, - } - Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP") - - // Create the clusterStagedUpdateStrategy. - strategy = createClusterStagedUpdateStrategySucceed(strategyName) - - oldConfigMap = appConfigMap() - newConfigMap = appConfigMap() - newConfigMap.Data["data"] = testConfigMapDataValue - }) - - AfterAll(func() { - // Remove the custom deletion blocker finalizer from the CRP. - ensureCRPAndRelatedResourcesDeleted(crpName, allMemberClusters) - - // Delete the clusterStagedUpdateRun. - ensureClusterStagedUpdateRunDeletion(updateRunName) - - // Delete the clusterStagedUpdateStrategy. - ensureClusterUpdateRunStrategyDeletion(strategyName) - }) - - It("Should not rollout any resources to member clusters with external strategy", checkIfRemovedWorkResourcesFromAllMemberClustersConsistently) - - It("Should have the latest resource snapshot", func() { - validateLatestClusterResourceSnapshot(crpName, resourceSnapshotIndex1st) - }) - - It("Should update crp status as pending rollout", func() { - crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", "", ""}, []bool{false, false, false}, nil, nil) - Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - }) - - It("Create updateRun and verify resources are rolled out", func() { - createClusterStagedUpdateRunSucceed(updateRunName, crpName, resourceSnapshotIndex1st, strategyName, placementv1beta1.StateRun) - - // Approval for AfterStageTasks of canary stage - validateAndApproveClusterApprovalRequests(updateRunName, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) - - // Approval for BeforeStageTasks of prod stage - validateAndApproveClusterApprovalRequests(updateRunName, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) - Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunName) - - checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) - }) - - It("Should update crp status as completed", func() { - crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(workResourceIdentifiers(), resourceSnapshotIndex1st, true, allMemberClusterNames, - []string{resourceSnapshotIndex1st, resourceSnapshotIndex1st, resourceSnapshotIndex1st}, []bool{true, true, true}, nil, nil) - Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - }) - - It("Update the configmap on hub but should not rollout to member clusters with external strategy", func() { - updateConfigMapSucceed(&newConfigMap) - - // Verify old configmap is still on all member clusters - for _, cluster := range allMemberClusters { - configMapActual := configMapPlacedOnClusterActual(cluster, &oldConfigMap) - Consistently(configMapActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to keep old configmap %s data on cluster %s", oldConfigMap.Name, cluster.ClusterName) - } - }) - - It("Should have new resource snapshot but CRP status should remain completed with old snapshot", func() { - validateLatestClusterResourceSnapshot(crpName, resourceSnapshotIndex2nd) - - // CRP status should still show completed with old snapshot - crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(workResourceIdentifiers(), resourceSnapshotIndex1st, true, allMemberClusterNames, - []string{resourceSnapshotIndex1st, resourceSnapshotIndex1st, resourceSnapshotIndex1st}, []bool{true, true, true}, nil, nil) - Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to keep CRP %s status as expected", crpName) - }) - - It("Update CRP to use rollingUpdate strategy", func() { - Eventually(func() error { - crp := &placementv1beta1.ClusterResourcePlacement{} - if err := hubClient.Get(ctx, client.ObjectKey{Name: crpName}, crp); err != nil { - return fmt.Errorf("failed to get the crp: %w", err) - } - crp.Spec.Strategy = placementv1beta1.RolloutStrategy{ - Type: placementv1beta1.RollingUpdateRolloutStrategyType, - } - return hubClient.Update(ctx, crp) - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP strategy to rollingUpdate") - }) - - It("Should automatically rollout new resources to all member clusters with rollingUpdate strategy", func() { - // Verify CRP status shows all clusters with new resource snapshot - crpStatusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), allMemberClusterNames, nil, resourceSnapshotIndex2nd) - Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status with rollingUpdate strategy", crpName) - - // Verify new configmap is on all member clusters - for _, cluster := range allMemberClusters { - configMapActual := configMapPlacedOnClusterActual(cluster, &newConfigMap) - Eventually(configMapActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update to the new configmap %s on cluster %s", newConfigMap.Name, cluster.ClusterName) - } - }) - }) - Context("Test parallel cluster updates with maxConcurrency set to 3", Ordered, func() { var strategy *placementv1beta1.ClusterStagedUpdateStrategy updateRunName := fmt.Sprintf(clusterStagedUpdateRunNameWithSubIndexTemplate, GinkgoParallelProcess(), 0) @@ -1565,7 +1442,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { It("Should complete the cluster staged update run with all 3 clusters updated in parallel", func() { // With maxConcurrency=3, all 3 clusters should be updated in parallel. // Each round waits 15 seconds, so total time should be under 20s. - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunParallelEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunName) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1656,7 +1533,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { // Since maxConcurrency=70% each round we process 2 clusters in parallel, // so all 3 clusters should be updated in 2 rounds. // Each round waits 15 seconds, so total time should be under 40s. - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunParallelEventuallyDuration*2, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunName) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1668,7 +1545,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) }) - Context("Test resource rollout with staged update run by update run states - (Initialize -> Run)", Ordered, func() { + Context("Test resource rollout with staged update run by update run states - (Initialize -> Run -> Stop -> Run)", Ordered, func() { updateRunNames := []string{} var strategy *placementv1beta1.ClusterStagedUpdateStrategy @@ -1739,14 +1616,14 @@ var _ = Describe("test CRP rollout with staged update run", func() { checkIfRemovedWorkResourcesFromAllMemberClustersConsistently() By("Validating the csur status remains in Initialize state") - csurNotStartedActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, false) + csurNotStartedActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, false) Consistently(csurNotStartedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to Initialize updateRun %s", updateRunNames[0]) }) It("Should rollout resources to member-cluster-2 only after update run is in Run state", func() { // Update the update run state to Run By("Updating the update run state to Run") - updateClusterStagedUpdateRunState(updateRunNames[0], placementv1beta1.StateRun) + UpdateClusterStagedUpdateRunState(ctx, hubClient, updateRunNames[0], placementv1beta1.StateRun) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[1]}) checkIfRemovedWorkResourcesFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) @@ -1755,13 +1632,49 @@ var _ = Describe("test CRP rollout with staged update run", func() { crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) - validateAndApproveClusterApprovalRequests(updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + }) + + It("Should not rollout to all member clusters while waiting for beforeStageTask approval for prod stage", func() { + By("Validating not rolled out to member-cluster-1 and member-cluster-3 yet") + checkIfRemovedWorkResourcesFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) + checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[1]}) + + By("Validating crp status as member-cluster-2 updated only") + crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) + }) + + It("Should not rollout to all member clusters after stopping update run", func() { + By("Updating update run state to Stop") + UpdateClusterStagedUpdateRunState(ctx, hubClient, updateRunNames[0], placementv1beta1.StateStop) + + By("Validating not rolled out to member-cluster-1 and member-cluster-3 yet") + checkIfRemovedWorkResourcesFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) + checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[1]}) + + By("Validating crp status as member-cluster-2 updated") + crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) + }) + + It("Should not complete rollout to all member after beforeStageTask approval while in Stop state", func() { + ValidateAndApproveClusterApprovalRequests(ctx, hubClient, updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + + By("Validating not rolled out to member-cluster-1 and member-cluster-3 after beforeStageTask approval while update run is in Stop state") + checkIfRemovedWorkResourcesFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) }) - It("Should rollout resources to all the members and complete the cluster staged update run successfully", func() { - validateAndApproveClusterApprovalRequests(updateRunNames[0], envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + It("Should complete rollout to all member clusters after resuming the update run to Run state", func() { + // Update the update run state back to Run. + By("Updating the update run state back to Run") + UpdateClusterStagedUpdateRunState(ctx, hubClient, updateRunNames[0], placementv1beta1.StateRun) - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + By("All member clusters should have work resources placed") + checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1], allMemberClusters[2]}) + + By("Validating update run has succeeded after resuming") + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1842,7 +1755,7 @@ var _ = Describe("Test member cluster join and leave flow with updateRun", Label createClusterStagedUpdateRunSucceed(updateRunNames[0], crpName, resourceSnapshotIndex1st, strategyName, placementv1beta1.StateRun) By("Validating staged update run has succeeded") - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) By("Validating CRP status as completed") @@ -1894,7 +1807,7 @@ var _ = Describe("Test member cluster join and leave flow with updateRun", Label }) It("Should complete the second staged update run and complete the CRP", func() { - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1], allMemberClusterNames[2]}}, []string{allMemberClusterNames[0]}, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1], allMemberClusterNames[2]}}, []string{allMemberClusterNames[0]}, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(workResourceIdentifiers(), resourceSnapshotIndex1st, true, allMemberClusterNames[1:], @@ -1942,7 +1855,7 @@ var _ = Describe("Test member cluster join and leave flow with updateRun", Label }) It("Should complete the staged update run, complete CRP, and rollout resources to all member clusters", func() { - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(workResourceIdentifiers(), resourceSnapshotIndex1st, true, allMemberClusterNames, @@ -1985,7 +1898,7 @@ var _ = Describe("Test member cluster join and leave flow with updateRun", Label }) It("Should complete the staged update run, complete CRP, and rollout updated resources to all member clusters", func() { - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[1], resourceSnapshotIndex2nd, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex2nd, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(workResourceIdentifiers(), resourceSnapshotIndex2nd, true, allMemberClusterNames, @@ -2024,7 +1937,7 @@ var _ = Describe("Test member cluster join and leave flow with updateRun", Label }) It("Should complete the staged update run, complete CRP, and re-place resources to all member clusters", func() { - csurSucceededActual := clusterStagedUpdateRunStatusSucceededActual(updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) + csurSucceededActual := ClusterStagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(csurSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) crpStatusUpdatedActual := crpStatusWithExternalStrategyActual(workResourceIdentifiers(), resourceSnapshotIndex1st, true, allMemberClusterNames, @@ -2039,39 +1952,6 @@ var _ = Describe("Test member cluster join and leave flow with updateRun", Label Eventually(bindingBoundActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to mark binding for member cluster %s as bound", allMemberClusterNames[0]) }) }) - - Context("Rejoin a member cluster and change to rollout CRP with rollingUpdate", Label("joinleave"), Ordered, Serial, func() { - It("Should be able to rejoin member cluster 1", func() { - setMemberClusterToJoin(allMemberClusters[0]) - checkIfMemberClusterHasJoined(allMemberClusters[0]) - }) - - It("Should update the CRP rollout strategy to use rollingUpdate", func() { - Eventually(func() error { - var crp placementv1beta1.ClusterResourcePlacement - if err := hubClient.Get(ctx, client.ObjectKey{Name: crpName}, &crp); err != nil { - return fmt.Errorf("failed to get CRP %s: %w", crpName, err) - } - crp.Spec.Strategy = placementv1beta1.RolloutStrategy{ - Type: placementv1beta1.RollingUpdateRolloutStrategyType, - } - return hubClient.Update(ctx, &crp) - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP rollout strategy to rolling update") - }) - - It("Should verify resources are placed to member cluster 1 and binding status becomes bound", func() { - // Verify CRP status shows all clusters as bounded with rolling update. - crpStatusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), allMemberClusterNames, nil, resourceSnapshotIndex1st) - Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected with rolling update") - - // Verify resources are placed on all member clusters. - checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) - - // Verify binding for member cluster 1 becomes bound. - bindingBoundActual := bindingStateActual(crpName, allMemberClusterNames[0], placementv1beta1.BindingStateBound) - Eventually(bindingBoundActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to mark binding for member cluster %s as bound with rolling update", allMemberClusterNames[0]) - }) - }) }) func createClusterStagedUpdateStrategySucceed(strategyName string) *placementv1beta1.ClusterStagedUpdateStrategy { @@ -2195,50 +2075,6 @@ func createClusterStagedUpdateRunSucceedWithNoResourceSnapshotIndex(updateRunNam Expect(hubClient.Create(ctx, updateRun)).To(Succeed(), "Failed to create ClusterStagedUpdateRun %s", updateRunName) } -func updateClusterStagedUpdateRunState(updateRunName string, state placementv1beta1.State) { - Eventually(func() error { - updateRun := &placementv1beta1.ClusterStagedUpdateRun{} - if err := hubClient.Get(ctx, types.NamespacedName{Name: updateRunName}, updateRun); err != nil { - return fmt.Errorf("failed to get ClusterStagedUpdateRun %s", updateRunName) - } - - updateRun.Spec.State = state - if err := hubClient.Update(ctx, updateRun); err != nil { - return fmt.Errorf("failed to update ClusterStagedUpdateRun %s", updateRunName) - } - return nil - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update ClusterStagedUpdateRun %s state to %s", updateRunName, state) -} - -func validateAndApproveClusterApprovalRequests(updateRunName, stageName, approvalRequestNameFmt, stageTaskType string) { - Eventually(func() error { - appReqList := &placementv1beta1.ClusterApprovalRequestList{} - if err := hubClient.List(ctx, appReqList, client.MatchingLabels{ - placementv1beta1.TargetUpdatingStageNameLabel: stageName, - placementv1beta1.TargetUpdateRunLabel: updateRunName, - placementv1beta1.TaskTypeLabel: stageTaskType, - }); err != nil { - return fmt.Errorf("failed to list approval requests: %w", err) - } - - if len(appReqList.Items) != 1 { - return fmt.Errorf("got %d approval requests, want 1", len(appReqList.Items)) - } - appReq := &appReqList.Items[0] - approvalRequestName := fmt.Sprintf(approvalRequestNameFmt, updateRunName, stageName) - if appReq.Name != approvalRequestName { - return fmt.Errorf("got approval request %s, want %s", appReq.Name, approvalRequestName) - } - meta.SetStatusCondition(&appReq.Status.Conditions, metav1.Condition{ - Status: metav1.ConditionTrue, - Type: string(placementv1beta1.ApprovalRequestConditionApproved), - ObservedGeneration: appReq.GetGeneration(), - Reason: "lgtm", - }) - return hubClient.Status().Update(ctx, appReq) - }, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to get or approve approval request") -} - func updateConfigMapSucceed(newConfigMap *corev1.ConfigMap) { cm := &corev1.ConfigMap{} key := client.ObjectKey{Namespace: newConfigMap.Namespace, Name: newConfigMap.Name} diff --git a/test/e2e/collect-logs.sh b/test/e2e/collect-logs.sh index 3fb39dadb..9c533c7cc 100755 --- a/test/e2e/collect-logs.sh +++ b/test/e2e/collect-logs.sh @@ -34,41 +34,124 @@ echo -e "${GREEN}Starting log collection at ${TIMESTAMP}${NC}" echo "Logs will be saved to: ${LOG_DIR}" echo "" -# Function to collect logs from a pod -collect_pod_logs() { - local pod_name=$1 - local cluster_name=$2 - local log_file_prefix=$3 - - echo -e "${YELLOW}Collecting logs from pod ${pod_name} in cluster ${cluster_name}${NC}" - - # Get all containers in the pod - containers=$(kubectl get pod "${pod_name}" -n "${NAMESPACE}" -o jsonpath='{.spec.containers[*].name}' 2>/dev/null || echo "") - - if [ -z "$containers" ]; then - echo -e "${RED}No containers found in pod ${pod_name}${NC}" + + +# Function to collect fleet agent logs directly from node filesystem using docker exec +# This approach bypasses kubectl logs limitations and accesses the full log history +# including rotated and compressed log files stored in /var/log/pods. +collect_node_agent_logs() { + local cluster_name=$1 + local node_log_dir=$2 + local agent_type=$3 # "hub-agent" or "member-agent" + + echo -e "${YELLOW}Collecting ${agent_type} logs from cluster ${cluster_name} nodes${NC}" + + # Get all nodes in the cluster + local nodes + nodes=$(kubectl get nodes -o jsonpath='{.items[*].metadata.name}' 2>/dev/null || echo "") + + if [ -z "$nodes" ]; then + echo -e "${RED}No nodes found in cluster ${cluster_name}${NC}" return fi - - # Collect logs for each container - for container in $containers; do - log_file="${log_file_prefix}-${container}.log" - echo " - Container ${container} -> ${log_file}" - - # Get current logs - kubectl logs "${pod_name}" -n "${NAMESPACE}" -c "${container}" > "${log_file}" 2>&1 || \ - echo "Failed to get logs for container ${container}" > "${log_file}" - - # Try to get previous logs if pod was restarted - previous_log_file="${log_file_prefix}-${container}-previous.log" - if kubectl logs "${pod_name}" -n "${NAMESPACE}" -c "${container}" --previous > "${previous_log_file}" 2>&1; then - echo " - Previous logs for ${container} -> ${previous_log_file}" - else - rm -f "${previous_log_file}" - fi + + # Create node logs directory + mkdir -p "${node_log_dir}" + + for node in $nodes; do + echo " - Collecting ${agent_type} logs from node ${node}" + local node_specific_dir="${node_log_dir}/${node}" + mkdir -p "${node_specific_dir}" + + # Collect specific agent logs from node filesystem + collect_agent_logs_from_node "${node}" "${cluster_name}" "${node_specific_dir}" "${agent_type}" done } +# Function to collect specific agent logs from node filesystem +# Collects all log files including rotated (*.log.*) and compressed (*.gz) files +# Args: +# node: The node name to collect logs from +# cluster_name: The cluster name for logging context +# node_log_dir: The directory to save the collected logs +# agent_type: The type of agent ("hub-agent" or "member-agent") +collect_agent_logs_from_node() { + local node=$1 + local cluster_name=$2 + local node_log_dir=$3 + local agent_type=$4 # "hub-agent" or "member-agent" + + echo " -> Collecting ${agent_type} logs from node filesystem" + echo " -> Found log paths: $(docker exec "${node}" find /var/log/pods -path "*/${NAMESPACE}_*${agent_type}*")" + + # First check if any agent logs exist on this node (including .log, .log.*, and .gz files) + local log_files + log_files=$(docker exec "${node}" find /var/log/pods -path "*/${NAMESPACE}_*${agent_type}*" -type f \( -name "*.log" -o -name "*.log.*" -o -name "*.gz" \) 2>/dev/null || echo "") + + if [ -n "$log_files" ]; then + + # Process each log file separately using process substitution to avoid subshell + while read -r logfile; do + if [ -n "$logfile" ]; then + + # Extract a meaningful filename from the log path + local base_path=$(basename "$(dirname "$logfile")") + local original_filename="$(basename "$logfile")" + local sanitized_filename="${base_path}_${original_filename}" + + # Remove .gz extension for the output filename if present + local output_filename="${sanitized_filename%.gz}" + # Ensure output filename ends with .log + if [[ ! "$output_filename" =~ \.log$ ]]; then + output_filename="${output_filename}.log" + fi + + # Create individual log file for this specific log + local individual_log_file="${node_log_dir}/${agent_type}-${output_filename}" + + { + echo "# ${agent_type} logs from node filesystem" + echo "# Timestamp: $(date -u '+%Y-%m-%d %H:%M:%S UTC')" + echo "# Node: ${node}" + echo "# Cluster: ${cluster_name}" + echo "# Source log file: ${logfile}" + echo "# Method: Direct access to /var/log/pods via docker exec" + echo "# ==================================" + echo "" + + # Handle different file types + if [[ "$logfile" == *.gz ]]; then + echo "# Note: This is a compressed log file that has been decompressed" + echo "" + # Decompress and read the file + docker exec "${node}" zcat "$logfile" 2>/dev/null || echo "Failed to decompress and read $logfile" + else + # Regular log file (including rotated .log.* files) + docker exec "${node}" cat "$logfile" 2>/dev/null || echo "Failed to read $logfile" + fi + } > "${individual_log_file}" + + echo " -> ${agent_type}-${output_filename}" + fi + done < <(echo "$log_files") + + # Check if any files were created in the directory + local created_files + created_files=$(find "${node_log_dir}" -name "${agent_type}-*.log" 2>/dev/null | wc -l) + + # If no log files were actually created, clean up empty directory + if [ "$created_files" -eq 0 ]; then + echo " -> No valid ${agent_type} logs processed on node ${node}" + rmdir "${node_log_dir}" 2>/dev/null || true + fi + else + # No agent logs found, don't create the file and remove directory if empty + echo " -> No ${agent_type} logs found on node ${node}" + rmdir "${node_log_dir}" 2>/dev/null || true + fi +} + + # Collect hub cluster logs echo -e "${GREEN}=== Collecting Hub Cluster Logs ===${NC}" kind export kubeconfig --name "${HUB_CLUSTER}" 2>/dev/null || { @@ -80,42 +163,26 @@ kind export kubeconfig --name "${HUB_CLUSTER}" 2>/dev/null || { HUB_LOG_DIR="${LOG_DIR}/hub" mkdir -p "${HUB_LOG_DIR}" -# Get all hub-agent pods -hub_pods=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=hub-agent -o jsonpath='{.items[*].metadata.name}' 2>/dev/null || echo "") - -if [ -z "$hub_pods" ]; then - echo -e "${RED}No hub-agent pods found${NC}" -else - for pod in $hub_pods; do - collect_pod_logs "${pod}" "${HUB_CLUSTER}" "${HUB_LOG_DIR}/${pod}" - done -fi +# Collect hub-agent logs from hub cluster nodes +collect_node_agent_logs "${HUB_CLUSTER}" "${HUB_LOG_DIR}/nodes" "hub-agent" # Collect member cluster logs for cluster in "${MEMBER_CLUSTERS[@]}"; do echo -e "${GREEN}=== Collecting Member Cluster Logs: ${cluster} ===${NC}" - + # Export kubeconfig for the member cluster if ! kind export kubeconfig --name "${cluster}" 2>/dev/null; then echo -e "${RED}Failed to export kubeconfig for cluster ${cluster}, skipping...${NC}" continue fi - + # Create member logs directory MEMBER_LOG_DIR="${LOG_DIR}/${cluster}" mkdir -p "${MEMBER_LOG_DIR}" - - # Get all member-agent pods - member_pods=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=member-agent -o jsonpath='{.items[*].metadata.name}' 2>/dev/null || echo "") - - if [ -z "$member_pods" ]; then - echo -e "${RED}No member-agent pods found in cluster ${cluster}${NC}" - else - for pod in $member_pods; do - collect_pod_logs "${pod}" "${cluster}" "${MEMBER_LOG_DIR}/${pod}" - done - fi - + + # Collect member-agent logs from member cluster nodes + collect_node_agent_logs "${cluster}" "${MEMBER_LOG_DIR}/nodes" "member-agent" + echo "" done diff --git a/test/e2e/placement_drift_diff_test.go b/test/e2e/placement_drift_diff_test.go index eb6c85daa..359c68aa0 100644 --- a/test/e2e/placement_drift_diff_test.go +++ b/test/e2e/placement_drift_diff_test.go @@ -1383,6 +1383,10 @@ var _ = Describe("report diff mode", func() { } } + // With workapplier's backoff requeue enabled, it takes longer to report the new diff results. + // The backoff logic is: 1 attempt after 5s (fixed delay), 2nd attempt after 2s (initial delay for slow backoff), + // fast backoff with exponential rate of 1.5x (as diff report succeeded). + // The test takes ~25s to reach this point, so workloadEventuallyDuration (45s) should be enough to cover the backoff delays. Eventually(func() error { crp := &placementv1beta1.ClusterResourcePlacement{} if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil { @@ -1394,7 +1398,7 @@ var _ = Describe("report diff mode", func() { return fmt.Errorf("CRP status diff (-got, +want): %s", diff) } return nil - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) AfterAll(func() { diff --git a/test/e2e/resource_placement_drift_diff_test.go b/test/e2e/resource_placement_drift_diff_test.go index ec2131786..500cd0eca 100644 --- a/test/e2e/resource_placement_drift_diff_test.go +++ b/test/e2e/resource_placement_drift_diff_test.go @@ -1556,6 +1556,10 @@ var _ = Describe("report diff mode using RP", Label("resourceplacement"), func() } } + // With workapplier's backoff requeue enabled, it takes longer to report the new diff results. + // The backoff logic is: 1 attempt after 5s (fixed delay), 2nd attempt after 2s (initial delay for slow backoff), + // fast backoff with exponential rate of 1.5x (as diff report succeeded). + // The test takes ~25s to reach this point, so workloadEventuallyDuration (45s) should be enough to cover the backoff delays. Eventually(func() error { rp := &placementv1beta1.ResourcePlacement{} if err := hubClient.Get(ctx, types.NamespacedName{Name: rpName, Namespace: nsName}, rp); err != nil { @@ -1567,7 +1571,7 @@ var _ = Describe("report diff mode using RP", Label("resourceplacement"), func() return fmt.Errorf("RP status diff (-got, +want): %s", diff) } return nil - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP status as expected") + }, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP status as expected") }) AfterAll(func() { diff --git a/test/e2e/resources_test.go b/test/e2e/resources_test.go index ed3a7bb2f..bf1d15318 100644 --- a/test/e2e/resources_test.go +++ b/test/e2e/resources_test.go @@ -25,6 +25,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" discoveryv1 "k8s.io/api/discovery/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" @@ -280,3 +281,44 @@ func endpointSliceExport(name, namespace string) fleetnetworkingv1alpha1.Endpoin }, } } + +func testCRD() apiextensionsv1.CustomResourceDefinition { + crdName := fmt.Sprintf("testcrds-%d.kubefleet.test", GinkgoParallelProcess()) + return apiextensionsv1.CustomResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: crdName, + }, + Spec: apiextensionsv1.CustomResourceDefinitionSpec{ + Group: "kubefleet.test", + Names: apiextensionsv1.CustomResourceDefinitionNames{ + Plural: fmt.Sprintf("testcrds-%d", GinkgoParallelProcess()), + Singular: fmt.Sprintf("testcrd-%d", GinkgoParallelProcess()), + Kind: fmt.Sprintf("TestCRD%d", GinkgoParallelProcess()), + ListKind: fmt.Sprintf("TestCRD%dList", GinkgoParallelProcess()), + }, + Scope: apiextensionsv1.NamespaceScoped, + Versions: []apiextensionsv1.CustomResourceDefinitionVersion{ + { + Name: "v1", + Served: true, + Storage: true, + Schema: &apiextensionsv1.CustomResourceValidation{ + OpenAPIV3Schema: &apiextensionsv1.JSONSchemaProps{ + Type: "object", + Properties: map[string]apiextensionsv1.JSONSchemaProps{ + "spec": { + Type: "object", + Properties: map[string]apiextensionsv1.JSONSchemaProps{ + "field": { + Type: "string", + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} diff --git a/test/e2e/rollout_test.go b/test/e2e/rollout_test.go index d52acb287..f505ee892 100644 --- a/test/e2e/rollout_test.go +++ b/test/e2e/rollout_test.go @@ -29,7 +29,6 @@ import ( appv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" - apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -1137,29 +1136,6 @@ func waitForTestResourceToBePlaced(memberCluster *framework.Cluster, testResourc } } -func waitForCRDToBeReady(crdName string) { - Eventually(func() error { // wait for CRD to be created - crd := &apiextensionsv1.CustomResourceDefinition{} - if err := hubClient.Get(ctx, types.NamespacedName{Name: crdName}, crd); err != nil { - return err - } - if crd.Status.Conditions == nil { - return fmt.Errorf("CRD status conditions are nil for %s", crdName) - } - - for _, cond := range crd.Status.Conditions { - if cond.Type == apiextensionsv1.Established && cond.Status != apiextensionsv1.ConditionTrue { - return fmt.Errorf("CRD is not established: %s", crdName) - } - if cond.Type == apiextensionsv1.NamesAccepted && cond.Status != apiextensionsv1.ConditionTrue { - return fmt.Errorf("CRD names are not accepted: %s", crdName) - } - } - - return nil - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "CRD failed to be ready %s", crdName) -} - func buildCRPForSafeRollout() *placementv1beta1.ClusterResourcePlacement { return &placementv1beta1.ClusterResourcePlacement{ ObjectMeta: metav1.ObjectMeta{ diff --git a/test/e2e/staged_updaterun_test.go b/test/e2e/staged_updaterun_test.go index 94ebc5684..97fa7a883 100644 --- a/test/e2e/staged_updaterun_test.go +++ b/test/e2e/staged_updaterun_test.go @@ -24,7 +24,6 @@ import ( . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" - "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" @@ -135,7 +134,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -143,9 +142,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to all the members after approval and complete the staged update run successfully", func() { - validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -206,7 +205,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem []string{resourceSnapshotIndex1st, resourceSnapshotIndex2nd, resourceSnapshotIndex1st}, []bool{true, true, true}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - validateAndApproveNamespacedApprovalRequests(updateRunNames[1], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[1], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { By("Verify that the configmap is not updated on member-cluster-1 and member-cluster-3") @@ -217,9 +216,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to all the members after approval and complete the staged update run successfully", func() { - validateAndApproveNamespacedApprovalRequests(updateRunNames[1], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[1], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunNames[1], testNamespace, resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], testNamespace, resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[1]) By("Verify that new the configmap is updated on all member clusters") for idx := range allMemberClusters { @@ -311,7 +310,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -319,9 +318,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to all the members after approval and complete the staged update run successfully", func() { - validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -382,7 +381,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem []string{resourceSnapshotIndex1st, resourceSnapshotIndex2nd, resourceSnapshotIndex1st}, []bool{true, true, true}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - validateAndApproveNamespacedApprovalRequests(updateRunNames[1], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[1], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -394,9 +393,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to member-cluster-1 and member-cluster-3 after approval and complete the staged update run successfully", func() { - validateAndApproveNamespacedApprovalRequests(updateRunNames[1], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[1], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunNames[1], testNamespace, resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], testNamespace, resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[1]) By("Verify that new the configmap is updated on all member clusters") for idx := range allMemberClusters { @@ -430,7 +429,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem []string{resourceSnapshotIndex2nd, resourceSnapshotIndex1st, resourceSnapshotIndex2nd}, []bool{true, true, true}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - validateAndApproveNamespacedApprovalRequests(updateRunNames[2], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[2], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollback resources to prod stage until approved", func() { @@ -442,9 +441,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollback resources to member-cluster-1 and member-cluster-3 after approval and complete the staged update run successfully", func() { - validateAndApproveNamespacedApprovalRequests(updateRunNames[2], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[2], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunNames[2], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) for idx := range allMemberClusters { configMapActual := configMapPlacedOnClusterActual(allMemberClusters[idx], &oldConfigMap) @@ -534,7 +533,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames[:2], []string{"", resourceSnapshotIndex1st}, []bool{false, true}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -542,9 +541,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to member-cluster-1 after approval but not member-cluster-3 and complete the staged update run successfully", func() { - validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0]}}, nil, nil, nil, true) + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) checkIfRemovedConfigMapFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[2]}) @@ -590,7 +589,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{resourceSnapshotIndex1st, resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to keep RP %s/%s status as expected", testNamespace, rpName) - validateAndApproveNamespacedApprovalRequests(updateRunNames[1], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[1], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -598,9 +597,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to member-cluster-3 after approval and complete the staged update run successfully", func() { - validateAndApproveNamespacedApprovalRequests(updateRunNames[1], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[1], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunNames[1], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex2nd, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex2nd, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[1]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -642,14 +641,14 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(appConfigMapIdentifiers(), resourceSnapshotIndex1st, false, []string{allMemberClusterNames[2]}, []string{resourceSnapshotIndex1st}, []bool{false}, nil, nil) Consistently(rpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - validateAndApproveNamespacedApprovalRequests(updateRunNames[2], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[2], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should remove resources on member-cluster-1 and member-cluster-2 after approval and complete the staged update run successfully", func() { - validateAndApproveNamespacedApprovalRequests(updateRunNames[2], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[2], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) // need to go through two stages - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunNames[2], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex3rd, 1, defaultApplyStrategy, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0], allMemberClusterNames[1]}, nil, nil, true) + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex3rd, 1, defaultApplyStrategy, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0], allMemberClusterNames[1]}, nil, nil, true) Eventually(surSucceededActual, 2*updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[2]) checkIfRemovedConfigMapFromMemberClusters([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) checkIfPlacedWorkResourcesOnMemberClustersConsistently([]*framework.Cluster{allMemberClusters[2]}) @@ -735,7 +734,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames[2:], []string{""}, []bool{false}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to prod stage until approved", func() { @@ -743,9 +742,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, 1, defaultApplyStrategy, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, 1, defaultApplyStrategy, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[2]}) checkIfRemovedConfigMapFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) @@ -790,7 +789,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, resourceSnapshotIndex1st}, []bool{false, true, true}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to keep RP %s/%s status as expected", testNamespace, rpName) - validateAndApproveNamespacedApprovalRequests(updateRunNames[1], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[1], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to member-cluster-1 until approved", func() { @@ -798,9 +797,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to member-cluster-1 after approval and complete the staged update run successfully", func() { - validateAndApproveNamespacedApprovalRequests(updateRunNames[1], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[1], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunNames[1], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[1], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, 3, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[1]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -842,7 +841,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(appConfigMapIdentifiers(), resourceSnapshotIndex1st, true, allMemberClusterNames[1:], []string{resourceSnapshotIndex1st, resourceSnapshotIndex1st}, []bool{true, true}, nil, nil) Consistently(rpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - validateAndApproveNamespacedApprovalRequests(updateRunNames[2], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[2], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not remove resources from member-cluster-1 until approved", func() { @@ -850,9 +849,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should remove resources on member-cluster-1 after approval and complete the cluster staged update run successfully", func() { - validateAndApproveNamespacedApprovalRequests(updateRunNames[2], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[2], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunNames[2], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0]}, nil, nil, true) + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[2], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, 2, defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0]}, nil, nil, true) Eventually(surSucceededActual, 2*updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[2]) checkIfRemovedConfigMapFromMemberClusters([]*framework.Cluster{allMemberClusters[0]}) checkIfPlacedWorkResourcesOnMemberClustersConsistently([]*framework.Cluster{allMemberClusters[1], allMemberClusters[2]}) @@ -993,7 +992,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, wantROs) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - validateAndApproveNamespacedApprovalRequests(updateRunName, testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunName, testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should not rollout resources to member-cluster-1 and member-cluster-3 until approved", func() { @@ -1001,9 +1000,9 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) It("Should rollout resources to member-cluster-1 and member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - validateAndApproveNamespacedApprovalRequests(updateRunName, testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunName, testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, wantROs, true) + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, wantROs, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunName) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1093,13 +1092,13 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - validateAndApproveNamespacedApprovalRequests(updateRunName, testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunName, testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) }) It("Should report diff for member-cluster-1 and member-cluster-3 after approval and complete the cluster staged update run successfully", func() { - validateAndApproveNamespacedApprovalRequests(updateRunName, testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunName, testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), applyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), applyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunName) }) @@ -1206,13 +1205,13 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem Eventually(configMapActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update to the new configmap %s on cluster %s", newConfigMap.Name, allMemberClusterNames[1]) // Approval for AfterStageTask of canary stage - validateAndApproveNamespacedApprovalRequests(updateRunName, testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunName, testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) // Approval for BeforeStageTask of prod stage - validateAndApproveNamespacedApprovalRequests(updateRunName, testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunName, testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) // Verify complete rollout. - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunName, testNamespace, resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, testNamespace, resourceSnapshotIndex2nd, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunName) // Verify new configmap is on all member clusters. @@ -1229,126 +1228,6 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) }) - Context("Test RP rollout strategy transition from external to rollingUpdate", Ordered, func() { - var strategy *placementv1beta1.StagedUpdateStrategy - updateRunName := fmt.Sprintf(stagedUpdateRunNameWithSubIndexTemplate, GinkgoParallelProcess(), 0) - var oldConfigMap, newConfigMap corev1.ConfigMap - - BeforeAll(func() { - // Create the RP with external rollout strategy initially. - rp := &placementv1beta1.ResourcePlacement{ - ObjectMeta: metav1.ObjectMeta{ - Name: rpName, - Namespace: testNamespace, - // Add a custom finalizer; this would allow us to better observe - // the behavior of the controllers. - Finalizers: []string{customDeletionBlockerFinalizer}, - }, - Spec: placementv1beta1.PlacementSpec{ - ResourceSelectors: configMapSelector(), - Strategy: placementv1beta1.RolloutStrategy{ - Type: placementv1beta1.ExternalRolloutStrategyType, - }, - }, - } - Expect(hubClient.Create(ctx, rp)).To(Succeed(), "Failed to create RP") - - // Create the stagedUpdateStrategy. - strategy = createStagedUpdateStrategySucceed(strategyName, testNamespace) - - oldConfigMap = appConfigMap() - newConfigMap = appConfigMap() - newConfigMap.Data["data"] = testConfigMapDataValue - }) - - AfterAll(func() { - // Remove the custom deletion blocker finalizer from the RP. - ensureRPAndRelatedResourcesDeleted(types.NamespacedName{Name: rpName, Namespace: testNamespace}, allMemberClusters) - - // Delete the stagedUpdateRun. - ensureStagedUpdateRunDeletion(updateRunName, testNamespace) - - // Delete the stagedUpdateStrategy. - ensureStagedUpdateRunStrategyDeletion(strategyName, testNamespace) - }) - - It("Should not rollout any resources to member clusters with external strategy", checkIfRemovedConfigMapFromAllMemberClustersConsistently) - - It("Should have the latest resource snapshot", func() { - validateLatestResourceSnapshot(rpName, testNamespace, resourceSnapshotIndex1st) - }) - - It("Should update rp status as pending rollout", func() { - rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", "", ""}, []bool{false, false, false}, nil, nil) - Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - }) - - It("Create updateRun and verify resources are rolled out", func() { - createStagedUpdateRunSucceed(updateRunName, testNamespace, rpName, resourceSnapshotIndex1st, strategyName, placementv1beta1.StateRun) - - // Approval for AfterStageTask of canary stage - validateAndApproveNamespacedApprovalRequests(updateRunName, testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) - - // Approval for BeforeStageTask of prod stage - validateAndApproveNamespacedApprovalRequests(updateRunName, testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) - - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) - Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunName) - - checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) - }) - - It("Should update rp status as completed", func() { - rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(appConfigMapIdentifiers(), resourceSnapshotIndex1st, true, allMemberClusterNames, - []string{resourceSnapshotIndex1st, resourceSnapshotIndex1st, resourceSnapshotIndex1st}, []bool{true, true, true}, nil, nil) - Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - }) - - It("Update the configmap on hub but should not rollout to member clusters with external strategy", func() { - updateConfigMapSucceed(&newConfigMap) - - // Verify old configmap is still on all member clusters. - for _, cluster := range allMemberClusters { - configMapActual := configMapPlacedOnClusterActual(cluster, &oldConfigMap) - Consistently(configMapActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to keep old configmap %s data on cluster %s", oldConfigMap.Name, cluster.ClusterName) - } - }) - - It("Should have new resource snapshot but RP status should remain completed with old snapshot", func() { - validateLatestResourceSnapshot(rpName, testNamespace, resourceSnapshotIndex2nd) - - // RP status should still show completed with old snapshot. - rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(appConfigMapIdentifiers(), resourceSnapshotIndex1st, true, allMemberClusterNames, - []string{resourceSnapshotIndex1st, resourceSnapshotIndex1st, resourceSnapshotIndex1st}, []bool{true, true, true}, nil, nil) - Consistently(rpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to keep RP %s/%s status as expected", testNamespace, rpName) - }) - - It("Update RP to use rollingUpdate strategy", func() { - Eventually(func() error { - rp := &placementv1beta1.ResourcePlacement{} - if err := hubClient.Get(ctx, client.ObjectKey{Name: rpName, Namespace: testNamespace}, rp); err != nil { - return fmt.Errorf("failed to get the rp: %w", err) - } - rp.Spec.Strategy = placementv1beta1.RolloutStrategy{ - Type: placementv1beta1.RollingUpdateRolloutStrategyType, - } - return hubClient.Update(ctx, rp) - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP strategy to rollingUpdate") - }) - - It("Should automatically rollout new resources to all member clusters with rollingUpdate strategy", func() { - // Verify RP status shows all clusters with new resource snapshot. - rpStatusUpdatedActual := rpStatusUpdatedActual(appConfigMapIdentifiers(), allMemberClusterNames, nil, resourceSnapshotIndex2nd) - Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status with rollingUpdate strategy", testNamespace, rpName) - - // Verify new configmap is on all member clusters. - for _, cluster := range allMemberClusters { - configMapActual := configMapPlacedOnClusterActual(cluster, &newConfigMap) - Eventually(configMapActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update to the new configmap %s on cluster %s", newConfigMap.Name, cluster.ClusterName) - } - }) - }) - Context("Test parallel cluster updates with maxConcurrency set to 3", Ordered, func() { var strategy *placementv1beta1.StagedUpdateStrategy updateRunName := fmt.Sprintf(stagedUpdateRunNameWithSubIndexTemplate, GinkgoParallelProcess(), 0) @@ -1425,7 +1304,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem It("Should complete the staged update run with all 3 clusters updated in parallel", func() { // With maxConcurrency=3, all 3 clusters should be updated in parallel. // Each round waits 15 seconds, so total time should be under 20s. - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunParallelEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunName) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1515,7 +1394,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem // Since maxConcurrency=70% each round we process 2 clusters in parallel, // so all 3 clusters should be updated in 2 rounds. // Each round waits 15 seconds, so total time should be under 40s. - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunName, testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[0], allMemberClusterNames[1], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunParallelEventuallyDuration*2, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunName) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1527,7 +1406,7 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem }) }) - Context("Test resource rollout with staged update run by update run states - (Initialize -> Run)", Ordered, func() { + Context("Test resource rollout with staged update run by update run states - (Initialize -> Run -> Stop -> Run)", Ordered, func() { updateRunNames := []string{} var strategy *placementv1beta1.StagedUpdateStrategy @@ -1596,14 +1475,14 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem checkIfRemovedConfigMapFromAllMemberClustersConsistently() By("Validating the sur status remains in Initialize state") - surNotStartedActual := stagedUpdateRunStatusSucceededActual(updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, false) + surNotStartedActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, false) Consistently(surNotStartedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to Initialize updateRun %s/%s ", testNamespace, updateRunNames[0]) }) It("Should rollout resources to member-cluster-2 only after update run is in Run state", func() { // Update the update run state to Run. By("Updating the update run state to Run") - updateStagedUpdateRunState(updateRunNames[0], testNamespace, placementv1beta1.StateRun) + UpdateStagedUpdateRunState(ctx, hubClient, updateRunNames[0], testNamespace, placementv1beta1.StateRun) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[1]}) checkIfRemovedConfigMapFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) @@ -1612,13 +1491,50 @@ var _ = Describe("test RP rollout with staged update run", Label("resourceplacem rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) - validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envCanary, placementv1beta1.AfterStageApprovalTaskNameFmt, placementv1beta1.AfterStageTaskLabelValue) + }) + + It("Should not rollout to all member clusters while waiting for beforeStageTask approval for prod stage", func() { + By("Validating not rolled out to member-cluster-1 and member-cluster-3 yet") + checkIfRemovedConfigMapFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) + checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[1]}) + + By("Validating rp status with member-cluster-2 updated only") + rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) + Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) + }) + + It("Should not rollout to all member clusters after stopping update run", func() { + // Update the update run state to Stop. + By("Updating the update run state to Stop") + UpdateStagedUpdateRunState(ctx, hubClient, updateRunNames[0], testNamespace, placementv1beta1.StateStop) + + By("Validating not rolled out to member-cluster-1 and member-cluster-3 yet") + checkIfRemovedConfigMapFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) + checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[1]}) + + By("Validating rp status with member-cluster-2 updated only") + rpStatusUpdatedActual := rpStatusWithExternalStrategyActual(nil, "", false, allMemberClusterNames, []string{"", resourceSnapshotIndex1st, ""}, []bool{false, true, false}, nil, nil) + Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update RP %s/%s status as expected", testNamespace, rpName) + }) + + It("Should not complete rollout to all member after beforeStageTask approval while in Stop state", func() { + ValidateAndApproveNamespacedApprovalRequests(ctx, hubClient, updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + + By("Validating not rolled out to member-cluster-1 and member-cluster-3 after beforeStageTask approval while update run is in Stop state") + checkIfRemovedConfigMapFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) }) - It("Should rollout resources to all the members and complete the staged update run successfully", func() { - validateAndApproveNamespacedApprovalRequests(updateRunNames[0], testNamespace, envProd, placementv1beta1.BeforeStageApprovalTaskNameFmt, placementv1beta1.BeforeStageTaskLabelValue) + It("Should complete rollout to all member clusters after resuming the update run to Run state", func() { + // Update the update run state back to Run. + By("Updating the update run state back to Run") + UpdateStagedUpdateRunState(ctx, hubClient, updateRunNames[0], testNamespace, placementv1beta1.StateRun) + + By("All member clusters should have work resources placed") + checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1], allMemberClusters[2]}) - surSucceededActual := stagedUpdateRunStatusSucceededActual(updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) + By("Validating update run has succeeded after resuming") + surSucceededActual := StagedUpdateRunStatusSucceededActual(ctx, hubClient, updateRunNames[0], testNamespace, resourceSnapshotIndex1st, policySnapshotIndex1st, len(allMemberClusters), defaultApplyStrategy, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil, true) Eventually(surSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s/%s succeeded", testNamespace, updateRunNames[0]) checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) @@ -1754,47 +1670,3 @@ func createStagedUpdateRunSucceedWithNoResourceSnapshotIndex(updateRunName, name } Expect(hubClient.Create(ctx, updateRun)).To(Succeed(), "Failed to create StagedUpdateRun %s", updateRunName) } - -func updateStagedUpdateRunState(updateRunName, namespace string, state placementv1beta1.State) { - Eventually(func() error { - updateRun := &placementv1beta1.StagedUpdateRun{} - if err := hubClient.Get(ctx, types.NamespacedName{Name: updateRunName, Namespace: namespace}, updateRun); err != nil { - return fmt.Errorf("failed to get StagedUpdateRun %s", updateRunName) - } - - updateRun.Spec.State = state - if err := hubClient.Update(ctx, updateRun); err != nil { - return fmt.Errorf("failed to update StagedUpdateRun %s", updateRunName) - } - return nil - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update StagedUpdateRun %s to state %s", updateRunName, state) -} - -func validateAndApproveNamespacedApprovalRequests(updateRunName, namespace, stageName, approvalRequestNameFmt, stageTaskType string) { - Eventually(func() error { - appReqList := &placementv1beta1.ApprovalRequestList{} - if err := hubClient.List(ctx, appReqList, client.InNamespace(namespace), client.MatchingLabels{ - placementv1beta1.TargetUpdatingStageNameLabel: stageName, - placementv1beta1.TargetUpdateRunLabel: updateRunName, - placementv1beta1.TaskTypeLabel: stageTaskType, - }); err != nil { - return fmt.Errorf("failed to list approval requests: %w", err) - } - - if len(appReqList.Items) != 1 { - return fmt.Errorf("got %d approval requests, want 1", len(appReqList.Items)) - } - appReq := &appReqList.Items[0] - approvalRequestName := fmt.Sprintf(approvalRequestNameFmt, updateRunName, stageName) - if appReq.Name != approvalRequestName { - return fmt.Errorf("got approval request %s, want %s", appReq.Name, approvalRequestName) - } - meta.SetStatusCondition(&appReq.Status.Conditions, metav1.Condition{ - Status: metav1.ConditionTrue, - Type: string(placementv1beta1.ApprovalRequestConditionApproved), - ObservedGeneration: appReq.GetGeneration(), - Reason: "lgtm", - }) - return hubClient.Status().Update(ctx, appReq) - }, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to get or approve approval request") -} diff --git a/test/e2e/utils_test.go b/test/e2e/utils_test.go index 15318d9b9..fc5fa06e0 100644 --- a/test/e2e/utils_test.go +++ b/test/e2e/utils_test.go @@ -17,6 +17,7 @@ limitations under the License. package e2e import ( + "context" "encoding/json" "errors" "fmt" @@ -677,6 +678,29 @@ func deleteTestResourceCRD() { Expect(hubClient.Delete(ctx, &crd)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{})) } +func waitForCRDToBeReady(crdName string) { + Eventually(func() error { // wait for CRD to be created + crd := &apiextensionsv1.CustomResourceDefinition{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: crdName}, crd); err != nil { + return err + } + if crd.Status.Conditions == nil { + return fmt.Errorf("CRD status conditions are nil for %s", crdName) + } + + for _, cond := range crd.Status.Conditions { + if cond.Type == apiextensionsv1.Established && cond.Status != apiextensionsv1.ConditionTrue { + return fmt.Errorf("CRD is not established: %s", crdName) + } + if cond.Type == apiextensionsv1.NamesAccepted && cond.Status != apiextensionsv1.ConditionTrue { + return fmt.Errorf("CRD names are not accepted: %s", crdName) + } + } + + return nil + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "CRD failed to be ready %s", crdName) +} + func createTestResourceCRD() { var crd apiextensionsv1.CustomResourceDefinition readTestCustomResourceDefinition(&crd) @@ -1809,3 +1833,186 @@ func retrievePlacement(placementKey types.NamespacedName) (placementv1beta1.Plac } return placement, nil } + +// ClusterStagedUpdateRunStatusSucceededActual verifies the status of the ClusterStagedUpdateRun. +func ClusterStagedUpdateRunStatusSucceededActual( + ctx context.Context, + hubClient client.Client, + updateRunName string, + wantResourceIndex string, + wantPolicyIndex string, + wantClusterCount int, + wantApplyStrategy *placementv1beta1.ApplyStrategy, + wantStrategySpec *placementv1beta1.UpdateStrategySpec, + wantSelectedClusters [][]string, + wantUnscheduledClusters []string, + wantCROs map[string][]string, + wantROs map[string][]placementv1beta1.NamespacedName, + execute bool, +) func() error { + return func() error { + updateRun := &placementv1beta1.ClusterStagedUpdateRun{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: updateRunName}, updateRun); err != nil { + return err + } + + wantStatus := placementv1beta1.UpdateRunStatus{ + PolicySnapshotIndexUsed: wantPolicyIndex, + ResourceSnapshotIndexUsed: wantResourceIndex, + PolicyObservedClusterCount: wantClusterCount, + ApplyStrategy: wantApplyStrategy.DeepCopy(), + UpdateStrategySnapshot: wantStrategySpec, + } + + if execute { + wantStatus.StagesStatus = buildStageUpdatingStatuses(wantStrategySpec, wantSelectedClusters, wantCROs, wantROs, updateRun) + wantStatus.DeletionStageStatus = buildDeletionStageStatus(wantUnscheduledClusters, updateRun) + wantStatus.Conditions = updateRunSucceedConditions(updateRun.Generation) + } else { + wantStatus.StagesStatus = buildStageUpdatingStatusesForInitialized(wantStrategySpec, wantSelectedClusters, wantCROs, wantROs, updateRun) + wantStatus.DeletionStageStatus = buildDeletionStatusWithoutConditions(wantUnscheduledClusters, updateRun) + wantStatus.Conditions = updateRunInitializedConditions(updateRun.Generation) + } + if diff := cmp.Diff(updateRun.Status, wantStatus, updateRunStatusCmpOption...); diff != "" { + return fmt.Errorf("UpdateRun status diff (-got, +want): %s", diff) + } + return nil + } +} + +// StagedUpdateRunStatusSucceededActual verifies the status of the StagedUpdateRun. +func StagedUpdateRunStatusSucceededActual( + ctx context.Context, + hubClient client.Client, + updateRunName, namespace string, + wantResourceIndex, wantPolicyIndex string, + wantClusterCount int, + wantApplyStrategy *placementv1beta1.ApplyStrategy, + wantStrategySpec *placementv1beta1.UpdateStrategySpec, + wantSelectedClusters [][]string, + wantUnscheduledClusters []string, + wantCROs map[string][]string, + wantROs map[string][]placementv1beta1.NamespacedName, + execute bool, +) func() error { + return func() error { + updateRun := &placementv1beta1.StagedUpdateRun{} + if err := hubClient.Get(ctx, client.ObjectKey{Name: updateRunName, Namespace: namespace}, updateRun); err != nil { + return err + } + + wantStatus := placementv1beta1.UpdateRunStatus{ + PolicySnapshotIndexUsed: wantPolicyIndex, + ResourceSnapshotIndexUsed: wantResourceIndex, + PolicyObservedClusterCount: wantClusterCount, + ApplyStrategy: wantApplyStrategy.DeepCopy(), + UpdateStrategySnapshot: wantStrategySpec, + } + + if execute { + wantStatus.StagesStatus = buildStageUpdatingStatuses(wantStrategySpec, wantSelectedClusters, wantCROs, wantROs, updateRun) + wantStatus.DeletionStageStatus = buildDeletionStageStatus(wantUnscheduledClusters, updateRun) + wantStatus.Conditions = updateRunSucceedConditions(updateRun.Generation) + } else { + wantStatus.StagesStatus = buildStageUpdatingStatusesForInitialized(wantStrategySpec, wantSelectedClusters, wantCROs, wantROs, updateRun) + wantStatus.DeletionStageStatus = buildDeletionStatusWithoutConditions(wantUnscheduledClusters, updateRun) + wantStatus.Conditions = updateRunInitializedConditions(updateRun.Generation) + } + if diff := cmp.Diff(updateRun.Status, wantStatus, updateRunStatusCmpOption...); diff != "" { + return fmt.Errorf("UpdateRun status diff (-got, +want): %s", diff) + } + return nil + } +} + +// UpdateClusterStagedUpdateRunState updates the state of the ClusterStagedUpdateRun with the given name and state. +func UpdateClusterStagedUpdateRunState(ctx context.Context, hubClient client.Client, updateRunName string, state placementv1beta1.State) { + Eventually(func() error { + updateRun := &placementv1beta1.ClusterStagedUpdateRun{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: updateRunName}, updateRun); err != nil { + return fmt.Errorf("failed to get ClusterStagedUpdateRun %s", updateRunName) + } + + updateRun.Spec.State = state + if err := hubClient.Update(ctx, updateRun); err != nil { + return fmt.Errorf("failed to update ClusterStagedUpdateRun %s", updateRunName) + } + return nil + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update ClusterStagedUpdateRun %s state to %s", updateRunName, state) +} + +// ValidateAndApproveClusterApprovalRequests validates and approves the cluster approval request for the given update run, stage based on stage task type. +func ValidateAndApproveClusterApprovalRequests(ctx context.Context, hubClient client.Client, updateRunName, stageName, approvalRequestNameFmt, stageTaskType string) { + Eventually(func() error { + appReqList := &placementv1beta1.ClusterApprovalRequestList{} + if err := hubClient.List(ctx, appReqList, client.MatchingLabels{ + placementv1beta1.TargetUpdatingStageNameLabel: stageName, + placementv1beta1.TargetUpdateRunLabel: updateRunName, + placementv1beta1.TaskTypeLabel: stageTaskType, + }); err != nil { + return fmt.Errorf("failed to list approval requests: %w", err) + } + + if len(appReqList.Items) != 1 { + return fmt.Errorf("got %d approval requests, want 1", len(appReqList.Items)) + } + appReq := &appReqList.Items[0] + approvalRequestName := fmt.Sprintf(approvalRequestNameFmt, updateRunName, stageName) + if appReq.Name != approvalRequestName { + return fmt.Errorf("got approval request %s, want %s", appReq.Name, approvalRequestName) + } + meta.SetStatusCondition(&appReq.Status.Conditions, metav1.Condition{ + Status: metav1.ConditionTrue, + Type: string(placementv1beta1.ApprovalRequestConditionApproved), + ObservedGeneration: appReq.GetGeneration(), + Reason: "lgtm", + }) + return hubClient.Status().Update(ctx, appReq) + }, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to get or approve approval request") +} + +// UpdateStagedUpdateRunState updates the state of the StagedUpdateRun with the given name, namespace and state. +func UpdateStagedUpdateRunState(ctx context.Context, hubClient client.Client, updateRunName, namespace string, state placementv1beta1.State) { + Eventually(func() error { + updateRun := &placementv1beta1.StagedUpdateRun{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: updateRunName, Namespace: namespace}, updateRun); err != nil { + return fmt.Errorf("failed to get StagedUpdateRun %s", updateRunName) + } + + updateRun.Spec.State = state + if err := hubClient.Update(ctx, updateRun); err != nil { + return fmt.Errorf("failed to update StagedUpdateRun %s", updateRunName) + } + return nil + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update StagedUpdateRun %s to state %s", updateRunName, state) +} + +// ValidateAndApproveNamespacedApprovalRequests validates and approves the approval request for the given update run, stage based on stage task type. +func ValidateAndApproveNamespacedApprovalRequests(ctx context.Context, hubClient client.Client, updateRunName, namespace, stageName, approvalRequestNameFmt, stageTaskType string) { + Eventually(func() error { + appReqList := &placementv1beta1.ApprovalRequestList{} + if err := hubClient.List(ctx, appReqList, client.InNamespace(namespace), client.MatchingLabels{ + placementv1beta1.TargetUpdatingStageNameLabel: stageName, + placementv1beta1.TargetUpdateRunLabel: updateRunName, + placementv1beta1.TaskTypeLabel: stageTaskType, + }); err != nil { + return fmt.Errorf("failed to list approval requests: %w", err) + } + + if len(appReqList.Items) != 1 { + return fmt.Errorf("got %d approval requests, want 1", len(appReqList.Items)) + } + appReq := &appReqList.Items[0] + approvalRequestName := fmt.Sprintf(approvalRequestNameFmt, updateRunName, stageName) + if appReq.Name != approvalRequestName { + return fmt.Errorf("got approval request %s, want %s", appReq.Name, approvalRequestName) + } + meta.SetStatusCondition(&appReq.Status.Conditions, metav1.Condition{ + Status: metav1.ConditionTrue, + Type: string(placementv1beta1.ApprovalRequestConditionApproved), + ObservedGeneration: appReq.GetGeneration(), + Reason: "lgtm", + }) + return hubClient.Status().Update(ctx, appReq) + }, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to get or approve approval request") +} diff --git a/test/scheduler/suite_test.go b/test/scheduler/suite_test.go index 829dfaef9..a435830a9 100644 --- a/test/scheduler/suite_test.go +++ b/test/scheduler/suite_test.go @@ -571,7 +571,7 @@ func beforeSuiteForProcess1() []byte { Expect(err).NotTo(HaveOccurred(), "Failed to create controller manager") // Spin up a scheduler work queue. - schedulerWorkQueue := queue.NewSimplePlacementSchedulingQueue() + schedulerWorkQueue := queue.NewSimplePlacementSchedulingQueue("", nil) // Build a custom cluster eligibility checker. clusterEligibilityChecker := clustereligibilitychecker.New( diff --git a/test/utils/actuals/actuals.go b/test/utils/actuals/actuals.go new file mode 100644 index 000000000..3fb3897d7 --- /dev/null +++ b/test/utils/actuals/actuals.go @@ -0,0 +1,44 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package actuals features common actuals used in Ginkgo/Gomega tests. +package actuals + +import ( + "context" + "fmt" + + "k8s.io/apimachinery/pkg/api/errors" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" +) + +func WorkObjectRemovedActual(ctx context.Context, hubClient client.Client, workName, workNamespace string) func() error { + // Wait for the removal of the Work object. + return func() error { + work := &fleetv1beta1.Work{} + if err := hubClient.Get(ctx, client.ObjectKey{Name: workName, Namespace: workNamespace}, work); !errors.IsNotFound(err) && err != nil { + return fmt.Errorf("work object still exists or an unexpected error occurred: %w", err) + } + if controllerutil.ContainsFinalizer(work, fleetv1beta1.WorkFinalizer) { + // The Work object is being deleted, but the finalizer is still present. + return fmt.Errorf("work object is being deleted, but the finalizer is still present") + } + return nil + } +} diff --git a/test/utils/handler/handler.go b/test/utils/handler/handler.go new file mode 100644 index 000000000..67d382f48 --- /dev/null +++ b/test/utils/handler/handler.go @@ -0,0 +1,47 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package handler provides test utilities for Kubernetes event handlers. +package handler + +// TestHandler is a simple implementation of cache.ResourceEventHandler for testing. +// It allows tests to track when specific event handler methods are called. +type TestHandler struct { + OnAddFunc func() + OnUpdateFunc func() + OnDeleteFunc func() +} + +// OnAdd is called when an object is added. +func (h *TestHandler) OnAdd(obj interface{}, isInInitialList bool) { + if h.OnAddFunc != nil { + h.OnAddFunc() + } +} + +// OnUpdate is called when an object is updated. +func (h *TestHandler) OnUpdate(oldObj, newObj interface{}) { + if h.OnUpdateFunc != nil { + h.OnUpdateFunc() + } +} + +// OnDelete is called when an object is deleted. +func (h *TestHandler) OnDelete(obj interface{}) { + if h.OnDeleteFunc != nil { + h.OnDeleteFunc() + } +} diff --git a/test/utils/informer/manager.go b/test/utils/informer/manager.go index a54f5075f..d96fa1b91 100644 --- a/test/utils/informer/manager.go +++ b/test/utils/informer/manager.go @@ -160,6 +160,21 @@ func (m *FakeManager) GetNameSpaceScopedResources() []schema.GroupVersionResourc return m.NamespaceScopedResources } +func (m *FakeManager) GetAllResources() []schema.GroupVersionResource { + allResources := make([]schema.GroupVersionResource, 0, len(m.APIResources)) + for gvk := range m.APIResources { + // Return a GVR with the same Group/Version and Kind as Resource + // The actual resource name doesn't matter since IsInformerSynced ignores the GVR parameter + gvr := schema.GroupVersionResource{ + Group: gvk.Group, + Version: gvk.Version, + Resource: gvk.Kind, + } + allResources = append(allResources, gvr) + } + return allResources +} + func (m *FakeManager) IsClusterScopedResources(gvk schema.GroupVersionKind) bool { return m.APIResources[gvk] == m.IsClusterScopedResource } @@ -170,3 +185,10 @@ func (m *FakeManager) WaitForCacheSync() { func (m *FakeManager) GetClient() dynamic.Interface { return nil } +func (m *FakeManager) AddEventHandlerToInformer(_ schema.GroupVersionResource, _ cache.ResourceEventHandler) { + // No-op for testing +} + +func (m *FakeManager) CreateInformerForResource(_ informer.APIResourceMeta) { + // No-op for testing +} diff --git a/test/utils/resource/apiresource.go b/test/utils/resource/apiresource.go new file mode 100644 index 000000000..863a7d26c --- /dev/null +++ b/test/utils/resource/apiresource.go @@ -0,0 +1,322 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resource + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/restmapper" +) + +// Common verbs for API resources +var ( + // VerbsAll includes all standard Kubernetes verbs + VerbsAll = []string{"list", "watch", "get", "create", "update", "patch", "delete"} + // VerbsReadOnly includes verbs for read-only access + VerbsReadOnly = []string{"list", "watch", "get"} + // VerbsNoWatch includes verbs without watch capability + VerbsNoWatch = []string{"get", "create", "update", "patch", "delete"} +) + +// APIGroupV1 returns a standard core v1 API group for testing +func APIGroupV1() metav1.APIGroup { + return metav1.APIGroup{ + Name: "", + Versions: []metav1.GroupVersionForDiscovery{ + {GroupVersion: "v1", Version: "v1"}, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: "v1", + Version: "v1", + }, + } +} + +// APIGroupAppsV1 returns a standard apps/v1 API group for testing +func APIGroupAppsV1() metav1.APIGroup { + return metav1.APIGroup{ + Name: "apps", + Versions: []metav1.GroupVersionForDiscovery{ + {GroupVersion: "apps/v1", Version: "v1"}, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: "apps/v1", + Version: "v1", + }, + } +} + +// APIGroupResourcesV1 returns APIGroupResources for core v1 with the provided resources +func APIGroupResourcesV1(resources ...metav1.APIResource) *restmapper.APIGroupResources { + return &restmapper.APIGroupResources{ + Group: APIGroupV1(), + VersionedResources: map[string][]metav1.APIResource{ + "v1": resources, + }, + } +} + +// APIGroupResourcesAppsV1 returns APIGroupResources for apps/v1 with the provided resources +func APIGroupResourcesAppsV1(resources ...metav1.APIResource) *restmapper.APIGroupResources { + return &restmapper.APIGroupResources{ + Group: APIGroupAppsV1(), + VersionedResources: map[string][]metav1.APIResource{ + "v1": resources, + }, + } +} + +// APIResourceConfigMap returns a standard ConfigMap APIResource for testing +func APIResourceConfigMap() metav1.APIResource { + return metav1.APIResource{ + Name: "configmaps", + Kind: "ConfigMap", + Namespaced: true, + Verbs: VerbsReadOnly, + } +} + +// APIResourceSecret returns a standard Secret APIResource for testing +func APIResourceSecret() metav1.APIResource { + return metav1.APIResource{ + Name: "secrets", + Kind: "Secret", + Namespaced: true, + Verbs: VerbsReadOnly, + } +} + +// APIResourcePod returns a standard Pod APIResource for testing +func APIResourcePod() metav1.APIResource { + return metav1.APIResource{ + Name: "pods", + Kind: "Pod", + Namespaced: true, + Verbs: VerbsReadOnly, + } +} + +// APIResourceService returns a standard Service APIResource for testing +func APIResourceService() metav1.APIResource { + return metav1.APIResource{ + Name: "services", + Kind: "Service", + Namespaced: true, + Verbs: VerbsReadOnly, + } +} + +// APIResourceNamespace returns a standard Namespace APIResource for testing +func APIResourceNamespace() metav1.APIResource { + return metav1.APIResource{ + Name: "namespaces", + Kind: "Namespace", + Namespaced: false, + Verbs: VerbsReadOnly, + } +} + +// APIResourceNode returns a standard Node APIResource for testing +func APIResourceNode() metav1.APIResource { + return metav1.APIResource{ + Name: "nodes", + Kind: "Node", + Namespaced: false, + Verbs: VerbsReadOnly, + } +} + +// APIResourceDeployment returns a standard Deployment APIResource for testing +func APIResourceDeployment() metav1.APIResource { + return metav1.APIResource{ + Name: "deployments", + Kind: "Deployment", + Namespaced: true, + Verbs: VerbsReadOnly, + } +} + +// APIResourceStatefulSet returns a standard StatefulSet APIResource for testing +func APIResourceStatefulSet() metav1.APIResource { + return metav1.APIResource{ + Name: "statefulsets", + Kind: "StatefulSet", + Namespaced: true, + Verbs: VerbsReadOnly, + } +} + +// APIResourceDaemonSet returns a standard DaemonSet APIResource for testing +func APIResourceDaemonSet() metav1.APIResource { + return metav1.APIResource{ + Name: "daemonsets", + Kind: "DaemonSet", + Namespaced: true, + Verbs: VerbsReadOnly, + } +} + +// APIResourceClusterRole returns a standard ClusterRole APIResource for testing +func APIResourceClusterRole() metav1.APIResource { + return metav1.APIResource{ + Name: "clusterroles", + Kind: "ClusterRole", + Namespaced: false, + Verbs: VerbsReadOnly, + } +} + +// APIResourceListV1 returns a standard v1 APIResourceList for testing with common core resources +func APIResourceListV1() *metav1.APIResourceList { + return &metav1.APIResourceList{ + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + APIResourceConfigMap(), + APIResourceSecret(), + APIResourcePod(), + APIResourceService(), + APIResourceNamespace(), + APIResourceNode(), + }, + } +} + +// APIResourceListAppsV1 returns a standard apps/v1 APIResourceList for testing +func APIResourceListAppsV1() *metav1.APIResourceList { + return &metav1.APIResourceList{ + GroupVersion: "apps/v1", + APIResources: []metav1.APIResource{ + APIResourceDeployment(), + APIResourceStatefulSet(), + APIResourceDaemonSet(), + }, + } +} + +// APIResourceWithVerbs creates a custom APIResource with specified verbs for testing +func APIResourceWithVerbs(name, kind string, namespaced bool, verbs []string) metav1.APIResource { + return metav1.APIResource{ + Name: name, + Kind: kind, + Namespaced: namespaced, + Verbs: verbs, + } +} + +// GVK helpers - GroupVersionKind for common resources + +// GVKConfigMap returns the GroupVersionKind for ConfigMap +func GVKConfigMap() schema.GroupVersionKind { + return schema.GroupVersionKind{Group: "", Version: "v1", Kind: "ConfigMap"} +} + +// GVKSecret returns the GroupVersionKind for Secret +func GVKSecret() schema.GroupVersionKind { + return schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Secret"} +} + +// GVKPod returns the GroupVersionKind for Pod +func GVKPod() schema.GroupVersionKind { + return schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Pod"} +} + +// GVKService returns the GroupVersionKind for Service +func GVKService() schema.GroupVersionKind { + return schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Service"} +} + +// GVKNamespace returns the GroupVersionKind for Namespace +func GVKNamespace() schema.GroupVersionKind { + return schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Namespace"} +} + +// GVKNode returns the GroupVersionKind for Node +func GVKNode() schema.GroupVersionKind { + return schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Node"} +} + +// GVKDeployment returns the GroupVersionKind for Deployment +func GVKDeployment() schema.GroupVersionKind { + return schema.GroupVersionKind{Group: "apps", Version: "v1", Kind: "Deployment"} +} + +// GVKStatefulSet returns the GroupVersionKind for StatefulSet +func GVKStatefulSet() schema.GroupVersionKind { + return schema.GroupVersionKind{Group: "apps", Version: "v1", Kind: "StatefulSet"} +} + +// GVKDaemonSet returns the GroupVersionKind for DaemonSet +func GVKDaemonSet() schema.GroupVersionKind { + return schema.GroupVersionKind{Group: "apps", Version: "v1", Kind: "DaemonSet"} +} + +// GVKClusterRole returns the GroupVersionKind for ClusterRole +func GVKClusterRole() schema.GroupVersionKind { + return schema.GroupVersionKind{Group: "rbac.authorization.k8s.io", Version: "v1", Kind: "ClusterRole"} +} + +// GVR helpers - GroupVersionResource for common resources + +// GVRConfigMap returns the GroupVersionResource for configmaps +func GVRConfigMap() schema.GroupVersionResource { + return schema.GroupVersionResource{Group: "", Version: "v1", Resource: "configmaps"} +} + +// GVRSecret returns the GroupVersionResource for secrets +func GVRSecret() schema.GroupVersionResource { + return schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"} +} + +// GVRPod returns the GroupVersionResource for pods +func GVRPod() schema.GroupVersionResource { + return schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"} +} + +// GVRService returns the GroupVersionResource for services +func GVRService() schema.GroupVersionResource { + return schema.GroupVersionResource{Group: "", Version: "v1", Resource: "services"} +} + +// GVRNamespace returns the GroupVersionResource for namespaces +func GVRNamespace() schema.GroupVersionResource { + return schema.GroupVersionResource{Group: "", Version: "v1", Resource: "namespaces"} +} + +// GVRNode returns the GroupVersionResource for nodes +func GVRNode() schema.GroupVersionResource { + return schema.GroupVersionResource{Group: "", Version: "v1", Resource: "nodes"} +} + +// GVRDeployment returns the GroupVersionResource for deployments +func GVRDeployment() schema.GroupVersionResource { + return schema.GroupVersionResource{Group: "apps", Version: "v1", Resource: "deployments"} +} + +// GVRStatefulSet returns the GroupVersionResource for statefulsets +func GVRStatefulSet() schema.GroupVersionResource { + return schema.GroupVersionResource{Group: "apps", Version: "v1", Resource: "statefulsets"} +} + +// GVRDaemonSet returns the GroupVersionResource for daemonsets +func GVRDaemonSet() schema.GroupVersionResource { + return schema.GroupVersionResource{Group: "apps", Version: "v1", Resource: "daemonsets"} +} + +// GVRClusterRole returns the GroupVersionResource for clusterroles +func GVRClusterRole() schema.GroupVersionResource { + return schema.GroupVersionResource{Group: "rbac.authorization.k8s.io", Version: "v1", Resource: "clusterroles"} +} diff --git a/test/utils/resource/resource.go b/test/utils/resource/resource.go index a181d2661..97e2badfb 100644 --- a/test/utils/resource/resource.go +++ b/test/utils/resource/resource.go @@ -19,6 +19,7 @@ package resource import ( "encoding/json" + "fmt" "testing" appsv1 "k8s.io/api/apps/v1" @@ -244,3 +245,57 @@ func CreateResourceContentForTest(t *testing.T, obj interface{}) *fleetv1beta1.R }, } } + +// MarshalRuntimeObjToJSON marshals a runtime.Object to JSON bytes. +func MarshalRuntimeObjToJSONForTest(obj runtime.Object) ([]byte, error) { + unstructuredObjMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(obj) + if err != nil { + return nil, fmt.Errorf("failed to convert runtime object to an unstructured object: %w", err) + } + unstructuredObj := &unstructured.Unstructured{Object: unstructuredObjMap} + json, err := unstructuredObj.MarshalJSON() + if err != nil { + return nil, fmt.Errorf("failed to marshal the unstructured object to JSON: %w", err) + } + return json, nil +} + +// WorkObjectForTest creates a Work object for testing. +func WorkObjectForTest( + workName, memberClusterReservedNSName, placementObjName, placementObjNSName string, + applyStrategy *fleetv1beta1.ApplyStrategy, + reportBackStrategy *fleetv1beta1.ReportBackStrategy, + rawManifestJSON ...[]byte, +) *fleetv1beta1.Work { + manifests := make([]fleetv1beta1.Manifest, len(rawManifestJSON)) + for idx := range rawManifestJSON { + manifests[idx] = fleetv1beta1.Manifest{ + RawExtension: runtime.RawExtension{ + Raw: rawManifestJSON[idx], + }, + } + } + + work := &fleetv1beta1.Work{ + ObjectMeta: metav1.ObjectMeta{ + Name: workName, + Namespace: memberClusterReservedNSName, + Labels: make(map[string]string), + }, + Spec: fleetv1beta1.WorkSpec{ + Workload: fleetv1beta1.WorkloadTemplate{ + Manifests: manifests, + }, + ApplyStrategy: applyStrategy, + ReportBackStrategy: reportBackStrategy, + }, + } + if len(placementObjName) > 0 { + work.Labels[fleetv1beta1.PlacementTrackingLabel] = placementObjName + } + if len(placementObjNSName) > 0 { + work.Labels[fleetv1beta1.ParentNamespaceLabel] = placementObjNSName + } + + return work +}