diff --git a/pkg/apis/tuned/v1/tuned_types.go b/pkg/apis/tuned/v1/tuned_types.go index 38a4ca7fea..65d078ff97 100644 --- a/pkg/apis/tuned/v1/tuned_types.go +++ b/pkg/apis/tuned/v1/tuned_types.go @@ -29,6 +29,10 @@ const ( // TunedDeferredUpdate request the tuned daemons to defer the update of the rendered profile // until the next restart. TunedDeferredUpdate string = "tuned.openshift.io/deferred" + + // StableGenerationAnnotationName is tuned daemonset annotation used to determine ClusterOperator + // progressing status. + StableGenerationAnnotationName = "tuned.openshift.io/stable-generation" ) ///////////////////////////////////////////////////////////////////////////////// diff --git a/pkg/operator/controller.go b/pkg/operator/controller.go index c7d9214a69..9a01b95aa6 100644 --- a/pkg/operator/controller.go +++ b/pkg/operator/controller.go @@ -38,7 +38,6 @@ import ( tunedinformers "github.com/openshift/cluster-node-tuning-operator/pkg/generated/informers/externalversions" ntomf "github.com/openshift/cluster-node-tuning-operator/pkg/manifests" "github.com/openshift/cluster-node-tuning-operator/pkg/metrics" - tunedpkg "github.com/openshift/cluster-node-tuning-operator/pkg/tuned" "github.com/openshift/cluster-node-tuning-operator/pkg/util" "github.com/openshift/cluster-node-tuning-operator/version" @@ -571,6 +570,10 @@ func (c *Controller) syncDaemonSet(tuned *tunedv1.Tuned) error { update = true } + // OCPBUGS-62632: do not indicate ClusterOperator status Progressing on node scaling or reboot + ds, change := c.storeLastStableGeneration(ds) + update = update || change + if update { // Update the DaemonSet ds = ds.DeepCopy() // never update the objects from cache @@ -658,7 +661,6 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error { profileMf.Spec.Config.Verbosity = computed.Operand.Verbosity profileMf.Spec.Config.TuneDConfig = computed.Operand.TuneDConfig profileMf.Spec.Profile = computed.AllProfiles - profileMf.Status.Conditions = tunedpkg.InitializeStatusConditions() _, err = c.clients.Tuned.TunedV1().Profiles(ntoconfig.WatchNamespace()).Create(context.TODO(), profileMf, metav1.CreateOptions{}) if err != nil { return fmt.Errorf("failed to create Profile %s: %v", profileMf.Name, err) @@ -737,7 +739,6 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error { profile.Spec.Config.TuneDConfig = computed.Operand.TuneDConfig profile.Spec.Config.ProviderName = providerName profile.Spec.Profile = computed.AllProfiles - profile.Status.Conditions = tunedpkg.InitializeStatusConditions() delete(c.pc.state.bootcmdline, nodeName) // bootcmdline retrieved from node annotation is potentially stale, let it resync on node update klog.V(2).Infof("syncProfile(): updating Profile %s [%s]", profile.Name, computed.TunedProfileName) diff --git a/pkg/operator/status.go b/pkg/operator/status.go index 03975cdb2f..8c873dd1ba 100644 --- a/pkg/operator/status.go +++ b/pkg/operator/status.go @@ -2,13 +2,14 @@ package operator import ( "context" - "errors" "fmt" "os" + "strconv" configv1 "github.com/openshift/api/config/v1" operatorv1 "github.com/openshift/api/operator/v1" operatorv1helpers "github.com/openshift/library-go/pkg/operator/v1helpers" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -24,10 +25,6 @@ import ( "github.com/openshift/cluster-node-tuning-operator/version" ) -const ( - errGenerationMismatch = "generation mismatch" -) - // syncOperatorStatus computes the operator's current status and therefrom // creates or updates the ClusterOperator resource for the operator. func (c *Controller) syncOperatorStatus(tuned *tunedv1.Tuned) error { @@ -44,10 +41,6 @@ func (c *Controller) syncOperatorStatus(tuned *tunedv1.Tuned) error { oldConditions := co.Status.Conditions co.Status.Conditions, operandReleaseVersion, err = c.computeStatus(tuned, oldConditions) if err != nil { - if err.Error() == errGenerationMismatch { - // Tuned DaemonSet has not updated its status yet - return nil - } return err } if len(operandReleaseVersion) > 0 { @@ -98,6 +91,53 @@ func (c *Controller) getOrCreateOperatorStatus() (*configv1.ClusterOperator, err return co, nil } +// storeLastStableGeneration stores the last stable daemonSet's generation into its annotation. +// Returns the updated daemonSet and true if the annotation was updated. +func (c *Controller) storeLastStableGeneration(daemonSet *appsv1.DaemonSet) (*appsv1.DaemonSet, bool) { + lastStableGeneration := daemonSet.Annotations[tunedv1.StableGenerationAnnotationName] + currentGeneration := strconv.FormatInt(daemonSet.Generation, 10) + if lastStableGeneration == currentGeneration { + return daemonSet, false + } + + if isProgressing, _ := isProgressing(daemonSet); isProgressing { + return daemonSet, false + } + + klog.V(2).Infof("DaemonSet %s/%s generation %d is stable", daemonSet.Namespace, daemonSet.Name, daemonSet.Generation) + daemonSet.Annotations[tunedv1.StableGenerationAnnotationName] = currentGeneration + return daemonSet, true +} + +// Progressing means "[the component] is actively rolling out new code, propagating config +// changes (e.g, a version change), or otherwise moving from one steady state to another.". +// This controller expects that all "config changes" result in increased DaemonSet generation +// (i.e. DaemonSet .spec changes). +// The controller stores the last "stable" DS generation in an annotation. +// Stable means that all DS replicas are available and at the latest version. +// Any subsequent missing replicas must be caused by a node added / removed / rebooted / +// pod manually killed, which then does not result in Progressing=true. +// +// This code is borrowed from library-go. +func isProgressing(daemonSet *appsv1.DaemonSet) (bool, string) { + lastStableGeneration := daemonSet.Annotations[tunedv1.StableGenerationAnnotationName] + currentGeneration := strconv.FormatInt(daemonSet.Generation, 10) + if lastStableGeneration == currentGeneration { + // The previous reconfiguration has completed in the past. + return false, "" + } + + switch { + case daemonSet.Generation != daemonSet.Status.ObservedGeneration: + return true, "Waiting for DaemonSet to act on changes" + case daemonSet.Status.UpdatedNumberScheduled < daemonSet.Status.DesiredNumberScheduled: + return true, fmt.Sprintf("Waiting for DaemonSet to update %d node pods", daemonSet.Status.DesiredNumberScheduled) + case daemonSet.Status.NumberUnavailable > 0: + return true, "Waiting for DaemonSet to deploy node pods" + } + return false, "" +} + // profileApplied returns true if Tuned Profile 'profile' has been applied. func profileApplied(profile *tunedv1.Profile) bool { if profile == nil || profile.Spec.Config.TunedProfile != profile.Status.TunedProfile { @@ -178,7 +218,10 @@ func (c *Controller) numMCLabelsAcrossMCP(profileList []*tunedv1.Profile) int { return n } -// computeStatus computes the operator's current status. +// computeStatus returns the operator's current status conditions, operand's version and error if any. +// Operand's version is only returned if the operator is NOT progressing towards a new version, +// i.e. is stable. In all other cases (including when we fail to retrieve the daemonset), an empty string +// is returned. func (c *Controller) computeStatus(tuned *tunedv1.Tuned, conditions []configv1.ClusterOperatorStatusCondition) ([]configv1.ClusterOperatorStatusCondition, string, error) { const ( // maximum number of seconds for the operator to be Unavailable with a unique @@ -253,11 +296,6 @@ func (c *Controller) computeStatus(tuned *tunedv1.Tuned, conditions []configv1.C copyAvailableCondition() } } else { - if ds.Status.ObservedGeneration != ds.Generation { - // Do not base the conditions on stale information - return conditions, "", errors.New(errGenerationMismatch) - } - dsReleaseVersion := c.getDaemonSetReleaseVersion(ds) if ds.Status.NumberAvailable > 0 { @@ -278,17 +316,11 @@ func (c *Controller) computeStatus(tuned *tunedv1.Tuned, conditions []configv1.C klog.V(3).Infof("syncOperatorStatus(): %s", availableCondition.Message) } - // The operator is actively making changes to the operand (is Progressing) when: - // the total number of Nodes that should be running the daemon Pod - // (including Nodes correctly running the daemon Pod) != the total number of - // Nodes that are running updated daemon Pod. - if ds.Status.DesiredNumberScheduled != ds.Status.UpdatedNumberScheduled || - ds.Status.CurrentNumberScheduled != ds.Status.NumberReady || - ds.Status.DesiredNumberScheduled == 0 { + if ok, msg := isProgressing(ds); ok { klog.V(3).Infof("setting Progressing condition to true") progressingCondition.Status = configv1.ConditionTrue progressingCondition.Reason = "Reconciling" - progressingCondition.Message = fmt.Sprintf("Working towards %q", os.Getenv("RELEASE_VERSION")) + progressingCondition.Message = msg } else { progressingCondition.Status = configv1.ConditionFalse progressingCondition.Reason = "AsExpected" @@ -314,9 +346,8 @@ func (c *Controller) computeStatus(tuned *tunedv1.Tuned, conditions []configv1.C numProgressingProfiles, numDegradedProfiles := numProfilesProgressingDegraded(profileList) if numProgressingProfiles > 0 { - progressingCondition.Status = configv1.ConditionTrue - progressingCondition.Reason = "ProfileProgressing" - progressingCondition.Message = fmt.Sprintf("Waiting for %v/%v Profiles to be applied", numProgressingProfiles, len(profileList)) + availableCondition.Reason = "ProfileProgressing" + availableCondition.Message = fmt.Sprintf("Waiting for %v/%v Profiles to be applied", numProgressingProfiles, len(profileList)) } if numDegradedProfiles > 0 { diff --git a/pkg/operator/status_test.go b/pkg/operator/status_test.go new file mode 100644 index 0000000000..de8e1bddeb --- /dev/null +++ b/pkg/operator/status_test.go @@ -0,0 +1,627 @@ +// Assisted-by: Claude Code IDE; model: claude-4.5-sonnet + +package operator + +import ( + "bytes" + "fmt" + "os" + "strings" + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/yaml" + + configv1 "github.com/openshift/api/config/v1" + operatorv1 "github.com/openshift/api/operator/v1" + tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1" + ntoclient "github.com/openshift/cluster-node-tuning-operator/pkg/client" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" +) + +const ( + testDaemonSetName = "tuned" + testNamespace = "openshift-cluster-node-tuning-operator" + testReleaseVer = "4.20.0" +) + +// makeFakeManifest returns a test DaemonSet manifest as bytes +func makeFakeManifest() []byte { + return []byte(` +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: tuned + namespace: openshift-cluster-node-tuning-operator + labels: + openshift-app: tuned + annotations: + tuned.openshift.io/stable-generation: "1" +spec: + selector: + matchLabels: + openshift-app: tuned + updateStrategy: + rollingUpdate: + maxUnavailable: 10% + type: RollingUpdate + template: + metadata: + labels: + openshift-app: tuned + name: tuned + spec: + serviceAccountName: tuned + containers: + - name: tuned + image: ${CLUSTER_NODE_TUNED_IMAGE} + imagePullPolicy: IfNotPresent + command: ["/usr/bin/cluster-node-tuning-operator","ocp-tuned","--in-cluster"] + env: + - name: WATCH_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: OCP_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: RELEASE_VERSION + value: ${RELEASE_VERSION} + - name: CLUSTER_NODE_TUNED_IMAGE + value: ${CLUSTER_NODE_TUNED_IMAGE} + hostNetwork: true + hostPID: true + nodeSelector: + kubernetes.io/os: linux + priorityClassName: "system-node-critical" + tolerations: + - operator: Exists +`) +} + +// Test case structure +type testCase struct { + name string + tuned *tunedv1.Tuned + daemonSet *appsv1.DaemonSet + profiles []*tunedv1.Profile + bootcmdlineConflict map[string]bool + mcLabelsAcrossMCP map[string]bool + oldConditions []configv1.ClusterOperatorStatusCondition + expectedConditions []configv1.ClusterOperatorStatusCondition + expectedOperandVersion string + expectErr bool +} + +// Tuned CR modifiers +type tunedModifier func(*tunedv1.Tuned) *tunedv1.Tuned + +func makeFakeTuned(modifiers ...tunedModifier) *tunedv1.Tuned { + tuned := &tunedv1.Tuned{ + ObjectMeta: metav1.ObjectMeta{ + Name: "default", + Namespace: testNamespace, + }, + Spec: tunedv1.TunedSpec{ + ManagementState: operatorv1.Managed, + }, + } + for _, modifier := range modifiers { + tuned = modifier(tuned) + } + return tuned +} + +func withManagementState(state operatorv1.ManagementState) tunedModifier { + return func(t *tunedv1.Tuned) *tunedv1.Tuned { + t.Spec.ManagementState = state + return t + } +} + +type daemonSetModifier func(*appsv1.DaemonSet) *appsv1.DaemonSet + +// getDaemonSet creates a DaemonSet from the manifest and applies modifiers +func getDaemonSet(modifiers ...daemonSetModifier) *appsv1.DaemonSet { + manifest := makeFakeManifest() + + // Decode the manifest + ds := &appsv1.DaemonSet{} + decoder := yaml.NewYAMLOrJSONDecoder(bytes.NewReader(manifest), 4096) + if err := decoder.Decode(ds); err != nil { + panic(fmt.Sprintf("failed to decode DaemonSet manifest: %v", err)) + } + + // Replace placeholders in the manifest + for i := range ds.Spec.Template.Spec.Containers { + container := &ds.Spec.Template.Spec.Containers[i] + if strings.Contains(container.Image, "${CLUSTER_NODE_TUNED_IMAGE}") { + container.Image = "quay.io/openshift/origin-cluster-node-tuning-operator:latest" + } + for j := range container.Env { + env := &container.Env[j] + if env.Name == "RELEASE_VERSION" && strings.Contains(env.Value, "${RELEASE_VERSION}") { + env.Value = testReleaseVer + } + if env.Name == "CLUSTER_NODE_TUNED_IMAGE" && strings.Contains(env.Value, "${CLUSTER_NODE_TUNED_IMAGE}") { + env.Value = "quay.io/openshift/origin-cluster-node-tuning-operator:latest" + } + } + } + + // Set default status values for testing + ds.Generation = 1 + ds.Status = appsv1.DaemonSetStatus{ + CurrentNumberScheduled: 1, + DesiredNumberScheduled: 1, + NumberAvailable: 1, + NumberReady: 1, + NumberUnavailable: 0, + ObservedGeneration: 1, + UpdatedNumberScheduled: 1, + } + + // Apply modifiers + for _, modifier := range modifiers { + ds = modifier(ds) + } + + return ds +} + +func withDaemonSetStatus(numberReady, updatedNumber, numberAvailable, numberUnavailable int32) daemonSetModifier { + return func(instance *appsv1.DaemonSet) *appsv1.DaemonSet { + instance.Status.NumberReady = numberReady + instance.Status.NumberAvailable = numberAvailable + instance.Status.UpdatedNumberScheduled = updatedNumber + instance.Status.NumberUnavailable = numberUnavailable + return instance + } +} + +func withDaemonSetGeneration(generations ...int64) daemonSetModifier { + return func(instance *appsv1.DaemonSet) *appsv1.DaemonSet { + instance.Generation = generations[0] + if len(generations) > 1 { + instance.Status.ObservedGeneration = generations[1] + } + return instance + } +} + +func withDaemonSetAnnotation(annotation string, value string) daemonSetModifier { + return func(instance *appsv1.DaemonSet) *appsv1.DaemonSet { + if instance.Annotations == nil { + instance.Annotations = map[string]string{} + } + instance.Annotations[annotation] = value + return instance + } +} + +// Tuned-operator specific modifiers +func withDesiredNumberScheduled(desired int32) daemonSetModifier { + return func(instance *appsv1.DaemonSet) *appsv1.DaemonSet { + instance.Status.DesiredNumberScheduled = desired + instance.Status.CurrentNumberScheduled = instance.Status.NumberAvailable + return instance + } +} + +// Profile modifiers +type profileModifier func(*tunedv1.Profile) *tunedv1.Profile + +func makeFakeProfile(name, tunedProfile string, modifiers ...profileModifier) *tunedv1.Profile { + profile := &tunedv1.Profile{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: testNamespace, + }, + Spec: tunedv1.ProfileSpec{ + Config: tunedv1.ProfileConfig{ + TunedProfile: tunedProfile, + }, + }, + Status: tunedv1.ProfileStatus{ + TunedProfile: tunedProfile, + Conditions: []tunedv1.StatusCondition{}, + }, + } + for _, modifier := range modifiers { + profile = modifier(profile) + } + return profile +} + +func withProfileApplied() profileModifier { + return func(p *tunedv1.Profile) *tunedv1.Profile { + p.Status.Conditions = append(p.Status.Conditions, tunedv1.StatusCondition{ + Type: tunedv1.TunedProfileApplied, + Status: corev1.ConditionTrue, + }) + return p + } +} + +func withProfileDegraded() profileModifier { + return func(p *tunedv1.Profile) *tunedv1.Profile { + p.Status.Conditions = append(p.Status.Conditions, tunedv1.StatusCondition{ + Type: tunedv1.TunedDegraded, + Status: corev1.ConditionTrue, + }) + return p + } +} + +// Condition helpers +func makeCondition(condType configv1.ClusterStatusConditionType, status configv1.ConditionStatus, reason string) configv1.ClusterOperatorStatusCondition { + return configv1.ClusterOperatorStatusCondition{ + Type: condType, + Status: status, + Reason: reason, + } +} + +// Mock implementations +type mockDaemonSetLister struct { + ds *appsv1.DaemonSet + err error +} + +func (m *mockDaemonSetLister) List(selector labels.Selector) ([]*appsv1.DaemonSet, error) { + if m.err != nil { + return nil, m.err + } + if m.ds != nil { + return []*appsv1.DaemonSet{m.ds}, nil + } + return []*appsv1.DaemonSet{}, nil +} + +func (m *mockDaemonSetLister) Get(name string) (*appsv1.DaemonSet, error) { + if m.err != nil { + return nil, m.err + } + if m.ds != nil && m.ds.Name == name { + return m.ds, nil + } + return nil, ¬FoundError{name: name} +} + +type mockProfileLister struct { + profiles []*tunedv1.Profile + err error +} + +func (m *mockProfileLister) List(selector labels.Selector) ([]*tunedv1.Profile, error) { + if m.err != nil { + return nil, m.err + } + return m.profiles, nil +} + +func (m *mockProfileLister) Get(name string) (*tunedv1.Profile, error) { + if m.err != nil { + return nil, m.err + } + for _, p := range m.profiles { + if p.Name == name { + return p, nil + } + } + return nil, ¬FoundError{name: name} +} + +type notFoundError struct { + name string +} + +func (e *notFoundError) Error() string { + return "not found: " + e.name +} + +func (e *notFoundError) Status() metav1.Status { + return metav1.Status{Reason: metav1.StatusReasonNotFound} +} + +// Test context helper +func newTestController(daemonSet *appsv1.DaemonSet, profiles []*tunedv1.Profile, bootcmdlineConflict, mcLabelsAcrossMCP map[string]bool) *Controller { + if bootcmdlineConflict == nil { + bootcmdlineConflict = make(map[string]bool) + } + if mcLabelsAcrossMCP == nil { + mcLabelsAcrossMCP = make(map[string]bool) + } + + return &Controller{ + listers: &ntoclient.Listers{ + DaemonSets: &mockDaemonSetLister{ds: daemonSet}, + TunedProfiles: &mockProfileLister{profiles: profiles}, + }, + bootcmdlineConflict: bootcmdlineConflict, + mcLabelsAcrossMCP: mcLabelsAcrossMCP, + } +} + +// ConditionsEqual returns true if and only if the provided slices of conditions +// (ignoring LastTransitionTime and Message) are equal. +func conditionsEqual(oldConditions, newConditions []configv1.ClusterOperatorStatusCondition) bool { + if len(newConditions) != len(oldConditions) { + return false + } + + for _, conditionA := range oldConditions { + foundMatchingCondition := false + + for _, conditionB := range newConditions { + // Compare every field except LastTransitionTime. + if conditionA.Type == conditionB.Type && + conditionA.Status == conditionB.Status && + conditionA.Reason == conditionB.Reason { + foundMatchingCondition = true + break + } + } + + if !foundMatchingCondition { + return false + } + } + + return true +} + +func TestComputeStatus(t *testing.T) { + testCases := []testCase{ + { + // No previous operator deployment, new installation. + name: "initial sync, new installation", + tuned: makeFakeTuned(), + // No daemonSet (nil), no profiles, no old conditions + expectedConditions: []configv1.ClusterOperatorStatusCondition{ + makeCondition(configv1.OperatorAvailable, configv1.ConditionFalse, "TunedUnavailable"), + // OCPSTRAT-2484: Operators MUST go progressing when transitioning between versions (fresh install here). + makeCondition(configv1.OperatorProgressing, configv1.ConditionTrue, "Reconciling"), + makeCondition(configv1.OperatorDegraded, configv1.ConditionFalse, "Reconciling"), + }, + expectedOperandVersion: "", // daemonset progressing, empty string is returned + }, + { + // DaemonSet is fully deployed, its stable generation annotation is not yet updated. + name: "daemonset fully available, annotation not updated", + tuned: makeFakeTuned(), + daemonSet: getDaemonSet( + withDaemonSetGeneration(2, 2), + withDaemonSetStatus(3, 3, 3, 0), + withDesiredNumberScheduled(3), + withDaemonSetAnnotation(tunedv1.StableGenerationAnnotationName, "1"), + ), + expectedConditions: []configv1.ClusterOperatorStatusCondition{ + makeCondition(configv1.OperatorAvailable, configv1.ConditionTrue, "AsExpected"), + makeCondition(configv1.OperatorProgressing, configv1.ConditionFalse, "AsExpected"), + makeCondition(configv1.OperatorDegraded, configv1.ConditionFalse, "AsExpected"), + }, + expectedOperandVersion: testReleaseVer, + }, + { + // DaemonSet is fully deployed, its stable generation annotation is updated. + name: "daemonset fully available, annotation updated", + tuned: makeFakeTuned(), + daemonSet: getDaemonSet( + withDaemonSetGeneration(2, 2), + withDaemonSetStatus(3, 3, 3, 0), + withDesiredNumberScheduled(3), + withDaemonSetAnnotation(tunedv1.StableGenerationAnnotationName, "2"), + ), + expectedConditions: []configv1.ClusterOperatorStatusCondition{ + makeCondition(configv1.OperatorAvailable, configv1.ConditionTrue, "AsExpected"), + makeCondition(configv1.OperatorProgressing, configv1.ConditionFalse, "AsExpected"), + makeCondition(configv1.OperatorDegraded, configv1.ConditionFalse, "AsExpected"), + }, + expectedOperandVersion: testReleaseVer, + }, + { + // Operator upgrade, ClusterOperator must be progressing. + name: "daemonset/operator progressing, generation mismatch", + tuned: makeFakeTuned(), + daemonSet: getDaemonSet( + withDaemonSetGeneration(2, 2), + withDaemonSetStatus(3, 1, 3, 0), // numberReady=3, updatedNumber=1, numberAvailable=3, numberUnavailable=0 + withDesiredNumberScheduled(3), + withDaemonSetAnnotation(tunedv1.StableGenerationAnnotationName, "1"), + ), + expectedConditions: []configv1.ClusterOperatorStatusCondition{ + makeCondition(configv1.OperatorAvailable, configv1.ConditionTrue, "AsExpected"), + makeCondition(configv1.OperatorProgressing, configv1.ConditionTrue, "Reconciling"), + makeCondition(configv1.OperatorDegraded, configv1.ConditionFalse, "AsExpected"), + }, + expectedOperandVersion: "", + }, + { + // DaemonSet is not fully updated, generation != observedGeneration + name: "daemonset generation vs observedGeneration mismatch", + tuned: makeFakeTuned(), + daemonSet: getDaemonSet( + withDaemonSetGeneration(2, 1), + withDaemonSetStatus(3, 3, 3, 0), // numberReady=3, updatedNumber=3, numberAvailable=3, numberUnavailable=0 + withDesiredNumberScheduled(3), + withDaemonSetAnnotation(tunedv1.StableGenerationAnnotationName, "1"), + ), + expectedConditions: []configv1.ClusterOperatorStatusCondition{ + makeCondition(configv1.OperatorAvailable, configv1.ConditionTrue, "AsExpected"), + makeCondition(configv1.OperatorProgressing, configv1.ConditionTrue, "Reconciling"), + makeCondition(configv1.OperatorDegraded, configv1.ConditionFalse, "AsExpected"), + }, + expectedOperandVersion: "", + }, + { + // Upgrade finished, we have a stale ClusterOperator progressing condition. + name: "progressed to available/stable (upgrade finished)", + tuned: makeFakeTuned(), + daemonSet: getDaemonSet(), + oldConditions: []configv1.ClusterOperatorStatusCondition{ + makeCondition(configv1.OperatorAvailable, configv1.ConditionFalse, "TunedUnavailable"), + makeCondition(configv1.OperatorProgressing, configv1.ConditionTrue, "Reconciling"), + }, + expectedConditions: []configv1.ClusterOperatorStatusCondition{ + makeCondition(configv1.OperatorAvailable, configv1.ConditionTrue, "AsExpected"), + makeCondition(configv1.OperatorProgressing, configv1.ConditionFalse, "AsExpected"), + makeCondition(configv1.OperatorDegraded, configv1.ConditionFalse, "AsExpected"), + }, + expectedOperandVersion: testReleaseVer, + }, + { + // When DaemonSet has no available pods, we must report ClusterOperator unavailable condition. + name: "daemonset has no available pods", + tuned: makeFakeTuned(), + daemonSet: getDaemonSet( + withDaemonSetStatus(3, 0, 0, 3), + withDesiredNumberScheduled(3), + withDaemonSetAnnotation(tunedv1.StableGenerationAnnotationName, "1"), + ), + expectedConditions: []configv1.ClusterOperatorStatusCondition{ + makeCondition(configv1.OperatorAvailable, configv1.ConditionFalse, "TunedUnavailable"), + makeCondition(configv1.OperatorProgressing, configv1.ConditionFalse, "AsExpected"), + makeCondition(configv1.OperatorDegraded, configv1.ConditionFalse, "AsExpected"), + }, + expectedOperandVersion: testReleaseVer, + }, + { + // Profiles progressing do not cause ClusterOperator progressing=true condition. + name: "profile progressing", + tuned: makeFakeTuned(), + daemonSet: getDaemonSet(), + profiles: []*tunedv1.Profile{ + makeFakeProfile("node1", "openshift-node", withProfileApplied()), + makeFakeProfile("node2", "openshift-node"), + makeFakeProfile("node3", "openshift-node"), + }, + expectedConditions: []configv1.ClusterOperatorStatusCondition{ + makeCondition(configv1.OperatorAvailable, configv1.ConditionTrue, "ProfileProgressing"), + makeCondition(configv1.OperatorProgressing, configv1.ConditionFalse, "AsExpected"), + makeCondition(configv1.OperatorDegraded, configv1.ConditionFalse, "AsExpected"), + }, + expectedOperandVersion: testReleaseVer, + }, + { + // Profiles progressing do not cause ClusterOperator degraded=true condition. + name: "profiles degraded", + tuned: makeFakeTuned(), + daemonSet: getDaemonSet(), + profiles: []*tunedv1.Profile{ + makeFakeProfile("node1", "openshift-node", withProfileApplied()), + makeFakeProfile("node2", "openshift-node", withProfileDegraded()), + }, + expectedConditions: []configv1.ClusterOperatorStatusCondition{ + makeCondition(configv1.OperatorAvailable, configv1.ConditionTrue, "ProfileDegraded"), + makeCondition(configv1.OperatorProgressing, configv1.ConditionFalse, "AsExpected"), + makeCondition(configv1.OperatorDegraded, configv1.ConditionFalse, "AsExpected"), + }, + expectedOperandVersion: testReleaseVer, + }, + { + // bootcmdline conflict is a serious cluster mis-configuration, report ClusterOperator degraded=true condition. + name: "bootcmdline conflict", + tuned: makeFakeTuned(), + daemonSet: getDaemonSet(), + profiles: []*tunedv1.Profile{ + makeFakeProfile("node1", "openshift-node", withProfileApplied()), + makeFakeProfile("node2", "openshift-node", withProfileApplied()), + }, + bootcmdlineConflict: map[string]bool{"node1": true}, + expectedConditions: []configv1.ClusterOperatorStatusCondition{ + makeCondition(configv1.OperatorAvailable, configv1.ConditionTrue, "AsExpected"), + makeCondition(configv1.OperatorProgressing, configv1.ConditionFalse, "AsExpected"), + makeCondition(configv1.OperatorDegraded, configv1.ConditionTrue, "ProfileConflict"), + }, + expectedOperandVersion: testReleaseVer, + }, + { + // MC labels across MCPs is a serious cluster mis-configuration, report ClusterOperator degraded=true condition. + name: "MC labels across MCPs", + tuned: makeFakeTuned(), + daemonSet: getDaemonSet(), + profiles: []*tunedv1.Profile{ + makeFakeProfile("node1", "openshift-node", withProfileApplied()), + makeFakeProfile("node2", "openshift-node", withProfileApplied()), + }, + mcLabelsAcrossMCP: map[string]bool{"node1": true}, + expectedConditions: []configv1.ClusterOperatorStatusCondition{ + makeCondition(configv1.OperatorAvailable, configv1.ConditionTrue, "AsExpected"), + makeCondition(configv1.OperatorProgressing, configv1.ConditionFalse, "AsExpected"), + makeCondition(configv1.OperatorDegraded, configv1.ConditionTrue, "MCLabelsAcrossMCPs"), + }, + expectedOperandVersion: testReleaseVer, + }, + { + name: "daemonset not found with existing conditions", + tuned: makeFakeTuned(), + // daemonSet is nil (not found) + oldConditions: []configv1.ClusterOperatorStatusCondition{ + { + Type: configv1.OperatorAvailable, + Status: configv1.ConditionFalse, + Reason: "TunedUnavailable", + }, + }, + expectErr: true, + }, + { + name: "management state unmanaged", + tuned: makeFakeTuned(withManagementState(operatorv1.Unmanaged)), + daemonSet: getDaemonSet(), + expectedConditions: []configv1.ClusterOperatorStatusCondition{ + makeCondition(configv1.OperatorAvailable, configv1.ConditionTrue, "Unmanaged"), + makeCondition(configv1.OperatorProgressing, configv1.ConditionFalse, "Unmanaged"), + makeCondition(configv1.OperatorDegraded, configv1.ConditionFalse, "Unmanaged"), + }, + expectedOperandVersion: "", + }, + { + name: "management state removed", + tuned: makeFakeTuned(withManagementState(operatorv1.Removed)), + daemonSet: getDaemonSet(), + expectedConditions: []configv1.ClusterOperatorStatusCondition{ + makeCondition(configv1.OperatorAvailable, configv1.ConditionTrue, "Removed"), + makeCondition(configv1.OperatorProgressing, configv1.ConditionFalse, "Removed"), + makeCondition(configv1.OperatorDegraded, configv1.ConditionFalse, "Removed"), + }, + expectedOperandVersion: "", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + os.Setenv("RELEASE_VERSION", testReleaseVer) + defer os.Unsetenv("RELEASE_VERSION") + + c := newTestController(tc.daemonSet, tc.profiles, tc.bootcmdlineConflict, tc.mcLabelsAcrossMCP) + + conditions, operandVersion, err := c.computeStatus(tc.tuned, tc.oldConditions) + + if err != nil && !tc.expectErr { + t.Errorf("computeStatus() returned unexpected error: %v", err) + } + if err == nil && tc.expectErr { + t.Error("computeStatus() unexpectedly succeeded when error was expected") + } + if tc.expectErr { + // Skip further checks if error was expected + return + } + + // Assert - Check operand version + if operandVersion != tc.expectedOperandVersion { + t.Errorf("operandVersion mismatch: expected %q, got %q", tc.expectedOperandVersion, operandVersion) + } + + // Assert - Check conditions + if !conditionsEqual(tc.expectedConditions, conditions) { + t.Errorf("conditions mismatch:\nExpected: %+v\nGot: %+v", tc.expectedConditions, conditions) + } + }) + } +} diff --git a/pkg/tuned/controller.go b/pkg/tuned/controller.go index d2cfa46939..0e8170d0c3 100644 --- a/pkg/tuned/controller.go +++ b/pkg/tuned/controller.go @@ -1339,7 +1339,7 @@ func (c *Controller) updateTunedProfileStatus(ctx context.Context, change Change isApplied := (c.daemon.profileFingerprintUnpacked == c.daemon.profileFingerprintEffective) daemonStatus := c.daemon.status - klog.V(4).Infof("daemonStatus(): change: deferred=%v applied=%v nodeRestart=%v", wantsDeferred, isApplied, change.nodeRestart) + klog.V(4).Infof("updateTunedProfileStatus(): change: deferred=%v applied=%v nodeRestart=%v", wantsDeferred, isApplied, change.nodeRestart) if (wantsDeferred && !isApplied) && !change.nodeRestart { // avoid setting the flag on updates deferred -> immediate daemonStatus |= scDeferred recommendProfile, err := TunedRecommendFileRead() @@ -1357,7 +1357,8 @@ func (c *Controller) updateTunedProfileStatus(ctx context.Context, change Change c.daemon.status = daemonStatus if profile.Status.TunedProfile == activeProfile && - ConditionsEqual(profile.Status.Conditions, statusConditions) { + ConditionsEqual(profile.Status.Conditions, statusConditions) && + profile.Status.ObservedGeneration == profile.Generation { klog.V(2).Infof("updateTunedProfileStatus(): no need to update status of Profile %s", profile.Name) return nil }