Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pkg/apis/tuned/v1/tuned_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ const (
// TunedDeferredUpdate request the tuned daemons to defer the update of the rendered profile
// until the next restart.
TunedDeferredUpdate string = "tuned.openshift.io/deferred"

// StableGenerationAnnotationName is tuned daemonset annotation used to determine ClusterOperator
// progressing status.
StableGenerationAnnotationName = "tuned.openshift.io/stable-generation"
)

/////////////////////////////////////////////////////////////////////////////////
Expand Down
7 changes: 4 additions & 3 deletions pkg/operator/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ import (
tunedinformers "github.com/openshift/cluster-node-tuning-operator/pkg/generated/informers/externalversions"
ntomf "github.com/openshift/cluster-node-tuning-operator/pkg/manifests"
"github.com/openshift/cluster-node-tuning-operator/pkg/metrics"
tunedpkg "github.com/openshift/cluster-node-tuning-operator/pkg/tuned"
"github.com/openshift/cluster-node-tuning-operator/pkg/util"
"github.com/openshift/cluster-node-tuning-operator/version"

Expand Down Expand Up @@ -571,6 +570,10 @@ func (c *Controller) syncDaemonSet(tuned *tunedv1.Tuned) error {
update = true
}

// OCPBUGS-62632: do not indicate ClusterOperator status Progressing on node scaling or reboot
ds, change := c.storeLastStableGeneration(ds)
update = update || change

if update {
// Update the DaemonSet
ds = ds.DeepCopy() // never update the objects from cache
Expand Down Expand Up @@ -658,7 +661,6 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error {
profileMf.Spec.Config.Verbosity = computed.Operand.Verbosity
profileMf.Spec.Config.TuneDConfig = computed.Operand.TuneDConfig
profileMf.Spec.Profile = computed.AllProfiles
profileMf.Status.Conditions = tunedpkg.InitializeStatusConditions()
_, err = c.clients.Tuned.TunedV1().Profiles(ntoconfig.WatchNamespace()).Create(context.TODO(), profileMf, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("failed to create Profile %s: %v", profileMf.Name, err)
Expand Down Expand Up @@ -737,7 +739,6 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error {
profile.Spec.Config.TuneDConfig = computed.Operand.TuneDConfig
profile.Spec.Config.ProviderName = providerName
profile.Spec.Profile = computed.AllProfiles
profile.Status.Conditions = tunedpkg.InitializeStatusConditions()
delete(c.pc.state.bootcmdline, nodeName) // bootcmdline retrieved from node annotation is potentially stale, let it resync on node update

klog.V(2).Infof("syncProfile(): updating Profile %s [%s]", profile.Name, computed.TunedProfileName)
Expand Down
83 changes: 57 additions & 26 deletions pkg/operator/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ package operator

import (
"context"
"errors"
"fmt"
"os"
"strconv"

configv1 "github.com/openshift/api/config/v1"
operatorv1 "github.com/openshift/api/operator/v1"
operatorv1helpers "github.com/openshift/library-go/pkg/operator/v1helpers"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -24,10 +25,6 @@ import (
"github.com/openshift/cluster-node-tuning-operator/version"
)

const (
errGenerationMismatch = "generation mismatch"
)

// syncOperatorStatus computes the operator's current status and therefrom
// creates or updates the ClusterOperator resource for the operator.
func (c *Controller) syncOperatorStatus(tuned *tunedv1.Tuned) error {
Expand All @@ -44,10 +41,6 @@ func (c *Controller) syncOperatorStatus(tuned *tunedv1.Tuned) error {
oldConditions := co.Status.Conditions
co.Status.Conditions, operandReleaseVersion, err = c.computeStatus(tuned, oldConditions)
if err != nil {
if err.Error() == errGenerationMismatch {
// Tuned DaemonSet has not updated its status yet
return nil
}
return err
}
if len(operandReleaseVersion) > 0 {
Expand Down Expand Up @@ -98,6 +91,53 @@ func (c *Controller) getOrCreateOperatorStatus() (*configv1.ClusterOperator, err
return co, nil
}

// storeLastStableGeneration stores the last stable daemonSet's generation into its annotation.
// Returns the updated daemonSet and true if the annotation was updated.
func (c *Controller) storeLastStableGeneration(daemonSet *appsv1.DaemonSet) (*appsv1.DaemonSet, bool) {
lastStableGeneration := daemonSet.Annotations[tunedv1.StableGenerationAnnotationName]
currentGeneration := strconv.FormatInt(daemonSet.Generation, 10)
if lastStableGeneration == currentGeneration {
return daemonSet, false
}

if isProgressing, _ := isProgressing(daemonSet); isProgressing {
return daemonSet, false
}

klog.V(2).Infof("DaemonSet %s/%s generation %d is stable", daemonSet.Namespace, daemonSet.Name, daemonSet.Generation)
daemonSet.Annotations[tunedv1.StableGenerationAnnotationName] = currentGeneration
return daemonSet, true
}

// Progressing means "[the component] is actively rolling out new code, propagating config
// changes (e.g, a version change), or otherwise moving from one steady state to another.".
// This controller expects that all "config changes" result in increased DaemonSet generation
// (i.e. DaemonSet .spec changes).
// The controller stores the last "stable" DS generation in an annotation.
// Stable means that all DS replicas are available and at the latest version.
// Any subsequent missing replicas must be caused by a node added / removed / rebooted /
// pod manually killed, which then does not result in Progressing=true.
//
// This code is borrowed from library-go.
func isProgressing(daemonSet *appsv1.DaemonSet) (bool, string) {
lastStableGeneration := daemonSet.Annotations[tunedv1.StableGenerationAnnotationName]
currentGeneration := strconv.FormatInt(daemonSet.Generation, 10)
if lastStableGeneration == currentGeneration {
// The previous reconfiguration has completed in the past.
return false, ""
}

switch {
case daemonSet.Generation != daemonSet.Status.ObservedGeneration:
return true, "Waiting for DaemonSet to act on changes"
case daemonSet.Status.UpdatedNumberScheduled < daemonSet.Status.DesiredNumberScheduled:
return true, fmt.Sprintf("Waiting for DaemonSet to update %d node pods", daemonSet.Status.DesiredNumberScheduled)
case daemonSet.Status.NumberUnavailable > 0:
return true, "Waiting for DaemonSet to deploy node pods"
}
return false, ""
}

// profileApplied returns true if Tuned Profile 'profile' has been applied.
func profileApplied(profile *tunedv1.Profile) bool {
if profile == nil || profile.Spec.Config.TunedProfile != profile.Status.TunedProfile {
Expand Down Expand Up @@ -178,7 +218,10 @@ func (c *Controller) numMCLabelsAcrossMCP(profileList []*tunedv1.Profile) int {
return n
}

// computeStatus computes the operator's current status.
// computeStatus returns the operator's current status conditions, operand's version and error if any.
// Operand's version is only returned if the operator is NOT progressing towards a new version,
// i.e. is stable. In all other cases (including when we fail to retrieve the daemonset), an empty string
// is returned.
func (c *Controller) computeStatus(tuned *tunedv1.Tuned, conditions []configv1.ClusterOperatorStatusCondition) ([]configv1.ClusterOperatorStatusCondition, string, error) {
const (
// maximum number of seconds for the operator to be Unavailable with a unique
Expand Down Expand Up @@ -253,11 +296,6 @@ func (c *Controller) computeStatus(tuned *tunedv1.Tuned, conditions []configv1.C
copyAvailableCondition()
}
} else {
if ds.Status.ObservedGeneration != ds.Generation {
// Do not base the conditions on stale information
return conditions, "", errors.New(errGenerationMismatch)
}

dsReleaseVersion := c.getDaemonSetReleaseVersion(ds)

if ds.Status.NumberAvailable > 0 {
Expand All @@ -278,17 +316,11 @@ func (c *Controller) computeStatus(tuned *tunedv1.Tuned, conditions []configv1.C
klog.V(3).Infof("syncOperatorStatus(): %s", availableCondition.Message)
}

// The operator is actively making changes to the operand (is Progressing) when:
// the total number of Nodes that should be running the daemon Pod
// (including Nodes correctly running the daemon Pod) != the total number of
// Nodes that are running updated daemon Pod.
if ds.Status.DesiredNumberScheduled != ds.Status.UpdatedNumberScheduled ||
ds.Status.CurrentNumberScheduled != ds.Status.NumberReady ||
ds.Status.DesiredNumberScheduled == 0 {
if ok, msg := isProgressing(ds); ok {
klog.V(3).Infof("setting Progressing condition to true")
progressingCondition.Status = configv1.ConditionTrue
progressingCondition.Reason = "Reconciling"
progressingCondition.Message = fmt.Sprintf("Working towards %q", os.Getenv("RELEASE_VERSION"))
progressingCondition.Message = msg
} else {
progressingCondition.Status = configv1.ConditionFalse
progressingCondition.Reason = "AsExpected"
Expand All @@ -314,9 +346,8 @@ func (c *Controller) computeStatus(tuned *tunedv1.Tuned, conditions []configv1.C
numProgressingProfiles, numDegradedProfiles := numProfilesProgressingDegraded(profileList)

if numProgressingProfiles > 0 {
progressingCondition.Status = configv1.ConditionTrue
progressingCondition.Reason = "ProfileProgressing"
progressingCondition.Message = fmt.Sprintf("Waiting for %v/%v Profiles to be applied", numProgressingProfiles, len(profileList))
availableCondition.Reason = "ProfileProgressing"
availableCondition.Message = fmt.Sprintf("Waiting for %v/%v Profiles to be applied", numProgressingProfiles, len(profileList))
}

if numDegradedProfiles > 0 {
Expand Down
Loading