From d94517a4254362ffcd31f05c4d664cc00dfaeb0c Mon Sep 17 00:00:00 2001 From: Talor Itzhak Date: Tue, 4 Nov 2025 14:27:08 +0200 Subject: [PATCH 1/5] api: introduce DRAResourceManagement annotation This annotation signals NTO that compute resources will be managed by DRA plugins. Signed-off-by: Talor Itzhak --- pkg/apis/performanceprofile/v2/performanceprofile_types.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/apis/performanceprofile/v2/performanceprofile_types.go b/pkg/apis/performanceprofile/v2/performanceprofile_types.go index 98e8e6c4e..d5912d61c 100644 --- a/pkg/apis/performanceprofile/v2/performanceprofile_types.go +++ b/pkg/apis/performanceprofile/v2/performanceprofile_types.go @@ -34,6 +34,11 @@ const PerformanceProfileEnablePhysicalRpsAnnotation = "performance.openshift.io/ // Valid values: "true", "enable" (to enable), "false", "disable" (to disable). const PerformanceProfileEnableRpsAnnotation = "performance.openshift.io/enable-rps" +// PerformanceProfileDRAResourceManagementAnnotation signal the operator to disable KubeletConfig +// topology managers (CPU Manager, Memory Manager) configurations +// that conflict with the DRA feature, and stop reconciling the PerformanceProfile. +const PerformanceProfileDRAResourceManagementAnnotation = "performance.openshift.io/dra-resource-management" + // PerformanceProfileSpec defines the desired state of PerformanceProfile. type PerformanceProfileSpec struct { // CPU defines a set of CPU related parameters. From ff6ed3b114992e65d300978882f516347a1841bd Mon Sep 17 00:00:00 2001 From: Talor Itzhak Date: Tue, 4 Nov 2025 14:29:13 +0200 Subject: [PATCH 2/5] kubeletconfig: disable compute manangers Disable CPU and memory managers via kubeletconfig when DRAResourceManagement enabled. Signed-off-by: Talor Itzhak --- .../components/components.go | 2 + .../components/kubeletconfig/kubeletconfig.go | 107 ++++++++++++------ .../kubeletconfig/kubeletconfig_test.go | 14 +++ .../components/manifestset/manifestset.go | 1 + 4 files changed, 90 insertions(+), 34 deletions(-) diff --git a/pkg/performanceprofile/controller/performanceprofile/components/components.go b/pkg/performanceprofile/controller/performanceprofile/components/components.go index c4243b209..0ec412e99 100644 --- a/pkg/performanceprofile/controller/performanceprofile/components/components.go +++ b/pkg/performanceprofile/controller/performanceprofile/components/components.go @@ -38,6 +38,7 @@ type Options struct { ProfileMCP *mcov1.MachineConfigPool MachineConfig MachineConfigOptions MixedCPUsFeatureGateEnabled bool + DRAResourceManagement bool } type MachineConfigOptions struct { @@ -48,4 +49,5 @@ type MachineConfigOptions struct { type KubeletConfigOptions struct { MachineConfigPoolSelector map[string]string MixedCPUsEnabled bool + DRAResourceManagement bool } diff --git a/pkg/performanceprofile/controller/performanceprofile/components/kubeletconfig/kubeletconfig.go b/pkg/performanceprofile/controller/performanceprofile/components/kubeletconfig/kubeletconfig.go index e93704dd2..a78be62ae 100644 --- a/pkg/performanceprofile/controller/performanceprofile/components/kubeletconfig/kubeletconfig.go +++ b/pkg/performanceprofile/controller/performanceprofile/components/kubeletconfig/kubeletconfig.go @@ -2,6 +2,7 @@ package kubeletconfig import ( "encoding/json" + "fmt" "time" corev1 "k8s.io/api/core/v1" @@ -43,8 +44,12 @@ const ( evictionHardNodefsInodesFree = "nodefs.inodesFree" ) -// New returns new KubeletConfig object for performance sensetive workflows +// New returns new KubeletConfig object for performance sensitive workflows func New(profile *performancev2.PerformanceProfile, opts *components.KubeletConfigOptions) (*machineconfigv1.KubeletConfig, error) { + if err := validateOptions(opts); err != nil { + return nil, fmt.Errorf("KubeletConfig options validation failed: %w", err) + } + name := components.GetComponentName(profile.Name, components.ComponentNamePrefix) kubeletConfig := &kubeletconfigv1beta1.KubeletConfiguration{} if v, ok := profile.Annotations[experimentalKubeletSnippetAnnotation]; ok { @@ -58,6 +63,61 @@ func New(profile *performancev2.PerformanceProfile, opts *components.KubeletConf Kind: "KubeletConfiguration", } + // when DRA resource management is enabled, all kubeletconfig settings should be disabled. + // this is because the DRA plugin will manage the resource allocation. + // if the kubeletconfig CPU and Memory Manager settings are not disabled, it will conflict with the DRA. + if opts.DRAResourceManagement { + if err := setKubeletConfigForDRAManagement(kubeletConfig, opts); err != nil { + return nil, err + } + } else { + if err := setKubeletConfigForCPUAndMemoryManagers(profile, kubeletConfig, opts); err != nil { + return nil, err + } + } + + raw, err := json.Marshal(kubeletConfig) + if err != nil { + return nil, err + } + + return &machineconfigv1.KubeletConfig{ + TypeMeta: metav1.TypeMeta{ + APIVersion: machineconfigv1.GroupVersion.String(), + Kind: "KubeletConfig", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: machineconfigv1.KubeletConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: opts.MachineConfigPoolSelector, + }, + KubeletConfig: &runtime.RawExtension{ + Raw: raw, + }, + }, + }, nil +} + +func addStringToQuantity(q *resource.Quantity, value string) error { + v, err := resource.ParseQuantity(value) + if err != nil { + return err + } + q.Add(v) + return nil +} + +func setKubeletConfigForDRAManagement(kubeletConfig *kubeletconfigv1beta1.KubeletConfiguration, opts *components.KubeletConfigOptions) error { + kubeletConfig.CPUManagerPolicy = "none" + kubeletConfig.CPUManagerPolicyOptions = map[string]string{} + kubeletConfig.TopologyManagerPolicy = kubeletconfigv1beta1.NoneTopologyManagerPolicy + kubeletConfig.MemoryManagerPolicy = kubeletconfigv1beta1.NoneMemoryManagerPolicy + return nil +} + +func setKubeletConfigForCPUAndMemoryManagers(profile *performancev2.PerformanceProfile, kubeletConfig *kubeletconfigv1beta1.KubeletConfiguration, opts *components.KubeletConfigOptions) error { kubeletConfig.CPUManagerPolicy = cpuManagerPolicyStatic kubeletConfig.CPUManagerReconcilePeriod = metav1.Duration{Duration: 5 * time.Second} kubeletConfig.TopologyManagerPolicy = kubeletconfigv1beta1.BestEffortTopologyManagerPolicy @@ -102,11 +162,11 @@ func New(profile *performancev2.PerformanceProfile, opts *components.KubeletConf if opts.MixedCPUsEnabled { sharedCPUs, err := cpuset.Parse(string(*profile.Spec.CPU.Shared)) if err != nil { - return nil, err + return err } reservedCPUs, err := cpuset.Parse(string(*profile.Spec.CPU.Reserved)) if err != nil { - return nil, err + return err } kubeletConfig.ReservedSystemCPUs = reservedCPUs.Union(sharedCPUs).String() } @@ -125,13 +185,13 @@ func New(profile *performancev2.PerformanceProfile, opts *components.KubeletConf if kubeletConfig.ReservedMemory == nil { reservedMemory := resource.NewQuantity(0, resource.DecimalSI) if err := addStringToQuantity(reservedMemory, kubeletConfig.KubeReserved[string(corev1.ResourceMemory)]); err != nil { - return nil, err + return err } if err := addStringToQuantity(reservedMemory, kubeletConfig.SystemReserved[string(corev1.ResourceMemory)]); err != nil { - return nil, err + return err } if err := addStringToQuantity(reservedMemory, kubeletConfig.EvictionHard[evictionHardMemoryAvailable]); err != nil { - return nil, err + return err } kubeletConfig.ReservedMemory = []kubeletconfigv1beta1.MemoryReservation{ @@ -159,37 +219,16 @@ func New(profile *performancev2.PerformanceProfile, opts *components.KubeletConf } } } - - raw, err := json.Marshal(kubeletConfig) - if err != nil { - return nil, err - } - - return &machineconfigv1.KubeletConfig{ - TypeMeta: metav1.TypeMeta{ - APIVersion: machineconfigv1.GroupVersion.String(), - Kind: "KubeletConfig", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: name, - }, - Spec: machineconfigv1.KubeletConfigSpec{ - MachineConfigPoolSelector: &metav1.LabelSelector{ - MatchLabels: opts.MachineConfigPoolSelector, - }, - KubeletConfig: &runtime.RawExtension{ - Raw: raw, - }, - }, - }, nil + return nil } +func validateOptions(opts *components.KubeletConfigOptions) error { + if opts == nil { + return nil + } -func addStringToQuantity(q *resource.Quantity, value string) error { - v, err := resource.ParseQuantity(value) - if err != nil { - return err + if opts.MixedCPUsEnabled && opts.DRAResourceManagement { + return fmt.Errorf("invalid configuration: mixed CPUs mode and DRA resource management features are mutually exclusive. please disable one of the features before continuing") } - q.Add(v) return nil } diff --git a/pkg/performanceprofile/controller/performanceprofile/components/kubeletconfig/kubeletconfig_test.go b/pkg/performanceprofile/controller/performanceprofile/components/kubeletconfig/kubeletconfig_test.go index aac2c798a..364603412 100644 --- a/pkg/performanceprofile/controller/performanceprofile/components/kubeletconfig/kubeletconfig_test.go +++ b/pkg/performanceprofile/controller/performanceprofile/components/kubeletconfig/kubeletconfig_test.go @@ -206,4 +206,18 @@ var _ = Describe("Kubelet Config", func() { }) }) + + Context("with mutually exclusive options", func() { + It("should return an error when both MixedCPUs and DRA resource management are enabled", func() { + profile := testutils.NewPerformanceProfile("test") + selectorKey, selectorValue := components.GetFirstKeyAndValue(profile.Spec.MachineConfigPoolSelector) + _, err := New(profile, &components.KubeletConfigOptions{ + MachineConfigPoolSelector: map[string]string{selectorKey: selectorValue}, + MixedCPUsEnabled: true, + DRAResourceManagement: true, + }) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("mixed CPUs mode and DRA resource management features are mutually exclusive")) + }) + }) }) diff --git a/pkg/performanceprofile/controller/performanceprofile/components/manifestset/manifestset.go b/pkg/performanceprofile/controller/performanceprofile/components/manifestset/manifestset.go index 6942c5c38..a7937313e 100644 --- a/pkg/performanceprofile/controller/performanceprofile/components/manifestset/manifestset.go +++ b/pkg/performanceprofile/controller/performanceprofile/components/manifestset/manifestset.go @@ -62,6 +62,7 @@ func GetNewComponents(profile *performancev2.PerformanceProfile, opts *component &components.KubeletConfigOptions{ MachineConfigPoolSelector: machineConfigPoolSelector, MixedCPUsEnabled: opts.MachineConfig.MixedCPUsEnabled, + DRAResourceManagement: opts.DRAResourceManagement, }) if err != nil { return nil, err From 2bd7267d278355697905e5b44e41f9729bfcfb52 Mon Sep 17 00:00:00 2001 From: Talor Itzhak Date: Tue, 4 Nov 2025 14:29:57 +0200 Subject: [PATCH 3/5] profile: add IsDRAManaged function common place to check if DRA management enabled. Signed-off-by: Talor Itzhak --- .../components/profile/profile.go | 22 ++++++++++ .../components/profile/profile_test.go | 40 +++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/pkg/performanceprofile/controller/performanceprofile/components/profile/profile.go b/pkg/performanceprofile/controller/performanceprofile/components/profile/profile.go index d120889f6..7bc66c63a 100644 --- a/pkg/performanceprofile/controller/performanceprofile/components/profile/profile.go +++ b/pkg/performanceprofile/controller/performanceprofile/components/profile/profile.go @@ -1,6 +1,10 @@ package profile import ( + "strconv" + + "k8s.io/klog/v2" + performancev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2" "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components" @@ -94,3 +98,21 @@ func IsMixedCPUsEnabled(profile *performancev2.PerformanceProfile) bool { } return *profile.Spec.WorkloadHints.MixedCpus } + +func IsDRAManaged(profile *performancev2.PerformanceProfile) bool { + if profile.Annotations == nil { + return false + } + + v, ok := profile.Annotations[performancev2.PerformanceProfileDRAResourceManagementAnnotation] + if !ok { + return false + } + + parsed, err := strconv.ParseBool(v) + if err != nil { + klog.ErrorS(err, "failed to parse annotation as bool", "annotation", performancev2.PerformanceProfileDRAResourceManagementAnnotation) + return false + } + return parsed +} diff --git a/pkg/performanceprofile/controller/performanceprofile/components/profile/profile_test.go b/pkg/performanceprofile/controller/performanceprofile/components/profile/profile_test.go index 33a5e38a3..86a90e773 100644 --- a/pkg/performanceprofile/controller/performanceprofile/components/profile/profile_test.go +++ b/pkg/performanceprofile/controller/performanceprofile/components/profile/profile_test.go @@ -108,6 +108,46 @@ var _ = Describe("PerformanceProfile", func() { }) }) }) + + Describe("DRA Resource Management", func() { + Context("IsDRAManaged", func() { + It("should return false when annotations are nil", func() { + profile.Annotations = nil + result := IsDRAManaged(profile) + Expect(result).To(BeFalse()) + }) + + It("should return false when annotation is not present", func() { + profile.Annotations = map[string]string{} + result := IsDRAManaged(profile) + Expect(result).To(BeFalse()) + }) + + It("should return true when annotation is 'true'", func() { + profile.Annotations = map[string]string{ + performancev2.PerformanceProfileDRAResourceManagementAnnotation: "true", + } + result := IsDRAManaged(profile) + Expect(result).To(BeTrue()) + }) + + It("should return false when annotation is 'false'", func() { + profile.Annotations = map[string]string{ + performancev2.PerformanceProfileDRAResourceManagementAnnotation: "false", + } + result := IsDRAManaged(profile) + Expect(result).To(BeFalse()) + }) + + It("should return false when annotation has invalid value", func() { + profile.Annotations = map[string]string{ + performancev2.PerformanceProfileDRAResourceManagementAnnotation: "invalid", + } + result := IsDRAManaged(profile) + Expect(result).To(BeFalse()) + }) + }) + }) }) func setValidNodeSelector(profile *performancev2.PerformanceProfile) { From 5c318e1928fbc2269d7910cea9ead7853f159f0e Mon Sep 17 00:00:00 2001 From: Talor Itzhak Date: Thu, 11 Dec 2025 11:35:39 +0200 Subject: [PATCH 4/5] handlers: wire the new logic to the code Wire and inject the new options in the code flow. Signed-off-by: Talor Itzhak --- .../controller/performanceprofile/components/handler/handler.go | 1 + .../performanceprofile/hypershift/components/handler.go | 1 + 2 files changed, 2 insertions(+) diff --git a/pkg/performanceprofile/controller/performanceprofile/components/handler/handler.go b/pkg/performanceprofile/controller/performanceprofile/components/handler/handler.go index 75df6b6ee..870db949d 100644 --- a/pkg/performanceprofile/controller/performanceprofile/components/handler/handler.go +++ b/pkg/performanceprofile/controller/performanceprofile/components/handler/handler.go @@ -44,6 +44,7 @@ func (h *handler) Apply(ctx context.Context, obj client.Object, recorder record. } // set missing options opts.MachineConfig.MixedCPUsEnabled = opts.MixedCPUsFeatureGateEnabled && profileutil.IsMixedCPUsEnabled(profile) + opts.DRAResourceManagement = profileutil.IsDRAManaged(profile) components, err := manifestset.GetNewComponents(profile, opts) if err != nil { diff --git a/pkg/performanceprofile/controller/performanceprofile/hypershift/components/handler.go b/pkg/performanceprofile/controller/performanceprofile/hypershift/components/handler.go index 1137d9e8e..702d6df0e 100644 --- a/pkg/performanceprofile/controller/performanceprofile/hypershift/components/handler.go +++ b/pkg/performanceprofile/controller/performanceprofile/hypershift/components/handler.go @@ -93,6 +93,7 @@ func (h *handler) Apply(ctx context.Context, obj client.Object, recorder record. } // set missing options options.MachineConfig.MixedCPUsEnabled = options.MixedCPUsFeatureGateEnabled && profileutil.IsMixedCPUsEnabled(profile) + options.DRAResourceManagement = profileutil.IsDRAManaged(profile) mfs, err := manifestset.GetNewComponents(profile, options) if err != nil { From ac51808cc862e4fc2608426af26cc1f6072ea38d Mon Sep 17 00:00:00 2001 From: Talor Itzhak Date: Thu, 11 Dec 2025 12:37:40 +0200 Subject: [PATCH 5/5] e2e: DRA manage resources test The test apply DRA management annotation and checks that NTO disables all managers. After that, it reverts changes in the profile and checks that NTO applies the managers configuration back to their original configuration. Signed-off-by: Talor Itzhak --- .../1_performance/topology_manager.go | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/test/e2e/performanceprofile/functests/1_performance/topology_manager.go b/test/e2e/performanceprofile/functests/1_performance/topology_manager.go index 76a356589..2cde8d603 100644 --- a/test/e2e/performanceprofile/functests/1_performance/topology_manager.go +++ b/test/e2e/performanceprofile/functests/1_performance/topology_manager.go @@ -2,6 +2,8 @@ package __performance import ( "context" + "fmt" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -9,7 +11,9 @@ import ( testutils "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils" "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/discovery" "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/nodes" + "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/poolname" "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profiles" + "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profilesupdate" corev1 "k8s.io/api/core/v1" kubeletconfigv1beta1 "k8s.io/kubelet/config/v1beta1" @@ -45,4 +49,88 @@ var _ = Describe("[rfe_id:27350][performance]Topology Manager", Ordered, func() Expect(kubeletConfig.TopologyManagerPolicy).To(Equal(kubeletconfigv1beta1.BestEffortTopologyManagerPolicy), "Topology Manager policy mismatch got %q expected %q", kubeletconfigv1beta1.BestEffortTopologyManagerPolicy) } }) + + Context("Deactivating topology and resource managers", func() { + var initialProfile *performancev2.PerformanceProfile + var targetNode *corev1.Node + + BeforeEach(func() { + var err error + profile, err = profiles.GetByNodeLabels(testutils.NodeSelectorLabels) + Expect(err).ToNot(HaveOccurred()) + + workerRTNodes, err := nodes.GetByLabels(testutils.NodeSelectorLabels) + Expect(err).ToNot(HaveOccurred()) + + workerRTNodes, err = nodes.MatchingOptionalSelector(workerRTNodes) + Expect(err).ToNot(HaveOccurred(), "Error looking for the optional selector: %v", err) + Expect(workerRTNodes).ToNot(BeEmpty(), "No RT worker node found!") + targetNode = &workerRTNodes[0] + + initialProfile = profile.DeepCopy() + + By("Adding the DRA resource management annotation to the profile") + if profile.Annotations == nil { + profile.Annotations = make(map[string]string) + } + profile.Annotations[performancev2.PerformanceProfileDRAResourceManagementAnnotation] = "true" + + By("Updating the performance profile") + profiles.UpdateWithRetry(profile) + + poolName := poolname.GetByProfile(context.TODO(), profile) + By(fmt.Sprintf("Applying changes in performance profile and waiting until %s will start updating", poolName)) + profilesupdate.WaitForTuningUpdating(context.TODO(), profile) + + By(fmt.Sprintf("Waiting when %s finishes updates", poolName)) + profilesupdate.WaitForTuningUpdated(context.TODO(), profile) + + DeferCleanup(func() { + By("Reverting the performance profile") + profiles.UpdateWithRetry(initialProfile) + + poolName := poolname.GetByProfile(context.TODO(), initialProfile) + By(fmt.Sprintf("Applying changes in performance profile and waiting until %s will start updating", poolName)) + profilesupdate.WaitForTuningUpdating(context.TODO(), initialProfile) + + By(fmt.Sprintf("Waiting when %s finishes updates", poolName)) + profilesupdate.WaitForTuningUpdated(context.TODO(), initialProfile) + + By("Verifying managers are restored to their original configuration") + kubeletConfig, err := nodes.GetKubeletConfig(context.TODO(), targetNode) + Expect(err).ToNot(HaveOccurred()) + + By("Verifying CPU manager policy is restored to static") + Expect(kubeletConfig.CPUManagerPolicy).To(Equal("static"), "CPUManagerPolicy should be 'static' after revert, got %q", kubeletConfig.CPUManagerPolicy) + + By("Verifying Topology manager policy is restored") + expectedTopologyPolicy := kubeletconfigv1beta1.BestEffortTopologyManagerPolicy + if initialProfile.Spec.NUMA != nil && initialProfile.Spec.NUMA.TopologyPolicy != nil { + expectedTopologyPolicy = *initialProfile.Spec.NUMA.TopologyPolicy + } + Expect(kubeletConfig.TopologyManagerPolicy).To(Equal(expectedTopologyPolicy), "TopologyManagerPolicy should be %q after revert, got %q", expectedTopologyPolicy, kubeletConfig.TopologyManagerPolicy) + + By("Verifying Memory manager policy is restored") + // Memory manager is set to Static only for restricted or single-numa-node topology policies + if expectedTopologyPolicy == kubeletconfigv1beta1.RestrictedTopologyManagerPolicy || + expectedTopologyPolicy == kubeletconfigv1beta1.SingleNumaNodeTopologyManagerPolicy { + Expect(kubeletConfig.MemoryManagerPolicy).To(Equal("Static"), "MemoryManagerPolicy should be 'Static' after revert, got %q", kubeletConfig.MemoryManagerPolicy) + } + }) + }) + + It("should disable CPU, Memory, and Topology managers when DRA annotation is set", func() { + kubeletConfig, err := nodes.GetKubeletConfig(context.TODO(), targetNode) + Expect(err).ToNot(HaveOccurred()) + + By("Verifying CPU manager policy is set to none") + Expect(kubeletConfig.CPUManagerPolicy).To(Equal("none"), "CPUManagerPolicy should be 'none' when DRA is enabled, got %q", kubeletConfig.CPUManagerPolicy) + + By("Verifying Topology manager policy is set to none") + Expect(kubeletConfig.TopologyManagerPolicy).To(Equal(kubeletconfigv1beta1.NoneTopologyManagerPolicy), "TopologyManagerPolicy should be 'none' when DRA is enabled, got %q", kubeletConfig.TopologyManagerPolicy) + + By("Verifying Memory manager policy is set to None") + Expect(kubeletConfig.MemoryManagerPolicy).To(Equal(kubeletconfigv1beta1.NoneMemoryManagerPolicy), "MemoryManagerPolicy should be 'None' when DRA is enabled, got %q", kubeletConfig.MemoryManagerPolicy) + }) + }) })