From 60fb80a7edd3311c752bbda837f3fa8d611e84e6 Mon Sep 17 00:00:00 2001
From: Sargun Narula <snarula@redhat.com>
Date: Thu, 4 Dec 2025 16:52:06 +0530
Subject: [PATCH] e2e: Added Irq test to verify housekeeping cpu updates with
 node reboot.

Signed-Off-by: Sargun Narula <snarula@redhat.com>
---
 .../2_performance_update/updating_profile.go  | 200 ++++++++++++++++++
 .../utils/deployments/deployments.go          |   1 +
 .../functests/utils/nodes/nodes.go            |  18 ++
 .../functests/utils/pods/pods.go              |  19 ++
 4 files changed, 238 insertions(+)

diff --git a/test/e2e/performanceprofile/functests/2_performance_update/updating_profile.go b/test/e2e/performanceprofile/functests/2_performance_update/updating_profile.go
index 856788b764..bddfbc0e8f 100644
--- a/test/e2e/performanceprofile/functests/2_performance_update/updating_profile.go
+++ b/test/e2e/performanceprofile/functests/2_performance_update/updating_profile.go
@@ -12,8 +12,10 @@ import (
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
+	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/types"
@@ -27,11 +29,13 @@ import (
 	performancev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2"
 	"github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components"
 	profilecomponent "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components/profile"
+	componenttuned "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components/tuned"
 	manifestsutil "github.com/openshift/cluster-node-tuning-operator/pkg/util"
 	testutils "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils"
 	"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/cgroup/runtime"
 	testclient "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/client"
 	"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/cluster"
+	"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/deployments"
 	"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/discovery"
 	"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/hypershift"
 	"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/label"
@@ -996,6 +1000,155 @@ var _ = Describe("[rfe_id:28761][performance] Updating parameters in performance
 		})
 	})
 
+	Context("Verify IRQ housekeeping updates", Ordered, Label(string(label.Tier2)), func() {
+		var targetNode *corev1.Node
+		var isolatedCPUSet cpuset.CPUSet
+
+		testutils.CustomBeforeAll(func() {
+			initialProfile = profile.DeepCopy()
+		})
+
+		It("[test_id:99999] should update housekeeping CPUs when performance profile is modified", func() {
+
+			if componenttuned.IsIRQBalancingGloballyDisabled(profile) {
+				Skip("this test needs IRQ balancing (GloballyDisableIrqLoadBalancing=false)")
+			}
+
+			ctx := context.TODO()
+
+			// Get current profile CPU configuration
+			Expect(profile.Spec.CPU.Reserved).ToNot(BeNil(), "expected reserved CPUs, found none")
+			Expect(profile.Spec.CPU.Isolated).ToNot(BeNil(), "expected isolated CPUs, found none")
+
+			reservedCPUSet, err := cpuset.Parse(string(*profile.Spec.CPU.Reserved))
+			Expect(err).ToNot(HaveOccurred(), "failed to parse reserved CPUs")
+
+			isolatedCPUSet, err = cpuset.Parse(string(*profile.Spec.CPU.Isolated))
+			Expect(err).ToNot(HaveOccurred(), "failed to parse isolated CPUs")
+
+			targetNodeIdx := nodes.PickNodeIdx(workerRTNodes)
+			targetNode = &workerRTNodes[targetNodeIdx]
+			Expect(targetNode).ToNot(BeNil(), "missing target node")
+			By(fmt.Sprintf("Using target worker node %q", targetNode.Name))
+
+			// Ensure we have enough isolated CPUs for the test
+			// minimum amount to avoid SMT-alignment error
+			cpuRequest := 2
+			if cpuRequest >= isolatedCPUSet.Size() {
+				Skip(fmt.Sprintf("cpus request %d is greater than the available isolated cpus %d", cpuRequest, isolatedCPUSet.Size()))
+			}
+
+			By("Creating a Deployment with guaranteed pod that has irq-load-balancing.crio.io: housekeeping annotation")
+			annotations := map[string]string{
+				"irq-load-balancing.crio.io": "housekeeping",
+			}
+			podTemplate := getTestPodWithProfileAndAnnotations(profile, annotations, cpuRequest)
+
+			dp := deployments.Make("irq-housekeeping-dp", testutils.NamespaceTesting,
+				deployments.WithPodTemplate(podTemplate),
+				deployments.WithNodeSelector(map[string]string{testutils.LabelHostname: targetNode.Name}),
+			)
+
+			err = testclient.DataPlaneClient.Create(ctx, dp)
+			Expect(err).ToNot(HaveOccurred(), "failed to create test deployment")
+			defer func() {
+				By("Cleaning up: deleting deployment")
+				testclient.DataPlaneClient.Delete(ctx, dp)
+			}()
+
+			By("Waiting for the deployment to be ready")
+			desiredStatus := appsv1.DeploymentStatus{
+				Replicas:          1,
+				AvailableReplicas: 1,
+			}
+			err = deployments.WaitForDesiredDeploymentStatus(ctx, dp, testclient.DataPlaneClient, dp.Namespace, dp.Name, desiredStatus)
+			Expect(err).ToNot(HaveOccurred(), "deployment did not reach desired status")
+
+			By("Getting the pod from the deployment")
+			podList := &corev1.PodList{}
+			listOptions := &client.ListOptions{
+				Namespace:     dp.Namespace,
+				LabelSelector: labels.SelectorFromSet(dp.Spec.Selector.MatchLabels),
+			}
+			err = testclient.DataPlaneClient.List(ctx, podList, listOptions)
+			Expect(err).ToNot(HaveOccurred(), "failed to list pods from deployment")
+			Expect(len(podList.Items)).To(Equal(1), "expected exactly one pod in deployment")
+			testpod := &podList.Items[0]
+			Expect(testpod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed), "Test pod does not have QoS class of Guaranteed")
+
+			By("Verifying OPENSHIFT_HOUSEKEEPING_CPUS environment variable is set")
+			initialHousekeepingCPUSet, err := getHousekeepingCPUsFromEnv(testpod)
+			Expect(err).ToNot(HaveOccurred(), "failed to get OPENSHIFT_HOUSEKEEPING_CPUS from pod")
+			Expect(initialHousekeepingCPUSet.Size()).ToNot(BeZero(), "OPENSHIFT_HOUSEKEEPING_CPUS should not be empty")
+
+			By("Verifying initial IRQ affinity includes housekeeping CPUs")
+			smpAffinitySet, err := nodes.GetDefaultSmpAffinitySet(ctx, targetNode)
+			Expect(err).ToNot(HaveOccurred(), "failed to get default smp affinity")
+			onlineCPUsSet, err := nodes.GetOnlineCPUsSet(ctx, targetNode)
+			Expect(err).ToNot(HaveOccurred(), "failed to get online CPUs")
+			smpAffinitySet = smpAffinitySet.Intersection(onlineCPUsSet)
+
+			Expect(initialHousekeepingCPUSet.IsSubsetOf(smpAffinitySet)).To(BeTrue(),
+				"Housekeeping CPUs %s should be subset of IRQ affinity %s", initialHousekeepingCPUSet.String(), smpAffinitySet.String())
+
+			By("Modifying the performance profile to change reserved and isolated CPUs")
+
+			// Move one isolated CPU to reserved to trigger housekeeping CPUs update
+			cpuToMove := cpuset.New(isolatedCPUSet.List()[0])
+			newReservedSet := reservedCPUSet.Union(cpuToMove)
+			newIsolatedSet := isolatedCPUSet.Difference(cpuToMove)
+
+			profile.Spec.CPU.Reserved = ptr.To(performancev2.CPUSet(newReservedSet.String()))
+			profile.Spec.CPU.Isolated = ptr.To(performancev2.CPUSet(newIsolatedSet.String()))
+
+			By("Updating the performance profile")
+			profiles.UpdateWithRetry(profile)
+
+			By("Waiting for tuning to start updating")
+			profilesupdate.WaitForTuningUpdating(ctx, profile)
+
+			By("Waiting for tuning to complete")
+			profilesupdate.WaitForTuningUpdated(ctx, profile)
+
+			By("Waiting for the deployment to be ready again after profile update and node reboot")
+			Eventually(func() error {
+				return deployments.WaitForDesiredDeploymentStatus(ctx, dp, testclient.DataPlaneClient, dp.Namespace, dp.Name, desiredStatus)
+			}).WithTimeout(20*time.Minute).WithPolling(30*time.Second).Should(Succeed(), "deployment did not become ready after profile update")
+
+			By("Getting the updated pod from the deployment")
+			err = testclient.DataPlaneClient.List(ctx, podList, listOptions)
+			Expect(err).ToNot(HaveOccurred(), "failed to list pods from deployment after update")
+			Expect(len(podList.Items)).To(Equal(1), "expected exactly one pod in deployment after update")
+			testpod = &podList.Items[0]
+
+			By("Verifying OPENSHIFT_HOUSEKEEPING_CPUS is updated after profile modification")
+			updatedHousekeepingCPUSet, err := getHousekeepingCPUsFromEnv(testpod)
+			Expect(err).ToNot(HaveOccurred(), "failed to get updated OPENSHIFT_HOUSEKEEPING_CPUS from pod")
+			Expect(updatedHousekeepingCPUSet.Size()).ToNot(BeZero(), "updated OPENSHIFT_HOUSEKEEPING_CPUS should not be empty")
+
+			By("Verifying updated IRQ affinity includes housekeeping CPUs")
+			updatedSmpAffinitySet, err := nodes.GetDefaultSmpAffinitySet(ctx, targetNode)
+			Expect(err).ToNot(HaveOccurred(), "failed to get updated default smp affinity")
+			updatedOnlineCPUsSet, err := nodes.GetOnlineCPUsSet(ctx, targetNode)
+			Expect(err).ToNot(HaveOccurred(), "failed to get updated online CPUs")
+			updatedSmpAffinitySet = updatedSmpAffinitySet.Intersection(updatedOnlineCPUsSet)
+
+			Expect(updatedHousekeepingCPUSet.IsSubsetOf(updatedSmpAffinitySet)).To(BeTrue(),
+				"Updated housekeeping CPUs %s should be subset of IRQ affinity %s", updatedHousekeepingCPUSet.String(), updatedSmpAffinitySet.String())
+		})
+
+		AfterAll(func() {
+			By("Reverting the profile to its initial state")
+			profiles.UpdateWithRetry(initialProfile)
+
+			By(fmt.Sprintf("Applying changes in performance profile and waiting until %s will start updating", poolName))
+			profilesupdate.WaitForTuningUpdating(context.TODO(), profile)
+
+			By(fmt.Sprintf("Waiting when %s finishes updates", poolName))
+			profilesupdate.WaitForTuningUpdated(context.TODO(), profile)
+		})
+	})
+
 	Context("[rfe_id:54374][rps_mask] Network Stack Pinning", Label(string(label.RPSMask), string(label.Tier1)), func() {
 
 		BeforeEach(func() {
@@ -1435,3 +1588,50 @@ func copyNumaCoreSiblings(src map[int]map[int][]int) map[int]map[int][]int {
 	}
 	return dst
 }
+
+// getHousekeepingCPUsFromEnv extracts the OPENSHIFT_HOUSEKEEPING_CPUS environment variable from the pod and returns it as a CPUSet.
+func getHousekeepingCPUsFromEnv(pod *corev1.Pod) (cpuset.CPUSet, error) {
+	const housekeepingCpusEnv = "OPENSHIFT_HOUSEKEEPING_CPUS"
+
+	cmd := []string{"printenv", housekeepingCpusEnv}
+	output, err := pods.ExecCommandOnPod(testclient.K8sClient, pod, "", cmd)
+	if err != nil {
+		return cpuset.New(), fmt.Errorf("failed to get %s from pod %s/%s: %v", housekeepingCpusEnv, pod.Namespace, pod.Name, err)
+	}
+
+	value := strings.TrimSpace(string(output))
+	if value == "" {
+		return cpuset.New(), fmt.Errorf("%s environment variable not found or empty in pod %s/%s", housekeepingCpusEnv, pod.Namespace, pod.Name)
+	}
+
+	cpuSet, err := cpuset.Parse(value)
+	if err != nil {
+		return cpuset.New(), fmt.Errorf("failed to parse %s value %q from pod %s/%s: %v", housekeepingCpusEnv, value, pod.Namespace, pod.Name, err)
+	}
+
+	return cpuSet, nil
+}
+
+// getTestPodWithProfileAndAnnotations creates a test pod with specified profile and annotations
+func getTestPodWithProfileAndAnnotations(perfProf *performancev2.PerformanceProfile, annotations map[string]string, cpus int) *corev1.Pod {
+	testpod := pods.GetTestPod()
+	if len(annotations) > 0 {
+		testpod.Annotations = annotations
+	}
+	testpod.Namespace = testutils.NamespaceTesting
+
+	cpuCount := fmt.Sprintf("%d", cpus)
+	resCpu := resource.MustParse(cpuCount)
+	resMem := resource.MustParse("256Mi")
+	testpod.Spec.Containers[0].Resources = corev1.ResourceRequirements{
+		Limits: corev1.ResourceList{
+			corev1.ResourceCPU:    resCpu,
+			corev1.ResourceMemory: resMem,
+		},
+	}
+	if perfProf != nil {
+		runtimeClassName := components.GetComponentName(perfProf.Name, components.ComponentNamePrefix)
+		testpod.Spec.RuntimeClassName = &runtimeClassName
+	}
+	return testpod
+}
diff --git a/test/e2e/performanceprofile/functests/utils/deployments/deployments.go b/test/e2e/performanceprofile/functests/utils/deployments/deployments.go
index b45b2a18e6..6227774bae 100644
--- a/test/e2e/performanceprofile/functests/utils/deployments/deployments.go
+++ b/test/e2e/performanceprofile/functests/utils/deployments/deployments.go
@@ -76,6 +76,7 @@ func WithPodTemplate(podTemplate *corev1.Pod) func(dp *appsv1.Deployment) {
 	return func(dp *appsv1.Deployment) {
 		dp.Spec.Template.Spec = podTemplate.Spec
 		dp.Spec.Template.Labels = podTemplate.Labels
+		dp.Spec.Template.Annotations = podTemplate.Annotations
 		dp.Spec.Selector.MatchLabels = podTemplate.Labels
 	}
 }
diff --git a/test/e2e/performanceprofile/functests/utils/nodes/nodes.go b/test/e2e/performanceprofile/functests/utils/nodes/nodes.go
index eec13b39be..d49af38494 100644
--- a/test/e2e/performanceprofile/functests/utils/nodes/nodes.go
+++ b/test/e2e/performanceprofile/functests/utils/nodes/nodes.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"os"
 	"path"
 	"sort"
 	"strconv"
@@ -586,3 +587,20 @@ func GetL3SharedCPUs(node *corev1.Node) func(cpuId int) (cpuset.CPUSet, error) {
 		return cpuSet, err
 	}
 }
+
+// PickNodeIdx selects a node index based on environment variable E2E_PAO_TARGET_NODE.
+// If the environment variable is not set or the node is not found, returns 0.
+func PickNodeIdx(nodes []corev1.Node) int {
+	name, ok := os.LookupEnv("E2E_PAO_TARGET_NODE")
+	if !ok {
+		return 0 // "random" default
+	}
+	for idx := range nodes {
+		if nodes[idx].Name == name {
+			testlog.Infof("node %q found among candidates, picking", name)
+			return idx
+		}
+	}
+	testlog.Infof("node %q not found among candidates, fall back to random one", name)
+	return 0 // "safe" default
+}
diff --git a/test/e2e/performanceprofile/functests/utils/pods/pods.go b/test/e2e/performanceprofile/functests/utils/pods/pods.go
index 8e3f93d200..ced7e137ca 100644
--- a/test/e2e/performanceprofile/functests/utils/pods/pods.go
+++ b/test/e2e/performanceprofile/functests/utils/pods/pods.go
@@ -20,6 +20,7 @@ import (
 	"k8s.io/client-go/kubernetes"
 	"k8s.io/client-go/kubernetes/scheme"
 	"k8s.io/client-go/tools/remotecommand"
+	"k8s.io/utils/cpuset"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 
 	testclient "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/client"
@@ -272,3 +273,21 @@ func CheckPODSchedulingFailed(c client.Client, pod *corev1.Pod) (bool, error) {
 	}
 	return false, nil
 }
+
+// GetPodCPUs returns the CPUs assigned to the pod by reading the container's cpuset
+func GetPodCPUs(ctx context.Context, c *kubernetes.Clientset, pod *corev1.Pod) (cpuset.CPUSet, error) {
+	// Get the CPUs allowed for the container by reading /proc/self/status
+	cmd := []string{"/bin/bash", "-c", "grep Cpus_allowed_list /proc/self/status | awk '{print $2}'"}
+	output, err := ExecCommandOnPod(c, pod, "", cmd)
+	if err != nil {
+		return cpuset.New(), fmt.Errorf("failed to get Cpus_allowed_list from pod %s/%s: %v", pod.Namespace, pod.Name, err)
+	}
+
+	cpuList := strings.TrimSpace(string(output))
+	podCPUs, err := cpuset.Parse(cpuList)
+	if err != nil {
+		return cpuset.New(), fmt.Errorf("failed to parse CPU list %q: %v", cpuList, err)
+	}
+
+	return podCPUs, nil
+}