-
Notifications
You must be signed in to change notification settings - Fork 118
e2e:Test to verify housekeeping cpu updates with node reboot. #1444
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
SargunNarula
wants to merge
1
commit into
openshift:main
Choose a base branch
from
SargunNarula:irq_housekeeping
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+238
−0
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,8 +12,10 @@ import ( | |
|
|
||
| . "github.com/onsi/ginkgo/v2" | ||
| . "github.com/onsi/gomega" | ||
| appsv1 "k8s.io/api/apps/v1" | ||
| corev1 "k8s.io/api/core/v1" | ||
| "k8s.io/apimachinery/pkg/api/errors" | ||
| "k8s.io/apimachinery/pkg/api/resource" | ||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
| "k8s.io/apimachinery/pkg/labels" | ||
| "k8s.io/apimachinery/pkg/types" | ||
|
|
@@ -27,11 +29,13 @@ import ( | |
| performancev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2" | ||
| "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components" | ||
| profilecomponent "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components/profile" | ||
| componenttuned "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components/tuned" | ||
| manifestsutil "github.com/openshift/cluster-node-tuning-operator/pkg/util" | ||
| testutils "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils" | ||
| "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/cgroup/runtime" | ||
| testclient "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/client" | ||
| "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/cluster" | ||
| "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/deployments" | ||
| "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/discovery" | ||
| "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/hypershift" | ||
| "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/label" | ||
|
|
@@ -996,6 +1000,155 @@ var _ = Describe("[rfe_id:28761][performance] Updating parameters in performance | |
| }) | ||
| }) | ||
|
|
||
| Context("Verify IRQ housekeeping updates", Ordered, Label(string(label.Tier2)), func() { | ||
| var targetNode *corev1.Node | ||
| var isolatedCPUSet cpuset.CPUSet | ||
|
|
||
| testutils.CustomBeforeAll(func() { | ||
| initialProfile = profile.DeepCopy() | ||
| }) | ||
|
|
||
| It("[test_id:99999] should update housekeeping CPUs when performance profile is modified", func() { | ||
|
|
||
| if componenttuned.IsIRQBalancingGloballyDisabled(profile) { | ||
| Skip("this test needs IRQ balancing (GloballyDisableIrqLoadBalancing=false)") | ||
| } | ||
|
|
||
| ctx := context.TODO() | ||
|
|
||
| // Get current profile CPU configuration | ||
| Expect(profile.Spec.CPU.Reserved).ToNot(BeNil(), "expected reserved CPUs, found none") | ||
| Expect(profile.Spec.CPU.Isolated).ToNot(BeNil(), "expected isolated CPUs, found none") | ||
|
|
||
| reservedCPUSet, err := cpuset.Parse(string(*profile.Spec.CPU.Reserved)) | ||
| Expect(err).ToNot(HaveOccurred(), "failed to parse reserved CPUs") | ||
|
|
||
| isolatedCPUSet, err = cpuset.Parse(string(*profile.Spec.CPU.Isolated)) | ||
| Expect(err).ToNot(HaveOccurred(), "failed to parse isolated CPUs") | ||
|
|
||
| targetNodeIdx := nodes.PickNodeIdx(workerRTNodes) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we need to pick the target node ?, Why can' t it use worker-cnf node ? |
||
| targetNode = &workerRTNodes[targetNodeIdx] | ||
| Expect(targetNode).ToNot(BeNil(), "missing target node") | ||
| By(fmt.Sprintf("Using target worker node %q", targetNode.Name)) | ||
|
|
||
| // Ensure we have enough isolated CPUs for the test | ||
| // minimum amount to avoid SMT-alignment error | ||
| cpuRequest := 2 | ||
| if cpuRequest >= isolatedCPUSet.Size() { | ||
| Skip(fmt.Sprintf("cpus request %d is greater than the available isolated cpus %d", cpuRequest, isolatedCPUSet.Size())) | ||
| } | ||
|
|
||
| By("Creating a Deployment with guaranteed pod that has irq-load-balancing.crio.io: housekeeping annotation") | ||
| annotations := map[string]string{ | ||
| "irq-load-balancing.crio.io": "housekeeping", | ||
| } | ||
| podTemplate := getTestPodWithProfileAndAnnotations(profile, annotations, cpuRequest) | ||
|
|
||
| dp := deployments.Make("irq-housekeeping-dp", testutils.NamespaceTesting, | ||
| deployments.WithPodTemplate(podTemplate), | ||
| deployments.WithNodeSelector(map[string]string{testutils.LabelHostname: targetNode.Name}), | ||
| ) | ||
|
|
||
| err = testclient.DataPlaneClient.Create(ctx, dp) | ||
| Expect(err).ToNot(HaveOccurred(), "failed to create test deployment") | ||
| defer func() { | ||
| By("Cleaning up: deleting deployment") | ||
| testclient.DataPlaneClient.Delete(ctx, dp) | ||
| }() | ||
|
|
||
| By("Waiting for the deployment to be ready") | ||
| desiredStatus := appsv1.DeploymentStatus{ | ||
| Replicas: 1, | ||
| AvailableReplicas: 1, | ||
| } | ||
| err = deployments.WaitForDesiredDeploymentStatus(ctx, dp, testclient.DataPlaneClient, dp.Namespace, dp.Name, desiredStatus) | ||
| Expect(err).ToNot(HaveOccurred(), "deployment did not reach desired status") | ||
|
|
||
| By("Getting the pod from the deployment") | ||
| podList := &corev1.PodList{} | ||
| listOptions := &client.ListOptions{ | ||
| Namespace: dp.Namespace, | ||
| LabelSelector: labels.SelectorFromSet(dp.Spec.Selector.MatchLabels), | ||
| } | ||
| err = testclient.DataPlaneClient.List(ctx, podList, listOptions) | ||
| Expect(err).ToNot(HaveOccurred(), "failed to list pods from deployment") | ||
| Expect(len(podList.Items)).To(Equal(1), "expected exactly one pod in deployment") | ||
| testpod := &podList.Items[0] | ||
| Expect(testpod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed), "Test pod does not have QoS class of Guaranteed") | ||
|
|
||
| By("Verifying OPENSHIFT_HOUSEKEEPING_CPUS environment variable is set") | ||
| initialHousekeepingCPUSet, err := getHousekeepingCPUsFromEnv(testpod) | ||
| Expect(err).ToNot(HaveOccurred(), "failed to get OPENSHIFT_HOUSEKEEPING_CPUS from pod") | ||
| Expect(initialHousekeepingCPUSet.Size()).ToNot(BeZero(), "OPENSHIFT_HOUSEKEEPING_CPUS should not be empty") | ||
|
|
||
| By("Verifying initial IRQ affinity includes housekeeping CPUs") | ||
| smpAffinitySet, err := nodes.GetDefaultSmpAffinitySet(ctx, targetNode) | ||
| Expect(err).ToNot(HaveOccurred(), "failed to get default smp affinity") | ||
| onlineCPUsSet, err := nodes.GetOnlineCPUsSet(ctx, targetNode) | ||
| Expect(err).ToNot(HaveOccurred(), "failed to get online CPUs") | ||
| smpAffinitySet = smpAffinitySet.Intersection(onlineCPUsSet) | ||
|
|
||
| Expect(initialHousekeepingCPUSet.IsSubsetOf(smpAffinitySet)).To(BeTrue(), | ||
| "Housekeeping CPUs %s should be subset of IRQ affinity %s", initialHousekeepingCPUSet.String(), smpAffinitySet.String()) | ||
|
|
||
| By("Modifying the performance profile to change reserved and isolated CPUs") | ||
|
|
||
| // Move one isolated CPU to reserved to trigger housekeeping CPUs update | ||
| cpuToMove := cpuset.New(isolatedCPUSet.List()[0]) | ||
| newReservedSet := reservedCPUSet.Union(cpuToMove) | ||
| newIsolatedSet := isolatedCPUSet.Difference(cpuToMove) | ||
|
|
||
| profile.Spec.CPU.Reserved = ptr.To(performancev2.CPUSet(newReservedSet.String())) | ||
| profile.Spec.CPU.Isolated = ptr.To(performancev2.CPUSet(newIsolatedSet.String())) | ||
|
|
||
| By("Updating the performance profile") | ||
| profiles.UpdateWithRetry(profile) | ||
|
|
||
| By("Waiting for tuning to start updating") | ||
| profilesupdate.WaitForTuningUpdating(ctx, profile) | ||
|
|
||
| By("Waiting for tuning to complete") | ||
| profilesupdate.WaitForTuningUpdated(ctx, profile) | ||
|
|
||
| By("Waiting for the deployment to be ready again after profile update and node reboot") | ||
| Eventually(func() error { | ||
| return deployments.WaitForDesiredDeploymentStatus(ctx, dp, testclient.DataPlaneClient, dp.Namespace, dp.Name, desiredStatus) | ||
| }).WithTimeout(20*time.Minute).WithPolling(30*time.Second).Should(Succeed(), "deployment did not become ready after profile update") | ||
|
|
||
| By("Getting the updated pod from the deployment") | ||
| err = testclient.DataPlaneClient.List(ctx, podList, listOptions) | ||
| Expect(err).ToNot(HaveOccurred(), "failed to list pods from deployment after update") | ||
| Expect(len(podList.Items)).To(Equal(1), "expected exactly one pod in deployment after update") | ||
| testpod = &podList.Items[0] | ||
|
|
||
| By("Verifying OPENSHIFT_HOUSEKEEPING_CPUS is updated after profile modification") | ||
| updatedHousekeepingCPUSet, err := getHousekeepingCPUsFromEnv(testpod) | ||
| Expect(err).ToNot(HaveOccurred(), "failed to get updated OPENSHIFT_HOUSEKEEPING_CPUS from pod") | ||
| Expect(updatedHousekeepingCPUSet.Size()).ToNot(BeZero(), "updated OPENSHIFT_HOUSEKEEPING_CPUS should not be empty") | ||
|
|
||
| By("Verifying updated IRQ affinity includes housekeeping CPUs") | ||
| updatedSmpAffinitySet, err := nodes.GetDefaultSmpAffinitySet(ctx, targetNode) | ||
| Expect(err).ToNot(HaveOccurred(), "failed to get updated default smp affinity") | ||
| updatedOnlineCPUsSet, err := nodes.GetOnlineCPUsSet(ctx, targetNode) | ||
| Expect(err).ToNot(HaveOccurred(), "failed to get updated online CPUs") | ||
| updatedSmpAffinitySet = updatedSmpAffinitySet.Intersection(updatedOnlineCPUsSet) | ||
|
|
||
| Expect(updatedHousekeepingCPUSet.IsSubsetOf(updatedSmpAffinitySet)).To(BeTrue(), | ||
| "Updated housekeeping CPUs %s should be subset of IRQ affinity %s", updatedHousekeepingCPUSet.String(), updatedSmpAffinitySet.String()) | ||
| }) | ||
|
|
||
| AfterAll(func() { | ||
| By("Reverting the profile to its initial state") | ||
| profiles.UpdateWithRetry(initialProfile) | ||
|
|
||
| By(fmt.Sprintf("Applying changes in performance profile and waiting until %s will start updating", poolName)) | ||
| profilesupdate.WaitForTuningUpdating(context.TODO(), profile) | ||
|
|
||
| By(fmt.Sprintf("Waiting when %s finishes updates", poolName)) | ||
| profilesupdate.WaitForTuningUpdated(context.TODO(), profile) | ||
| }) | ||
| }) | ||
|
|
||
| Context("[rfe_id:54374][rps_mask] Network Stack Pinning", Label(string(label.RPSMask), string(label.Tier1)), func() { | ||
|
|
||
| BeforeEach(func() { | ||
|
|
@@ -1435,3 +1588,50 @@ func copyNumaCoreSiblings(src map[int]map[int][]int) map[int]map[int][]int { | |
| } | ||
| return dst | ||
| } | ||
|
|
||
| // getHousekeepingCPUsFromEnv extracts the OPENSHIFT_HOUSEKEEPING_CPUS environment variable from the pod and returns it as a CPUSet. | ||
| func getHousekeepingCPUsFromEnv(pod *corev1.Pod) (cpuset.CPUSet, error) { | ||
| const housekeepingCpusEnv = "OPENSHIFT_HOUSEKEEPING_CPUS" | ||
|
|
||
| cmd := []string{"printenv", housekeepingCpusEnv} | ||
| output, err := pods.ExecCommandOnPod(testclient.K8sClient, pod, "", cmd) | ||
| if err != nil { | ||
| return cpuset.New(), fmt.Errorf("failed to get %s from pod %s/%s: %v", housekeepingCpusEnv, pod.Namespace, pod.Name, err) | ||
| } | ||
|
|
||
| value := strings.TrimSpace(string(output)) | ||
| if value == "" { | ||
| return cpuset.New(), fmt.Errorf("%s environment variable not found or empty in pod %s/%s", housekeepingCpusEnv, pod.Namespace, pod.Name) | ||
| } | ||
|
|
||
| cpuSet, err := cpuset.Parse(value) | ||
| if err != nil { | ||
| return cpuset.New(), fmt.Errorf("failed to parse %s value %q from pod %s/%s: %v", housekeepingCpusEnv, value, pod.Namespace, pod.Name, err) | ||
| } | ||
|
|
||
| return cpuSet, nil | ||
| } | ||
|
|
||
| // getTestPodWithProfileAndAnnotations creates a test pod with specified profile and annotations | ||
| func getTestPodWithProfileAndAnnotations(perfProf *performancev2.PerformanceProfile, annotations map[string]string, cpus int) *corev1.Pod { | ||
| testpod := pods.GetTestPod() | ||
| if len(annotations) > 0 { | ||
| testpod.Annotations = annotations | ||
| } | ||
| testpod.Namespace = testutils.NamespaceTesting | ||
|
|
||
| cpuCount := fmt.Sprintf("%d", cpus) | ||
| resCpu := resource.MustParse(cpuCount) | ||
| resMem := resource.MustParse("256Mi") | ||
| testpod.Spec.Containers[0].Resources = corev1.ResourceRequirements{ | ||
| Limits: corev1.ResourceList{ | ||
| corev1.ResourceCPU: resCpu, | ||
| corev1.ResourceMemory: resMem, | ||
| }, | ||
| } | ||
| if perfProf != nil { | ||
| runtimeClassName := components.GetComponentName(perfProf.Name, components.ComponentNamePrefix) | ||
| testpod.Spec.RuntimeClassName = &runtimeClassName | ||
| } | ||
| return testpod | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,7 @@ import ( | |
| "context" | ||
| "encoding/json" | ||
| "fmt" | ||
| "os" | ||
| "path" | ||
| "sort" | ||
| "strconv" | ||
|
|
@@ -586,3 +587,20 @@ func GetL3SharedCPUs(node *corev1.Node) func(cpuId int) (cpuset.CPUSet, error) { | |
| return cpuSet, err | ||
| } | ||
| } | ||
|
|
||
| // PickNodeIdx selects a node index based on environment variable E2E_PAO_TARGET_NODE. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need this ? |
||
| // If the environment variable is not set or the node is not found, returns 0. | ||
| func PickNodeIdx(nodes []corev1.Node) int { | ||
| name, ok := os.LookupEnv("E2E_PAO_TARGET_NODE") | ||
| if !ok { | ||
| return 0 // "random" default | ||
| } | ||
| for idx := range nodes { | ||
| if nodes[idx].Name == name { | ||
| testlog.Infof("node %q found among candidates, picking", name) | ||
| return idx | ||
| } | ||
| } | ||
| testlog.Infof("node %q not found among candidates, fall back to random one", name) | ||
| return 0 // "safe" default | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a valid test case id