From 319b61be6b2a904cfb65a6a841927f5a8640434d Mon Sep 17 00:00:00 2001 From: "Niranjan M.R" Date: Fri, 11 Jul 2025 17:03:23 +0530 Subject: [PATCH 1/3] verify Guranteed pod is running after kubelet restart This PR addresses issue where we are verifying if the cpu manager state file is same after kubelet restart while we are verifying the above, we are not checking if Guranteed pod started before kubelet restart is also still running. Refer: https://issues.redhat.com/browse/OCPBUGS-43280 Signed-off-by: Niranjan M.R --- .../functests/1_performance/cpu_management.go | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go index e439411d98..f8962d19d3 100644 --- a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go +++ b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go @@ -313,6 +313,11 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { cpuManagerCpusetBeforeRestart, err := nodes.CpuManagerCpuSet(ctx, workerRTNode) Expect(err).ToNot(HaveOccurred()) testlog.Infof("pre kubelet restart default cpuset: %v", cpuManagerCpusetBeforeRestart.String()) + + By("capturing test pod state before restart") + originalPodUID := testpod.UID + testlog.Infof("pre kubelet restart pod UID: %v", originalPodUID) + kubeletRestartCmd := []string{ "chroot", "/rootfs", @@ -329,6 +334,34 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { testlog.Infof("post restart: finished cooldown time: %v", restartCooldownTime) + By("verify test pod comes back after kubelet restart") + Eventually(func() error { + updatedPod := &corev1.Pod{} + err := testclient.DataPlaneClient.Get(ctx, client.ObjectKeyFromObject(testpod), updatedPod) + if err != nil { + return fmt.Errorf("failed to get pod after restart: %v", err) + } + + // Verify it's the same pod (same UID) + if updatedPod.UID != originalPodUID { + return fmt.Errorf("pod UID changed after restart: original=%v, current=%v", originalPodUID, updatedPod.UID) + } + + // Verify pod is ready + if updatedPod.Status.Phase != corev1.PodRunning { + return fmt.Errorf("pod is not running after restart: phase=%v", updatedPod.Status.Phase) + } + // Check pod ready condition + for _, condition := range updatedPod.Status.Conditions { + if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { + testlog.Infof("post kubelet restart pod is ready with UID: %v", updatedPod.UID) + return nil + } + } + + return fmt.Errorf("pod ready condition not found or not true") + }).WithTimeout(5*time.Minute).WithPolling(10*time.Second).Should(Succeed(), "test pod should come back after kubelet restart") + By("fetch Default cpuset from cpu manager state after restart") cpuManagerCpusetAfterRestart, err := nodes.CpuManagerCpuSet(ctx, workerRTNode) Expect(cpuManagerCpusetBeforeRestart).To(Equal(cpuManagerCpusetAfterRestart)) From 17e210c1ae23bf49348eef99958bf34403be8175 Mon Sep 17 00:00:00 2001 From: "Niranjan M.R" Date: Tue, 15 Jul 2025 17:09:24 +0530 Subject: [PATCH 2/3] avoid using indirect references and test negative pod status condition Signed-off-by: Niranjan M.R --- .../functests/1_performance/cpu_management.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go index f8962d19d3..9944e462cf 100644 --- a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go +++ b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go @@ -336,8 +336,8 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { By("verify test pod comes back after kubelet restart") Eventually(func() error { - updatedPod := &corev1.Pod{} - err := testclient.DataPlaneClient.Get(ctx, client.ObjectKeyFromObject(testpod), updatedPod) + var updatedPod corev1.Pod + err := testclient.DataPlaneClient.Get(ctx, client.ObjectKeyFromObject(testpod), &updatedPod) if err != nil { return fmt.Errorf("failed to get pod after restart: %v", err) } @@ -353,13 +353,11 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { } // Check pod ready condition for _, condition := range updatedPod.Status.Conditions { - if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { - testlog.Infof("post kubelet restart pod is ready with UID: %v", updatedPod.UID) - return nil + if condition.Type == corev1.PodReady && condition.Status != corev1.ConditionTrue { + return fmt.Errorf("Pod ondition is not in Ready state after kubelet restart: condition: %v", updatedPod.Status.Conditions) } } - - return fmt.Errorf("pod ready condition not found or not true") + return nil }).WithTimeout(5*time.Minute).WithPolling(10*time.Second).Should(Succeed(), "test pod should come back after kubelet restart") By("fetch Default cpuset from cpu manager state after restart") From 51793b9add37f8624393d86461586cc6ca9aaca3 Mon Sep 17 00:00:00 2001 From: "Niranjan M.R" Date: Tue, 15 Jul 2025 18:09:59 +0530 Subject: [PATCH 3/3] typo fix and add more debug when pod fails Signed-off-by: Niranjan M.R --- .../functests/1_performance/cpu_management.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go index 9944e462cf..29ca6bcca6 100644 --- a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go +++ b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go @@ -354,7 +354,8 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { // Check pod ready condition for _, condition := range updatedPod.Status.Conditions { if condition.Type == corev1.PodReady && condition.Status != corev1.ConditionTrue { - return fmt.Errorf("Pod ondition is not in Ready state after kubelet restart: condition: %v", updatedPod.Status.Conditions) + return fmt.Errorf("Pod condition is not in Ready state after kubelet restart: reason: %v, message: %v", condition.Reason, condition.Message) + } } return nil