From a13df0122884f67c2a332e88f86a7e3f91a12bfa Mon Sep 17 00:00:00 2001 From: Simon Pasquier Date: Tue, 2 Dec 2025 17:43:26 +0100 Subject: [PATCH 1/3] test: ensure readiness of MonitoringStack Signed-off-by: Simon Pasquier --- test/e2e/framework/assertions.go | 57 ++++++++++++++++++-- test/e2e/monitoring_stack_controller_test.go | 14 +++-- 2 files changed, 65 insertions(+), 6 deletions(-) diff --git a/test/e2e/framework/assertions.go b/test/e2e/framework/assertions.go index da2e8f3fe..be5cf18b4 100644 --- a/test/e2e/framework/assertions.go +++ b/test/e2e/framework/assertions.go @@ -150,9 +150,9 @@ func (f *Framework) AssertStatefulsetReady(name, namespace string, fns ...Option t.Helper() key := types.NamespacedName{Name: name, Namespace: namespace} if err := wait.PollUntilContextTimeout(context.Background(), option.PollInterval, option.WaitTimeout, true, func(ctx context.Context) (bool, error) { - pod := &appsv1.StatefulSet{} - err := f.K8sClient.Get(context.Background(), key, pod) - return err == nil && pod.Status.ReadyReplicas == *pod.Spec.Replicas, nil + ss := &appsv1.StatefulSet{} + err := f.K8sClient.Get(context.Background(), key, ss) + return err == nil && ss.Status.ReadyReplicas == *ss.Spec.Replicas, nil }); err != nil { t.Fatal(fmt.Errorf("statefulset %s was never ready with %v", name, err)) } @@ -208,6 +208,57 @@ func (f *Framework) AssertStatefulSetContainerHasArg(t *testing.T, name, namespa } } +// AssertMonitoringStackReady asserts that a monitoring stack is successfully deployed. +func (f *Framework) AssertMonitoringStackReady(name, namespace string, fns ...OptionFn) func(t *testing.T) { + option := AssertOption{ + PollInterval: 5 * time.Second, + WaitTimeout: DefaultTestTimeout, + } + for _, fn := range fns { + fn(&option) + } + + return func(t *testing.T) { + t.Helper() + key := types.NamespacedName{Name: name, Namespace: namespace} + var loopErr error + if err := wait.PollUntilContextTimeout(context.Background(), option.PollInterval, option.WaitTimeout, true, func(ctx context.Context) (bool, error) { + ms := &v1alpha1.MonitoringStack{} + if err := f.K8sClient.Get(context.Background(), key, ms); err != nil { + loopErr = err + return false, nil + } + + var foundAvailable bool + for _, cond := range ms.Status.Conditions { + if cond.Type == v1alpha1.ResourceDiscoveryCondition { + continue + } + + if ms.Generation != cond.ObservedGeneration { + loopErr = fmt.Errorf("%s condition observed generation %d != generation %d", cond.Type, cond.ObservedGeneration, ms.Generation) + return false, nil + } + + foundAvailable = foundAvailable || cond.Type == v1alpha1.AvailableCondition + if cond.Status != v1alpha1.ConditionTrue { + loopErr = fmt.Errorf("expected %s condition to be True, got %s", cond.Type, cond.Status) + return false, nil + } + } + + if !foundAvailable { + loopErr = fmt.Errorf("%s condition not found", v1alpha1.AvailableCondition) + return false, nil + } + + return true, nil + }); err != nil { + t.Fatal(fmt.Errorf("monitoringStack %s/%s was never ready: %w: %w", namespace, name, err, loopErr)) + } + } +} + // AssertDeploymentReady asserts that a deployment has the desired number of pods running func (f *Framework) AssertDeploymentReady(name, namespace string, fns ...OptionFn) func(t *testing.T) { option := AssertOption{ diff --git a/test/e2e/monitoring_stack_controller_test.go b/test/e2e/monitoring_stack_controller_test.go index 717cb5f90..dbe2a9720 100644 --- a/test/e2e/monitoring_stack_controller_test.go +++ b/test/e2e/monitoring_stack_controller_test.go @@ -21,6 +21,7 @@ import ( policyv1 "k8s.io/api/policy/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" @@ -810,7 +811,13 @@ func assertPrometheusManagedFields(t *testing.T) { Replicas: &numOfRep, ScrapeInterval: &scrapeInterval, PersistentVolumeClaim: &corev1.PersistentVolumeClaimSpec{ - VolumeName: "prom-store", + // We need to define the storage size request for the pods to come + // up. + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, }, RemoteWrite: []monv1.RemoteWriteSpec{ { @@ -846,6 +853,8 @@ func assertPrometheusManagedFields(t *testing.T) { err = f.K8sClient.Create(context.Background(), ms) assert.NilError(t, err, "failed to create a monitoring stack") + f.AssertMonitoringStackReady(ms.Name, ms.Namespace, framework.WithTimeout(2*time.Minute))(t) + prom := monv1.Prometheus{} f.GetResourceWithRetry(t, ms.Name, ms.Namespace, &prom) @@ -1081,8 +1090,7 @@ const oboManagedFieldsJson = ` "f:volumeClaimTemplate": { "f:metadata": {}, "f:spec": { - "f:resources": {}, - "f:volumeName": {} + "f:resources": {"f:requests": {"f:storage": {}}} }, "f:status": {} } From 0767cad89d76da1a955a93cd773fd57aea81553e Mon Sep 17 00:00:00 2001 From: Simon Pasquier Date: Wed, 3 Dec 2025 15:32:08 +0100 Subject: [PATCH 2/3] chore: refactor MonitoringStack conditions --- .../monitoring/monitoring-stack/conditions.go | 85 ++++++++++--------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/pkg/controllers/monitoring/monitoring-stack/conditions.go b/pkg/controllers/monitoring/monitoring-stack/conditions.go index e4db554f6..5c646ac98 100644 --- a/pkg/controllers/monitoring/monitoring-stack/conditions.go +++ b/pkg/controllers/monitoring/monitoring-stack/conditions.go @@ -33,13 +33,20 @@ func updateConditions(ms *v1alpha1.MonitoringStack, prom monv1.Prometheus, recEr } } -func getMSCondition(conditions []v1alpha1.Condition, t v1alpha1.ConditionType) (v1alpha1.Condition, error) { +// getMSCondition returns the matching condition from the conditions slice. +// If no item matches, it returns an condition initialized to unknown status. +func getMSCondition(conditions []v1alpha1.Condition, t v1alpha1.ConditionType) v1alpha1.Condition { for _, c := range conditions { if c.Type == t { - return c, nil + return c } } - return v1alpha1.Condition{}, fmt.Errorf("condition type %v not found", t) + return v1alpha1.Condition{ + Type: t, + Status: v1alpha1.ConditionUnknown, + Reason: NoReason, + LastTransitionTime: metav1.Now(), + } } // updateResourceDiscovery updates the ResourceDiscoveryCondition based on the @@ -71,76 +78,67 @@ func updateResourceDiscovery(ms *v1alpha1.MonitoringStack) v1alpha1.Condition { // updateAvailable gets existing "Available" condition and updates its parameters // based on the Prometheus "Available" condition func updateAvailable(conditions []v1alpha1.Condition, prom monv1.Prometheus, generation int64) v1alpha1.Condition { - ac, err := getMSCondition(conditions, v1alpha1.AvailableCondition) - if err != nil { - ac = v1alpha1.Condition{ - Type: v1alpha1.AvailableCondition, - Status: v1alpha1.ConditionUnknown, - Reason: NoReason, - LastTransitionTime: metav1.Now(), - } - } + ac := getMSCondition(conditions, v1alpha1.AvailableCondition) prometheusAvailable, err := getPrometheusCondition(prom.Status.Conditions, monv1.Available) - if err != nil { ac.Status = v1alpha1.ConditionUnknown ac.Reason = PrometheusNotAvailable ac.Message = CannotReadPrometheusConditions + ac.ObservedGeneration = generation ac.LastTransitionTime = metav1.Now() return ac } - // MonitoringStack status will not be updated if there is a difference between the Prometheus generation - // and the Prometheus ObservedGeneration. This can occur, for example, in the case of an invalid Prometheus configuration. + + // MonitoringStack status should not be updated if there is a difference + // between the Prometheus generation and the Prometheus ObservedGeneration. + // This can occur, for example, in the case of the Prometheus operator + // being down and not reconciling the resource. if prometheusAvailable.ObservedGeneration != prom.Generation { return ac } - if prometheusAvailable.Status != monv1.ConditionTrue { - ac.Status = prometheusStatusToMSStatus(prometheusAvailable.Status) - if prometheusAvailable.Status == monv1.ConditionDegraded { - ac.Reason = PrometheusDegraded - } else { - ac.Reason = PrometheusNotAvailable - } - ac.Message = prometheusAvailable.Message - ac.LastTransitionTime = metav1.Now() - return ac - } - ac.Status = v1alpha1.ConditionTrue - ac.Reason = AvailableReason - ac.Message = AvailableMessage ac.ObservedGeneration = generation ac.LastTransitionTime = metav1.Now() + if prometheusAvailable.Status == monv1.ConditionTrue { + ac.Status = v1alpha1.ConditionTrue + ac.Reason = AvailableReason + ac.Message = AvailableMessage + return ac + } + + ac.Status = prometheusStatusToMSStatus(prometheusAvailable.Status) + ac.Reason = PrometheusNotAvailable + if prometheusAvailable.Status == monv1.ConditionDegraded { + ac.Reason = PrometheusDegraded + } + ac.Message = prometheusAvailable.Message + return ac + } // updateReconciled updates "Reconciled" conditions based on the provided error value and // Prometheus "Reconciled" condition func updateReconciled(conditions []v1alpha1.Condition, prom monv1.Prometheus, generation int64, reconcileErr error) v1alpha1.Condition { - rc, cErr := getMSCondition(conditions, v1alpha1.ReconciledCondition) - if cErr != nil { - rc = v1alpha1.Condition{ - Type: v1alpha1.ReconciledCondition, - Status: v1alpha1.ConditionUnknown, - Reason: NoReason, - LastTransitionTime: metav1.Now(), - } - } + rc := getMSCondition(conditions, v1alpha1.ReconciledCondition) + if reconcileErr != nil { rc.Status = v1alpha1.ConditionFalse rc.Message = reconcileErr.Error() rc.Reason = FailedToReconcileReason rc.LastTransitionTime = metav1.Now() + rc.ObservedGeneration = generation return rc } - prometheusReconciled, reconcileErr := getPrometheusCondition(prom.Status.Conditions, monv1.Reconciled) - if reconcileErr != nil { + prometheusReconciled, err := getPrometheusCondition(prom.Status.Conditions, monv1.Reconciled) + if err != nil { rc.Status = v1alpha1.ConditionUnknown rc.Reason = PrometheusNotReconciled rc.Message = CannotReadPrometheusConditions rc.LastTransitionTime = metav1.Now() + rc.ObservedGeneration = generation return rc } @@ -148,6 +146,8 @@ func updateReconciled(conditions []v1alpha1.Condition, prom monv1.Prometheus, ge return rc } + rc.ObservedGeneration = generation + rc.LastTransitionTime = metav1.Now() if prometheusReconciled.Status != monv1.ConditionTrue { rc.Status = prometheusStatusToMSStatus(prometheusReconciled.Status) rc.Reason = PrometheusNotReconciled @@ -155,6 +155,7 @@ func updateReconciled(conditions []v1alpha1.Condition, prom monv1.Prometheus, ge rc.LastTransitionTime = metav1.Now() return rc } + rc.Status = v1alpha1.ConditionTrue rc.Reason = ReconciledReason rc.Message = SuccessfullyReconciledMessage @@ -174,8 +175,8 @@ func getPrometheusCondition(prometheusConditions []monv1.Condition, t monv1.Cond func prometheusStatusToMSStatus(ps monv1.ConditionStatus) v1alpha1.ConditionStatus { switch ps { - // Prometheus "Available" condition with status "Degraded" is reported as "Available" condition - // with status false + // Prometheus "Available" condition with status "Degraded" is reported as + // "Available" condition with status false. case monv1.ConditionDegraded: return v1alpha1.ConditionFalse case monv1.ConditionTrue: From 04c89bd2fe037333f4f2006d09a26cca38ab5bd3 Mon Sep 17 00:00:00 2001 From: Simon Pasquier Date: Fri, 12 Dec 2025 15:12:28 +0100 Subject: [PATCH 3/3] Reduce code duplication in test/e2e --- test/e2e/framework/assertions.go | 95 +++++++------------- test/e2e/monitoring_stack_controller_test.go | 40 +++------ 2 files changed, 44 insertions(+), 91 deletions(-) diff --git a/test/e2e/framework/assertions.go b/test/e2e/framework/assertions.go index be5cf18b4..fc5b6281d 100644 --- a/test/e2e/framework/assertions.go +++ b/test/e2e/framework/assertions.go @@ -210,51 +210,10 @@ func (f *Framework) AssertStatefulSetContainerHasArg(t *testing.T, name, namespa // AssertMonitoringStackReady asserts that a monitoring stack is successfully deployed. func (f *Framework) AssertMonitoringStackReady(name, namespace string, fns ...OptionFn) func(t *testing.T) { - option := AssertOption{ - PollInterval: 5 * time.Second, - WaitTimeout: DefaultTestTimeout, - } - for _, fn := range fns { - fn(&option) - } - return func(t *testing.T) { t.Helper() - key := types.NamespacedName{Name: name, Namespace: namespace} - var loopErr error - if err := wait.PollUntilContextTimeout(context.Background(), option.PollInterval, option.WaitTimeout, true, func(ctx context.Context) (bool, error) { - ms := &v1alpha1.MonitoringStack{} - if err := f.K8sClient.Get(context.Background(), key, ms); err != nil { - loopErr = err - return false, nil - } - - var foundAvailable bool - for _, cond := range ms.Status.Conditions { - if cond.Type == v1alpha1.ResourceDiscoveryCondition { - continue - } - - if ms.Generation != cond.ObservedGeneration { - loopErr = fmt.Errorf("%s condition observed generation %d != generation %d", cond.Type, cond.ObservedGeneration, ms.Generation) - return false, nil - } - - foundAvailable = foundAvailable || cond.Type == v1alpha1.AvailableCondition - if cond.Status != v1alpha1.ConditionTrue { - loopErr = fmt.Errorf("expected %s condition to be True, got %s", cond.Type, cond.Status) - return false, nil - } - } - - if !foundAvailable { - loopErr = fmt.Errorf("%s condition not found", v1alpha1.AvailableCondition) - return false, nil - } - - return true, nil - }); err != nil { - t.Fatal(fmt.Errorf("monitoringStack %s/%s was never ready: %w: %w", namespace, name, err, loopErr)) + if _, err := f.GetMonitoringStackWhenReady(name, namespace); err != nil { + t.Fatal(err) } } } @@ -694,7 +653,7 @@ func (f *Framework) AssertNoEventWithReason(t *testing.T, reason string) { } } -func (f *Framework) GetStackWhenAvailable(t *testing.T, name, namespace string) v1alpha1.MonitoringStack { +func (f *Framework) GetMonitoringStackWhenReady(name, namespace string) (*v1alpha1.MonitoringStack, error) { var ms v1alpha1.MonitoringStack key := types.NamespacedName{ Name: name, @@ -702,24 +661,43 @@ func (f *Framework) GetStackWhenAvailable(t *testing.T, name, namespace string) } var lastErr error - err := wait.PollUntilContextTimeout(context.Background(), 5*time.Second, DefaultTestTimeout*2, true, func(ctx context.Context) (bool, error) { - lastErr = nil + if err := wait.PollUntilContextTimeout(context.Background(), 5*time.Second, DefaultTestTimeout*2, true, func(_ context.Context) (bool, error) { err := f.K8sClient.Get(context.Background(), key, &ms) if err != nil { lastErr = err return false, nil } - availableC := getConditionByType(ms.Status.Conditions, v1alpha1.AvailableCondition) - if availableC != nil && availableC.Status == v1alpha1.ConditionTrue { - return true, nil + + var foundAvailable bool + for _, cond := range ms.Status.Conditions { + if cond.Type == v1alpha1.ResourceDiscoveryCondition { + continue + } + + if ms.Generation != cond.ObservedGeneration { + lastErr = fmt.Errorf("%s condition observed generation %d != generation %d", cond.Type, cond.ObservedGeneration, ms.Generation) + return false, nil + } + + foundAvailable = foundAvailable || cond.Type == v1alpha1.AvailableCondition + if cond.Status != v1alpha1.ConditionTrue { + lastErr = fmt.Errorf("expected %s condition to be True, got %s", cond.Type, cond.Status) + return false, nil + } } - return false, nil - }) - if wait.Interrupted(err) { - t.Fatal(fmt.Errorf("MonitoringStack %s/%s was not available - err: %w | %v", namespace, name, lastErr, ms.Status.Conditions)) + if !foundAvailable { + lastErr = fmt.Errorf("%s condition not found", v1alpha1.AvailableCondition) + return false, nil + } + + return true, nil + + }); err != nil { + return nil, fmt.Errorf("MonitoringStack %s/%s: %w: %w", namespace, name, err, lastErr) } - return ms + + return &ms, nil } func (f *Framework) AssertAlertmanagerAbsent(t *testing.T, name, namespace string) { @@ -740,15 +718,6 @@ func (f *Framework) AssertAlertmanagerAbsent(t *testing.T, name, namespace strin } } -func getConditionByType(conditions []v1alpha1.Condition, ctype v1alpha1.ConditionType) *v1alpha1.Condition { - for _, c := range conditions { - if c.Type == ctype { - return &c - } - } - return nil -} - // AssertPrometheusReplicaStatus asserts that prometheus is scaled correctly duration a time period of customForeverTestTimeout func (f *Framework) AssertPrometheusReplicaStatus(name, namespace string, expectedReplicas int32, fns ...OptionFn) func(t *testing.T) { option := AssertOption{ diff --git a/test/e2e/monitoring_stack_controller_test.go b/test/e2e/monitoring_stack_controller_test.go index dbe2a9720..0dfcc1bc7 100644 --- a/test/e2e/monitoring_stack_controller_test.go +++ b/test/e2e/monitoring_stack_controller_test.go @@ -32,7 +32,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" stack "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1" - monitoringstack "github.com/rhobs/observability-operator/pkg/controllers/monitoring/monitoring-stack" "github.com/rhobs/observability-operator/pkg/controllers/util" "github.com/rhobs/observability-operator/test/e2e/framework" ) @@ -274,26 +273,8 @@ func reconcileStack(t *testing.T) { assert.Equal(t, expected.LogLevel, generated.Spec.LogLevel) assert.Equal(t, expected.Retention, generated.Spec.Retention) - availableMs := f.GetStackWhenAvailable(t, ms.Name, ms.Namespace) - availableC := getConditionByType(availableMs.Status.Conditions, stack.AvailableCondition) - assertCondition(t, availableC, monitoringstack.AvailableReason, stack.AvailableCondition, availableMs) - reconciledC := getConditionByType(availableMs.Status.Conditions, stack.ReconciledCondition) - assertCondition(t, reconciledC, monitoringstack.ReconciledReason, stack.ReconciledCondition, availableMs) -} - -func assertCondition(t *testing.T, c *stack.Condition, reason string, ctype stack.ConditionType, ms stack.MonitoringStack) { - assert.Check(t, c != nil, "failed to find %s status condition for %s monitoring stack", ctype, ms.Name) - assert.Check(t, c.Status == stack.ConditionTrue, "unexpected %s condition status", ctype) - assert.Check(t, c.Reason == reason, "unexpected %s condition reason", ctype) -} - -func getConditionByType(conditions []stack.Condition, ctype stack.ConditionType) *stack.Condition { - for _, c := range conditions { - if c.Type == ctype { - return &c - } - } - return nil + _, err = f.GetMonitoringStackWhenReady(ms.Name, ms.Namespace) + assert.NilError(t, err) } func reconcileRevertsManualChanges(t *testing.T) { @@ -548,21 +529,24 @@ func assertAlertmanagerNotDeployed(t *testing.T) { if err := f.K8sClient.Create(context.Background(), ms); err != nil { t.Fatal(err) } - _ = f.GetStackWhenAvailable(t, ms.Name, ms.Namespace) + f.AssertMonitoringStackReady(ms.Name, ms.Namespace)(t) f.AssertAlertmanagerAbsent(t, ms.Name, ms.Namespace) } func assertAlertmanagerDeployedAndRemoved(t *testing.T) { ms := newMonitoringStack(t, "alertmanager-deployed-and-removed") - if err := f.K8sClient.Create(context.Background(), ms); err != nil { - t.Fatal(err) - } - updatedMS := f.GetStackWhenAvailable(t, ms.Name, ms.Namespace) + err := f.K8sClient.Create(context.Background(), ms) + assert.NilError(t, err) + + updatedMS, err := f.GetMonitoringStackWhenReady(ms.Name, ms.Namespace) + assert.NilError(t, err) + var am monv1.Alertmanager key := types.NamespacedName{Name: ms.Name, Namespace: ms.Namespace} - err := f.K8sClient.Get(context.Background(), key, &am) + err = f.K8sClient.Get(context.Background(), key, &am) assert.NilError(t, err) - err = f.UpdateWithRetry(t, &updatedMS, framework.SetAlertmanagerDisabled(true)) + + err = f.UpdateWithRetry(t, updatedMS, framework.SetAlertmanagerDisabled(true)) assert.NilError(t, err, "failed to update monitoring stack to disable alertmanager") f.AssertAlertmanagerAbsent(t, updatedMS.Name, updatedMS.Namespace)