Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 43 additions & 42 deletions pkg/controllers/monitoring/monitoring-stack/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,20 @@ func updateConditions(ms *v1alpha1.MonitoringStack, prom monv1.Prometheus, recEr
}
}

func getMSCondition(conditions []v1alpha1.Condition, t v1alpha1.ConditionType) (v1alpha1.Condition, error) {
// getMSCondition returns the matching condition from the conditions slice.
// If no item matches, it returns an condition initialized to unknown status.
func getMSCondition(conditions []v1alpha1.Condition, t v1alpha1.ConditionType) v1alpha1.Condition {
for _, c := range conditions {
if c.Type == t {
return c, nil
return c
}
}
return v1alpha1.Condition{}, fmt.Errorf("condition type %v not found", t)
return v1alpha1.Condition{
Type: t,
Status: v1alpha1.ConditionUnknown,
Reason: NoReason,
LastTransitionTime: metav1.Now(),
}
}

// updateResourceDiscovery updates the ResourceDiscoveryCondition based on the
Expand Down Expand Up @@ -71,90 +78,84 @@ func updateResourceDiscovery(ms *v1alpha1.MonitoringStack) v1alpha1.Condition {
// updateAvailable gets existing "Available" condition and updates its parameters
// based on the Prometheus "Available" condition
func updateAvailable(conditions []v1alpha1.Condition, prom monv1.Prometheus, generation int64) v1alpha1.Condition {
ac, err := getMSCondition(conditions, v1alpha1.AvailableCondition)
if err != nil {
ac = v1alpha1.Condition{
Type: v1alpha1.AvailableCondition,
Status: v1alpha1.ConditionUnknown,
Reason: NoReason,
LastTransitionTime: metav1.Now(),
}
}
ac := getMSCondition(conditions, v1alpha1.AvailableCondition)

prometheusAvailable, err := getPrometheusCondition(prom.Status.Conditions, monv1.Available)

if err != nil {
ac.Status = v1alpha1.ConditionUnknown
ac.Reason = PrometheusNotAvailable
ac.Message = CannotReadPrometheusConditions
ac.ObservedGeneration = generation
ac.LastTransitionTime = metav1.Now()
return ac
}
// MonitoringStack status will not be updated if there is a difference between the Prometheus generation
// and the Prometheus ObservedGeneration. This can occur, for example, in the case of an invalid Prometheus configuration.

// MonitoringStack status should not be updated if there is a difference
// between the Prometheus generation and the Prometheus ObservedGeneration.
// This can occur, for example, in the case of the Prometheus operator
// being down and not reconciling the resource.
if prometheusAvailable.ObservedGeneration != prom.Generation {
return ac
}

if prometheusAvailable.Status != monv1.ConditionTrue {
ac.Status = prometheusStatusToMSStatus(prometheusAvailable.Status)
if prometheusAvailable.Status == monv1.ConditionDegraded {
ac.Reason = PrometheusDegraded
} else {
ac.Reason = PrometheusNotAvailable
}
ac.Message = prometheusAvailable.Message
ac.LastTransitionTime = metav1.Now()
return ac
}
ac.Status = v1alpha1.ConditionTrue
ac.Reason = AvailableReason
ac.Message = AvailableMessage
ac.ObservedGeneration = generation
ac.LastTransitionTime = metav1.Now()
if prometheusAvailable.Status == monv1.ConditionTrue {
ac.Status = v1alpha1.ConditionTrue
ac.Reason = AvailableReason
ac.Message = AvailableMessage
return ac
}

ac.Status = prometheusStatusToMSStatus(prometheusAvailable.Status)
ac.Reason = PrometheusNotAvailable
if prometheusAvailable.Status == monv1.ConditionDegraded {
ac.Reason = PrometheusDegraded
}
ac.Message = prometheusAvailable.Message

return ac

}

// updateReconciled updates "Reconciled" conditions based on the provided error value and
// Prometheus "Reconciled" condition
func updateReconciled(conditions []v1alpha1.Condition, prom monv1.Prometheus, generation int64, reconcileErr error) v1alpha1.Condition {
rc, cErr := getMSCondition(conditions, v1alpha1.ReconciledCondition)
if cErr != nil {
rc = v1alpha1.Condition{
Type: v1alpha1.ReconciledCondition,
Status: v1alpha1.ConditionUnknown,
Reason: NoReason,
LastTransitionTime: metav1.Now(),
}
}
rc := getMSCondition(conditions, v1alpha1.ReconciledCondition)

if reconcileErr != nil {
rc.Status = v1alpha1.ConditionFalse
rc.Message = reconcileErr.Error()
rc.Reason = FailedToReconcileReason
rc.LastTransitionTime = metav1.Now()
rc.ObservedGeneration = generation

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe we could have a

    defer func() {
        rc.ObservedGeneration = generation
        rc.LastTransitionTime = metav1.Now()
    }()

so we don't have to worry about those.

(given that it seems to always be set to metav1.Now(), I think LastTransitionTime could be set in one place just before publishing the conditions, but that's out of scope)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the issue is that we return the current condition at L146 but I'll think about a better code flow because the current one is indeed complicated and error-prone.

return rc
}
prometheusReconciled, reconcileErr := getPrometheusCondition(prom.Status.Conditions, monv1.Reconciled)

if reconcileErr != nil {
prometheusReconciled, err := getPrometheusCondition(prom.Status.Conditions, monv1.Reconciled)
if err != nil {
rc.Status = v1alpha1.ConditionUnknown
rc.Reason = PrometheusNotReconciled
rc.Message = CannotReadPrometheusConditions
rc.LastTransitionTime = metav1.Now()
rc.ObservedGeneration = generation
return rc
}

if prometheusReconciled.ObservedGeneration != prom.Generation {
return rc
}

rc.ObservedGeneration = generation
rc.LastTransitionTime = metav1.Now()
Comment on lines +149 to +150

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see we already re-set some of those below, maybe a defer https://github.com/rhobs/observability-operator/pull/951/files#r2606644482?

if prometheusReconciled.Status != monv1.ConditionTrue {
rc.Status = prometheusStatusToMSStatus(prometheusReconciled.Status)
rc.Reason = PrometheusNotReconciled
rc.Message = prometheusReconciled.Message
rc.LastTransitionTime = metav1.Now()
return rc
}

rc.Status = v1alpha1.ConditionTrue
rc.Reason = ReconciledReason
rc.Message = SuccessfullyReconciledMessage
Expand All @@ -174,8 +175,8 @@ func getPrometheusCondition(prometheusConditions []monv1.Condition, t monv1.Cond

func prometheusStatusToMSStatus(ps monv1.ConditionStatus) v1alpha1.ConditionStatus {
switch ps {
// Prometheus "Available" condition with status "Degraded" is reported as "Available" condition
// with status false
// Prometheus "Available" condition with status "Degraded" is reported as
// "Available" condition with status false.
case monv1.ConditionDegraded:
return v1alpha1.ConditionFalse
case monv1.ConditionTrue:
Expand Down
66 changes: 43 additions & 23 deletions test/e2e/framework/assertions.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,9 @@ func (f *Framework) AssertStatefulsetReady(name, namespace string, fns ...Option
t.Helper()
key := types.NamespacedName{Name: name, Namespace: namespace}
if err := wait.PollUntilContextTimeout(context.Background(), option.PollInterval, option.WaitTimeout, true, func(ctx context.Context) (bool, error) {
pod := &appsv1.StatefulSet{}
err := f.K8sClient.Get(context.Background(), key, pod)
return err == nil && pod.Status.ReadyReplicas == *pod.Spec.Replicas, nil
ss := &appsv1.StatefulSet{}
err := f.K8sClient.Get(context.Background(), key, ss)
return err == nil && ss.Status.ReadyReplicas == *ss.Spec.Replicas, nil
}); err != nil {
t.Fatal(fmt.Errorf("statefulset %s was never ready with %v", name, err))
}
Expand Down Expand Up @@ -208,6 +208,16 @@ func (f *Framework) AssertStatefulSetContainerHasArg(t *testing.T, name, namespa
}
}

// AssertMonitoringStackReady asserts that a monitoring stack is successfully deployed.
func (f *Framework) AssertMonitoringStackReady(name, namespace string, fns ...OptionFn) func(t *testing.T) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see there a GetStackWhenAvailable

func (f *Framework) GetStackWhenAvailable(t *testing.T, name, namespace string) v1alpha1.MonitoringStack {
, maybe we can merge the two, to return the MS but with your more strict conditions?

And with that we can also simplify

availableMs := f.GetStackWhenAvailable(t, ms.Name, ms.Namespace)
availableC := getConditionByType(availableMs.Status.Conditions, stack.AvailableCondition)
assertCondition(t, availableC, monitoringstack.AvailableReason, stack.AvailableCondition, availableMs)
reconciledC := getConditionByType(availableMs.Status.Conditions, stack.ReconciledCondition)
assertCondition(t, reconciledC, monitoringstack.ReconciledReason, stack.ReconciledCondition, availableMs)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea!

return func(t *testing.T) {
t.Helper()
if _, err := f.GetMonitoringStackWhenReady(name, namespace); err != nil {
t.Fatal(err)
}
}
}

// AssertDeploymentReady asserts that a deployment has the desired number of pods running
func (f *Framework) AssertDeploymentReady(name, namespace string, fns ...OptionFn) func(t *testing.T) {
option := AssertOption{
Expand Down Expand Up @@ -643,32 +653,51 @@ func (f *Framework) AssertNoEventWithReason(t *testing.T, reason string) {
}
}

func (f *Framework) GetStackWhenAvailable(t *testing.T, name, namespace string) v1alpha1.MonitoringStack {
func (f *Framework) GetMonitoringStackWhenReady(name, namespace string) (*v1alpha1.MonitoringStack, error) {
var ms v1alpha1.MonitoringStack
key := types.NamespacedName{
Name: name,
Namespace: namespace,
}
var lastErr error

err := wait.PollUntilContextTimeout(context.Background(), 5*time.Second, DefaultTestTimeout*2, true, func(ctx context.Context) (bool, error) {
lastErr = nil
if err := wait.PollUntilContextTimeout(context.Background(), 5*time.Second, DefaultTestTimeout*2, true, func(_ context.Context) (bool, error) {
err := f.K8sClient.Get(context.Background(), key, &ms)
if err != nil {
lastErr = err
return false, nil
}
availableC := getConditionByType(ms.Status.Conditions, v1alpha1.AvailableCondition)
if availableC != nil && availableC.Status == v1alpha1.ConditionTrue {
return true, nil

var foundAvailable bool
for _, cond := range ms.Status.Conditions {
if cond.Type == v1alpha1.ResourceDiscoveryCondition {
continue
}

if ms.Generation != cond.ObservedGeneration {
lastErr = fmt.Errorf("%s condition observed generation %d != generation %d", cond.Type, cond.ObservedGeneration, ms.Generation)
return false, nil
}

foundAvailable = foundAvailable || cond.Type == v1alpha1.AvailableCondition
if cond.Status != v1alpha1.ConditionTrue {
lastErr = fmt.Errorf("expected %s condition to be True, got %s", cond.Type, cond.Status)
return false, nil
}
}
return false, nil
})

if wait.Interrupted(err) {
t.Fatal(fmt.Errorf("MonitoringStack %s/%s was not available - err: %w | %v", namespace, name, lastErr, ms.Status.Conditions))
if !foundAvailable {
lastErr = fmt.Errorf("%s condition not found", v1alpha1.AvailableCondition)
return false, nil
}

return true, nil

}); err != nil {
return nil, fmt.Errorf("MonitoringStack %s/%s: %w: %w", namespace, name, err, lastErr)
}
return ms

return &ms, nil
}

func (f *Framework) AssertAlertmanagerAbsent(t *testing.T, name, namespace string) {
Expand All @@ -689,15 +718,6 @@ func (f *Framework) AssertAlertmanagerAbsent(t *testing.T, name, namespace strin
}
}

func getConditionByType(conditions []v1alpha1.Condition, ctype v1alpha1.ConditionType) *v1alpha1.Condition {
for _, c := range conditions {
if c.Type == ctype {
return &c
}
}
return nil
}

// AssertPrometheusReplicaStatus asserts that prometheus is scaled correctly duration a time period of customForeverTestTimeout
func (f *Framework) AssertPrometheusReplicaStatus(name, namespace string, expectedReplicas int32, fns ...OptionFn) func(t *testing.T) {
option := AssertOption{
Expand Down
54 changes: 23 additions & 31 deletions test/e2e/monitoring_stack_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
policyv1 "k8s.io/api/policy/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
Expand All @@ -31,7 +32,6 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

stack "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1"
monitoringstack "github.com/rhobs/observability-operator/pkg/controllers/monitoring/monitoring-stack"
"github.com/rhobs/observability-operator/pkg/controllers/util"
"github.com/rhobs/observability-operator/test/e2e/framework"
)
Expand Down Expand Up @@ -273,26 +273,8 @@ func reconcileStack(t *testing.T) {
assert.Equal(t, expected.LogLevel, generated.Spec.LogLevel)
assert.Equal(t, expected.Retention, generated.Spec.Retention)

availableMs := f.GetStackWhenAvailable(t, ms.Name, ms.Namespace)
availableC := getConditionByType(availableMs.Status.Conditions, stack.AvailableCondition)
assertCondition(t, availableC, monitoringstack.AvailableReason, stack.AvailableCondition, availableMs)
reconciledC := getConditionByType(availableMs.Status.Conditions, stack.ReconciledCondition)
assertCondition(t, reconciledC, monitoringstack.ReconciledReason, stack.ReconciledCondition, availableMs)
}

func assertCondition(t *testing.T, c *stack.Condition, reason string, ctype stack.ConditionType, ms stack.MonitoringStack) {
assert.Check(t, c != nil, "failed to find %s status condition for %s monitoring stack", ctype, ms.Name)
assert.Check(t, c.Status == stack.ConditionTrue, "unexpected %s condition status", ctype)
assert.Check(t, c.Reason == reason, "unexpected %s condition reason", ctype)
}

func getConditionByType(conditions []stack.Condition, ctype stack.ConditionType) *stack.Condition {
for _, c := range conditions {
if c.Type == ctype {
return &c
}
}
return nil
_, err = f.GetMonitoringStackWhenReady(ms.Name, ms.Namespace)
assert.NilError(t, err)
}

func reconcileRevertsManualChanges(t *testing.T) {
Expand Down Expand Up @@ -547,21 +529,24 @@ func assertAlertmanagerNotDeployed(t *testing.T) {
if err := f.K8sClient.Create(context.Background(), ms); err != nil {
t.Fatal(err)
}
_ = f.GetStackWhenAvailable(t, ms.Name, ms.Namespace)
f.AssertMonitoringStackReady(ms.Name, ms.Namespace)(t)
f.AssertAlertmanagerAbsent(t, ms.Name, ms.Namespace)
}

func assertAlertmanagerDeployedAndRemoved(t *testing.T) {
ms := newMonitoringStack(t, "alertmanager-deployed-and-removed")
if err := f.K8sClient.Create(context.Background(), ms); err != nil {
t.Fatal(err)
}
updatedMS := f.GetStackWhenAvailable(t, ms.Name, ms.Namespace)
err := f.K8sClient.Create(context.Background(), ms)
assert.NilError(t, err)

updatedMS, err := f.GetMonitoringStackWhenReady(ms.Name, ms.Namespace)
assert.NilError(t, err)

var am monv1.Alertmanager
key := types.NamespacedName{Name: ms.Name, Namespace: ms.Namespace}
err := f.K8sClient.Get(context.Background(), key, &am)
err = f.K8sClient.Get(context.Background(), key, &am)
assert.NilError(t, err)
err = f.UpdateWithRetry(t, &updatedMS, framework.SetAlertmanagerDisabled(true))

err = f.UpdateWithRetry(t, updatedMS, framework.SetAlertmanagerDisabled(true))
assert.NilError(t, err, "failed to update monitoring stack to disable alertmanager")

f.AssertAlertmanagerAbsent(t, updatedMS.Name, updatedMS.Namespace)
Expand Down Expand Up @@ -810,7 +795,13 @@ func assertPrometheusManagedFields(t *testing.T) {
Replicas: &numOfRep,
ScrapeInterval: &scrapeInterval,
PersistentVolumeClaim: &corev1.PersistentVolumeClaimSpec{
VolumeName: "prom-store",
// We need to define the storage size request for the pods to come
// up.
Resources: corev1.VolumeResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceStorage: resource.MustParse("1Gi"),
},
},
},
RemoteWrite: []monv1.RemoteWriteSpec{
{
Expand Down Expand Up @@ -846,6 +837,8 @@ func assertPrometheusManagedFields(t *testing.T) {
err = f.K8sClient.Create(context.Background(), ms)
assert.NilError(t, err, "failed to create a monitoring stack")

f.AssertMonitoringStackReady(ms.Name, ms.Namespace, framework.WithTimeout(2*time.Minute))(t)

prom := monv1.Prometheus{}
f.GetResourceWithRetry(t, ms.Name, ms.Namespace, &prom)

Expand Down Expand Up @@ -1081,8 +1074,7 @@ const oboManagedFieldsJson = `
"f:volumeClaimTemplate": {
"f:metadata": {},
"f:spec": {
"f:resources": {},
"f:volumeName": {}
"f:resources": {"f:requests": {"f:storage": {}}}
},
"f:status": {}
}
Expand Down
Loading