Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions internal/controller/appserver/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,15 +357,19 @@ func GenerateOLSDeployment(r reconciler.Reconciler, cr *olsv1alpha1.OLSConfig) (
// Get ResourceVersions for tracking - these resources should already exist
// If they don't exist, we'll get empty strings which is fine for initial creation
configMapResourceVersion, _ := utils.GetConfigMapResourceVersion(r, ctx, utils.OLSConfigCmName)
secretResourceVersion, _ := utils.GetSecretResourceVersion(r, ctx, utils.PostgresSecretName)

annotations := map[string]string{
utils.OLSConfigMapResourceVersionAnnotation: configMapResourceVersion,
utils.PostgresSecretResourceVersionAnnotation: secretResourceVersion,
}

deployment := appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: utils.OLSAppServerDeploymentName,
Namespace: r.GetNamespace(),
Labels: utils.GenerateAppServerSelectorLabels(),
Annotations: map[string]string{
utils.OLSConfigMapResourceVersionAnnotation: configMapResourceVersion,
},
Name: utils.OLSAppServerDeploymentName,
Namespace: r.GetNamespace(),
Labels: utils.GenerateAppServerSelectorLabels(),
Annotations: annotations,
},
Spec: appsv1.DeploymentSpec{
Selector: &metav1.LabelSelector{
Expand Down
45 changes: 44 additions & 1 deletion internal/controller/postgres/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,18 +171,27 @@ func GeneratePostgresDeployment(r reconciler.Reconciler, ctx context.Context, cr
// Get ResourceVersions for tracking - these resources should already exist
// If they don't exist, we'll get empty strings which is fine for initial creation
configMapResourceVersion, _ := utils.GetConfigMapResourceVersion(r, ctx, utils.PostgresConfigMap)
secretResourceVersion, _ := utils.GetSecretResourceVersion(r, ctx, utils.PostgresSecretName)
postgresCertsSecretVersion, _ := utils.GetSecretResourceVersion(r, ctx, utils.PostgresCertsSecretName)

deployment := appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: utils.PostgresDeploymentName,
Namespace: r.GetNamespace(),
Labels: utils.GeneratePostgresSelectorLabels(),
Annotations: map[string]string{
utils.PostgresConfigMapResourceVersionAnnotation: configMapResourceVersion,
utils.PostgresConfigMapResourceVersionAnnotation: configMapResourceVersion,
utils.PostgresSecretResourceVersionAnnotation: secretResourceVersion,
utils.PostgresCertsSecretResourceVersionAnnotation: postgresCertsSecretVersion,
},
},
Spec: appsv1.DeploymentSpec{
Replicas: &cacheReplicas,
// Use Recreate strategy for PostgreSQL since it uses ReadWriteOnce PVC
// This prevents Multi-Attach errors during rolling updates
Strategy: appsv1.DeploymentStrategy{
Type: appsv1.RecreateDeploymentStrategyType,
},
Selector: &metav1.LabelSelector{
MatchLabels: utils.GeneratePostgresSelectorLabels(),
},
Expand Down Expand Up @@ -272,6 +281,35 @@ func UpdatePostgresDeployment(r reconciler.Reconciler, ctx context.Context, cr *
}
}

// Check if Secret ResourceVersion has changed
currentSecretVersion, err := utils.GetSecretResourceVersion(r, ctx, utils.PostgresSecretName)
if err != nil {
r.GetLogger().Info("failed to get Secret ResourceVersion", "error", err)
changed = true
} else {
storedSecretVersion := existingDeployment.Annotations[utils.PostgresSecretResourceVersionAnnotation]
if storedSecretVersion != currentSecretVersion {
changed = true
}
}

// Check if Postgres TLS Certificate Secret ResourceVersion has changed
// This enables smooth rotation when service-ca-operator regenerates certificates
currentPostgresCertsVersion, err := utils.GetSecretResourceVersion(r, ctx, utils.PostgresCertsSecretName)
if err != nil {
r.GetLogger().Info("failed to get Postgres TLS Certificate Secret ResourceVersion", "error", err)
// Don't set changed = true here - TLS cert secret might not exist yet (service-ca-operator creates it)
// We'll track it when it becomes available
} else {
storedPostgresCertsVersion := existingDeployment.Annotations[utils.PostgresCertsSecretResourceVersionAnnotation]
if storedPostgresCertsVersion != currentPostgresCertsVersion {
r.GetLogger().Info("Postgres TLS certificate secret ResourceVersion changed, triggering restart",
"oldVersion", storedPostgresCertsVersion,
"newVersion", currentPostgresCertsVersion)
changed = true
}
}

// If nothing changed, skip update
if !changed {
return nil
Expand All @@ -280,6 +318,11 @@ func UpdatePostgresDeployment(r reconciler.Reconciler, ctx context.Context, cr *
// Apply changes - always update spec and annotations since something changed
existingDeployment.Spec = desiredDeployment.Spec
existingDeployment.Annotations[utils.PostgresConfigMapResourceVersionAnnotation] = desiredDeployment.Annotations[utils.PostgresConfigMapResourceVersionAnnotation]
existingDeployment.Annotations[utils.PostgresSecretResourceVersionAnnotation] = desiredDeployment.Annotations[utils.PostgresSecretResourceVersionAnnotation]
// Update TLS cert secret ResourceVersion if it exists in desired deployment
if postgresCertsVersion, ok := desiredDeployment.Annotations[utils.PostgresCertsSecretResourceVersionAnnotation]; ok {
existingDeployment.Annotations[utils.PostgresCertsSecretResourceVersionAnnotation] = postgresCertsVersion
}

r.GetLogger().Info("updating OLS postgres deployment", "name", existingDeployment.Name)

Expand Down
6 changes: 6 additions & 0 deletions internal/controller/utils/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,12 @@ const (
// PostgresSecretResourceVersionAnnotation is the annotation key for tracking Secret ResourceVersion
//nolint:gosec // G101: This is an annotation key name, not a credential
PostgresSecretResourceVersionAnnotation = "ols.openshift.io/postgres-secret-version"
// PostgresCertsSecretResourceVersionAnnotation is the annotation key for tracking Postgres TLS certificate Secret ResourceVersion
//nolint:gosec // not a credential, just an annotation key
PostgresCertsSecretResourceVersionAnnotation = "ols.openshift.io/postgres-certs-secret-version"
// ConsoleUIServiceCertSecretResourceVersionAnnotation is the annotation key for tracking Console UI TLS certificate Secret ResourceVersion
//nolint:gosec // not a credential, just an annotation key
ConsoleUIServiceCertSecretResourceVersionAnnotation = "ols.openshift.io/console-ui-certs-secret-version"
// PostgresServiceName is the name of OLS application Postgres server service
PostgresServiceName = "lightspeed-postgres-server"
// PostgresSecretName is the name of OLS application Postgres secret
Expand Down
185 changes: 177 additions & 8 deletions internal/controller/watchers/watchers.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@ package watchers
import (
"context"
"fmt"
"time"

appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/util/workqueue"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/event"
Expand Down Expand Up @@ -155,7 +158,24 @@ func (h *ConfigMapUpdateHandler) Create(ctx context.Context, evt event.CreateEve
}
}

// Fetch the OLSConfig CR to check if this configmap should be watched
// FIRST: Check if this is a system ConfigMap (before checking CR reference)
// This handles cases where system ConfigMaps like openshift-service-ca.crt are recreated
watcherConfig, _ := h.Reconciler.GetWatcherConfig().(*utils.WatcherConfig)
if watcherConfig != nil {
for _, systemCM := range watcherConfig.ConfigMaps.SystemResources {
if cm.GetNamespace() == systemCM.Namespace && cm.GetName() == systemCM.Name {
// This is a system ConfigMap - trigger restart directly
h.Reconciler.GetLogger().Info("Detected system configmap recreation",
"configmap", systemCM.Name,
"namespace", systemCM.Namespace,
"description", systemCM.Description)
ConfigMapWatcherFilter(h.Reconciler, ctx, obj)
return
}
}
}

// SECOND: Check if this configmap is referenced in the CR (for user-provided configmaps)
cr := &olsv1alpha1.OLSConfig{}
err := h.Reconciler.Get(ctx, types.NamespacedName{Name: utils.OLSConfigName}, cr)
if err != nil {
Expand Down Expand Up @@ -368,20 +388,170 @@ var restartFuncs = map[string]RestartFunc{
utils.ConsoleUIDeploymentName: console.RestartConsoleUI,
}

// restart corresponding deployment
func restartDeployment(r reconciler.Reconciler, ctx context.Context, affectedDeployments []string, namespace string, name string, useLCore bool) {
// deploymentExistsAndReady checks if a deployment exists and is in a stable state (not being created or updated)
// Returns true if deployment exists and the controller has observed the current generation
func deploymentExistsAndReady(r reconciler.Reconciler, ctx context.Context, deploymentName string) bool {
deployment := &appsv1.Deployment{}
err := r.Get(ctx, client.ObjectKey{Name: deploymentName, Namespace: r.GetNamespace()}, deployment)
if err != nil || errors.IsNotFound(err) {
return false
}
// Check if deployment is in a stable state (not being created or updated)
// Deployment is considered ready if the controller has observed the current generation
// ObservedGeneration == Generation means the deployment is stable
return deployment.Status.ObservedGeneration == deployment.Generation
}

// waitForDeploymentReady waits for a deployment to become ready after restart
func waitForDeploymentReady(r reconciler.Reconciler, ctx context.Context, deploymentName string, timeout time.Duration) error {
return wait.PollUntilContextTimeout(ctx, 2*time.Second, timeout, true, func(ctx context.Context) (bool, error) {
deployment := &appsv1.Deployment{}
err := r.Get(ctx, client.ObjectKey{Name: deploymentName, Namespace: r.GetNamespace()}, deployment)
if err != nil {
return false, err
}

// Check if deployment is ready
if deployment.Status.ReadyReplicas == *deployment.Spec.Replicas &&
deployment.Status.UpdatedReplicas == *deployment.Spec.Replicas &&
deployment.Status.UnavailableReplicas == 0 {
// Check Available condition
for _, condition := range deployment.Status.Conditions {
if condition.Type == appsv1.DeploymentAvailable && condition.Status == v1.ConditionTrue {
return true, nil
}
}
}
return false, nil
})
}

// restartDeployment restarts affected deployments. When conversationCache uses postgres,
// it ensures Postgres restarts first and waits for it to be ready before restarting app server.
func restartDeployment(r reconciler.Reconciler, ctx context.Context, affectedDeployments []string, namespace string, name string, useLCore bool) {
// Resolve ACTIVE_BACKEND to actual deployment name and build deployment list
resolvedDeployments := make([]string, 0, len(affectedDeployments))
for _, depName := range affectedDeployments {
// Resolve ACTIVE_BACKEND to actual deployment name
if depName == "ACTIVE_BACKEND" {
if useLCore {
depName = utils.LCoreDeploymentName
resolvedDeployments = append(resolvedDeployments, utils.LCoreDeploymentName)
} else {
depName = utils.OLSAppServerDeploymentName
resolvedDeployments = append(resolvedDeployments, utils.OLSAppServerDeploymentName)
}
} else {
resolvedDeployments = append(resolvedDeployments, depName)
}
}

// Check if conversationCache uses postgres to determine restart strategy
cr := &olsv1alpha1.OLSConfig{}
usesPostgresCache := false
getErr := r.Get(ctx, types.NamespacedName{Name: utils.OLSConfigName}, cr)
if getErr == nil {
usesPostgresCache = cr.Spec.OLSConfig.ConversationCache.Type == olsv1alpha1.Postgres
}

// Separate deployments into categories
var postgresDeployments []string
var appServerDeployments []string
var otherDeployments []string

for _, depName := range resolvedDeployments {
switch depName {
case utils.PostgresDeploymentName:
postgresDeployments = append(postgresDeployments, depName)
case utils.OLSAppServerDeploymentName, utils.LCoreDeploymentName:
appServerDeployments = append(appServerDeployments, depName)
default:
otherDeployments = append(otherDeployments, depName)
}
}

// If conversationCache uses postgres and both postgres and app server need restart,
// handle postgres first, then wait, then app server
if usesPostgresCache && len(postgresDeployments) > 0 && len(appServerDeployments) > 0 {
// Check if deployments exist and are in a stable state before trying to restart them
// During initial setup, deployments might not exist yet or might still be being created
postgresReady := deploymentExistsAndReady(r, ctx, utils.PostgresDeploymentName)
appServerReady := false
for _, depName := range appServerDeployments {
if deploymentExistsAndReady(r, ctx, depName) {
appServerReady = true
break
}
}

if !postgresReady || !appServerReady {
r.GetLogger().Info("skipping restart - deployments not ready yet",
"postgresReady", postgresReady,
"appServerReady", appServerReady,
"resource", name, "namespace", namespace)
return
}

// Step 1: Restart Postgres first
for _, pgDepName := range postgresDeployments {
err := postgres.RestartPostgres(r, ctx)
if err != nil {
r.GetLogger().Error(err, "failed to restart postgres deployment",
"deployment", pgDepName, "resource", name, "namespace", namespace)
return
}
}

// Step 2: Wait for Postgres to be ready before restarting app server
waitErr := waitForDeploymentReady(r, ctx, utils.PostgresDeploymentName, 5*time.Minute)
if waitErr != nil {
r.GetLogger().Error(waitErr, "postgres deployment did not become ready in time",
"resource", name, "namespace", namespace)
return
}

// Step 3: Restart app server deployments after postgres is ready
for _, appDepName := range appServerDeployments {
var err error
switch appDepName {
case utils.OLSAppServerDeploymentName:
err = appserver.RestartAppServer(r, ctx)
case utils.LCoreDeploymentName:
err = lcore.RestartLCore(r, ctx)
default:
r.GetLogger().Info("unknown app server deployment name", "deployment", appDepName)
continue
}

if err != nil {
r.GetLogger().Error(err, "failed to restart deployment",
"deployment", appDepName, "resource", name, "namespace", namespace)
} else {
r.GetLogger().Info("restarted deployment",
"deployment", appDepName, "resource", name, "namespace", namespace)
}
}

// Step 4: Restart other deployments (e.g., ConsoleUI)
for _, otherDepName := range otherDeployments {
restartFunc, exists := restartFuncs[otherDepName]
if !exists {
r.GetLogger().Info("unknown deployment name", "deployment", otherDepName)
continue
}

err := restartFunc(r, ctx)
if err != nil {
r.GetLogger().Error(err, "failed to restart deployment",
"deployment", otherDepName, "resource", name, "namespace", namespace)
} else {
r.GetLogger().Info("restarted deployment",
"deployment", otherDepName, "resource", name, "namespace", namespace)
}
}

return
}

// Restart the deployment using the appropriate function
// Normal case: restart all deployments without postgres dependency
for _, depName := range resolvedDeployments {
restartFunc, exists := restartFuncs[depName]
if !exists {
r.GetLogger().Info("unknown deployment name", "deployment", depName)
Expand All @@ -392,7 +562,6 @@ func restartDeployment(r reconciler.Reconciler, ctx context.Context, affectedDep
if err != nil {
r.GetLogger().Error(err, "failed to restart deployment",
"deployment", depName, "resource", name, "namespace", namespace)
// Continue with other deployments
} else {
r.GetLogger().Info("restarted deployment",
"deployment", depName, "resource", name, "namespace", namespace)
Expand Down
Loading