From 1c1419a24b98c87735dc71bd17c0b9131f9cb9a5 Mon Sep 17 00:00:00 2001 From: wenting Date: Mon, 12 Jan 2026 21:16:33 -0500 Subject: [PATCH 01/16] documentdb extension Signed-off-by: wenting --- .github/actions/setup-test-environment/action.yml | 2 +- operator/src/internal/cnpg/cnpg_cluster.go | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/actions/setup-test-environment/action.yml b/.github/actions/setup-test-environment/action.yml index d5ab4c6c..64b3d967 100644 --- a/.github/actions/setup-test-environment/action.yml +++ b/.github/actions/setup-test-environment/action.yml @@ -578,7 +578,7 @@ runs: spec: nodeCount: ${{ inputs.node-count }} instancesPerNode: ${{ inputs.instances-per-node }} - documentDBImage: ghcr.io/microsoft/documentdb/documentdb-local:16 + documentDBImage: ghcr.io/guanzhousongmicrosoft/documentdb-pg18-amd64:0.110.0 gatewayImage: ghcr.io/microsoft/documentdb/documentdb-local:16 resource: storage: diff --git a/operator/src/internal/cnpg/cnpg_cluster.go b/operator/src/internal/cnpg/cnpg_cluster.go index 5761160b..f5d010ca 100644 --- a/operator/src/internal/cnpg/cnpg_cluster.go +++ b/operator/src/internal/cnpg/cnpg_cluster.go @@ -8,6 +8,7 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/pointer" @@ -55,7 +56,7 @@ func GetCnpgClusterSpec(req ctrl.Request, documentdb *dbpreview.DocumentDB, docu Spec: func() cnpgv1.ClusterSpec { spec := cnpgv1.ClusterSpec{ Instances: documentdb.Spec.InstancesPerNode, - ImageName: documentdb_image, + ImageName: "ghcr.io/cloudnative-pg/postgresql:18-minimal-bookworm ", // TODO: Update to other pg images as needed StorageConfiguration: cnpgv1.StorageConfiguration{ StorageClass: storageClassPointer, // Use configured storage class or default Size: documentdb.Spec.Resource.Storage.PvcSize, @@ -76,6 +77,14 @@ func GetCnpgClusterSpec(req ctrl.Request, documentdb *dbpreview.DocumentDB, docu PostgresUID: 105, PostgresGID: 108, PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: documentdb_image, + }, + }, + }, AdditionalLibraries: []string{"pg_cron", "pg_documentdb_core", "pg_documentdb"}, Parameters: map[string]string{ "cron.database_name": "postgres", From 1189a7084a40b39182fda2b3b6e37f9e54b0871e Mon Sep 17 00:00:00 2001 From: wenting Date: Tue, 13 Jan 2026 21:10:24 -0500 Subject: [PATCH 02/16] Add configurable PostgresImage field to DocumentDB CRD Signed-off-by: wenting --- operator/src/api/preview/documentdb_types.go | 6 ++++++ operator/src/internal/cnpg/cnpg_cluster.go | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/operator/src/api/preview/documentdb_types.go b/operator/src/api/preview/documentdb_types.go index 2ed37a0e..53215c89 100644 --- a/operator/src/api/preview/documentdb_types.go +++ b/operator/src/api/preview/documentdb_types.go @@ -38,6 +38,12 @@ type DocumentDBSpec struct { // If not specified, defaults to a version that matches the DocumentDB operator version. GatewayImage string `json:"gatewayImage,omitempty"` + // PostgresImage is the container image to use for the PostgreSQL server. + // If not specified, defaults to "ghcr.io/cloudnative-pg/postgresql:18-minimal-bookworm". + // +kubebuilder:default="ghcr.io/cloudnative-pg/postgresql:18-minimal-bookworm" + // +optional + PostgresImage string `json:"postgresImage,omitempty"` + // DocumentDbCredentialSecret is the name of the Kubernetes Secret containing credentials // for the DocumentDB gateway (expects keys `username` and `password`). If omitted, // a default secret name `documentdb-credentials` is used. diff --git a/operator/src/internal/cnpg/cnpg_cluster.go b/operator/src/internal/cnpg/cnpg_cluster.go index f5d010ca..babf1b97 100644 --- a/operator/src/internal/cnpg/cnpg_cluster.go +++ b/operator/src/internal/cnpg/cnpg_cluster.go @@ -17,7 +17,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" ) -func GetCnpgClusterSpec(req ctrl.Request, documentdb *dbpreview.DocumentDB, documentdb_image, serviceAccountName, storageClass string, isPrimaryRegion bool, log logr.Logger) *cnpgv1.Cluster { +func GetCnpgClusterSpec(req ctrl.Request, documentdb *dbpreview.DocumentDB, documentdbImage, serviceAccountName, storageClass string, isPrimaryRegion bool, log logr.Logger) *cnpgv1.Cluster { sidecarPluginName := documentdb.Spec.SidecarInjectorPluginName if sidecarPluginName == "" { sidecarPluginName = util.DEFAULT_SIDECAR_INJECTOR_PLUGIN @@ -56,7 +56,7 @@ func GetCnpgClusterSpec(req ctrl.Request, documentdb *dbpreview.DocumentDB, docu Spec: func() cnpgv1.ClusterSpec { spec := cnpgv1.ClusterSpec{ Instances: documentdb.Spec.InstancesPerNode, - ImageName: "ghcr.io/cloudnative-pg/postgresql:18-minimal-bookworm ", // TODO: Update to other pg images as needed + ImageName: documentdb.Spec.PostgresImage, StorageConfiguration: cnpgv1.StorageConfiguration{ StorageClass: storageClassPointer, // Use configured storage class or default Size: documentdb.Spec.Resource.Storage.PvcSize, @@ -81,7 +81,7 @@ func GetCnpgClusterSpec(req ctrl.Request, documentdb *dbpreview.DocumentDB, docu { Name: "documentdb", ImageVolumeSource: corev1.ImageVolumeSource{ - Reference: documentdb_image, + Reference: documentdbImage, }, }, }, From 5afecf55ee72eccac3ff2989fbe2a39c70e4affd Mon Sep 17 00:00:00 2001 From: wenting Date: Thu, 15 Jan 2026 08:35:28 -0500 Subject: [PATCH 03/16] update crds Signed-off-by: wenting --- operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml | 6 ++++++ operator/src/config/crd/bases/documentdb.io_dbs.yaml | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml b/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml index b5e41e80..ab82529a 100644 --- a/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml +++ b/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml @@ -192,6 +192,12 @@ spec: maximum: 1 minimum: 1 type: integer + postgresImage: + default: ghcr.io/cloudnative-pg/postgresql:18-minimal-bookworm + description: |- + PostgresImage is the container image to use for the PostgreSQL server. + If not specified, defaults to "ghcr.io/cloudnative-pg/postgresql:18-minimal-bookworm". + type: string resource: description: Resource specifies the storage resources for DocumentDB. properties: diff --git a/operator/src/config/crd/bases/documentdb.io_dbs.yaml b/operator/src/config/crd/bases/documentdb.io_dbs.yaml index b5e41e80..ab82529a 100644 --- a/operator/src/config/crd/bases/documentdb.io_dbs.yaml +++ b/operator/src/config/crd/bases/documentdb.io_dbs.yaml @@ -192,6 +192,12 @@ spec: maximum: 1 minimum: 1 type: integer + postgresImage: + default: ghcr.io/cloudnative-pg/postgresql:18-minimal-bookworm + description: |- + PostgresImage is the container image to use for the PostgreSQL server. + If not specified, defaults to "ghcr.io/cloudnative-pg/postgresql:18-minimal-bookworm". + type: string resource: description: Resource specifies the storage resources for DocumentDB. properties: From 11aecd92bd4939379aa046b771f8f6cdddc7df2d Mon Sep 17 00:00:00 2001 From: wenting Date: Thu, 15 Jan 2026 21:15:27 -0500 Subject: [PATCH 04/16] Upgrade CNPG to support ImageVolume extensions Signed-off-by: wenting --- operator/documentdb-helm-chart/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operator/documentdb-helm-chart/Chart.yaml b/operator/documentdb-helm-chart/Chart.yaml index 65959429..9bca1e09 100644 --- a/operator/documentdb-helm-chart/Chart.yaml +++ b/operator/documentdb-helm-chart/Chart.yaml @@ -6,5 +6,5 @@ description: A Helm chart for deploying the DocumentDB operator appVersion: "0.1.3" dependencies: - name: cloudnative-pg - version: "0.26.1" + version: "0.27.0" repository: "https://cloudnative-pg.github.io/charts/" \ No newline at end of file From 5fdd82a4b0115cd96df264bf83eabc3d064eb814 Mon Sep 17 00:00:00 2001 From: wenting Date: Fri, 16 Jan 2026 20:49:49 -0500 Subject: [PATCH 05/16] need to set ld_library_path Signed-off-by: wenting --- operator/src/internal/cnpg/cnpg_cluster.go | 1 + 1 file changed, 1 insertion(+) diff --git a/operator/src/internal/cnpg/cnpg_cluster.go b/operator/src/internal/cnpg/cnpg_cluster.go index babf1b97..908bf42e 100644 --- a/operator/src/internal/cnpg/cnpg_cluster.go +++ b/operator/src/internal/cnpg/cnpg_cluster.go @@ -83,6 +83,7 @@ func GetCnpgClusterSpec(req ctrl.Request, documentdb *dbpreview.DocumentDB, docu ImageVolumeSource: corev1.ImageVolumeSource{ Reference: documentdbImage, }, + LdLibraryPath: []string{"lib"}, }, }, AdditionalLibraries: []string{"pg_cron", "pg_documentdb_core", "pg_documentdb"}, From 602258c694a1ffdc01d4c0689bbf02b34e0e798a Mon Sep 17 00:00:00 2001 From: wenting Date: Fri, 16 Jan 2026 21:27:18 -0500 Subject: [PATCH 06/16] upgrade extension Signed-off-by: wenting --- .../controller/documentdb_controller.go | 150 ++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/operator/src/internal/controller/documentdb_controller.go b/operator/src/internal/controller/documentdb_controller.go index c0e9b126..8510de3b 100644 --- a/operator/src/internal/controller/documentdb_controller.go +++ b/operator/src/internal/controller/documentdb_controller.go @@ -6,6 +6,7 @@ package controller import ( "bytes" "context" + "encoding/json" "fmt" "slices" "strings" @@ -257,6 +258,22 @@ func (r *DocumentDBReconciler) Reconcile(ctx context.Context, req ctrl.Request) } } + // Check if documentdb extension image needs to be updated in CNPG cluster + if err := r.Client.Get(ctx, types.NamespacedName{Name: desiredCnpgCluster.Name, Namespace: req.Namespace}, currentCnpgCluster); err == nil { + if err := r.updateDocumentDBExtensionImageIfNeeded(ctx, currentCnpgCluster, desiredCnpgCluster, documentdbImage); err != nil { + logger.Error(err, "Failed to update DocumentDB extension image") + return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil + } + } + + // Check if documentdb extension needs to be updated + if slices.Contains(currentCnpgCluster.Status.InstancesStatus[cnpgv1.PodHealthy], currentCnpgCluster.Status.CurrentPrimary) { + if err := r.updateDocumentDBExtensionIfNeeded(ctx, currentCnpgCluster); err != nil { + logger.Error(err, "Failed to update DocumentDB extension") + return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil + } + } + // Don't reque again unless there is a change return ctrl.Result{}, nil } @@ -469,3 +486,136 @@ func (r *DocumentDBReconciler) executeSQLCommand(ctx context.Context, cluster *c return stdout.String(), nil } + +// updateDocumentDBExtensionImageIfNeeded checks if the CNPG cluster's extension image differs from the desired one +// and updates it using JSON patch if needed +func (r *DocumentDBReconciler) updateDocumentDBExtensionImageIfNeeded(ctx context.Context, currentCluster, desiredCluster *cnpgv1.Cluster, desiredImage string) error { + logger := log.FromContext(ctx) + + // Get current documentdb extension image + var currentImage string + for _, ext := range currentCluster.Spec.PostgresConfiguration.Extensions { + if ext.Name == "documentdb" { + currentImage = ext.ImageVolumeSource.Reference + break + } + } + + // Get desired documentdb extension image + var desiredExtImage string + for _, ext := range desiredCluster.Spec.PostgresConfiguration.Extensions { + if ext.Name == "documentdb" { + desiredExtImage = ext.ImageVolumeSource.Reference + break + } + } + + // If images are the same, no update needed + if currentImage == desiredExtImage { + return nil + } + + logger.Info("Updating DocumentDB extension image in CNPG cluster", + "currentImage", currentImage, + "desiredImage", desiredExtImage, + "clusterName", currentCluster.Name) + + // Find the index of the documentdb extension + extIndex := -1 + for i, ext := range currentCluster.Spec.PostgresConfiguration.Extensions { + if ext.Name == "documentdb" { + extIndex = i + break + } + } + + if extIndex == -1 { + return fmt.Errorf("documentdb extension not found in CNPG cluster spec") + } + + // Use JSON patch to update the extension image + patch := []map[string]interface{}{ + { + "op": "replace", + "path": fmt.Sprintf("/spec/postgresql/extensions/%d/image/reference", extIndex), + "value": desiredExtImage, + }, + } + + patchBytes, err := json.Marshal(patch) + if err != nil { + return fmt.Errorf("failed to marshal patch: %w", err) + } + + if err := r.Client.Patch(ctx, currentCluster, client.RawPatch(types.JSONPatchType, patchBytes)); err != nil { + return fmt.Errorf("failed to patch CNPG cluster with new extension image: %w", err) + } + + logger.Info("Successfully updated DocumentDB extension image in CNPG cluster") + return nil +} + +// updateDocumentDBExtensionIfNeeded checks if the installed documentdb extension version differs from the default version +// and runs ALTER EXTENSION documentdb UPDATE if needed +func (r *DocumentDBReconciler) updateDocumentDBExtensionIfNeeded(ctx context.Context, cluster *cnpgv1.Cluster) error { + logger := log.FromContext(ctx) + + // Query the extension versions + checkVersionSQL := "SELECT default_version, installed_version FROM pg_available_extensions WHERE name = 'documentdb'" + output, err := r.executeSQLCommand(ctx, cluster, checkVersionSQL) + if err != nil { + return fmt.Errorf("failed to check documentdb extension versions: %w", err) + } + + // Parse the output to get default_version and installed_version + // Expected output format: + // default_version | installed_version + // -----------------+------------------- + // 0.110-0 | 0.110-0 + lines := strings.Split(strings.TrimSpace(output), "\n") + if len(lines) < 3 { + logger.Info("DocumentDB extension not found or not installed yet", "output", output) + return nil + } + + // Parse the data row (3rd line, index 2) + dataLine := strings.TrimSpace(lines[2]) + parts := strings.Split(dataLine, "|") + if len(parts) != 2 { + logger.Info("Unexpected format from pg_available_extensions query", "output", output) + return nil + } + + defaultVersion := strings.TrimSpace(parts[0]) + installedVersion := strings.TrimSpace(parts[1]) + + // If installed_version is empty, extension is not installed + if installedVersion == "" { + logger.Info("DocumentDB extension is not installed yet") + return nil + } + + // If versions match, no update needed + if defaultVersion == installedVersion { + logger.V(1).Info("DocumentDB extension is up to date", + "version", installedVersion) + return nil + } + + logger.Info("DocumentDB extension version mismatch, updating extension", + "defaultVersion", defaultVersion, + "installedVersion", installedVersion) + + // Run ALTER EXTENSION to update + updateSQL := "ALTER EXTENSION documentdb UPDATE" + _, err = r.executeSQLCommand(ctx, cluster, updateSQL) + if err != nil { + return fmt.Errorf("failed to update documentdb extension: %w", err) + } + + logger.Info("Successfully updated DocumentDB extension", + "fromVersion", installedVersion, + "toVersion", defaultVersion) + + return nil +} From b9982688ba1ba040a7389b4932b42ecd0eb4a306 Mon Sep 17 00:00:00 2001 From: wenting Date: Sat, 17 Jan 2026 21:59:18 -0500 Subject: [PATCH 07/16] unit tests Signed-off-by: wenting --- .../controller/certificate_controller_test.go | 143 +++ .../controller/documentdb_controller.go | 57 +- .../controller/documentdb_controller_test.go | 869 +++++++++++++++--- 3 files changed, 924 insertions(+), 145 deletions(-) create mode 100644 operator/src/internal/controller/certificate_controller_test.go diff --git a/operator/src/internal/controller/certificate_controller_test.go b/operator/src/internal/controller/certificate_controller_test.go new file mode 100644 index 00000000..96620a3a --- /dev/null +++ b/operator/src/internal/controller/certificate_controller_test.go @@ -0,0 +1,143 @@ +package controller + +import ( + "context" + "testing" + "time" + + cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" + cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + dbpreview "github.com/documentdb/documentdb-operator/api/preview" + util "github.com/documentdb/documentdb-operator/internal/utils" +) + +// helper to build TLS reconciler with objects +func buildCertificateReconciler(t *testing.T, objs ...runtime.Object) *CertificateReconciler { + scheme := runtime.NewScheme() + require.NoError(t, dbpreview.AddToScheme(scheme)) + require.NoError(t, cmapi.AddToScheme(scheme)) + require.NoError(t, corev1.AddToScheme(scheme)) + builder := fake.NewClientBuilder().WithScheme(scheme) + if len(objs) > 0 { + builder = builder.WithRuntimeObjects(objs...) + clientObjs := make([]client.Object, 0, len(objs)) + for _, obj := range objs { + if co, ok := obj.(client.Object); ok { + clientObjs = append(clientObjs, co) + } + } + if len(clientObjs) > 0 { + builder = builder.WithStatusSubresource(clientObjs...) + } + } + c := builder.Build() + return &CertificateReconciler{Client: c, Scheme: scheme} +} + +func baseDocumentDB(name, ns string) *dbpreview.DocumentDB { + return &dbpreview.DocumentDB{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns}, + Spec: dbpreview.DocumentDBSpec{ + NodeCount: 1, + InstancesPerNode: 1, + Resource: dbpreview.Resource{Storage: dbpreview.StorageConfiguration{PvcSize: "1Gi"}}, + DocumentDBImage: "test-image", + ExposeViaService: dbpreview.ExposeViaService{ServiceType: "ClusterIP"}, + }, + } +} + +func TestEnsureProvidedSecret(t *testing.T) { + ctx := context.Background() + ddb := baseDocumentDB("ddb-prov", "default") + ddb.Spec.TLS = &dbpreview.TLSConfiguration{Gateway: &dbpreview.GatewayTLS{Mode: "Provided", Provided: &dbpreview.ProvidedTLS{SecretName: "mycert"}}} + // Secret missing first + r := buildCertificateReconciler(t, ddb) + res, err := r.reconcileCertificates(ctx, ddb) + require.NoError(t, err) + require.Equal(t, RequeueAfterShort, res.RequeueAfter) + require.False(t, ddb.Status.TLS.Ready, "Should not be ready until secret exists") + + // Create secret with required keys then reconcile again + secret := &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "mycert", Namespace: "default"}, Data: map[string][]byte{"tls.crt": []byte("crt"), "tls.key": []byte("key")}} + require.NoError(t, r.Client.Create(ctx, secret)) + res, err = r.reconcileCertificates(ctx, ddb) + require.NoError(t, err) + require.Zero(t, res.RequeueAfter) + require.True(t, ddb.Status.TLS.Ready, "Provided secret should mark TLS ready") + require.Equal(t, "mycert", ddb.Status.TLS.SecretName) +} + +func TestEnsureCertManagerManagedCert(t *testing.T) { + ctx := context.Background() + ddb := baseDocumentDB("ddb-cm", "default") + ddb.Spec.TLS = &dbpreview.TLSConfiguration{Gateway: &dbpreview.GatewayTLS{Mode: "CertManager", CertManager: &dbpreview.CertManagerTLS{IssuerRef: dbpreview.IssuerRef{Name: "test-issuer", Kind: "Issuer"}, DNSNames: []string{"custom.example"}}}} + ddb.Status.TLS = &dbpreview.TLSStatus{} + issuer := &cmapi.Issuer{ObjectMeta: metav1.ObjectMeta{Name: "test-issuer", Namespace: "default"}, Spec: cmapi.IssuerSpec{IssuerConfig: cmapi.IssuerConfig{SelfSigned: &cmapi.SelfSignedIssuer{}}}} + r := buildCertificateReconciler(t, ddb, issuer) + + // Call certificate ensure twice to mimic reconcile loops + res, err := r.reconcileCertificates(ctx, ddb) + require.NoError(t, err) + require.Equal(t, RequeueAfterShort, res.RequeueAfter) + res, err = r.reconcileCertificates(ctx, ddb) + require.NoError(t, err) + require.Equal(t, RequeueAfterShort, res.RequeueAfter) + + cert := &cmapi.Certificate{} + // fetch certificate (self-created by reconcile). If not found, run reconcile again once. + require.NoError(t, r.Client.Get(ctx, types.NamespacedName{Name: "ddb-cm-gateway-cert", Namespace: "default"}, cert)) + // Debug: list all certificates to ensure store functioning + certList := &cmapi.CertificateList{} + _ = r.Client.List(ctx, certList) + for _, c := range certList.Items { + t.Logf("Found certificate: %s/%s secret=%s", c.Namespace, c.Name, c.Spec.SecretName) + } + require.Contains(t, cert.Spec.DNSNames, "custom.example") + // Should include service DNS names + serviceBase := util.DOCUMENTDB_SERVICE_PREFIX + ddb.Name + require.Contains(t, cert.Spec.DNSNames, serviceBase) + + // Simulate readiness condition then invoke ensure again (mimic reconcile loop) + cert.Status.Conditions = append(cert.Status.Conditions, cmapi.CertificateCondition{Type: cmapi.CertificateConditionReady, Status: cmmeta.ConditionTrue, LastTransitionTime: &metav1.Time{Time: time.Now()}}) + require.NoError(t, r.Client.Update(ctx, cert)) + res, err = r.reconcileCertificates(ctx, ddb) + require.NoError(t, err) + require.Zero(t, res.RequeueAfter) + require.True(t, ddb.Status.TLS.Ready, "Cert-manager managed cert should mark ready after condition true") + require.NotEmpty(t, ddb.Status.TLS.SecretName) +} + +func TestEnsureSelfSignedCert(t *testing.T) { + ctx := context.Background() + ddb := baseDocumentDB("ddb-ss", "default") + ddb.Spec.TLS = &dbpreview.TLSConfiguration{Gateway: &dbpreview.GatewayTLS{Mode: "SelfSigned"}} + ddb.Status.TLS = &dbpreview.TLSStatus{} + r := buildCertificateReconciler(t, ddb) + + // First call should create issuer and certificate + res, err := r.reconcileCertificates(ctx, ddb) + require.NoError(t, err) + require.Equal(t, RequeueAfterShort, res.RequeueAfter) + + // Certificate should exist + cert := &cmapi.Certificate{} + require.NoError(t, r.Client.Get(ctx, types.NamespacedName{Name: "ddb-ss-gateway-cert", Namespace: "default"}, cert)) + + // Simulate ready condition and call again + cert.Status.Conditions = append(cert.Status.Conditions, cmapi.CertificateCondition{Type: cmapi.CertificateConditionReady, Status: cmmeta.ConditionTrue, LastTransitionTime: &metav1.Time{Time: time.Now()}}) + require.NoError(t, r.Client.Update(ctx, cert)) + res, err = r.reconcileCertificates(ctx, ddb) + require.NoError(t, err) + require.Zero(t, res.RequeueAfter) + require.True(t, ddb.Status.TLS.Ready) + require.NotEmpty(t, ddb.Status.TLS.SecretName) +} diff --git a/operator/src/internal/controller/documentdb_controller.go b/operator/src/internal/controller/documentdb_controller.go index 8510de3b..f343b28d 100644 --- a/operator/src/internal/controller/documentdb_controller.go +++ b/operator/src/internal/controller/documentdb_controller.go @@ -267,11 +267,9 @@ func (r *DocumentDBReconciler) Reconcile(ctx context.Context, req ctrl.Request) } // Check if documentdb extension needs to be updated - if slices.Contains(currentCnpgCluster.Status.InstancesStatus[cnpgv1.PodHealthy], currentCnpgCluster.Status.CurrentPrimary) { - if err := r.updateDocumentDBExtensionIfNeeded(ctx, currentCnpgCluster); err != nil { - logger.Error(err, "Failed to update DocumentDB extension") - return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil - } + if err := r.updateDocumentDBExtensionIfNeeded(ctx, currentCnpgCluster); err != nil { + logger.Error(err, "Failed to update DocumentDB extension") + return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil } // Don't reque again unless there is a change @@ -555,11 +553,41 @@ func (r *DocumentDBReconciler) updateDocumentDBExtensionImageIfNeeded(ctx contex return nil } +// parseExtensionVersionsFromOutput parses the output of pg_available_extensions query +// Returns defaultVersion, installedVersion, and a boolean indicating if parsing was successful +// Expected output format: +// +// default_version | installed_version +// -----------------+------------------- +// 0.110-0 | 0.110-0 +func parseExtensionVersionsFromOutput(output string) (defaultVersion, installedVersion string, ok bool) { + lines := strings.Split(strings.TrimSpace(output), "\n") + if len(lines) < 3 { + return "", "", false + } + + // Parse the data row (3rd line, index 2) + dataLine := strings.TrimSpace(lines[2]) + parts := strings.Split(dataLine, "|") + if len(parts) != 2 { + return "", "", false + } + + defaultVersion = strings.TrimSpace(parts[0]) + installedVersion = strings.TrimSpace(parts[1]) + return defaultVersion, installedVersion, true +} + // updateDocumentDBExtensionIfNeeded checks if the installed documentdb extension version differs from the default version // and runs ALTER EXTENSION documentdb UPDATE if needed func (r *DocumentDBReconciler) updateDocumentDBExtensionIfNeeded(ctx context.Context, cluster *cnpgv1.Cluster) error { logger := log.FromContext(ctx) + if !slices.Contains(cluster.Status.InstancesStatus[cnpgv1.PodHealthy], cluster.Status.CurrentPrimary) { + logger.Info("Current primary pod is not healthy; skipping DocumentDB extension version check") + return nil + } + // Query the extension versions checkVersionSQL := "SELECT default_version, installed_version FROM pg_available_extensions WHERE name = 'documentdb'" output, err := r.executeSQLCommand(ctx, cluster, checkVersionSQL) @@ -568,27 +596,12 @@ func (r *DocumentDBReconciler) updateDocumentDBExtensionIfNeeded(ctx context.Con } // Parse the output to get default_version and installed_version - // Expected output format: - // default_version | installed_version - // -----------------+------------------- - // 0.110-0 | 0.110-0 - lines := strings.Split(strings.TrimSpace(output), "\n") - if len(lines) < 3 { + defaultVersion, installedVersion, ok := parseExtensionVersionsFromOutput(output) + if !ok { logger.Info("DocumentDB extension not found or not installed yet", "output", output) return nil } - // Parse the data row (3rd line, index 2) - dataLine := strings.TrimSpace(lines[2]) - parts := strings.Split(dataLine, "|") - if len(parts) != 2 { - logger.Info("Unexpected format from pg_available_extensions query", "output", output) - return nil - } - - defaultVersion := strings.TrimSpace(parts[0]) - installedVersion := strings.TrimSpace(parts[1]) - // If installed_version is empty, extension is not installed if installedVersion == "" { logger.Info("DocumentDB extension is not installed yet") diff --git a/operator/src/internal/controller/documentdb_controller_test.go b/operator/src/internal/controller/documentdb_controller_test.go index 96620a3a..9d6dbc0f 100644 --- a/operator/src/internal/controller/documentdb_controller_test.go +++ b/operator/src/internal/controller/documentdb_controller_test.go @@ -1,13 +1,14 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + package controller import ( "context" - "testing" - "time" - cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" - cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" - "github.com/stretchr/testify/require" + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -16,128 +17,750 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" dbpreview "github.com/documentdb/documentdb-operator/api/preview" - util "github.com/documentdb/documentdb-operator/internal/utils" ) -// helper to build TLS reconciler with objects -func buildCertificateReconciler(t *testing.T, objs ...runtime.Object) *CertificateReconciler { - scheme := runtime.NewScheme() - require.NoError(t, dbpreview.AddToScheme(scheme)) - require.NoError(t, cmapi.AddToScheme(scheme)) - require.NoError(t, corev1.AddToScheme(scheme)) - builder := fake.NewClientBuilder().WithScheme(scheme) - if len(objs) > 0 { - builder = builder.WithRuntimeObjects(objs...) - clientObjs := make([]client.Object, 0, len(objs)) - for _, obj := range objs { - if co, ok := obj.(client.Object); ok { - clientObjs = append(clientObjs, co) - } - } - if len(clientObjs) > 0 { - builder = builder.WithStatusSubresource(clientObjs...) - } - } - c := builder.Build() - return &CertificateReconciler{Client: c, Scheme: scheme} +// parseExtensionVersions parses the output of pg_available_extensions query +// Returns defaultVersion, installedVersion, and a boolean indicating if parsing was successful +func parseExtensionVersions(output string) (defaultVersion, installedVersion string, ok bool) { + return parseExtensionVersionsFromOutput(output) } -func baseDocumentDB(name, ns string) *dbpreview.DocumentDB { - return &dbpreview.DocumentDB{ - ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns}, - Spec: dbpreview.DocumentDBSpec{ - NodeCount: 1, - InstancesPerNode: 1, - Resource: dbpreview.Resource{Storage: dbpreview.StorageConfiguration{PvcSize: "1Gi"}}, - DocumentDBImage: "test-image", - ExposeViaService: dbpreview.ExposeViaService{ServiceType: "ClusterIP"}, - }, - } -} +var _ = Describe("DocumentDB Controller", func() { + const ( + clusterName = "test-cluster" + clusterNamespace = "default" + ) -func TestEnsureProvidedSecret(t *testing.T) { - ctx := context.Background() - ddb := baseDocumentDB("ddb-prov", "default") - ddb.Spec.TLS = &dbpreview.TLSConfiguration{Gateway: &dbpreview.GatewayTLS{Mode: "Provided", Provided: &dbpreview.ProvidedTLS{SecretName: "mycert"}}} - // Secret missing first - r := buildCertificateReconciler(t, ddb) - res, err := r.reconcileCertificates(ctx, ddb) - require.NoError(t, err) - require.Equal(t, RequeueAfterShort, res.RequeueAfter) - require.False(t, ddb.Status.TLS.Ready, "Should not be ready until secret exists") - - // Create secret with required keys then reconcile again - secret := &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "mycert", Namespace: "default"}, Data: map[string][]byte{"tls.crt": []byte("crt"), "tls.key": []byte("key")}} - require.NoError(t, r.Client.Create(ctx, secret)) - res, err = r.reconcileCertificates(ctx, ddb) - require.NoError(t, err) - require.Zero(t, res.RequeueAfter) - require.True(t, ddb.Status.TLS.Ready, "Provided secret should mark TLS ready") - require.Equal(t, "mycert", ddb.Status.TLS.SecretName) -} + var ( + ctx context.Context + scheme *runtime.Scheme + ) -func TestEnsureCertManagerManagedCert(t *testing.T) { - ctx := context.Background() - ddb := baseDocumentDB("ddb-cm", "default") - ddb.Spec.TLS = &dbpreview.TLSConfiguration{Gateway: &dbpreview.GatewayTLS{Mode: "CertManager", CertManager: &dbpreview.CertManagerTLS{IssuerRef: dbpreview.IssuerRef{Name: "test-issuer", Kind: "Issuer"}, DNSNames: []string{"custom.example"}}}} - ddb.Status.TLS = &dbpreview.TLSStatus{} - issuer := &cmapi.Issuer{ObjectMeta: metav1.ObjectMeta{Name: "test-issuer", Namespace: "default"}, Spec: cmapi.IssuerSpec{IssuerConfig: cmapi.IssuerConfig{SelfSigned: &cmapi.SelfSignedIssuer{}}}} - r := buildCertificateReconciler(t, ddb, issuer) - - // Call certificate ensure twice to mimic reconcile loops - res, err := r.reconcileCertificates(ctx, ddb) - require.NoError(t, err) - require.Equal(t, RequeueAfterShort, res.RequeueAfter) - res, err = r.reconcileCertificates(ctx, ddb) - require.NoError(t, err) - require.Equal(t, RequeueAfterShort, res.RequeueAfter) - - cert := &cmapi.Certificate{} - // fetch certificate (self-created by reconcile). If not found, run reconcile again once. - require.NoError(t, r.Client.Get(ctx, types.NamespacedName{Name: "ddb-cm-gateway-cert", Namespace: "default"}, cert)) - // Debug: list all certificates to ensure store functioning - certList := &cmapi.CertificateList{} - _ = r.Client.List(ctx, certList) - for _, c := range certList.Items { - t.Logf("Found certificate: %s/%s secret=%s", c.Namespace, c.Name, c.Spec.SecretName) - } - require.Contains(t, cert.Spec.DNSNames, "custom.example") - // Should include service DNS names - serviceBase := util.DOCUMENTDB_SERVICE_PREFIX + ddb.Name - require.Contains(t, cert.Spec.DNSNames, serviceBase) - - // Simulate readiness condition then invoke ensure again (mimic reconcile loop) - cert.Status.Conditions = append(cert.Status.Conditions, cmapi.CertificateCondition{Type: cmapi.CertificateConditionReady, Status: cmmeta.ConditionTrue, LastTransitionTime: &metav1.Time{Time: time.Now()}}) - require.NoError(t, r.Client.Update(ctx, cert)) - res, err = r.reconcileCertificates(ctx, ddb) - require.NoError(t, err) - require.Zero(t, res.RequeueAfter) - require.True(t, ddb.Status.TLS.Ready, "Cert-manager managed cert should mark ready after condition true") - require.NotEmpty(t, ddb.Status.TLS.SecretName) -} + BeforeEach(func() { + ctx = context.Background() + scheme = runtime.NewScheme() + Expect(dbpreview.AddToScheme(scheme)).To(Succeed()) + Expect(cnpgv1.AddToScheme(scheme)).To(Succeed()) + Expect(corev1.AddToScheme(scheme)).To(Succeed()) + }) -func TestEnsureSelfSignedCert(t *testing.T) { - ctx := context.Background() - ddb := baseDocumentDB("ddb-ss", "default") - ddb.Spec.TLS = &dbpreview.TLSConfiguration{Gateway: &dbpreview.GatewayTLS{Mode: "SelfSigned"}} - ddb.Status.TLS = &dbpreview.TLSStatus{} - r := buildCertificateReconciler(t, ddb) - - // First call should create issuer and certificate - res, err := r.reconcileCertificates(ctx, ddb) - require.NoError(t, err) - require.Equal(t, RequeueAfterShort, res.RequeueAfter) - - // Certificate should exist - cert := &cmapi.Certificate{} - require.NoError(t, r.Client.Get(ctx, types.NamespacedName{Name: "ddb-ss-gateway-cert", Namespace: "default"}, cert)) - - // Simulate ready condition and call again - cert.Status.Conditions = append(cert.Status.Conditions, cmapi.CertificateCondition{Type: cmapi.CertificateConditionReady, Status: cmmeta.ConditionTrue, LastTransitionTime: &metav1.Time{Time: time.Now()}}) - require.NoError(t, r.Client.Update(ctx, cert)) - res, err = r.reconcileCertificates(ctx, ddb) - require.NoError(t, err) - require.Zero(t, res.RequeueAfter) - require.True(t, ddb.Status.TLS.Ready) - require.NotEmpty(t, ddb.Status.TLS.SecretName) -} + Describe("updateDocumentDBExtensionImageIfNeeded", func() { + It("should return nil when current and desired images are the same", func() { + currentCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v1.0.0", + }, + }, + }, + }, + }, + } + + desiredCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v1.0.0", + }, + }, + }, + }, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(currentCluster). + Build() + + reconciler := &DocumentDBReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + err := reconciler.updateDocumentDBExtensionImageIfNeeded(ctx, currentCluster, desiredCluster, "documentdb/documentdb:v1.0.0") + Expect(err).ToNot(HaveOccurred()) + + // Verify the cluster was not updated (image should remain the same) + updated := &cnpgv1.Cluster{} + Expect(fakeClient.Get(ctx, client.ObjectKey{Name: clusterName, Namespace: clusterNamespace}, updated)).To(Succeed()) + Expect(updated.Spec.PostgresConfiguration.Extensions[0].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v1.0.0")) + }) + + It("should update extension image when current and desired images differ", func() { + currentCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v1.0.0", + }, + }, + }, + }, + }, + } + + desiredCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v2.0.0", + }, + }, + }, + }, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(currentCluster). + Build() + + reconciler := &DocumentDBReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + err := reconciler.updateDocumentDBExtensionImageIfNeeded(ctx, currentCluster, desiredCluster, "documentdb/documentdb:v2.0.0") + Expect(err).ToNot(HaveOccurred()) + + // Verify the cluster was updated with the new image + updated := &cnpgv1.Cluster{} + Expect(fakeClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}, updated)).To(Succeed()) + Expect(updated.Spec.PostgresConfiguration.Extensions[0].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v2.0.0")) + }) + + It("should return error when documentdb extension is not found in current cluster", func() { + currentCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "other-extension", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "other/image:v1.0.0", + }, + }, + }, + }, + }, + } + + desiredCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v2.0.0", + }, + }, + }, + }, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(currentCluster). + Build() + + reconciler := &DocumentDBReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + err := reconciler.updateDocumentDBExtensionImageIfNeeded(ctx, currentCluster, desiredCluster, "documentdb/documentdb:v2.0.0") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("documentdb extension not found")) + }) + + It("should handle cluster with multiple extensions and update only documentdb", func() { + currentCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "pg_stat_statements", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "postgres/pg_stat_statements:v1.0.0", + }, + }, + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v1.0.0", + }, + }, + { + Name: "pg_cron", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "postgres/pg_cron:v1.0.0", + }, + }, + }, + }, + }, + } + + desiredCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "pg_stat_statements", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "postgres/pg_stat_statements:v1.0.0", + }, + }, + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v2.0.0", + }, + }, + { + Name: "pg_cron", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "postgres/pg_cron:v1.0.0", + }, + }, + }, + }, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(currentCluster). + Build() + + reconciler := &DocumentDBReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + err := reconciler.updateDocumentDBExtensionImageIfNeeded(ctx, currentCluster, desiredCluster, "documentdb/documentdb:v2.0.0") + Expect(err).ToNot(HaveOccurred()) + + // Verify only documentdb extension was updated + updated := &cnpgv1.Cluster{} + Expect(fakeClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}, updated)).To(Succeed()) + Expect(updated.Spec.PostgresConfiguration.Extensions).To(HaveLen(3)) + Expect(updated.Spec.PostgresConfiguration.Extensions[0].Name).To(Equal("pg_stat_statements")) + Expect(updated.Spec.PostgresConfiguration.Extensions[0].ImageVolumeSource.Reference).To(Equal("postgres/pg_stat_statements:v1.0.0")) + Expect(updated.Spec.PostgresConfiguration.Extensions[1].Name).To(Equal("documentdb")) + Expect(updated.Spec.PostgresConfiguration.Extensions[1].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v2.0.0")) + Expect(updated.Spec.PostgresConfiguration.Extensions[2].Name).To(Equal("pg_cron")) + Expect(updated.Spec.PostgresConfiguration.Extensions[2].ImageVolumeSource.Reference).To(Equal("postgres/pg_cron:v1.0.0")) + }) + + It("should return nil when no extensions exist in both clusters", func() { + currentCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{}, + }, + }, + } + + desiredCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{}, + }, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(currentCluster). + Build() + + reconciler := &DocumentDBReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + // Both clusters have no extensions, images are both empty strings, so they match + err := reconciler.updateDocumentDBExtensionImageIfNeeded(ctx, currentCluster, desiredCluster, "") + Expect(err).ToNot(HaveOccurred()) + }) + + It("should handle documentdb extension as the only extension", func() { + currentCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v1.0.0", + }, + LdLibraryPath: []string{"lib"}, + }, + }, + }, + }, + } + + desiredCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v3.0.0", + }, + LdLibraryPath: []string{"lib"}, + }, + }, + }, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(currentCluster). + Build() + + reconciler := &DocumentDBReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + err := reconciler.updateDocumentDBExtensionImageIfNeeded(ctx, currentCluster, desiredCluster, "documentdb/documentdb:v3.0.0") + Expect(err).ToNot(HaveOccurred()) + + // Verify the cluster was updated with the new image + updated := &cnpgv1.Cluster{} + Expect(fakeClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}, updated)).To(Succeed()) + Expect(updated.Spec.PostgresConfiguration.Extensions[0].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v3.0.0")) + // Verify other fields are preserved + Expect(updated.Spec.PostgresConfiguration.Extensions[0].LdLibraryPath).To(Equal([]string{"lib"})) + }) + + It("should handle documentdb extension at the beginning of extensions list", func() { + currentCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v1.0.0", + }, + }, + { + Name: "pg_cron", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "postgres/pg_cron:v1.0.0", + }, + }, + }, + }, + }, + } + + desiredCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v2.0.0", + }, + }, + { + Name: "pg_cron", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "postgres/pg_cron:v1.0.0", + }, + }, + }, + }, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(currentCluster). + Build() + + reconciler := &DocumentDBReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + err := reconciler.updateDocumentDBExtensionImageIfNeeded(ctx, currentCluster, desiredCluster, "documentdb/documentdb:v2.0.0") + Expect(err).ToNot(HaveOccurred()) + + // Verify the cluster was updated + updated := &cnpgv1.Cluster{} + Expect(fakeClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}, updated)).To(Succeed()) + Expect(updated.Spec.PostgresConfiguration.Extensions[0].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v2.0.0")) + }) + + It("should handle documentdb extension at the end of extensions list", func() { + currentCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "pg_cron", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "postgres/pg_cron:v1.0.0", + }, + }, + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v1.0.0", + }, + }, + }, + }, + }, + } + + desiredCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "pg_cron", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "postgres/pg_cron:v1.0.0", + }, + }, + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v2.0.0", + }, + }, + }, + }, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(currentCluster). + Build() + + reconciler := &DocumentDBReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + err := reconciler.updateDocumentDBExtensionImageIfNeeded(ctx, currentCluster, desiredCluster, "documentdb/documentdb:v2.0.0") + Expect(err).ToNot(HaveOccurred()) + + // Verify the cluster was updated + updated := &cnpgv1.Cluster{} + Expect(fakeClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}, updated)).To(Succeed()) + Expect(updated.Spec.PostgresConfiguration.Extensions[1].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v2.0.0")) + }) + }) + + Describe("updateDocumentDBExtensionIfNeeded", func() { + It("should return nil when primary pod is not healthy", func() { + cluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Status: cnpgv1.ClusterStatus{ + CurrentPrimary: "test-cluster-1", + InstancesStatus: map[cnpgv1.PodStatus][]string{ + cnpgv1.PodHealthy: {"test-cluster-2", "test-cluster-3"}, // Primary not in healthy list + }, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(cluster). + Build() + + reconciler := &DocumentDBReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + err := reconciler.updateDocumentDBExtensionIfNeeded(ctx, cluster) + Expect(err).ToNot(HaveOccurred()) + }) + + It("should return nil when InstancesStatus is empty", func() { + cluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Status: cnpgv1.ClusterStatus{ + CurrentPrimary: "test-cluster-1", + InstancesStatus: map[cnpgv1.PodStatus][]string{}, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(cluster). + Build() + + reconciler := &DocumentDBReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + err := reconciler.updateDocumentDBExtensionIfNeeded(ctx, cluster) + Expect(err).ToNot(HaveOccurred()) + }) + + It("should return nil when InstancesStatus is nil", func() { + cluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Status: cnpgv1.ClusterStatus{ + CurrentPrimary: "test-cluster-1", + InstancesStatus: nil, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(cluster). + Build() + + reconciler := &DocumentDBReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + err := reconciler.updateDocumentDBExtensionIfNeeded(ctx, cluster) + Expect(err).ToNot(HaveOccurred()) + }) + + It("should return nil when CurrentPrimary is empty", func() { + cluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Status: cnpgv1.ClusterStatus{ + CurrentPrimary: "", + InstancesStatus: map[cnpgv1.PodStatus][]string{ + cnpgv1.PodHealthy: {"test-cluster-1"}, + }, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(cluster). + Build() + + reconciler := &DocumentDBReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + err := reconciler.updateDocumentDBExtensionIfNeeded(ctx, cluster) + Expect(err).ToNot(HaveOccurred()) + }) + }) + + Describe("parseExtensionVersionsFromOutput", func() { + It("should parse valid output with matching versions", func() { + output := ` default_version | installed_version +-----------------+------------------- + 0.110-0 | 0.110-0 +(1 row)` + + defaultVersion, installedVersion, ok := parseExtensionVersions(output) + Expect(ok).To(BeTrue()) + Expect(defaultVersion).To(Equal("0.110-0")) + Expect(installedVersion).To(Equal("0.110-0")) + }) + + It("should parse valid output with different versions", func() { + output := ` default_version | installed_version +-----------------+------------------- + 0.111-0 | 0.110-0 +(1 row)` + + defaultVersion, installedVersion, ok := parseExtensionVersions(output) + Expect(ok).To(BeTrue()) + Expect(defaultVersion).To(Equal("0.111-0")) + Expect(installedVersion).To(Equal("0.110-0")) + }) + + It("should handle empty installed_version", func() { + output := ` default_version | installed_version +-----------------+------------------- + 0.110-0 | +(1 row)` + + defaultVersion, installedVersion, ok := parseExtensionVersions(output) + Expect(ok).To(BeTrue()) + Expect(defaultVersion).To(Equal("0.110-0")) + Expect(installedVersion).To(Equal("")) + }) + + It("should return false for output with less than 3 lines", func() { + output := ` default_version | installed_version +-----------------+-------------------` + + _, _, ok := parseExtensionVersions(output) + Expect(ok).To(BeFalse()) + }) + + It("should return false for empty output", func() { + output := "" + + _, _, ok := parseExtensionVersions(output) + Expect(ok).To(BeFalse()) + }) + + It("should return false for output with no pipe separator", func() { + output := ` default_version installed_version +-----------------+------------------- + 0.110-0 0.110-0 +(1 row)` + + _, _, ok := parseExtensionVersions(output) + Expect(ok).To(BeFalse()) + }) + + It("should return false for output with too many pipe separators", func() { + output := ` default_version | installed_version | extra +-----------------+-------------------+------ + 0.110-0 | 0.110-0 | data +(1 row)` + + _, _, ok := parseExtensionVersions(output) + Expect(ok).To(BeFalse()) + }) + + It("should handle semantic version strings", func() { + output := ` default_version | installed_version +-----------------+------------------- + 1.2.3-beta.1 | 1.2.2 +(1 row)` + + defaultVersion, installedVersion, ok := parseExtensionVersions(output) + Expect(ok).To(BeTrue()) + Expect(defaultVersion).To(Equal("1.2.3-beta.1")) + Expect(installedVersion).To(Equal("1.2.2")) + }) + + It("should trim whitespace from versions", func() { + output := ` default_version | installed_version +-----------------+------------------- + 0.110-0 | 0.109-0 +(1 row)` + + defaultVersion, installedVersion, ok := parseExtensionVersions(output) + Expect(ok).To(BeTrue()) + Expect(defaultVersion).To(Equal("0.110-0")) + Expect(installedVersion).To(Equal("0.109-0")) + }) + + It("should handle output without row count footer", func() { + output := ` default_version | installed_version +-----------------+------------------- + 0.110-0 | 0.110-0` + + defaultVersion, installedVersion, ok := parseExtensionVersions(output) + Expect(ok).To(BeTrue()) + Expect(defaultVersion).To(Equal("0.110-0")) + Expect(installedVersion).To(Equal("0.110-0")) + }) + }) +}) From f58950828b102f79b5429c3cf47c49e4eb0267ee Mon Sep 17 00:00:00 2001 From: wenting Date: Tue, 20 Jan 2026 14:53:35 -0500 Subject: [PATCH 08/16] documentDBVersion in status Signed-off-by: wenting --- .../crds/documentdb.io_dbs.yaml | 4 ++ operator/src/api/preview/documentdb_types.go | 3 + .../config/crd/bases/documentdb.io_dbs.yaml | 4 ++ .../controller/documentdb_controller.go | 67 ++++++++++++++++--- 4 files changed, 67 insertions(+), 11 deletions(-) diff --git a/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml b/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml index ab82529a..6206d7b6 100644 --- a/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml +++ b/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml @@ -311,6 +311,10 @@ spec: properties: connectionString: type: string + documentDBVersion: + description: DocumentDBVersion is the currently installed version + of the DocumentDB extension. + type: string localPrimary: type: string status: diff --git a/operator/src/api/preview/documentdb_types.go b/operator/src/api/preview/documentdb_types.go index 53215c89..0f79bb68 100644 --- a/operator/src/api/preview/documentdb_types.go +++ b/operator/src/api/preview/documentdb_types.go @@ -220,6 +220,9 @@ type DocumentDBStatus struct { TargetPrimary string `json:"targetPrimary,omitempty"` LocalPrimary string `json:"localPrimary,omitempty"` + // DocumentDBVersion is the currently installed version of the DocumentDB extension. + DocumentDBVersion string `json:"documentDBVersion,omitempty"` + // TLS reports gateway TLS provisioning status (Phase 1). TLS *TLSStatus `json:"tls,omitempty"` } diff --git a/operator/src/config/crd/bases/documentdb.io_dbs.yaml b/operator/src/config/crd/bases/documentdb.io_dbs.yaml index ab82529a..6206d7b6 100644 --- a/operator/src/config/crd/bases/documentdb.io_dbs.yaml +++ b/operator/src/config/crd/bases/documentdb.io_dbs.yaml @@ -311,6 +311,10 @@ spec: properties: connectionString: type: string + documentDBVersion: + description: DocumentDBVersion is the currently installed version + of the DocumentDB extension. + type: string localPrimary: type: string status: diff --git a/operator/src/internal/controller/documentdb_controller.go b/operator/src/internal/controller/documentdb_controller.go index f343b28d..af8b8e0b 100644 --- a/operator/src/internal/controller/documentdb_controller.go +++ b/operator/src/internal/controller/documentdb_controller.go @@ -251,6 +251,14 @@ func (r *DocumentDBReconciler) Reconcile(ctx context.Context, req ctrl.Request) } } + // Update DocumentDB extension version in status + if extVersion, err := r.getDocumentDBExtensionVersion(ctx, currentCnpgCluster); err == nil && extVersion != "" { + if documentdb.Status.DocumentDBVersion != extVersion { + documentdb.Status.DocumentDBVersion = extVersion + statusChanged = true + } + } + if statusChanged { if err := r.Status().Update(ctx, documentdb); err != nil { logger.Error(err, "Failed to update DocumentDB status") @@ -264,14 +272,30 @@ func (r *DocumentDBReconciler) Reconcile(ctx context.Context, req ctrl.Request) logger.Error(err, "Failed to update DocumentDB extension image") return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil } + // Note: If image was updated, CNPG will trigger a rolling restart. + // The clusterInstanceStatusChangedPredicate will trigger reconciliation + // when the pod becomes healthy with the new image. } // Check if documentdb extension needs to be updated - if err := r.updateDocumentDBExtensionIfNeeded(ctx, currentCnpgCluster); err != nil { + extensionUpdated, err := r.updateDocumentDBExtensionIfNeeded(ctx, currentCnpgCluster) + if err != nil { logger.Error(err, "Failed to update DocumentDB extension") return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil } + // Update DocumentDB version in status if extension was upgraded + if extensionUpdated { + if extVersion, err := r.getDocumentDBExtensionVersion(ctx, currentCnpgCluster); err == nil && extVersion != "" { + if documentdb.Status.DocumentDBVersion != extVersion { + documentdb.Status.DocumentDBVersion = extVersion + if err := r.Status().Update(ctx, documentdb); err != nil { + logger.Error(err, "Failed to update DocumentDB status after extension upgrade") + } + } + } + } + // Don't reque again unless there is a change return ctrl.Result{}, nil } @@ -486,7 +510,7 @@ func (r *DocumentDBReconciler) executeSQLCommand(ctx context.Context, cluster *c } // updateDocumentDBExtensionImageIfNeeded checks if the CNPG cluster's extension image differs from the desired one -// and updates it using JSON patch if needed +// and updates it using JSON patch if needed. func (r *DocumentDBReconciler) updateDocumentDBExtensionImageIfNeeded(ctx context.Context, currentCluster, desiredCluster *cnpgv1.Cluster, desiredImage string) error { logger := log.FromContext(ctx) @@ -579,40 +603,40 @@ func parseExtensionVersionsFromOutput(output string) (defaultVersion, installedV } // updateDocumentDBExtensionIfNeeded checks if the installed documentdb extension version differs from the default version -// and runs ALTER EXTENSION documentdb UPDATE if needed -func (r *DocumentDBReconciler) updateDocumentDBExtensionIfNeeded(ctx context.Context, cluster *cnpgv1.Cluster) error { +// and runs ALTER EXTENSION documentdb UPDATE if needed. Returns true if the extension was updated. +func (r *DocumentDBReconciler) updateDocumentDBExtensionIfNeeded(ctx context.Context, cluster *cnpgv1.Cluster) (bool, error) { logger := log.FromContext(ctx) if !slices.Contains(cluster.Status.InstancesStatus[cnpgv1.PodHealthy], cluster.Status.CurrentPrimary) { logger.Info("Current primary pod is not healthy; skipping DocumentDB extension version check") - return nil + return false, nil } // Query the extension versions checkVersionSQL := "SELECT default_version, installed_version FROM pg_available_extensions WHERE name = 'documentdb'" output, err := r.executeSQLCommand(ctx, cluster, checkVersionSQL) if err != nil { - return fmt.Errorf("failed to check documentdb extension versions: %w", err) + return false, fmt.Errorf("failed to check documentdb extension versions: %w", err) } // Parse the output to get default_version and installed_version defaultVersion, installedVersion, ok := parseExtensionVersionsFromOutput(output) if !ok { logger.Info("DocumentDB extension not found or not installed yet", "output", output) - return nil + return false, nil } // If installed_version is empty, extension is not installed if installedVersion == "" { logger.Info("DocumentDB extension is not installed yet") - return nil + return false, nil } // If versions match, no update needed if defaultVersion == installedVersion { logger.V(1).Info("DocumentDB extension is up to date", "version", installedVersion) - return nil + return false, nil } logger.Info("DocumentDB extension version mismatch, updating extension", @@ -623,12 +647,33 @@ func (r *DocumentDBReconciler) updateDocumentDBExtensionIfNeeded(ctx context.Con updateSQL := "ALTER EXTENSION documentdb UPDATE" _, err = r.executeSQLCommand(ctx, cluster, updateSQL) if err != nil { - return fmt.Errorf("failed to update documentdb extension: %w", err) + return false, fmt.Errorf("failed to update documentdb extension: %w", err) } logger.Info("Successfully updated DocumentDB extension", "fromVersion", installedVersion, "toVersion", defaultVersion) - return nil + return true, nil +} + +// getDocumentDBExtensionVersion queries the installed documentdb extension version +func (r *DocumentDBReconciler) getDocumentDBExtensionVersion(ctx context.Context, cluster *cnpgv1.Cluster) (string, error) { + if !slices.Contains(cluster.Status.InstancesStatus[cnpgv1.PodHealthy], cluster.Status.CurrentPrimary) { + return "", nil + } + + checkVersionSQL := "SELECT installed_version FROM pg_available_extensions WHERE name = 'documentdb'" + output, err := r.executeSQLCommand(ctx, cluster, checkVersionSQL) + if err != nil { + return "", err + } + + lines := strings.Split(strings.TrimSpace(output), "\n") + if len(lines) < 3 { + return "", nil + } + + version := strings.TrimSpace(lines[2]) + return version, nil } From 1cc45ff625be9f575421be03a6416242c09dff85 Mon Sep 17 00:00:00 2001 From: wenting Date: Wed, 21 Jan 2026 15:38:11 -0500 Subject: [PATCH 09/16] e2e test for upgrade Signed-off-by: wenting --- .../actions/setup-test-environment/action.yml | 2 +- .github/workflows/test-E2E.yml | 85 ++++++ .../controller/documentdb_controller.go | 255 ++++++++---------- .../controller/documentdb_controller_test.go | 234 +++++++++++----- 4 files changed, 369 insertions(+), 207 deletions(-) diff --git a/.github/actions/setup-test-environment/action.yml b/.github/actions/setup-test-environment/action.yml index 64b3d967..661d5924 100644 --- a/.github/actions/setup-test-environment/action.yml +++ b/.github/actions/setup-test-environment/action.yml @@ -578,7 +578,7 @@ runs: spec: nodeCount: ${{ inputs.node-count }} instancesPerNode: ${{ inputs.instances-per-node }} - documentDBImage: ghcr.io/guanzhousongmicrosoft/documentdb-pg18-amd64:0.110.0 + documentDBImage: ghcr.io/guanzhousongmicrosoft/documentdb-pg18-amd64:0.109.0 gatewayImage: ghcr.io/microsoft/documentdb/documentdb-local:16 resource: storage: diff --git a/.github/workflows/test-E2E.yml b/.github/workflows/test-E2E.yml index 321e1c46..f873622e 100644 --- a/.github/workflows/test-E2E.yml +++ b/.github/workflows/test-E2E.yml @@ -264,6 +264,91 @@ jobs: echo "✅ DocumentDB status validation passed" + - name: Test DocumentDB Image Upgrade + run: | + echo "Testing DocumentDB extension image upgrade on ${{ matrix.architecture }}..." + + OLD_IMAGE="ghcr.io/guanzhousongmicrosoft/documentdb-pg18-amd64:0.109.0" + NEW_IMAGE="ghcr.io/guanzhousongmicrosoft/documentdb-pg18-amd64:0.110.0" + + # Verify current image is the old image + CURRENT_IMAGE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.spec.documentDBImage}') + echo "Current DocumentDB image: $CURRENT_IMAGE" + + if [[ "$CURRENT_IMAGE" != "$OLD_IMAGE" ]]; then + echo "❌ Expected old image $OLD_IMAGE but found $CURRENT_IMAGE" + exit 1 + fi + echo "✓ Cluster deployed with old image" + + # Check DocumentDB version in status before upgrade + DOCUMENTDB_VERSION_BEFORE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.documentDBVersion}') + echo "DocumentDB version before upgrade: $DOCUMENTDB_VERSION_BEFORE" + + # Upgrade to new image + echo "" + echo "Upgrading DocumentDB image to $NEW_IMAGE..." + kubectl patch documentdb $DB_NAME -n $DB_NS --type='merge' -p "{\"spec\":{\"documentDBImage\":\"$NEW_IMAGE\"}}" + + # Wait for cluster to be healthy with new image + echo "Waiting for cluster to be healthy with new image..." + timeout 600 bash -c ' + NEW_IMAGE="ghcr.io/guanzhousongmicrosoft/documentdb-pg18-amd64:0.110.0" + + while true; do + DB_STATUS=$(kubectl get documentdb '$DB_NAME' -n '$DB_NS' -o jsonpath="{.status.status}" 2>/dev/null) + CLUSTER_STATUS=$(kubectl get cluster '$DB_NAME' -n '$DB_NS' -o jsonpath="{.status.phase}" 2>/dev/null) + + # Check extension image in CNPG cluster + EXTENSION_IMAGE=$(kubectl get cluster '$DB_NAME' -n '$DB_NS' -o jsonpath="{.spec.postgresConfiguration.extensions[?(@.name==\"documentdb\")].imageVolumeSource.reference}" 2>/dev/null || echo "N/A") + + echo "DocumentDB status: $DB_STATUS, CNPG Cluster phase: $CLUSTER_STATUS, Extension image: $EXTENSION_IMAGE" + + if [[ "$DB_STATUS" == "Cluster in healthy state" && "$CLUSTER_STATUS" == "Cluster in healthy state" ]]; then + if [[ "$EXTENSION_IMAGE" == "$NEW_IMAGE" ]]; then + HEALTHY_PODS=$(kubectl get cluster '$DB_NAME' -n '$DB_NS' -o jsonpath="{.status.instancesStatus.healthy}" 2>/dev/null | jq length 2>/dev/null || echo "0") + if [[ "$HEALTHY_PODS" -ge "1" ]]; then + echo "✓ Cluster is healthy with new image and $HEALTHY_PODS healthy pods" + break + fi + fi + fi + + sleep 10 + done + ' + + # Verify the new image is applied + echo "Verifying new image is applied..." + FINAL_IMAGE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.spec.documentDBImage}') + CNPG_EXTENSION_IMAGE=$(kubectl get cluster $DB_NAME -n $DB_NS -o jsonpath='{.spec.postgresConfiguration.extensions[?(@.name=="documentdb")].imageVolumeSource.reference}') + + echo "Final DocumentDB image in spec: $FINAL_IMAGE" + echo "Final extension image in CNPG cluster: $CNPG_EXTENSION_IMAGE" + + if [[ "$FINAL_IMAGE" != "$NEW_IMAGE" ]]; then + echo "❌ New image not applied to DocumentDB spec" + kubectl get documentdb $DB_NAME -n $DB_NS -o yaml + exit 1 + fi + + if [[ "$CNPG_EXTENSION_IMAGE" != "$NEW_IMAGE" ]]; then + echo "❌ New image not applied to CNPG cluster extension" + kubectl get cluster $DB_NAME -n $DB_NS -o yaml + exit 1 + fi + + echo "✓ New image applied successfully" + + # Check DocumentDB version in status after upgrade + DOCUMENTDB_VERSION_AFTER=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.documentDBVersion}') + echo "DocumentDB version before upgrade: $DOCUMENTDB_VERSION_BEFORE" + echo "DocumentDB version after upgrade: $DOCUMENTDB_VERSION_AFTER" + + echo "" + echo "✅ DocumentDB image upgrade test completed successfully!" + echo "Upgraded from $OLD_IMAGE to $NEW_IMAGE" + - name: Test cluster health and monitoring run: | echo "Testing cluster health and monitoring on ${{ matrix.architecture }}..." diff --git a/operator/src/internal/controller/documentdb_controller.go b/operator/src/internal/controller/documentdb_controller.go index af8b8e0b..017f92e0 100644 --- a/operator/src/internal/controller/documentdb_controller.go +++ b/operator/src/internal/controller/documentdb_controller.go @@ -251,14 +251,6 @@ func (r *DocumentDBReconciler) Reconcile(ctx context.Context, req ctrl.Request) } } - // Update DocumentDB extension version in status - if extVersion, err := r.getDocumentDBExtensionVersion(ctx, currentCnpgCluster); err == nil && extVersion != "" { - if documentdb.Status.DocumentDBVersion != extVersion { - documentdb.Status.DocumentDBVersion = extVersion - statusChanged = true - } - } - if statusChanged { if err := r.Status().Update(ctx, documentdb); err != nil { logger.Error(err, "Failed to update DocumentDB status") @@ -266,36 +258,12 @@ func (r *DocumentDBReconciler) Reconcile(ctx context.Context, req ctrl.Request) } } - // Check if documentdb extension image needs to be updated in CNPG cluster - if err := r.Client.Get(ctx, types.NamespacedName{Name: desiredCnpgCluster.Name, Namespace: req.Namespace}, currentCnpgCluster); err == nil { - if err := r.updateDocumentDBExtensionImageIfNeeded(ctx, currentCnpgCluster, desiredCnpgCluster, documentdbImage); err != nil { - logger.Error(err, "Failed to update DocumentDB extension image") - return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil - } - // Note: If image was updated, CNPG will trigger a rolling restart. - // The clusterInstanceStatusChangedPredicate will trigger reconciliation - // when the pod becomes healthy with the new image. - } - - // Check if documentdb extension needs to be updated - extensionUpdated, err := r.updateDocumentDBExtensionIfNeeded(ctx, currentCnpgCluster) - if err != nil { - logger.Error(err, "Failed to update DocumentDB extension") + // Check if documentdb extension needs to be upgraded (image update + ALTER EXTENSION) + if err := r.upgradeDocumentDBExtensionIfNeeded(ctx, currentCnpgCluster, desiredCnpgCluster, documentdb); err != nil { + logger.Error(err, "Failed to upgrade DocumentDB extension") return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil } - // Update DocumentDB version in status if extension was upgraded - if extensionUpdated { - if extVersion, err := r.getDocumentDBExtensionVersion(ctx, currentCnpgCluster); err == nil && extVersion != "" { - if documentdb.Status.DocumentDBVersion != extVersion { - documentdb.Status.DocumentDBVersion = extVersion - if err := r.Status().Update(ctx, documentdb); err != nil { - logger.Error(err, "Failed to update DocumentDB status after extension upgrade") - } - } - } - } - // Don't reque again unless there is a change return ctrl.Result{}, nil } @@ -509,74 +477,6 @@ func (r *DocumentDBReconciler) executeSQLCommand(ctx context.Context, cluster *c return stdout.String(), nil } -// updateDocumentDBExtensionImageIfNeeded checks if the CNPG cluster's extension image differs from the desired one -// and updates it using JSON patch if needed. -func (r *DocumentDBReconciler) updateDocumentDBExtensionImageIfNeeded(ctx context.Context, currentCluster, desiredCluster *cnpgv1.Cluster, desiredImage string) error { - logger := log.FromContext(ctx) - - // Get current documentdb extension image - var currentImage string - for _, ext := range currentCluster.Spec.PostgresConfiguration.Extensions { - if ext.Name == "documentdb" { - currentImage = ext.ImageVolumeSource.Reference - break - } - } - - // Get desired documentdb extension image - var desiredExtImage string - for _, ext := range desiredCluster.Spec.PostgresConfiguration.Extensions { - if ext.Name == "documentdb" { - desiredExtImage = ext.ImageVolumeSource.Reference - break - } - } - - // If images are the same, no update needed - if currentImage == desiredExtImage { - return nil - } - - logger.Info("Updating DocumentDB extension image in CNPG cluster", - "currentImage", currentImage, - "desiredImage", desiredExtImage, - "clusterName", currentCluster.Name) - - // Find the index of the documentdb extension - extIndex := -1 - for i, ext := range currentCluster.Spec.PostgresConfiguration.Extensions { - if ext.Name == "documentdb" { - extIndex = i - break - } - } - - if extIndex == -1 { - return fmt.Errorf("documentdb extension not found in CNPG cluster spec") - } - - // Use JSON patch to update the extension image - patch := []map[string]interface{}{ - { - "op": "replace", - "path": fmt.Sprintf("/spec/postgresql/extensions/%d/image/reference", extIndex), - "value": desiredExtImage, - }, - } - - patchBytes, err := json.Marshal(patch) - if err != nil { - return fmt.Errorf("failed to marshal patch: %w", err) - } - - if err := r.Client.Patch(ctx, currentCluster, client.RawPatch(types.JSONPatchType, patchBytes)); err != nil { - return fmt.Errorf("failed to patch CNPG cluster with new extension image: %w", err) - } - - logger.Info("Successfully updated DocumentDB extension image in CNPG cluster") - return nil -} - // parseExtensionVersionsFromOutput parses the output of pg_available_extensions query // Returns defaultVersion, installedVersion, and a boolean indicating if parsing was successful // Expected output format: @@ -602,78 +502,151 @@ func parseExtensionVersionsFromOutput(output string) (defaultVersion, installedV return defaultVersion, installedVersion, true } -// updateDocumentDBExtensionIfNeeded checks if the installed documentdb extension version differs from the default version -// and runs ALTER EXTENSION documentdb UPDATE if needed. Returns true if the extension was updated. -func (r *DocumentDBReconciler) updateDocumentDBExtensionIfNeeded(ctx context.Context, cluster *cnpgv1.Cluster) (bool, error) { +// upgradeDocumentDBExtensionIfNeeded handles the complete DocumentDB extension upgrade process: +// 1. Updates the extension image in CNPG cluster spec if needed (triggers rolling restart) +// 2. Runs ALTER EXTENSION documentdb UPDATE if the installed version differs from default +// 3. Updates the DocumentDB status with the new extension version +func (r *DocumentDBReconciler) upgradeDocumentDBExtensionIfNeeded(ctx context.Context, currentCluster, desiredCluster *cnpgv1.Cluster, documentdb *dbpreview.DocumentDB) error { logger := log.FromContext(ctx) - if !slices.Contains(cluster.Status.InstancesStatus[cnpgv1.PodHealthy], cluster.Status.CurrentPrimary) { - logger.Info("Current primary pod is not healthy; skipping DocumentDB extension version check") - return false, nil + // Step 1: Check if extension image needs to be updated in CNPG cluster spec + imageUpdated, err := r.updateExtensionImageIfNeeded(ctx, currentCluster, desiredCluster) + if err != nil { + return fmt.Errorf("failed to update extension image: %w", err) + } + + // If image was updated, CNPG will trigger a rolling restart. + // Wait for pod to become healthy before running ALTER EXTENSION. + if imageUpdated { + logger.Info("Extension image updated, waiting for pod to become healthy before running ALTER EXTENSION") + return nil + } + + // Step 2: Check if primary pod is healthy before running ALTER EXTENSION + if !slices.Contains(currentCluster.Status.InstancesStatus[cnpgv1.PodHealthy], currentCluster.Status.CurrentPrimary) { + logger.Info("Current primary pod is not healthy; skipping DocumentDB extension upgrade") + return nil } - // Query the extension versions + // Step 3: Check if ALTER EXTENSION UPDATE is needed checkVersionSQL := "SELECT default_version, installed_version FROM pg_available_extensions WHERE name = 'documentdb'" - output, err := r.executeSQLCommand(ctx, cluster, checkVersionSQL) + output, err := r.executeSQLCommand(ctx, currentCluster, checkVersionSQL) if err != nil { - return false, fmt.Errorf("failed to check documentdb extension versions: %w", err) + return fmt.Errorf("failed to check documentdb extension versions: %w", err) } - // Parse the output to get default_version and installed_version defaultVersion, installedVersion, ok := parseExtensionVersionsFromOutput(output) if !ok { logger.Info("DocumentDB extension not found or not installed yet", "output", output) - return false, nil + return nil } - // If installed_version is empty, extension is not installed if installedVersion == "" { logger.Info("DocumentDB extension is not installed yet") - return false, nil + return nil } - // If versions match, no update needed + // Step 4: Update DocumentDB version in status (even if no upgrade needed) + if documentdb.Status.DocumentDBVersion != installedVersion { + documentdb.Status.DocumentDBVersion = installedVersion + if err := r.Status().Update(ctx, documentdb); err != nil { + logger.Error(err, "Failed to update DocumentDB status with extension version") + } + } + + // If versions match, no upgrade needed if defaultVersion == installedVersion { - logger.V(1).Info("DocumentDB extension is up to date", - "version", installedVersion) - return false, nil + logger.V(1).Info("DocumentDB extension is up to date", "version", installedVersion) + return nil } - logger.Info("DocumentDB extension version mismatch, updating extension", - "defaultVersion", defaultVersion, - "installedVersion", installedVersion) + // Step 5: Run ALTER EXTENSION to upgrade + logger.Info("Upgrading DocumentDB extension", + "fromVersion", installedVersion, + "toVersion", defaultVersion) - // Run ALTER EXTENSION to update updateSQL := "ALTER EXTENSION documentdb UPDATE" - _, err = r.executeSQLCommand(ctx, cluster, updateSQL) - if err != nil { - return false, fmt.Errorf("failed to update documentdb extension: %w", err) + if _, err := r.executeSQLCommand(ctx, currentCluster, updateSQL); err != nil { + return fmt.Errorf("failed to run ALTER EXTENSION documentdb UPDATE: %w", err) } - logger.Info("Successfully updated DocumentDB extension", + logger.Info("Successfully upgraded DocumentDB extension", "fromVersion", installedVersion, "toVersion", defaultVersion) - return true, nil + // Step 6: Update DocumentDB version in status after upgrade + documentdb.Status.DocumentDBVersion = defaultVersion + if err := r.Status().Update(ctx, documentdb); err != nil { + logger.Error(err, "Failed to update DocumentDB status after extension upgrade") + } + + return nil } -// getDocumentDBExtensionVersion queries the installed documentdb extension version -func (r *DocumentDBReconciler) getDocumentDBExtensionVersion(ctx context.Context, cluster *cnpgv1.Cluster) (string, error) { - if !slices.Contains(cluster.Status.InstancesStatus[cnpgv1.PodHealthy], cluster.Status.CurrentPrimary) { - return "", nil +// updateExtensionImageIfNeeded checks if the CNPG cluster's extension image differs from the desired one +// and updates it using JSON patch if needed. Returns true if the image was updated. +func (r *DocumentDBReconciler) updateExtensionImageIfNeeded(ctx context.Context, currentCluster, desiredCluster *cnpgv1.Cluster) (bool, error) { + logger := log.FromContext(ctx) + + // Get current documentdb extension image + var currentImage string + for _, ext := range currentCluster.Spec.PostgresConfiguration.Extensions { + if ext.Name == "documentdb" { + currentImage = ext.ImageVolumeSource.Reference + break + } + } + + // Get desired documentdb extension image + var desiredExtImage string + for _, ext := range desiredCluster.Spec.PostgresConfiguration.Extensions { + if ext.Name == "documentdb" { + desiredExtImage = ext.ImageVolumeSource.Reference + break + } + } + + // If images are the same, no update needed + if currentImage == desiredExtImage { + return false, nil } - checkVersionSQL := "SELECT installed_version FROM pg_available_extensions WHERE name = 'documentdb'" - output, err := r.executeSQLCommand(ctx, cluster, checkVersionSQL) + logger.Info("Updating DocumentDB extension image in CNPG cluster", + "currentImage", currentImage, + "desiredImage", desiredExtImage, + "clusterName", currentCluster.Name) + + // Find the index of the documentdb extension + extIndex := -1 + for i, ext := range currentCluster.Spec.PostgresConfiguration.Extensions { + if ext.Name == "documentdb" { + extIndex = i + break + } + } + + if extIndex == -1 { + return false, fmt.Errorf("documentdb extension not found in CNPG cluster spec") + } + + // Use JSON patch to update the extension image + patch := []map[string]interface{}{ + { + "op": "replace", + "path": fmt.Sprintf("/spec/postgresql/extensions/%d/image/reference", extIndex), + "value": desiredExtImage, + }, + } + + patchBytes, err := json.Marshal(patch) if err != nil { - return "", err + return false, fmt.Errorf("failed to marshal patch: %w", err) } - lines := strings.Split(strings.TrimSpace(output), "\n") - if len(lines) < 3 { - return "", nil + if err := r.Client.Patch(ctx, currentCluster, client.RawPatch(types.JSONPatchType, patchBytes)); err != nil { + return false, fmt.Errorf("failed to patch CNPG cluster with new extension image: %w", err) } - version := strings.TrimSpace(lines[2]) - return version, nil + logger.Info("Successfully updated DocumentDB extension image in CNPG cluster") + return true, nil } diff --git a/operator/src/internal/controller/documentdb_controller_test.go b/operator/src/internal/controller/documentdb_controller_test.go index 9d6dbc0f..8d04a42e 100644 --- a/operator/src/internal/controller/documentdb_controller_test.go +++ b/operator/src/internal/controller/documentdb_controller_test.go @@ -44,8 +44,8 @@ var _ = Describe("DocumentDB Controller", func() { Expect(corev1.AddToScheme(scheme)).To(Succeed()) }) - Describe("updateDocumentDBExtensionImageIfNeeded", func() { - It("should return nil when current and desired images are the same", func() { + Describe("updateExtensionImageIfNeeded", func() { + It("should return false when current and desired images are the same", func() { currentCluster := &cnpgv1.Cluster{ ObjectMeta: metav1.ObjectMeta{ Name: clusterName, @@ -94,16 +94,17 @@ var _ = Describe("DocumentDB Controller", func() { Scheme: scheme, } - err := reconciler.updateDocumentDBExtensionImageIfNeeded(ctx, currentCluster, desiredCluster, "documentdb/documentdb:v1.0.0") + updated, err := reconciler.updateExtensionImageIfNeeded(ctx, currentCluster, desiredCluster) Expect(err).ToNot(HaveOccurred()) + Expect(updated).To(BeFalse()) // Verify the cluster was not updated (image should remain the same) - updated := &cnpgv1.Cluster{} - Expect(fakeClient.Get(ctx, client.ObjectKey{Name: clusterName, Namespace: clusterNamespace}, updated)).To(Succeed()) - Expect(updated.Spec.PostgresConfiguration.Extensions[0].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v1.0.0")) + result := &cnpgv1.Cluster{} + Expect(fakeClient.Get(ctx, client.ObjectKey{Name: clusterName, Namespace: clusterNamespace}, result)).To(Succeed()) + Expect(result.Spec.PostgresConfiguration.Extensions[0].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v1.0.0")) }) - It("should update extension image when current and desired images differ", func() { + It("should update extension image and return true when current and desired images differ", func() { currentCluster := &cnpgv1.Cluster{ ObjectMeta: metav1.ObjectMeta{ Name: clusterName, @@ -152,13 +153,14 @@ var _ = Describe("DocumentDB Controller", func() { Scheme: scheme, } - err := reconciler.updateDocumentDBExtensionImageIfNeeded(ctx, currentCluster, desiredCluster, "documentdb/documentdb:v2.0.0") + updated, err := reconciler.updateExtensionImageIfNeeded(ctx, currentCluster, desiredCluster) Expect(err).ToNot(HaveOccurred()) + Expect(updated).To(BeTrue()) // Verify the cluster was updated with the new image - updated := &cnpgv1.Cluster{} - Expect(fakeClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}, updated)).To(Succeed()) - Expect(updated.Spec.PostgresConfiguration.Extensions[0].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v2.0.0")) + result := &cnpgv1.Cluster{} + Expect(fakeClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}, result)).To(Succeed()) + Expect(result.Spec.PostgresConfiguration.Extensions[0].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v2.0.0")) }) It("should return error when documentdb extension is not found in current cluster", func() { @@ -210,7 +212,7 @@ var _ = Describe("DocumentDB Controller", func() { Scheme: scheme, } - err := reconciler.updateDocumentDBExtensionImageIfNeeded(ctx, currentCluster, desiredCluster, "documentdb/documentdb:v2.0.0") + _, err := reconciler.updateExtensionImageIfNeeded(ctx, currentCluster, desiredCluster) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("documentdb extension not found")) }) @@ -288,22 +290,23 @@ var _ = Describe("DocumentDB Controller", func() { Scheme: scheme, } - err := reconciler.updateDocumentDBExtensionImageIfNeeded(ctx, currentCluster, desiredCluster, "documentdb/documentdb:v2.0.0") + updated, err := reconciler.updateExtensionImageIfNeeded(ctx, currentCluster, desiredCluster) Expect(err).ToNot(HaveOccurred()) + Expect(updated).To(BeTrue()) // Verify only documentdb extension was updated - updated := &cnpgv1.Cluster{} - Expect(fakeClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}, updated)).To(Succeed()) - Expect(updated.Spec.PostgresConfiguration.Extensions).To(HaveLen(3)) - Expect(updated.Spec.PostgresConfiguration.Extensions[0].Name).To(Equal("pg_stat_statements")) - Expect(updated.Spec.PostgresConfiguration.Extensions[0].ImageVolumeSource.Reference).To(Equal("postgres/pg_stat_statements:v1.0.0")) - Expect(updated.Spec.PostgresConfiguration.Extensions[1].Name).To(Equal("documentdb")) - Expect(updated.Spec.PostgresConfiguration.Extensions[1].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v2.0.0")) - Expect(updated.Spec.PostgresConfiguration.Extensions[2].Name).To(Equal("pg_cron")) - Expect(updated.Spec.PostgresConfiguration.Extensions[2].ImageVolumeSource.Reference).To(Equal("postgres/pg_cron:v1.0.0")) + result := &cnpgv1.Cluster{} + Expect(fakeClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}, result)).To(Succeed()) + Expect(result.Spec.PostgresConfiguration.Extensions).To(HaveLen(3)) + Expect(result.Spec.PostgresConfiguration.Extensions[0].Name).To(Equal("pg_stat_statements")) + Expect(result.Spec.PostgresConfiguration.Extensions[0].ImageVolumeSource.Reference).To(Equal("postgres/pg_stat_statements:v1.0.0")) + Expect(result.Spec.PostgresConfiguration.Extensions[1].Name).To(Equal("documentdb")) + Expect(result.Spec.PostgresConfiguration.Extensions[1].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v2.0.0")) + Expect(result.Spec.PostgresConfiguration.Extensions[2].Name).To(Equal("pg_cron")) + Expect(result.Spec.PostgresConfiguration.Extensions[2].ImageVolumeSource.Reference).To(Equal("postgres/pg_cron:v1.0.0")) }) - It("should return nil when no extensions exist in both clusters", func() { + It("should return false when no extensions exist in both clusters", func() { currentCluster := &cnpgv1.Cluster{ ObjectMeta: metav1.ObjectMeta{ Name: clusterName, @@ -339,8 +342,9 @@ var _ = Describe("DocumentDB Controller", func() { } // Both clusters have no extensions, images are both empty strings, so they match - err := reconciler.updateDocumentDBExtensionImageIfNeeded(ctx, currentCluster, desiredCluster, "") + updated, err := reconciler.updateExtensionImageIfNeeded(ctx, currentCluster, desiredCluster) Expect(err).ToNot(HaveOccurred()) + Expect(updated).To(BeFalse()) }) It("should handle documentdb extension as the only extension", func() { @@ -394,15 +398,16 @@ var _ = Describe("DocumentDB Controller", func() { Scheme: scheme, } - err := reconciler.updateDocumentDBExtensionImageIfNeeded(ctx, currentCluster, desiredCluster, "documentdb/documentdb:v3.0.0") + updated, err := reconciler.updateExtensionImageIfNeeded(ctx, currentCluster, desiredCluster) Expect(err).ToNot(HaveOccurred()) + Expect(updated).To(BeTrue()) // Verify the cluster was updated with the new image - updated := &cnpgv1.Cluster{} - Expect(fakeClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}, updated)).To(Succeed()) - Expect(updated.Spec.PostgresConfiguration.Extensions[0].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v3.0.0")) + result := &cnpgv1.Cluster{} + Expect(fakeClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}, result)).To(Succeed()) + Expect(result.Spec.PostgresConfiguration.Extensions[0].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v3.0.0")) // Verify other fields are preserved - Expect(updated.Spec.PostgresConfiguration.Extensions[0].LdLibraryPath).To(Equal([]string{"lib"})) + Expect(result.Spec.PostgresConfiguration.Extensions[0].LdLibraryPath).To(Equal([]string{"lib"})) }) It("should handle documentdb extension at the beginning of extensions list", func() { @@ -466,13 +471,14 @@ var _ = Describe("DocumentDB Controller", func() { Scheme: scheme, } - err := reconciler.updateDocumentDBExtensionImageIfNeeded(ctx, currentCluster, desiredCluster, "documentdb/documentdb:v2.0.0") + updated, err := reconciler.updateExtensionImageIfNeeded(ctx, currentCluster, desiredCluster) Expect(err).ToNot(HaveOccurred()) + Expect(updated).To(BeTrue()) // Verify the cluster was updated - updated := &cnpgv1.Cluster{} - Expect(fakeClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}, updated)).To(Succeed()) - Expect(updated.Spec.PostgresConfiguration.Extensions[0].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v2.0.0")) + result := &cnpgv1.Cluster{} + Expect(fakeClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}, result)).To(Succeed()) + Expect(result.Spec.PostgresConfiguration.Extensions[0].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v2.0.0")) }) It("should handle documentdb extension at the end of extensions list", func() { @@ -536,23 +542,36 @@ var _ = Describe("DocumentDB Controller", func() { Scheme: scheme, } - err := reconciler.updateDocumentDBExtensionImageIfNeeded(ctx, currentCluster, desiredCluster, "documentdb/documentdb:v2.0.0") + updated, err := reconciler.updateExtensionImageIfNeeded(ctx, currentCluster, desiredCluster) Expect(err).ToNot(HaveOccurred()) + Expect(updated).To(BeTrue()) // Verify the cluster was updated - updated := &cnpgv1.Cluster{} - Expect(fakeClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}, updated)).To(Succeed()) - Expect(updated.Spec.PostgresConfiguration.Extensions[1].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v2.0.0")) + result := &cnpgv1.Cluster{} + Expect(fakeClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}, result)).To(Succeed()) + Expect(result.Spec.PostgresConfiguration.Extensions[1].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v2.0.0")) }) }) - Describe("updateDocumentDBExtensionIfNeeded", func() { + Describe("upgradeDocumentDBExtensionIfNeeded", func() { It("should return nil when primary pod is not healthy", func() { cluster := &cnpgv1.Cluster{ ObjectMeta: metav1.ObjectMeta{ Name: clusterName, Namespace: clusterNamespace, }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v1.0.0", + }, + }, + }, + }, + }, Status: cnpgv1.ClusterStatus{ CurrentPrimary: "test-cluster-1", InstancesStatus: map[cnpgv1.PodStatus][]string{ @@ -561,9 +580,36 @@ var _ = Describe("DocumentDB Controller", func() { }, } + desiredCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v1.0.0", + }, + }, + }, + }, + }, + } + + documentdb := &dbpreview.DocumentDB{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-documentdb", + Namespace: clusterNamespace, + }, + } + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(cluster). + WithObjects(cluster, documentdb). + WithStatusSubresource(&dbpreview.DocumentDB{}). Build() reconciler := &DocumentDBReconciler{ @@ -571,7 +617,7 @@ var _ = Describe("DocumentDB Controller", func() { Scheme: scheme, } - err := reconciler.updateDocumentDBExtensionIfNeeded(ctx, cluster) + err := reconciler.upgradeDocumentDBExtensionIfNeeded(ctx, cluster, desiredCluster, documentdb) Expect(err).ToNot(HaveOccurred()) }) @@ -581,41 +627,54 @@ var _ = Describe("DocumentDB Controller", func() { Name: clusterName, Namespace: clusterNamespace, }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v1.0.0", + }, + }, + }, + }, + }, Status: cnpgv1.ClusterStatus{ CurrentPrimary: "test-cluster-1", InstancesStatus: map[cnpgv1.PodStatus][]string{}, }, } - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(cluster). - Build() - - reconciler := &DocumentDBReconciler{ - Client: fakeClient, - Scheme: scheme, - } - - err := reconciler.updateDocumentDBExtensionIfNeeded(ctx, cluster) - Expect(err).ToNot(HaveOccurred()) - }) - - It("should return nil when InstancesStatus is nil", func() { - cluster := &cnpgv1.Cluster{ + desiredCluster := &cnpgv1.Cluster{ ObjectMeta: metav1.ObjectMeta{ Name: clusterName, Namespace: clusterNamespace, }, - Status: cnpgv1.ClusterStatus{ - CurrentPrimary: "test-cluster-1", - InstancesStatus: nil, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v1.0.0", + }, + }, + }, + }, + }, + } + + documentdb := &dbpreview.DocumentDB{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-documentdb", + Namespace: clusterNamespace, }, } fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(cluster). + WithObjects(cluster, documentdb). + WithStatusSubresource(&dbpreview.DocumentDB{}). Build() reconciler := &DocumentDBReconciler{ @@ -623,27 +682,66 @@ var _ = Describe("DocumentDB Controller", func() { Scheme: scheme, } - err := reconciler.updateDocumentDBExtensionIfNeeded(ctx, cluster) + err := reconciler.upgradeDocumentDBExtensionIfNeeded(ctx, cluster, desiredCluster, documentdb) Expect(err).ToNot(HaveOccurred()) }) - It("should return nil when CurrentPrimary is empty", func() { + It("should return nil and update image when image differs", func() { cluster := &cnpgv1.Cluster{ ObjectMeta: metav1.ObjectMeta{ Name: clusterName, Namespace: clusterNamespace, }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v1.0.0", + }, + }, + }, + }, + }, Status: cnpgv1.ClusterStatus{ - CurrentPrimary: "", + CurrentPrimary: "test-cluster-1", InstancesStatus: map[cnpgv1.PodStatus][]string{ cnpgv1.PodHealthy: {"test-cluster-1"}, }, }, } + desiredCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + }, + Spec: cnpgv1.ClusterSpec{ + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Extensions: []cnpgv1.ExtensionConfiguration{ + { + Name: "documentdb", + ImageVolumeSource: corev1.ImageVolumeSource{ + Reference: "documentdb/documentdb:v2.0.0", + }, + }, + }, + }, + }, + } + + documentdb := &dbpreview.DocumentDB{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-documentdb", + Namespace: clusterNamespace, + }, + } + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(cluster). + WithObjects(cluster, documentdb). + WithStatusSubresource(&dbpreview.DocumentDB{}). Build() reconciler := &DocumentDBReconciler{ @@ -651,8 +749,14 @@ var _ = Describe("DocumentDB Controller", func() { Scheme: scheme, } - err := reconciler.updateDocumentDBExtensionIfNeeded(ctx, cluster) + // Should update image and return nil (waiting for pod to become healthy) + err := reconciler.upgradeDocumentDBExtensionIfNeeded(ctx, cluster, desiredCluster, documentdb) Expect(err).ToNot(HaveOccurred()) + + // Verify image was updated + result := &cnpgv1.Cluster{} + Expect(fakeClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}, result)).To(Succeed()) + Expect(result.Spec.PostgresConfiguration.Extensions[0].ImageVolumeSource.Reference).To(Equal("documentdb/documentdb:v2.0.0")) }) }) From 3eee73d1fe290cdf7f9798aa50a6cbc7691f001d Mon Sep 17 00:00:00 2001 From: wenting Date: Wed, 21 Jan 2026 19:40:36 -0500 Subject: [PATCH 10/16] postgresql:18-standard-bookworm Signed-off-by: wenting --- operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml | 4 ++-- operator/src/api/preview/documentdb_types.go | 4 ++-- operator/src/config/crd/bases/documentdb.io_dbs.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml b/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml index 6206d7b6..738bf14c 100644 --- a/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml +++ b/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml @@ -193,10 +193,10 @@ spec: minimum: 1 type: integer postgresImage: - default: ghcr.io/cloudnative-pg/postgresql:18-minimal-bookworm + default: ghcr.io/cloudnative-pg/postgresql:18-standard-bookworm description: |- PostgresImage is the container image to use for the PostgreSQL server. - If not specified, defaults to "ghcr.io/cloudnative-pg/postgresql:18-minimal-bookworm". + If not specified, defaults to "ghcr.io/cloudnative-pg/postgresql:18-standard-bookworm". type: string resource: description: Resource specifies the storage resources for DocumentDB. diff --git a/operator/src/api/preview/documentdb_types.go b/operator/src/api/preview/documentdb_types.go index 0f79bb68..9c772ae7 100644 --- a/operator/src/api/preview/documentdb_types.go +++ b/operator/src/api/preview/documentdb_types.go @@ -39,8 +39,8 @@ type DocumentDBSpec struct { GatewayImage string `json:"gatewayImage,omitempty"` // PostgresImage is the container image to use for the PostgreSQL server. - // If not specified, defaults to "ghcr.io/cloudnative-pg/postgresql:18-minimal-bookworm". - // +kubebuilder:default="ghcr.io/cloudnative-pg/postgresql:18-minimal-bookworm" + // If not specified, defaults to "ghcr.io/cloudnative-pg/postgresql:18-standard-bookworm". + // +kubebuilder:default="ghcr.io/cloudnative-pg/postgresql:18-standard-bookworm" // +optional PostgresImage string `json:"postgresImage,omitempty"` diff --git a/operator/src/config/crd/bases/documentdb.io_dbs.yaml b/operator/src/config/crd/bases/documentdb.io_dbs.yaml index 6206d7b6..738bf14c 100644 --- a/operator/src/config/crd/bases/documentdb.io_dbs.yaml +++ b/operator/src/config/crd/bases/documentdb.io_dbs.yaml @@ -193,10 +193,10 @@ spec: minimum: 1 type: integer postgresImage: - default: ghcr.io/cloudnative-pg/postgresql:18-minimal-bookworm + default: ghcr.io/cloudnative-pg/postgresql:18-standard-bookworm description: |- PostgresImage is the container image to use for the PostgreSQL server. - If not specified, defaults to "ghcr.io/cloudnative-pg/postgresql:18-minimal-bookworm". + If not specified, defaults to "ghcr.io/cloudnative-pg/postgresql:18-standard-bookworm". type: string resource: description: Resource specifies the storage resources for DocumentDB. From 35b47caccdaed5c0d8c682d8b04894d3ad2022f1 Mon Sep 17 00:00:00 2001 From: wenting Date: Wed, 21 Jan 2026 20:20:50 -0500 Subject: [PATCH 11/16] Remove hardcoded PostgresUID/GID to fix initdb error with standard postgres images Signed-off-by: wenting --- operator/src/internal/cnpg/cnpg_cluster.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/operator/src/internal/cnpg/cnpg_cluster.go b/operator/src/internal/cnpg/cnpg_cluster.go index 908bf42e..64f8ecc0 100644 --- a/operator/src/internal/cnpg/cnpg_cluster.go +++ b/operator/src/internal/cnpg/cnpg_cluster.go @@ -74,8 +74,6 @@ func GetCnpgClusterSpec(req ctrl.Request, documentdb *dbpreview.DocumentDB, docu Parameters: params, }} }(), - PostgresUID: 105, - PostgresGID: 108, PostgresConfiguration: cnpgv1.PostgresConfiguration{ Extensions: []cnpgv1.ExtensionConfiguration{ { From a61d520a2621f102cbce2660b2aaea7b02ae6df6 Mon Sep 17 00:00:00 2001 From: wenting Date: Thu, 22 Jan 2026 09:28:25 -0500 Subject: [PATCH 12/16] Upgrade kind to v0.31.0 with Kubernetes v1.35.0 for E2E tests Signed-off-by: wenting --- .github/actions/setup-test-environment/action.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/actions/setup-test-environment/action.yml b/.github/actions/setup-test-environment/action.yml index 661d5924..91240531 100644 --- a/.github/actions/setup-test-environment/action.yml +++ b/.github/actions/setup-test-environment/action.yml @@ -198,8 +198,10 @@ runs: echo "✓ mongosh installed successfully for ${{ inputs.architecture }}" - name: Create kind cluster - uses: helm/kind-action@v1.8.0 + uses: helm/kind-action@v1.12.0 with: + version: v0.31.0 + node_image: kindest/node:v1.35.0@sha256:452d707d4862f52530247495d180205e029056831160e22870e37e3f6c1ac31f cluster_name: documentdb-${{ inputs.test-type }}-${{ inputs.architecture }}-${{ inputs.test-scenario-name }} - name: Load Docker images into kind cluster (local build) From 32b58e55aa1741069a5dd1ddc25dc79a031a7fce Mon Sep 17 00:00:00 2001 From: wenting Date: Wed, 28 Jan 2026 19:36:36 +0000 Subject: [PATCH 13/16] enable ImageVolume in github kind Signed-off-by: wenting --- .github/actions/setup-test-environment/action.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/actions/setup-test-environment/action.yml b/.github/actions/setup-test-environment/action.yml index 91240531..cfc8bec8 100644 --- a/.github/actions/setup-test-environment/action.yml +++ b/.github/actions/setup-test-environment/action.yml @@ -200,9 +200,12 @@ runs: - name: Create kind cluster uses: helm/kind-action@v1.12.0 with: - version: v0.31.0 - node_image: kindest/node:v1.35.0@sha256:452d707d4862f52530247495d180205e029056831160e22870e37e3f6c1ac31f cluster_name: documentdb-${{ inputs.test-type }}-${{ inputs.architecture }}-${{ inputs.test-scenario-name }} + config: | + kind: Cluster + apiVersion: kind.x-k8s.io/v1alpha4 + featureGates: + "ImageVolume": true - name: Load Docker images into kind cluster (local build) if: inputs.use-external-images == 'false' From 5c53ddb0d2c02adba0d76a6ba608b99eb8106ac9 Mon Sep 17 00:00:00 2001 From: wenting Date: Wed, 28 Jan 2026 19:59:37 +0000 Subject: [PATCH 14/16] Enable ImageVolume feature gate in kind cluster Signed-off-by: wenting --- .../actions/setup-test-environment/action.yml | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/actions/setup-test-environment/action.yml b/.github/actions/setup-test-environment/action.yml index cfc8bec8..27359e2e 100644 --- a/.github/actions/setup-test-environment/action.yml +++ b/.github/actions/setup-test-environment/action.yml @@ -197,15 +197,25 @@ runs: mongosh --version echo "✓ mongosh installed successfully for ${{ inputs.architecture }}" + - name: Create kind config file + shell: bash + run: | + cat > /tmp/kind-config.yaml < Date: Wed, 28 Jan 2026 22:25:48 +0000 Subject: [PATCH 15/16] fix: E2E test upgrade check uses documentDBVersion status field Signed-off-by: wenting --- .../actions/setup-test-environment/action.yml | 5 +- .github/workflows/test-E2E.yml | 166 +++++++++--------- .github/workflows/test-backup-and-restore.yml | 13 +- .github/workflows/test-build-and-package.yml | 76 +------- .github/workflows/test-integration.yml | 2 + 5 files changed, 93 insertions(+), 169 deletions(-) diff --git a/.github/actions/setup-test-environment/action.yml b/.github/actions/setup-test-environment/action.yml index 27359e2e..ab849c32 100644 --- a/.github/actions/setup-test-environment/action.yml +++ b/.github/actions/setup-test-environment/action.yml @@ -54,6 +54,9 @@ inputs: description: 'Whether to use external images instead of loading from artifacts' required: false default: 'false' + documentdb-image: + description: 'DocumentDB image to use for the cluster' + required: true # GitHub configuration github-token: description: 'GitHub token for accessing packages' @@ -593,7 +596,7 @@ runs: spec: nodeCount: ${{ inputs.node-count }} instancesPerNode: ${{ inputs.instances-per-node }} - documentDBImage: ghcr.io/guanzhousongmicrosoft/documentdb-pg18-amd64:0.109.0 + documentDBImage: ${{ inputs.documentdb-image }} gatewayImage: ghcr.io/microsoft/documentdb/documentdb-local:16 resource: storage: diff --git a/.github/workflows/test-E2E.yml b/.github/workflows/test-E2E.yml index f873622e..39796c07 100644 --- a/.github/workflows/test-E2E.yml +++ b/.github/workflows/test-E2E.yml @@ -57,6 +57,8 @@ env: DB_USERNAME: k8s_secret_user DB_PASSWORD: K8sSecret100 DB_PORT: 10260 + DOCUMENTDB_OLD_IMAGE: ghcr.io/guanzhousongmicrosoft/documentdb-pg18:0.109.0 + DOCUMENTDB_IMAGE: ghcr.io/guanzhousongmicrosoft/documentdb-pg18:0.110.0 jobs: # Conditional build workflow - only run if image_tag is not provided or on pull_request @@ -143,10 +145,89 @@ jobs: db-port: ${{ env.DB_PORT }} image-tag: ${{ env.IMAGE_TAG }} chart-version: ${{ env.CHART_VERSION }} + documentdb-image: ${{ env.DOCUMENTDB_OLD_IMAGE }} use-external-images: ${{ github.event_name != 'pull_request' && inputs.image_tag != '' && inputs.image_tag != null }} github-token: ${{ secrets.GITHUB_TOKEN }} repository-owner: ${{ github.repository_owner }} + - name: Test DocumentDB Image Upgrade + run: | + echo "Testing DocumentDB extension image upgrade on ${{ matrix.architecture }}..." + + OLD_IMAGE="${{ env.DOCUMENTDB_OLD_IMAGE }}" + NEW_IMAGE="${{ env.DOCUMENTDB_IMAGE }}" + + # Verify current image is the old image + CURRENT_IMAGE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.spec.documentDBImage}') + echo "Current DocumentDB image: $CURRENT_IMAGE" + + if [[ "$CURRENT_IMAGE" != "$OLD_IMAGE" ]]; then + echo "❌ Expected old image $OLD_IMAGE but found $CURRENT_IMAGE" + exit 1 + fi + echo "✓ Cluster deployed with old image" + + # Check DocumentDB version in status before upgrade + DOCUMENTDB_VERSION_BEFORE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.documentDBVersion}') + echo "DocumentDB version before upgrade: $DOCUMENTDB_VERSION_BEFORE" + + echo "" + echo "Upgrading DocumentDB image to $NEW_IMAGE..." + kubectl patch documentdb $DB_NAME -n $DB_NS --type='merge' -p "{\"spec\":{\"documentDBImage\":\"$NEW_IMAGE\"}}" + + echo "Waiting for cluster to be healthy with new image..." + + # Extract expected version from image tag (e.g., "0.110.0" -> "0.110-0") + NEW_VERSION_TAG="${NEW_IMAGE##*:}" + EXPECTED_VERSION=$(echo "$NEW_VERSION_TAG" | sed 's/\.\([^.]*\)$/-\1/') + echo "Expected DocumentDB version after upgrade: $EXPECTED_VERSION" + + timeout 600 bash -c ' + + while true; do + DB_STATUS=$(kubectl get documentdb '$DB_NAME' -n '$DB_NS' -o jsonpath="{.status.status}" 2>/dev/null) + CLUSTER_STATUS=$(kubectl get cluster '$DB_NAME' -n '$DB_NS' -o jsonpath="{.status.phase}" 2>/dev/null) + + # Check DocumentDB version in status + DOCUMENTDB_VERSION=$(kubectl get documentdb '$DB_NAME' -n '$DB_NS' -o jsonpath="{.status.documentDBVersion}" 2>/dev/null || echo "N/A") + + echo "DocumentDB status: $DB_STATUS, CNPG Cluster phase: $CLUSTER_STATUS, DocumentDB version: $DOCUMENTDB_VERSION" + + if [[ "$DB_STATUS" == "Cluster in healthy state" && "$CLUSTER_STATUS" == "Cluster in healthy state" ]]; then + if [[ "$DOCUMENTDB_VERSION" == "'"$EXPECTED_VERSION"'" ]]; then + HEALTHY_PODS=$(kubectl get cluster '$DB_NAME' -n '$DB_NS' -o jsonpath="{.status.instancesStatus.healthy}" 2>/dev/null | jq length 2>/dev/null || echo "0") + if [[ "$HEALTHY_PODS" -ge "1" ]]; then + echo "✓ Cluster is healthy with new image and $HEALTHY_PODS healthy pods" + break + fi + fi + fi + + sleep 10 + done + ' + + echo "Verifying new image is applied..." + FINAL_IMAGE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.spec.documentDBImage}') + echo "Final DocumentDB image in spec: $FINAL_IMAGE" + + if [[ "$FINAL_IMAGE" != "$NEW_IMAGE" ]]; then + echo "❌ New image not applied to DocumentDB spec" + kubectl get documentdb $DB_NAME -n $DB_NS -o yaml + exit 1 + fi + + echo "✓ New image applied successfully" + + # Check DocumentDB version in status after upgrade + DOCUMENTDB_VERSION_AFTER=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.documentDBVersion}') + echo "DocumentDB version before upgrade: $DOCUMENTDB_VERSION_BEFORE" + echo "DocumentDB version after upgrade: $DOCUMENTDB_VERSION_AFTER" + + echo "" + echo "✅ DocumentDB image upgrade test completed successfully!" + echo "Upgraded from $OLD_IMAGE to $NEW_IMAGE" + - name: Setup port forwarding for comprehensive tests uses: ./.github/actions/setup-port-forwarding with: @@ -264,91 +345,6 @@ jobs: echo "✅ DocumentDB status validation passed" - - name: Test DocumentDB Image Upgrade - run: | - echo "Testing DocumentDB extension image upgrade on ${{ matrix.architecture }}..." - - OLD_IMAGE="ghcr.io/guanzhousongmicrosoft/documentdb-pg18-amd64:0.109.0" - NEW_IMAGE="ghcr.io/guanzhousongmicrosoft/documentdb-pg18-amd64:0.110.0" - - # Verify current image is the old image - CURRENT_IMAGE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.spec.documentDBImage}') - echo "Current DocumentDB image: $CURRENT_IMAGE" - - if [[ "$CURRENT_IMAGE" != "$OLD_IMAGE" ]]; then - echo "❌ Expected old image $OLD_IMAGE but found $CURRENT_IMAGE" - exit 1 - fi - echo "✓ Cluster deployed with old image" - - # Check DocumentDB version in status before upgrade - DOCUMENTDB_VERSION_BEFORE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.documentDBVersion}') - echo "DocumentDB version before upgrade: $DOCUMENTDB_VERSION_BEFORE" - - # Upgrade to new image - echo "" - echo "Upgrading DocumentDB image to $NEW_IMAGE..." - kubectl patch documentdb $DB_NAME -n $DB_NS --type='merge' -p "{\"spec\":{\"documentDBImage\":\"$NEW_IMAGE\"}}" - - # Wait for cluster to be healthy with new image - echo "Waiting for cluster to be healthy with new image..." - timeout 600 bash -c ' - NEW_IMAGE="ghcr.io/guanzhousongmicrosoft/documentdb-pg18-amd64:0.110.0" - - while true; do - DB_STATUS=$(kubectl get documentdb '$DB_NAME' -n '$DB_NS' -o jsonpath="{.status.status}" 2>/dev/null) - CLUSTER_STATUS=$(kubectl get cluster '$DB_NAME' -n '$DB_NS' -o jsonpath="{.status.phase}" 2>/dev/null) - - # Check extension image in CNPG cluster - EXTENSION_IMAGE=$(kubectl get cluster '$DB_NAME' -n '$DB_NS' -o jsonpath="{.spec.postgresConfiguration.extensions[?(@.name==\"documentdb\")].imageVolumeSource.reference}" 2>/dev/null || echo "N/A") - - echo "DocumentDB status: $DB_STATUS, CNPG Cluster phase: $CLUSTER_STATUS, Extension image: $EXTENSION_IMAGE" - - if [[ "$DB_STATUS" == "Cluster in healthy state" && "$CLUSTER_STATUS" == "Cluster in healthy state" ]]; then - if [[ "$EXTENSION_IMAGE" == "$NEW_IMAGE" ]]; then - HEALTHY_PODS=$(kubectl get cluster '$DB_NAME' -n '$DB_NS' -o jsonpath="{.status.instancesStatus.healthy}" 2>/dev/null | jq length 2>/dev/null || echo "0") - if [[ "$HEALTHY_PODS" -ge "1" ]]; then - echo "✓ Cluster is healthy with new image and $HEALTHY_PODS healthy pods" - break - fi - fi - fi - - sleep 10 - done - ' - - # Verify the new image is applied - echo "Verifying new image is applied..." - FINAL_IMAGE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.spec.documentDBImage}') - CNPG_EXTENSION_IMAGE=$(kubectl get cluster $DB_NAME -n $DB_NS -o jsonpath='{.spec.postgresConfiguration.extensions[?(@.name=="documentdb")].imageVolumeSource.reference}') - - echo "Final DocumentDB image in spec: $FINAL_IMAGE" - echo "Final extension image in CNPG cluster: $CNPG_EXTENSION_IMAGE" - - if [[ "$FINAL_IMAGE" != "$NEW_IMAGE" ]]; then - echo "❌ New image not applied to DocumentDB spec" - kubectl get documentdb $DB_NAME -n $DB_NS -o yaml - exit 1 - fi - - if [[ "$CNPG_EXTENSION_IMAGE" != "$NEW_IMAGE" ]]; then - echo "❌ New image not applied to CNPG cluster extension" - kubectl get cluster $DB_NAME -n $DB_NS -o yaml - exit 1 - fi - - echo "✓ New image applied successfully" - - # Check DocumentDB version in status after upgrade - DOCUMENTDB_VERSION_AFTER=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.documentDBVersion}') - echo "DocumentDB version before upgrade: $DOCUMENTDB_VERSION_BEFORE" - echo "DocumentDB version after upgrade: $DOCUMENTDB_VERSION_AFTER" - - echo "" - echo "✅ DocumentDB image upgrade test completed successfully!" - echo "Upgraded from $OLD_IMAGE to $NEW_IMAGE" - - name: Test cluster health and monitoring run: | echo "Testing cluster health and monitoring on ${{ matrix.architecture }}..." diff --git a/.github/workflows/test-backup-and-restore.yml b/.github/workflows/test-backup-and-restore.yml index 4949b4df..120a6f43 100644 --- a/.github/workflows/test-backup-and-restore.yml +++ b/.github/workflows/test-backup-and-restore.yml @@ -9,10 +9,6 @@ on: - cron: '0 2 * * *' workflow_dispatch: inputs: - documentdb_version: - description: 'DocumentDB image version to test' - required: false - default: '16' node_count: description: 'Number of DocumentDB nodes' required: false @@ -27,11 +23,6 @@ on: description: 'Optional: Use existing image tag instead of building locally' required: false type: string - documentdb_version: - description: 'DocumentDB image version to test' - required: false - default: '16' - type: string node_count: description: 'Number of DocumentDB nodes' required: false @@ -52,6 +43,7 @@ env: DB_USERNAME: k8s_secret_user DB_PASSWORD: K8sSecret100 DB_PORT: 10260 + DOCUMENTDB_IMAGE: ghcr.io/guanzhousongmicrosoft/documentdb-pg18:0.110.0 jobs: # Conditional build workflow - only run if image_tag is not provided or on pull_request @@ -131,6 +123,7 @@ jobs: db-port: ${{ env.DB_PORT }} image-tag: ${{ env.IMAGE_TAG }} chart-version: ${{ env.CHART_VERSION }} + documentdb-image: ${{ env.DOCUMENTDB_IMAGE }} use-external-images: ${{ github.event_name != 'pull_request' && inputs.image_tag != '' && inputs.image_tag != null }} github-token: ${{ secrets.GITHUB_TOKEN }} repository-owner: ${{ github.repository_owner }} @@ -279,7 +272,7 @@ jobs: spec: nodeCount: ${{ matrix.node_count }} instancesPerNode: ${{ matrix.instances_per_node }} - documentDBImage: ghcr.io/microsoft/documentdb/documentdb-local:16 + documentDBImage: ${{ env.DOCUMENTDB_IMAGE }} gatewayImage: ghcr.io/microsoft/documentdb/documentdb-local:16 resource: storage: diff --git a/.github/workflows/test-build-and-package.yml b/.github/workflows/test-build-and-package.yml index afacfc57..8af51e78 100644 --- a/.github/workflows/test-build-and-package.yml +++ b/.github/workflows/test-build-and-package.yml @@ -118,52 +118,6 @@ jobs: path: sidecar-${{ matrix.arch }}-image.tar retention-days: 1 - build-documentdb: - name: Build DocumentDB Images - timeout-minutes: 30 - strategy: - matrix: - arch: [amd64, arm64] - include: - - arch: amd64 - base_arch: AMD64 - runner: ubuntu-22.04 - - arch: arm64 - base_arch: ARM64 - runner: ubuntu-22.04-arm - runs-on: ${{ matrix.runner }} - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Build documentdb Docker image for ${{ matrix.arch }} - run: | - echo "Building documentdb Docker image for ${{ matrix.arch }} architecture..." - docker buildx build \ - --platform linux/${{ matrix.arch }} \ - --build-arg ARCH=${{ matrix.base_arch }} \ - --tag ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/documentdb:${{ env.IMAGE_TAG }}-${{ matrix.arch }} \ - --load \ - -f .github/dockerfiles/Dockerfile_docdb . - - echo "✓ DocumentDB Docker image built successfully for ${{ matrix.arch }}" - - - name: Save documentdb Docker image as artifact - run: | - echo "Saving documentdb ${{ matrix.arch }} Docker image as tar file..." - docker save ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/documentdb:${{ env.IMAGE_TAG }}-${{ matrix.arch }} > documentdb-${{ matrix.arch }}-image.tar - echo "✓ DocumentDB ${{ matrix.arch }} Docker image saved to tar file" - - - name: Upload documentdb Docker image artifact - uses: actions/upload-artifact@v4 - with: - name: build-docker-documentdb-${{ matrix.arch }} - path: documentdb-${{ matrix.arch }}-image.tar - retention-days: 1 - build-gateway: name: Build Gateway Images timeout-minutes: 30 @@ -214,7 +168,7 @@ jobs: name: Consolidate Platform Images runs-on: ubuntu-latest timeout-minutes: 15 - needs: [build-operator, build-sidecar, build-documentdb, build-gateway] + needs: [build-operator, build-sidecar, build-gateway] outputs: image_tag: ${{ env.IMAGE_TAG }} steps: @@ -273,29 +227,7 @@ jobs: ls -la ./artifacts/build-docker-sidecar-arm64/ || echo "Directory not found" exit 1 fi - - # Load documentdb images - echo "Loading documentdb AMD64 image..." - if [ -f ./artifacts/build-docker-documentdb-amd64/documentdb-amd64-image.tar ]; then - docker load < ./artifacts/build-docker-documentdb-amd64/documentdb-amd64-image.tar - echo "✓ DocumentDB AMD64 image loaded" - else - echo "❌ DocumentDB AMD64 image file not found" - ls -la ./artifacts/build-docker-documentdb-amd64/ || echo "Directory not found" - exit 1 - fi - - echo "Loading documentdb ARM64 image..." - if [ -f ./artifacts/build-docker-documentdb-arm64/documentdb-arm64-image.tar ]; then - docker load < ./artifacts/build-docker-documentdb-arm64/documentdb-arm64-image.tar - echo "✓ DocumentDB ARM64 image loaded" - else - echo "❌ DocumentDB ARM64 image file not found" - ls -la ./artifacts/build-docker-documentdb-arm64/ || echo "Directory not found" - exit 1 - fi - - # Load gateway images + echo "Loading gateway AMD64 image..." if [ -f ./artifacts/build-docker-gateway-amd64/gateway-amd64-image.tar ]; then docker load < ./artifacts/build-docker-gateway-amd64/gateway-amd64-image.tar @@ -324,14 +256,12 @@ jobs: run: | echo "Saving platform-specific images as artifacts..." - # Save all 8 platform-specific images (4 types x 2 architectures) + # Save all 6 platform-specific images (3 types x 2 architectures) docker save \ ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/operator:${{ env.IMAGE_TAG }}-amd64 \ ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/operator:${{ env.IMAGE_TAG }}-arm64 \ ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/sidecar:${{ env.IMAGE_TAG }}-amd64 \ ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/sidecar:${{ env.IMAGE_TAG }}-arm64 \ - ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/documentdb:${{ env.IMAGE_TAG }}-amd64 \ - ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/documentdb:${{ env.IMAGE_TAG }}-arm64 \ ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/gateway:${{ env.IMAGE_TAG }}-amd64 \ ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/gateway:${{ env.IMAGE_TAG }}-arm64 \ > platform-specific-images.tar diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index aeb53786..d077d1fe 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -33,6 +33,7 @@ env: DB_USERNAME: default_user DB_PASSWORD: Admin100 DB_PORT: 10260 + DOCUMENTDB_IMAGE: ghcr.io/guanzhousongmicrosoft/documentdb-pg18:0.110.0 jobs: # Use the reusable build workflow - only if no image tag is provided or on pull_request @@ -119,6 +120,7 @@ jobs: db-port: ${{ env.DB_PORT }} image-tag: ${{ env.IMAGE_TAG }} chart-version: ${{ env.CHART_VERSION }} + documentdb-image: ${{ env.DOCUMENTDB_IMAGE }} use-external-images: ${{ github.event_name != 'pull_request' && github.event.inputs.image_tag != '' && github.event.inputs.image_tag != null }} github-token: ${{ secrets.GITHUB_TOKEN }} repository-owner: ${{ github.repository_owner }} From 6b8a917d87a286955227811929644986df05e383 Mon Sep 17 00:00:00 2001 From: wenting Date: Mon, 2 Feb 2026 15:02:45 -0500 Subject: [PATCH 16/16] Fix PR review comments: status update error handling and race conditions - Return errors for status updates in upgradeDocumentDBExtensionIfNeeded to trigger requeue - Refetch documentdb resource before status updates to avoid race conditions - Fix typo: reque -> requeue - Add documentation for version parsing in E2E workflow Signed-off-by: wenting --- .github/workflows/test-E2E.yml | 3 +++ .../src/internal/controller/documentdb_controller.go | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-E2E.yml b/.github/workflows/test-E2E.yml index 39796c07..fec60188 100644 --- a/.github/workflows/test-E2E.yml +++ b/.github/workflows/test-E2E.yml @@ -178,6 +178,9 @@ jobs: echo "Waiting for cluster to be healthy with new image..." # Extract expected version from image tag (e.g., "0.110.0" -> "0.110-0") + # NOTE: This parsing assumes semver format "X.Y.Z" where the last component becomes + # hyphen-separated in the extension version. Pre-release tags (e.g., "0.110.0-beta") + # are not currently supported and would not be converted correctly. NEW_VERSION_TAG="${NEW_IMAGE##*:}" EXPECTED_VERSION=$(echo "$NEW_VERSION_TAG" | sed 's/\.\([^.]*\)$/-\1/') echo "Expected DocumentDB version after upgrade: $EXPECTED_VERSION" diff --git a/operator/src/internal/controller/documentdb_controller.go b/operator/src/internal/controller/documentdb_controller.go index 017f92e0..cd9a1f3a 100644 --- a/operator/src/internal/controller/documentdb_controller.go +++ b/operator/src/internal/controller/documentdb_controller.go @@ -264,7 +264,7 @@ func (r *DocumentDBReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{RequeueAfter: RequeueAfterShort}, nil } - // Don't reque again unless there is a change + // Don't requeue again unless there is a change return ctrl.Result{}, nil } @@ -509,6 +509,11 @@ func parseExtensionVersionsFromOutput(output string) (defaultVersion, installedV func (r *DocumentDBReconciler) upgradeDocumentDBExtensionIfNeeded(ctx context.Context, currentCluster, desiredCluster *cnpgv1.Cluster, documentdb *dbpreview.DocumentDB) error { logger := log.FromContext(ctx) + // Refetch documentdb to avoid potential race conditions with status updates + if err := r.Get(ctx, types.NamespacedName{Name: documentdb.Name, Namespace: documentdb.Namespace}, documentdb); err != nil { + return fmt.Errorf("failed to refetch DocumentDB resource: %w", err) + } + // Step 1: Check if extension image needs to be updated in CNPG cluster spec imageUpdated, err := r.updateExtensionImageIfNeeded(ctx, currentCluster, desiredCluster) if err != nil { @@ -551,6 +556,7 @@ func (r *DocumentDBReconciler) upgradeDocumentDBExtensionIfNeeded(ctx context.Co documentdb.Status.DocumentDBVersion = installedVersion if err := r.Status().Update(ctx, documentdb); err != nil { logger.Error(err, "Failed to update DocumentDB status with extension version") + return fmt.Errorf("failed to update DocumentDB status with extension version: %w", err) } } @@ -578,6 +584,7 @@ func (r *DocumentDBReconciler) upgradeDocumentDBExtensionIfNeeded(ctx context.Co documentdb.Status.DocumentDBVersion = defaultVersion if err := r.Status().Update(ctx, documentdb); err != nil { logger.Error(err, "Failed to update DocumentDB status after extension upgrade") + return fmt.Errorf("failed to update DocumentDB status after extension upgrade: %w", err) } return nil