diff --git a/ci-operator/config/openshift/openshift-tests-private/openshift-openshift-tests-private-release-4.22__amd64-nightly.yaml b/ci-operator/config/openshift/openshift-tests-private/openshift-openshift-tests-private-release-4.22__amd64-nightly.yaml index 15fed7bfab74d..228f324c114d7 100644 --- a/ci-operator/config/openshift/openshift-tests-private/openshift-openshift-tests-private-release-4.22__amd64-nightly.yaml +++ b/ci-operator/config/openshift/openshift-tests-private/openshift-openshift-tests-private-release-4.22__amd64-nightly.yaml @@ -649,6 +649,15 @@ tests: test: - chain: openshift-e2e-test-qe-destructive workflow: cucushift-installer-rehearse-aws-ipi-edge-zone-cco-manual-security-token-service +- as: aws-ipi-zone-consistency-f28 + cron: 30 10 15 * * + steps: + cluster_profile: aws-qe + env: + BASE_DOMAIN: qe.devcluster.openshift.com + test: + - chain: openshift-e2e-test-qe + workflow: cucushift-installer-rehearse-aws-ipi-zone-consistency - as: aws-ipi-ovn-stress-f28 cron: 19 17 7 * * steps: diff --git a/ci-operator/jobs/openshift/openshift-tests-private/openshift-openshift-tests-private-release-4.22-periodics.yaml b/ci-operator/jobs/openshift/openshift-tests-private/openshift-openshift-tests-private-release-4.22-periodics.yaml index c9ccef8f64f4b..8191d6b0e66f9 100644 --- a/ci-operator/jobs/openshift/openshift-tests-private/openshift-openshift-tests-private-release-4.22-periodics.yaml +++ b/ci-operator/jobs/openshift/openshift-tests-private/openshift-openshift-tests-private-release-4.22-periodics.yaml @@ -22584,6 +22584,88 @@ periodics: - name: result-aggregator secret: secretName: result-aggregator +- agent: kubernetes + cluster: build09 + cron: 30 10 15 * * + decorate: true + decoration_config: + skip_cloning: true + extra_refs: + - base_ref: release-4.22 + org: openshift + repo: openshift-tests-private + labels: + ci-operator.openshift.io/cloud: aws + ci-operator.openshift.io/cloud-cluster-profile: aws-qe + ci-operator.openshift.io/variant: amd64-nightly + ci.openshift.io/generator: prowgen + job-release: "4.22" + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: periodic-ci-openshift-openshift-tests-private-release-4.22-amd64-nightly-aws-ipi-zone-consistency-f28 + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --lease-server-credentials-file=/etc/boskos/credentials + - --oauth-token-path=/usr/local/github-credentials/oauth + - --report-credentials-file=/etc/report/credentials + - --secret-dir=/secrets/ci-pull-credentials + - --target=aws-ipi-zone-consistency-f28 + - --variant=amd64-nightly + command: + - ci-operator + image: quay-proxy.ci.openshift.org/openshift/ci:ci_ci-operator_latest + imagePullPolicy: Always + name: "" + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /etc/boskos + name: boskos + readOnly: true + - mountPath: /secrets/ci-pull-credentials + name: ci-pull-credentials + readOnly: true + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /usr/local/github-credentials + name: github-credentials-openshift-ci-robot-private-git-cloner + readOnly: true + - mountPath: /secrets/manifest-tool + name: manifest-tool-local-pusher + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: boskos + secret: + items: + - key: credentials + path: credentials + secretName: boskos-credentials + - name: ci-pull-credentials + secret: + secretName: ci-pull-credentials + - name: github-credentials-openshift-ci-robot-private-git-cloner + secret: + secretName: github-credentials-openshift-ci-robot-private-git-cloner + - name: manifest-tool-local-pusher + secret: + secretName: manifest-tool-local-pusher + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator - agent: kubernetes cluster: build09 cron: 26 15 5,12,19,26 * * diff --git a/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/OWNERS b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/OWNERS new file mode 100644 index 0000000000000..cfba660e13955 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/OWNERS @@ -0,0 +1,5 @@ +approvers: +- jianlinliu +- gpei +- yunjiang29 +- liweinan diff --git a/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/cluster/OWNERS b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/cluster/OWNERS new file mode 100644 index 0000000000000..cfba660e13955 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/cluster/OWNERS @@ -0,0 +1,5 @@ +approvers: +- jianlinliu +- gpei +- yunjiang29 +- liweinan diff --git a/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/cluster/cucushift-installer-check-aws-zone-consistency-cluster-commands.sh b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/cluster/cucushift-installer-check-aws-zone-consistency-cluster-commands.sh new file mode 100755 index 0000000000000..95c9da1262119 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/cluster/cucushift-installer-check-aws-zone-consistency-cluster-commands.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +set -o nounset +set -o errexit +set -o pipefail + +# save the exit code for junit xml file generated in step gather-must-gather +# post check steps after cluster installation, exit code 101 if failed, +# save to install-post-check-status.txt +EXIT_CODE=101 +trap 'if [[ "$?" == 0 ]]; then EXIT_CODE=0; fi; echo "${EXIT_CODE}" > "${SHARED_DIR}/install-post-check-status.txt"' EXIT TERM + +if [ -f "${SHARED_DIR}/kubeconfig" ] ; then + export KUBECONFIG="${SHARED_DIR}/kubeconfig" +else + echo "No KUBECONFIG found, exit now" + exit 1 +fi + +MASTER_MACHINES=$(oc get machine -n openshift-machine-api -l machine.openshift.io/cluster-api-machine-role=master -o jsonpath='{.items[*].metadata.name}' 2>/dev/null || echo "") + +if [ -z "$MASTER_MACHINES" ]; then + echo "No master machines found" + exit 1 +fi + +ret=0 + +for machine in $MASTER_MACHINES; do + zone_label=$(oc get machine "$machine" -n openshift-machine-api -o jsonpath='{.metadata.labels.machine\.openshift\.io/zone}' 2>/dev/null || echo "") + provider_id=$(oc get machine "$machine" -n openshift-machine-api -o jsonpath='{.spec.providerID}' 2>/dev/null || echo "") + provider_zone=$(echo "$provider_id" | grep -oP 'aws:///\K[^/]+' 2>/dev/null || echo "") + spec_zone=$(oc get machine "$machine" -n openshift-machine-api -o jsonpath='{.spec.providerSpec.value.placement.availabilityZone}' 2>/dev/null || echo "") + + if [ -z "$zone_label" ] || [ -z "$provider_zone" ] || [ -z "$spec_zone" ]; then + echo "ERROR: $machine - missing zone information (label: $zone_label, providerID: $provider_zone, spec: $spec_zone)" + ret=$((ret + 1)) + elif [ "$zone_label" != "$provider_zone" ] || [ "$provider_zone" != "$spec_zone" ]; then + echo "ERROR: $machine - zone inconsistent (label: $zone_label, providerID: $provider_zone, spec: $spec_zone)" + ret=$((ret + 1)) + fi +done + +if [ $ret -eq 0 ]; then + echo "PASS: All machines have consistent zones" +else + echo "FAIL: Machines with inconsistent zones detected" +fi + +exit $ret diff --git a/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/cluster/cucushift-installer-check-aws-zone-consistency-cluster-ref.metadata.json b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/cluster/cucushift-installer-check-aws-zone-consistency-cluster-ref.metadata.json new file mode 100644 index 0000000000000..04b6cdf1510d0 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/cluster/cucushift-installer-check-aws-zone-consistency-cluster-ref.metadata.json @@ -0,0 +1,11 @@ +{ + "path": "cucushift/installer/check/aws/zone-consistency/cluster/cucushift-installer-check-aws-zone-consistency-cluster-ref.yaml", + "owners": { + "approvers": [ + "jianlinliu", + "gpei", + "yunjiang29", + "liweinan" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/cluster/cucushift-installer-check-aws-zone-consistency-cluster-ref.yaml b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/cluster/cucushift-installer-check-aws-zone-consistency-cluster-ref.yaml new file mode 100644 index 0000000000000..97e59749e7f4c --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/cluster/cucushift-installer-check-aws-zone-consistency-cluster-ref.yaml @@ -0,0 +1,16 @@ +ref: + as: cucushift-installer-check-aws-zone-consistency-cluster + from_image: + namespace: ocp + name: "4.16" + tag: upi-installer + grace_period: 10m + commands: cucushift-installer-check-aws-zone-consistency-cluster-commands.sh + resources: + requests: + cpu: 10m + memory: 100Mi + documentation: >- + Verify control plane machine zone consistency in installed cluster (OCPBUGS-69923). + This step verifies that actual cluster machines have consistent zone information + (zone label, providerID zone, and spec zone) after cluster installation is complete. diff --git a/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/manifests/OWNERS b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/manifests/OWNERS new file mode 100644 index 0000000000000..cfba660e13955 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/manifests/OWNERS @@ -0,0 +1,5 @@ +approvers: +- jianlinliu +- gpei +- yunjiang29 +- liweinan diff --git a/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/manifests/cucushift-installer-check-aws-zone-consistency-manifests-commands.sh b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/manifests/cucushift-installer-check-aws-zone-consistency-manifests-commands.sh new file mode 100755 index 0000000000000..2bc108178d9c2 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/manifests/cucushift-installer-check-aws-zone-consistency-manifests-commands.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +set -o nounset +set -o errexit +set -o pipefail + +# save the exit code for junit xml file generated in step gather-must-gather +# post check steps after manifest generation, exit code 100 if failed, +# save to install-pre-config-status.txt +EXIT_CODE=100 +trap 'if [[ "$?" == 0 ]]; then EXIT_CODE=0; fi; echo "${EXIT_CODE}" > "${SHARED_DIR}/install-pre-config-status.txt"' EXIT TERM + +# Try to find installer directory +# First check SHARED_DIR (manifests copied from ipi-install-install step) +# Then check /tmp/installer (if running in the same pod) +if [ -d "${SHARED_DIR}/installer" ]; then + INSTALL_DIR="${SHARED_DIR}/installer" +elif [ -d "/tmp/installer" ]; then + INSTALL_DIR="/tmp/installer" +else + echo "Error: Installation directory not found in ${SHARED_DIR}/installer or /tmp/installer" + exit 1 +fi + +CAPI_FILES=$(find "$INSTALL_DIR"/openshift -name "*cluster-api*master*.yaml" -type f 2>/dev/null | sort || true) +MAPI_FILES=$(find "$INSTALL_DIR"/openshift -name "*machine-api*master*.yaml" -type f 2>/dev/null | sort || true) + +if [ -z "$CAPI_FILES" ]; then + echo "Error: CAPI manifest files not found" + exit 1 +fi + +if [ -z "$MAPI_FILES" ]; then + echo "Error: MAPI manifest files not found" + exit 1 +fi + +# Get CAPI zones +capi_zones=() +for file in $CAPI_FILES; do + zone=$(yq eval '.spec.providerSpec.value.placement.availabilityZone' "$file" 2>/dev/null || echo "") + if [ -n "$zone" ] && [ "$zone" != "null" ]; then + capi_zones+=("$zone") + fi +done + +if [ ${#capi_zones[@]} -eq 0 ]; then + echo "Error: No CAPI zone information found" + exit 1 +fi + +# Get MAPI zones +mapi_zones=() +for file in $MAPI_FILES; do + kind=$(yq eval '.kind' "$file" 2>/dev/null || echo "") + if [ "$kind" = "ControlPlaneMachineSet" ]; then + zones=$(yq eval '.spec.template.machines_v1beta1_machine_openshift_io.failureDomains.aws[].placement.availabilityZone' "$file" 2>/dev/null || echo "") + master_count=${#capi_zones[@]} + mapi_index=0 + for zone in $zones; do + if [ "$zone" != "null" ] && [ -n "$zone" ] && [ $mapi_index -lt "$master_count" ]; then + mapi_zones+=("$zone") + mapi_index=$((mapi_index + 1)) + fi + done + fi +done + +if [ ${#mapi_zones[@]} -eq 0 ]; then + echo "Error: No MAPI zone information found" + exit 1 +fi + +# Compare zones +ret=0 +for i in $(seq 0 $((${#capi_zones[@]} - 1))); do + capi_zone="${capi_zones[$i]}" + mapi_zone="${mapi_zones[$i]}" + if [ "$capi_zone" != "$mapi_zone" ]; then + echo "ERROR: master-$i zone mismatch - CAPI: $capi_zone, MAPI: $mapi_zone" + ret=$((ret + 1)) + fi +done + +if [ $ret -eq 0 ]; then + echo "PASS: All machines have consistent zone allocation" +else + echo "FAIL: Zone allocation inconsistency detected" +fi + +exit $ret diff --git a/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/manifests/cucushift-installer-check-aws-zone-consistency-manifests-ref.metadata.json b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/manifests/cucushift-installer-check-aws-zone-consistency-manifests-ref.metadata.json new file mode 100644 index 0000000000000..3d2b037b7a1bd --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/manifests/cucushift-installer-check-aws-zone-consistency-manifests-ref.metadata.json @@ -0,0 +1,11 @@ +{ + "path": "cucushift/installer/check/aws/zone-consistency/manifests/cucushift-installer-check-aws-zone-consistency-manifests-ref.yaml", + "owners": { + "approvers": [ + "jianlinliu", + "gpei", + "yunjiang29", + "liweinan" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/manifests/cucushift-installer-check-aws-zone-consistency-manifests-ref.yaml b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/manifests/cucushift-installer-check-aws-zone-consistency-manifests-ref.yaml new file mode 100644 index 0000000000000..ed26d3183ffd0 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/check/aws/zone-consistency/manifests/cucushift-installer-check-aws-zone-consistency-manifests-ref.yaml @@ -0,0 +1,17 @@ +ref: + as: cucushift-installer-check-aws-zone-consistency-manifests + from_image: + namespace: ocp + name: "4.16" + tag: upi-installer + grace_period: 10m + commands: cucushift-installer-check-aws-zone-consistency-manifests-commands.sh + resources: + requests: + cpu: 10m + memory: 100Mi + documentation: >- + Verify control plane machine zone allocation consistency in manifests (OCPBUGS-69923). + This step verifies that CAPI and MAPI manifests have consistent zone allocation + for control plane machines after manifests are generated. + This should run after 'create manifests' but before 'create cluster'. diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/OWNERS b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/OWNERS new file mode 100644 index 0000000000000..88c1bfffd18a9 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/OWNERS @@ -0,0 +1,4 @@ +approvers: +- jianlinliu +- gpei +- yunjiang29 diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/cucushift-installer-rehearse-aws-ipi-zone-consistency-workflow.metadata.json b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/cucushift-installer-rehearse-aws-ipi-zone-consistency-workflow.metadata.json new file mode 100644 index 0000000000000..627460566115e --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/cucushift-installer-rehearse-aws-ipi-zone-consistency-workflow.metadata.json @@ -0,0 +1,10 @@ +{ + "path": "cucushift/installer/rehearse/aws/ipi/zone-consistency/cucushift-installer-rehearse-aws-ipi-zone-consistency-workflow.yaml", + "owners": { + "approvers": [ + "jianlinliu", + "gpei", + "yunjiang29" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/cucushift-installer-rehearse-aws-ipi-zone-consistency-workflow.yaml b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/cucushift-installer-rehearse-aws-ipi-zone-consistency-workflow.yaml new file mode 100644 index 0000000000000..f742f24288c2d --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/cucushift-installer-rehearse-aws-ipi-zone-consistency-workflow.yaml @@ -0,0 +1,13 @@ +workflow: + as: cucushift-installer-rehearse-aws-ipi-zone-consistency + steps: + pre: + - chain: cucushift-installer-rehearse-aws-ipi-zone-consistency-provision + - ref: cucushift-installer-reportportal-marker + post: + - chain: cucushift-installer-rehearse-aws-ipi-deprovision + - ref: send-results-to-reportportal + documentation: |- + This workflow tests OCPBUGS-69923: Control plane machine zone allocation consistency. + It verifies that CAPI and MAPI manifests have consistent zone allocation, + and that actual cluster machines have consistent zone information. diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/provision/OWNERS b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/provision/OWNERS new file mode 100644 index 0000000000000..1dee593a8886f --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/provision/OWNERS @@ -0,0 +1,10 @@ +approvers: +- jianlinliu +- gpei +- yunjiang29 +- liweinan +reviewers: +- jianlinliu +- gpei +- yunjiang29 +- liweinan diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/provision/cucushift-installer-rehearse-aws-ipi-zone-consistency-provision-chain.metadata.json b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/provision/cucushift-installer-rehearse-aws-ipi-zone-consistency-provision-chain.metadata.json new file mode 100644 index 0000000000000..b93c271e67102 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/provision/cucushift-installer-rehearse-aws-ipi-zone-consistency-provision-chain.metadata.json @@ -0,0 +1,17 @@ +{ + "path": "cucushift/installer/rehearse/aws/ipi/zone-consistency/provision/cucushift-installer-rehearse-aws-ipi-zone-consistency-provision-chain.yaml", + "owners": { + "approvers": [ + "jianlinliu", + "gpei", + "yunjiang29", + "liweinan" + ], + "reviewers": [ + "jianlinliu", + "gpei", + "yunjiang29", + "liweinan" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/provision/cucushift-installer-rehearse-aws-ipi-zone-consistency-provision-chain.yaml b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/provision/cucushift-installer-rehearse-aws-ipi-zone-consistency-provision-chain.yaml new file mode 100644 index 0000000000000..a4fbaa436d5a8 --- /dev/null +++ b/ci-operator/step-registry/cucushift/installer/rehearse/aws/ipi/zone-consistency/provision/cucushift-installer-rehearse-aws-ipi-zone-consistency-provision-chain.yaml @@ -0,0 +1,19 @@ +chain: + as: cucushift-installer-rehearse-aws-ipi-zone-consistency-provision + steps: + - chain: ipi-conf-aws + - ref: ipi-conf-aws-usage-info + - chain: aws-provision-iam-user-minimal-permission + - ref: ipi-install-rbac + - ref: openshift-cluster-bot-rbac + - ref: ipi-install-hosted-loki + - ref: ipi-install-install + - ref: cucushift-installer-check-aws-zone-consistency-manifests + - ref: ipi-install-times-collection + - ref: nodes-readiness + - ref: multiarch-validate-nodes + - ref: enable-qe-catalogsource + - chain: cucushift-installer-check + - ref: cucushift-installer-check-aws-zone-consistency-cluster + documentation: |- + Create an IPI cluster on AWS and verify zone consistency (OCPBUGS-69923). diff --git a/ci-operator/step-registry/ipi/install/install/ipi-install-install-commands.sh b/ci-operator/step-registry/ipi/install/install/ipi-install-install-commands.sh index 66d064ecdc6fd..51d4f3d4e2ef6 100755 --- a/ci-operator/step-registry/ipi/install/install/ipi-install-install-commands.sh +++ b/ci-operator/step-registry/ipi/install/install/ipi-install-install-commands.sh @@ -722,6 +722,13 @@ if test "${ret}" -ne 0 ; then fi set -o errexit +# Copy manifests to SHARED_DIR for post-manifest validation steps +# This allows steps that run after ipi-install-install to access manifests +if [ -d "${dir}/openshift" ]; then + mkdir -p "${SHARED_DIR}/installer/openshift" + cp -r "${dir}/openshift"/* "${SHARED_DIR}/installer/openshift/" || true +fi + # Platform specific manifests adjustments case "${CLUSTER_TYPE}" in azure4|azure-arm64)