Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 49 additions & 15 deletions documentdb-playground/aks-fleet-deployment/deploy-fleet-bicep.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,11 @@ RG_LOCATION="${RG_LOCATION:-eastus2}"
HUB_REGION="${HUB_REGION:-westus3}"
SCRIPT_DIR="$(dirname "$0")"

# Regions for member clusters (keep in sync with parameters.bicepparam if you change it)
if [ -n "${MEMBER_REGIONS_CSV:-}" ]; then
IFS=',' read -r -a MEMBER_REGIONS <<< "$MEMBER_REGIONS_CSV"
else
MEMBER_REGIONS=("westus3" "uksouth" "eastus2")
fi

# Optional: explicitly override the VM size used by the template param vmSize.
# If left empty, the template's default (currently Standard_DS2_v2) will be used.
KUBE_VM_SIZE="${KUBE_VM_SIZE:-}"

# Build JSON arrays for parameters (after any fallbacks)
MEMBER_REGIONS_JSON=$(printf '%s\n' "${MEMBER_REGIONS[@]}" | jq -R . | jq -s .)
# Optional: override the default member regions defined in main.bicep (comma-separated list)
MEMBER_REGIONS="${MEMBER_REGIONS:-}"

# Wait for any in-progress AKS operations in this resource group to finish
wait_for_no_inprogress() {
Expand Down Expand Up @@ -59,16 +51,24 @@ if ! wait_for_no_inprogress "$RESOURCE_GROUP"; then
echo "Exiting without changes due to in-progress operations. Re-run when provisioning completes." >&2
exit 1
fi

PARAMS=()
# Build parameter overrides
PARAMS=(
--parameters "$SCRIPT_DIR/parameters.bicepparam"
--parameters memberRegions="$MEMBER_REGIONS_JSON"
)
if [ -n "$KUBE_VM_SIZE" ]; then
echo "Overriding kubernetes VM size with: $KUBE_VM_SIZE"
PARAMS+=( --parameters vmSize="$KUBE_VM_SIZE" )
fi

if [ -n "$MEMBER_REGIONS" ]; then
echo "Overriding member regions with: $MEMBER_REGIONS"
MEMBER_REGION_JSON=$(printf '%s' "$MEMBER_REGIONS" | jq -Rsc 'split(",") | map(gsub("^\\s+|\\s+$";"")) | map(select(length>0))')
if [ "$(printf '%s' "$MEMBER_REGION_JSON" | jq 'length')" -eq 0 ]; then
echo "MEMBER_REGIONS did not contain any valid entries" >&2
exit 1
fi
PARAMS+=( --parameters memberRegions="$MEMBER_REGION_JSON" )
fi

DEPLOYMENT_NAME=${DEPLOYMENT_NAME:-"aks-deployment-$(date +%s)"}
az deployment group create \
--name "$DEPLOYMENT_NAME" \
Expand All @@ -84,6 +84,23 @@ DEPLOYMENT_OUTPUT=$(az deployment group show \

# Extract outputs
MEMBER_CLUSTER_NAMES=$(echo $DEPLOYMENT_OUTPUT | jq -r '.memberClusterNames.value[]')
VNET_NAMES=$(echo $DEPLOYMENT_OUTPUT | jq -r '.memberVnetNames.value[]')

while read -r vnet1; do
while read -r vnet2; do
[ -z "$vnet1" ] && continue
[ -z "$vnet2" ] && continue
[ "$vnet1" = "$vnet2" ] && continue
echo "Peering VNet '$vnet1' with VNet '$vnet2'..."
az network vnet peering create \
--name "${vnet1}-to-${vnet2}-peering" \
--resource-group "$RESOURCE_GROUP" \
--vnet-name "$vnet1" \
--remote-vnet "$vnet2" \
--allow-vnet-access true \
--allow-forwarded-traffic true
done <<< "$VNET_NAMES"
done <<< "$VNET_NAMES"

HUB_CLUSTER=""
while read -r cluster; do
Expand All @@ -97,6 +114,20 @@ git clone https://github.com/kubefleet-dev/kubefleet.git $kubeDir
pushd $kubeDir
# Set up HUB_CLUSTER as the hub
kubectl config use-context $HUB_CLUSTER

# Install cert manager on hub cluster
helm repo add jetstack https://charts.jetstack.io
helm repo update

echo -e "\nInstalling cert-manager on $HUB_CLUSTER..."
helm upgrade --install cert-manager jetstack/cert-manager \
--namespace cert-manager \
--create-namespace \
--set crds.enabled=true
kubectl rollout status deployment/cert-manager -n cert-manager --timeout=240s || true
echo "Pods ($HUB_CLUSTER):"
kubectl get pods -n cert-manager -o wide || true

export REGISTRY="ghcr.io/kubefleet-dev/kubefleet"
export TAG=$(curl "https://api.github.com/repos/kubefleet-dev/kubefleet/tags" | jq -r '.[0].name') # Gets latest tag
# Install the helm chart for running Fleet agents on the hub cluster.
Expand All @@ -111,7 +142,10 @@ helm upgrade --install hub-agent ./charts/hub-agent/ \
--set logFileMaxSize=100000 \
--set MaxConcurrentClusterPlacement=200 \
--set namespace=fleet-system-hub \
--set enableWorkload=true
--set enableWorkload=true #\
#--set useCertManager=true \
#--set enableWebhook=true


# Run the script.
chmod +x ./hack/membership/joinMC.sh
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ set -euo pipefail
RESOURCE_GROUP="${RESOURCE_GROUP:-documentdb-aks-fleet-rg}"
HUB_REGION="${HUB_REGION:-westus3}"
CHART_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)/operator/documentdb-helm-chart"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
VERSION="${VERSION:-200}"
VALUES_FILE="${VALUES_FILE:-}"
BUILD_CHART="${BUILD_CHART:-true}"
Expand Down Expand Up @@ -76,7 +77,7 @@ else
fi
fi

kubectl --context "$HUB_CLUSTER" apply -f ./documentdb-operator-crp.yaml
kubectl --context "$HUB_CLUSTER" apply -f $SCRIPT_DIR/documentdb-operator-crp.yaml

# Get all member clusters

Expand Down
72 changes: 3 additions & 69 deletions documentdb-playground/aks-fleet-deployment/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -19,33 +19,21 @@ param nodeCount int = 2
// Optionally include kubernetesVersion in cluster properties
var maybeK8sVersion = empty(kubernetesVersion) ? {} : { kubernetesVersion: kubernetesVersion }

// Define non-overlapping address spaces for each member cluster
var memberVnetAddressSpaces = [
'10.1.0.0/16' // westus3
'10.2.0.0/16' // uksouth
'10.3.0.0/16' // eastus2
]
var memberSubnetAddressSpaces = [
'10.1.0.0/20' // westus3
'10.2.0.0/20' // uksouth
'10.3.0.0/20' // eastus2
]

// Member VNets
resource memberVnets 'Microsoft.Network/virtualNetworks@2023-09-01' = [for (region, i) in memberRegions: {
name: 'member-${region}-vnet'
location: region
properties: {
addressSpace: {
addressPrefixes: [
memberVnetAddressSpaces[i]
'10.${i}.0.0/16'
]
}
subnets: [
{
name: 'aks-subnet'
properties: {
addressPrefix: memberSubnetAddressSpaces[i]
addressPrefix: '10.${i}.0.0/20'
}
}
]
Expand Down Expand Up @@ -84,59 +72,5 @@ resource memberClusters 'Microsoft.ContainerService/managedClusters@2023-10-01'
]
}]

// Create peering pairs for full mesh
var peeringPairs = [
{
sourceIndex: 0
targetIndex: 1
sourceName: memberRegions[0]
targetName: memberRegions[1]
}
{
sourceIndex: 0
targetIndex: 2
sourceName: memberRegions[0]
targetName: memberRegions[2]
}
{
sourceIndex: 1
targetIndex: 2
sourceName: memberRegions[1]
targetName: memberRegions[2]
}
]

// VNet peerings - Forward direction
resource memberPeeringsForward 'Microsoft.Network/virtualNetworks/virtualNetworkPeerings@2023-09-01' = [for pair in peeringPairs: {
name: '${pair.sourceName}-to-${pair.targetName}'
parent: memberVnets[pair.sourceIndex]
properties: {
remoteVirtualNetwork: {
id: memberVnets[pair.targetIndex].id
}
allowVirtualNetworkAccess: true
allowForwardedTraffic: true
allowGatewayTransit: false
useRemoteGateways: false
}
}]

// VNet peerings - Reverse direction
resource memberPeeringsReverse 'Microsoft.Network/virtualNetworks/virtualNetworkPeerings@2023-09-01' = [for pair in peeringPairs: {
name: '${pair.targetName}-to-${pair.sourceName}'
parent: memberVnets[pair.targetIndex]
properties: {
remoteVirtualNetwork: {
id: memberVnets[pair.sourceIndex].id
}
allowVirtualNetworkAccess: true
allowForwardedTraffic: true
allowGatewayTransit: false
useRemoteGateways: false
}
dependsOn: [
memberPeeringsForward
]
}]

output memberClusterNames array = [for i in range(0, length(memberRegions)): memberClusters[i].name]
output memberVnetNames array = [for i in range(0, length(memberRegions)): memberVnets[i].name]

This file was deleted.

38 changes: 38 additions & 0 deletions documentdb-playground/fleet-add-region/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Fleet Add Region Playground

This playground focuses on exercising DocumentDB across changing fleet shapes.
It builds on the AKS Fleet Deployment playground (shared Bicep templates and
install scripts) but layers extra tooling to: add a region, remove a region,
verify wiring, and iterate rapidly on those flows before changes graduate to
Copy link

Copilot AI Jan 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The word "wiring" in the context of "verify wiring" is somewhat informal and vague. Consider using more precise technical terminology such as "verify configuration" or "verify resource connectivity".

Suggested change
verify wiring, and iterate rapidly on those flows before changes graduate to
verify configuration and connectivity, and iterate rapidly on those flows before changes graduate to

Copilot uses AI. Check for mistakes.
docs or automation.

## Goals

- **Prove add/remove**: Validate that DocumentDB state, KubeFleet placements,
and CNPG clusters survive when a member region joins or leaves.
- **Shareable workflows**: Capture the manual commands and patches used during
prototyping so they can be replayed by others.
- **Regression surface**: Provide a safe spot to run disruptive tests (failovers,
partial rollouts, patching CRPs) without touching the core deployment guide.
- **Consistency with AKS Fleet**: Reuse credentials, hub selection, and discovery
logic from the `aks-fleet-deployment` playground to avoid divergence.

- `deploy-four-region.sh`: Convenience wrapper to stand up a fresh four-region
fleet using the upstream deployment assets before exercising the add/remove scripts.

## Typical Workflow

1. **Bootstrap fleet** using the `deploy-four-region.sh` script which calls the
functions from `../aks-fleet-deployment` (Bicep deployment, cert-manager install,
operator install). All environment variables (e.g., `RESOURCE_GROUP`, `HUB_REGION`)
match the upstream playground so secrets and kubeconfigs remain reusable.
2. **Stand up baseline DocumentDB** via `documentdb-three-region.sh`, which will
make a 3-region cluster, excluding the westus2 region to start.
3. **Introduce changes**:
- Add a westus2 with `add-region.sh` to patch `DocumentDB` and `resourceplacement`
lists.
- Validate with `check.sh` and watch KubeFleet propagate CRs.
- Remove the hub region, westus3 via `remove-region.sh` and re-run `check.sh`
to confirm cleanup.
4. **Experiment repeatedly**, adjusting variables such as `EXCLUDE_REGION`, `HUB_REGION`,
or `DOCUMENTDB_PASSWORD` to simulate production scenarios.
97 changes: 97 additions & 0 deletions documentdb-playground/fleet-add-region/add-region.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/bin/bash

set -e

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

RESOURCE_GROUP="${RESOURCE_GROUP:-documentdb-aks-fleet-rg}"
HUB_REGION="${HUB_REGION:-westus3}"
PRIMARY_REGION="${PRIMARY_REGION:-eastus2}"
EXCLUDE_REGION="${EXCLUDE_REGION:-westus2}"

# Dynamically get member clusters from Azure
echo "Discovering member clusters in resource group: $RESOURCE_GROUP..."
MEMBER_CLUSTERS=$(az aks list -g "$RESOURCE_GROUP" -o json | jq -r '.[] | select(.name|startswith("member-")) | .name' | sort)

if [ -z "$MEMBER_CLUSTERS" ]; then
echo "Error: No member clusters found in resource group $RESOURCE_GROUP"
echo "Please ensure the fleet is deployed first"
exit 1
fi

CLUSTER_ARRAY=($MEMBER_CLUSTERS)
echo "Found ${#CLUSTER_ARRAY[@]} member clusters:"
EXCLUDE_CLUSTER=""
for cluster in "${CLUSTER_ARRAY[@]}"; do
echo " - $cluster"
if [[ "$cluster" == *"$HUB_REGION"* ]]; then HUB_CLUSTER="$cluster"; fi
if [[ "$cluster" == *"$EXCLUDE_REGION"* ]]; then EXCLUDE_CLUSTER="$cluster"; fi
if [[ "$cluster" == *"$PRIMARY_REGION"* ]]; then PRIMARY_CLUSTER="$cluster"; fi
done

# Build the cluster list YAML with proper indentation
CLUSTER_LIST=""
CLUSTER_LIST_CRP=""
for cluster in "${CLUSTER_ARRAY[@]}"; do
if [ "$cluster" == "$EXCLUDE_CLUSTER" ]; then
echo "Including cluster $cluster in DocumentDB configuration"
fi
if [ -z "$CLUSTER_LIST" ]; then
CLUSTER_LIST=" - name: ${cluster}"
CLUSTER_LIST="${CLUSTER_LIST}"$'\n'" environment: aks"
CLUSTER_LIST_CRP=" - ${cluster}"
else
CLUSTER_LIST="${CLUSTER_LIST}"$'\n'" - name: ${cluster}"
CLUSTER_LIST="${CLUSTER_LIST}"$'\n'" environment: aks"
CLUSTER_LIST_CRP="${CLUSTER_LIST_CRP}"$'\n'" - ${cluster}"
fi
done

TEMP_YAML=$(mktemp)

# Use sed for safer substitution
sed -e "s/{{DOCUMENTDB_PASSWORD}}/$DOCUMENTDB_PASSWORD/g" \
-e "s/{{PRIMARY_CLUSTER}}/$PRIMARY_CLUSTER/g" \
"$SCRIPT_DIR/documentdb-resource-crp.yaml" | \
while IFS= read -r line; do
if [[ "$line" == '{{CLUSTER_LIST}}' ]]; then
echo "$CLUSTER_LIST"
elif [[ "$line" == '{{CLUSTER_LIST_CRP}}' ]]; then
echo "$CLUSTER_LIST_CRP"
else
echo "$line"
fi
done > "$TEMP_YAML"

echo ""
echo "Applying DocumentDB multi-region configuration..."

MAX_RETRIES=60
RETRY_INTERVAL=3
RETRY_COUNT=0

while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
kubectl --context "$HUB_CLUSTER" apply -f "$TEMP_YAML" &> /dev/null

echo "Checking if $EXCLUDE_CLUSTER has been added to clusterReplication on the excluded cluster..."

# Get the clusterReplication.clusters field from the DocumentDB object on the excluded cluster
CLUSTER_LIST_JSON=$(kubectl --context "$EXCLUDE_CLUSTER" get documentdb documentdb-preview -n documentdb-preview-ns -o jsonpath='{.spec.clusterReplication.clusterList[*].name}' 2>/dev/null)

if echo "$CLUSTER_LIST_JSON" | grep -q "$EXCLUDE_CLUSTER"; then
echo "Success: $EXCLUDE_CLUSTER is now included in clusterReplication field"
break
fi

RETRY_COUNT=$((RETRY_COUNT + 1))
echo "Cluster not yet in clusterReplication (attempt $RETRY_COUNT/$MAX_RETRIES). Retrying in ${RETRY_INTERVAL}s..."
sleep $RETRY_INTERVAL
done

if [ $RETRY_COUNT -eq $MAX_RETRIES ]; then
echo "Error: Timed out waiting for $EXCLUDE_CLUSTER to appear in clusterReplication"
exit 1
fi

rm -f "$TEMP_YAML"
echo "Done."
Loading
Loading