diff --git a/.github/workflows/post-deployment-health-check.yml b/.github/workflows/post-deployment-health-check.yml new file mode 100644 index 000000000000..eb05ec91d811 --- /dev/null +++ b/.github/workflows/post-deployment-health-check.yml @@ -0,0 +1,170 @@ +name: Post-Deployment Health Check + +on: + workflow_run: + workflows: + - "Build and deploy" + - "Build and deploy testing" + types: + - completed + workflow_dispatch: + inputs: + target_url: + description: 'Target URL to health check' + required: true + type: choice + options: + - https://www.pulumi.com + - https://www.pulumi-test.io + default: 'https://www.pulumi.com' + environment: + description: 'Environment name (for Slack notifications)' + required: true + type: choice + options: + - production + - testing + default: 'production' + +permissions: + contents: read + +env: + ESC_ACTION_OIDC_AUTH: true + ESC_ACTION_OIDC_ORGANIZATION: pulumi + ESC_ACTION_OIDC_REQUESTED_TOKEN_TYPE: urn:pulumi:token-type:access_token:organization + ESC_ACTION_ENVIRONMENT: github-secrets/pulumi-docs + ESC_ACTION_EXPORT_ENVIRONMENT_VARIABLES: false + +jobs: + health-check: + name: Run post-deployment health checks + runs-on: ubuntu-latest + if: ${{ github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }} + outputs: + url: ${{ steps.target.outputs.url }} + environment: ${{ steps.target.outputs.environment }} + slack_channel: ${{ steps.target.outputs.slack_channel }} + + steps: + - name: Determine target environment + id: target + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + echo "url=${{ inputs.target_url }}" >> $GITHUB_OUTPUT + echo "environment=${{ inputs.environment }}" >> $GITHUB_OUTPUT + if [[ "${{ inputs.environment }}" == "production" ]]; then + echo "slack_channel=docs-ops" >> $GITHUB_OUTPUT + else + echo "slack_channel=docs-ops-test" >> $GITHUB_OUTPUT + fi + elif [[ "${{ github.event.workflow_run.name }}" == "Build and deploy" ]]; then + echo "url=https://www.pulumi.com" >> $GITHUB_OUTPUT + echo "environment=production" >> $GITHUB_OUTPUT + echo "slack_channel=docs-ops" >> $GITHUB_OUTPUT + else + echo "url=https://www.pulumi-test.io" >> $GITHUB_OUTPUT + echo "environment=testing" >> $GITHUB_OUTPUT + echo "slack_channel=docs-ops-test" >> $GITHUB_OUTPUT + fi + + - name: Run health checks + run: | + #!/bin/bash + set -e + + BASE_URL="${{ steps.target.outputs.url }}" + FAILED=0 + + echo "🏥 Post-Deployment Health Check" + echo "Target: $BASE_URL" + echo "" + + # Function to check endpoint returns 200 + check_endpoint() { + local path="$1" + local name="$2" + local url="${BASE_URL}${path}" + + status=$(curl -s -o /dev/null -w "%{http_code}" -L "$url") + if [[ "$status" == "200" ]]; then + echo "✅ $name: $status" + else + echo "❌ $name: Expected 200, got $status" + FAILED=1 + fi + } + + # Function to check redirect + check_redirect() { + local path="$1" + local expected_location="$2" + local name="$3" + local url="${BASE_URL}${path}" + + # Get status and location without following redirects + response=$(curl -s -o /dev/null -w "%{http_code}|%{redirect_url}" "$url") + status="${response%%|*}" + location="${response##*|}" + + if [[ "$status" == "301" ]] && [[ "$location" =~ $expected_location ]]; then + echo "✅ $name: $status → $location" + else + echo "❌ $name: Expected 301 to match '$expected_location', got $status → $location" + FAILED=1 + fi + } + + echo "Testing endpoints..." + check_endpoint "/" "Homepage" + check_endpoint "/docs" "Docs landing" + check_endpoint "/registry" "Registry landing" + check_endpoint "/docs/reference/pkg/nodejs/pulumi/pulumi/" "Node.js SDK" + check_endpoint "/docs/reference/pkg/python/pulumi/" "Python SDK" + check_endpoint "/docs/reference/pkg/dotnet/Pulumi/Pulumi.html" ".NET SDK" + check_endpoint "/docs/reference/pkg/java/" "Java SDK" + check_endpoint "/docs/iac/get-started/" "Getting started" + check_endpoint "/docs/cli/commands/" "CLI reference" + + echo "" + echo "Testing Lambda@Edge redirects..." + check_redirect "/docs/intro/cloud-providers/aws/" "/registry/packages/aws/" "Cloud provider redirect" + check_redirect "/docs/reference/pkg/nodejs/pulumi/aws/" "/docs/reference/pkg/aws/\\?language=nodejs" "Node.js SDK redirect" + + echo "" + echo "==================================================" + if [[ $FAILED -eq 0 ]]; then + echo "✅ All health checks passed!" + exit 0 + else + echo "❌ Some health checks failed!" + exit 1 + fi + + notify: + name: Send Slack notification on failure + runs-on: ubuntu-latest + needs: [health-check] + if: failure() + + steps: + - name: Fetch secrets from ESC + id: esc-secrets + uses: pulumi/esc-action@v1 + + - name: Send Slack notification + uses: docker://sholung/action-slack-notify:v2.3.0 + env: + SLACK_CHANNEL: ${{ needs.health-check.outputs.slack_channel || 'docs-ops' }} + SLACK_COLOR: "#F54242" + SLACK_MESSAGE: | + 🚨 Post-deployment health check failed for ${{ needs.health-check.outputs.environment || 'production' }} + + Workflow: ${{ github.event.workflow_run.html_url }} + Deployment: ${{ github.event.workflow_run.name }} + Commit: ${{ github.event.workflow_run.head_sha }} + + Check the workflow logs for details: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + SLACK_USERNAME: docsbot + SLACK_WEBHOOK: ${{ steps.esc-secrets.outputs.SLACK_WEBHOOK_URL }} + SLACK_ICON: https://www.pulumi.com/logos/brand/avatar-on-white.png diff --git a/BUILD-AND-DEPLOY.md b/BUILD-AND-DEPLOY.md index 0b0e4a700375..499f27f98b88 100644 --- a/BUILD-AND-DEPLOY.md +++ b/BUILD-AND-DEPLOY.md @@ -2134,6 +2134,51 @@ Video recordings archived in GitHub Actions artifacts on failure. **Typical Duration:** 3-5 minutes +### Post-Deployment Health Checks + +After Pulumi updates complete, automated health checks validate the deployed site using curl-based tests. + +**Workflow:** `.github/workflows/post-deployment-health-check.yml` + +**Implementation:** Inline bash script using curl (no external dependencies or repository checkout required) + +**What it checks:** + +- Core pages (homepage, docs, registry) +- SDK documentation endpoints (Node.js, Python, .NET, Java) +- High-traffic documentation pages +- Lambda@Edge redirect functionality + +**When it runs:** + +- Automatically after `build-and-deploy.yml` or `testing-build-and-deploy.yml` completes successfully +- Can be manually triggered via GitHub Actions UI +- Can be scheduled (add `schedule` trigger to workflow) + +**On failure:** + +- Dedicated Slack notification sent to #docs-ops (production) or #docs-ops-test (testing) +- Notification includes deployment info, commit SHA, and link to logs +- Health check workflow marked as failed in GitHub Actions +- Deployment workflow remains marked as successful (separation of concerns) + +### Local testing + +```bash +# Test individual endpoint +curl -s -o /dev/null -w "%{http_code}\n" -L https://www.pulumi.com/docs + +# Test redirect +curl -s -o /dev/null -w "%{http_code}|%{redirect_url}\n" https://www.pulumi.com/docs/intro/cloud-providers/aws/ +``` + +### Adding new checks + +Edit `.github/workflows/post-deployment-health-check.yml` and add calls to: + +- `check_endpoint` function for page availability checks (expects 200 status) +- `check_redirect` function for Lambda@Edge redirect tests (expects 301 with location match) + ### Example Program Testing **Purpose:** Validate that all code examples are functional