From a0171029558f6150a483adf119e7c3f5995f11e2 Mon Sep 17 00:00:00 2001 From: Cam Date: Tue, 10 Feb 2026 20:34:26 +0000 Subject: [PATCH 1/3] Add post-deployment health checks (#17270) Implements automated health monitoring workflow and script to test Lambda@Edge and critical endpoints after deployments. - Creates Node.js health check script for endpoint testing - Adds GitHub Actions workflow triggered after deployments - Tests 9 critical endpoints and 2 Lambda@Edge redirects - Sends Slack notifications on failure - Supports manual triggers for on-demand testing - Documents health checks in BUILD-AND-DEPLOY.md Co-Authored-By: Claude Sonnet 4.5 --- .../post-deployment-health-check.yml | 110 +++++++++++++ BUILD-AND-DEPLOY.md | 46 ++++++ scripts/post-deployment-health-check.js | 146 ++++++++++++++++++ 3 files changed, 302 insertions(+) create mode 100644 .github/workflows/post-deployment-health-check.yml create mode 100755 scripts/post-deployment-health-check.js diff --git a/.github/workflows/post-deployment-health-check.yml b/.github/workflows/post-deployment-health-check.yml new file mode 100644 index 000000000000..821c67e25746 --- /dev/null +++ b/.github/workflows/post-deployment-health-check.yml @@ -0,0 +1,110 @@ +name: Post-Deployment Health Check + +on: + workflow_run: + workflows: + - "Build and deploy" + - "Build and deploy testing" + types: + - completed + workflow_dispatch: + inputs: + target_url: + description: 'Target URL to health check' + required: true + type: choice + options: + - https://www.pulumi.com + - https://www.pulumi-test.io + default: 'https://www.pulumi.com' + environment: + description: 'Environment name (for Slack notifications)' + required: true + type: choice + options: + - production + - testing + default: 'production' + +permissions: + contents: read + +env: + ESC_ACTION_OIDC_AUTH: true + ESC_ACTION_OIDC_ORGANIZATION: pulumi + ESC_ACTION_OIDC_REQUESTED_TOKEN_TYPE: urn:pulumi:token-type:access_token:organization + ESC_ACTION_ENVIRONMENT: github-secrets/pulumi-docs + ESC_ACTION_EXPORT_ENVIRONMENT_VARIABLES: false + +jobs: + health-check: + name: Run post-deployment health checks + runs-on: ubuntu-latest + if: ${{ github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }} + outputs: + url: ${{ steps.target.outputs.url }} + environment: ${{ steps.target.outputs.environment }} + slack_channel: ${{ steps.target.outputs.slack_channel }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v6 + with: + node-version: '24.x' + + - name: Determine target environment + id: target + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + echo "url=${{ inputs.target_url }}" >> $GITHUB_OUTPUT + echo "environment=${{ inputs.environment }}" >> $GITHUB_OUTPUT + if [[ "${{ inputs.environment }}" == "production" ]]; then + echo "slack_channel=docs-ops" >> $GITHUB_OUTPUT + else + echo "slack_channel=docs-ops-test" >> $GITHUB_OUTPUT + fi + elif [[ "${{ github.event.workflow_run.name }}" == "Build and deploy" ]]; then + echo "url=https://www.pulumi.com" >> $GITHUB_OUTPUT + echo "environment=production" >> $GITHUB_OUTPUT + echo "slack_channel=docs-ops" >> $GITHUB_OUTPUT + else + echo "url=https://www.pulumi-test.io" >> $GITHUB_OUTPUT + echo "environment=testing" >> $GITHUB_OUTPUT + echo "slack_channel=docs-ops-test" >> $GITHUB_OUTPUT + fi + + - name: Run health checks + id: health-check + run: | + node scripts/post-deployment-health-check.js "${{ steps.target.outputs.url }}" + + notify: + name: Send Slack notification on failure + runs-on: ubuntu-latest + needs: [health-check] + if: failure() + + steps: + - name: Fetch secrets from ESC + id: esc-secrets + uses: pulumi/esc-action@v1 + + - name: Send Slack notification + uses: docker://sholung/action-slack-notify:v2.3.0 + env: + SLACK_CHANNEL: ${{ needs.health-check.outputs.slack_channel || 'docs-ops' }} + SLACK_COLOR: "#F54242" + SLACK_MESSAGE: | + 🚨 Post-deployment health check failed for ${{ needs.health-check.outputs.environment || 'production' }} + + Workflow: ${{ github.event.workflow_run.html_url }} + Deployment: ${{ github.event.workflow_run.name }} + Commit: ${{ github.event.workflow_run.head_sha }} + + Check the workflow logs for details: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + SLACK_USERNAME: docsbot + SLACK_WEBHOOK: ${{ steps.esc-secrets.outputs.SLACK_WEBHOOK_URL }} + SLACK_ICON: https://www.pulumi.com/logos/brand/avatar-on-white.png diff --git a/BUILD-AND-DEPLOY.md b/BUILD-AND-DEPLOY.md index 0b0e4a700375..8f69b29c2dfb 100644 --- a/BUILD-AND-DEPLOY.md +++ b/BUILD-AND-DEPLOY.md @@ -2134,6 +2134,52 @@ Video recordings archived in GitHub Actions artifacts on failure. **Typical Duration:** 3-5 minutes +### Post-Deployment Health Checks + +After Pulumi updates complete, automated health checks validate the deployed site. + +**Workflow:** `.github/workflows/post-deployment-health-check.yml` + +**Script:** `scripts/post-deployment-health-check.js` + +**What it checks:** + +- Core pages (homepage, docs, registry) +- SDK documentation endpoints (Node.js, Python, .NET, Java) +- High-traffic documentation pages +- Lambda@Edge redirect functionality + +**When it runs:** + +- Automatically after `build-and-deploy.yml` or `testing-build-and-deploy.yml` completes successfully +- Can be manually triggered via GitHub Actions UI +- Can be scheduled (add `schedule` trigger to workflow) + +**On failure:** + +- Dedicated Slack notification sent to #docs-ops (production) or #docs-ops-test (testing) +- Notification includes deployment info, commit SHA, and link to logs +- Health check workflow marked as failed in GitHub Actions +- Deployment workflow remains marked as successful (separation of concerns) + +**Testing locally:** + +```bash +# Test production +node scripts/post-deployment-health-check.js https://www.pulumi.com + +# Test local build +make serve-static +node scripts/post-deployment-health-check.js http://localhost:8080 +``` + +**Adding new checks:** + +Edit `scripts/post-deployment-health-check.js` and add to: + +- `endpoints` array for page availability checks +- `redirectTests` array for Lambda@Edge redirect tests + ### Example Program Testing **Purpose:** Validate that all code examples are functional diff --git a/scripts/post-deployment-health-check.js b/scripts/post-deployment-health-check.js new file mode 100755 index 000000000000..5c9257e6eb40 --- /dev/null +++ b/scripts/post-deployment-health-check.js @@ -0,0 +1,146 @@ +#!/usr/bin/env node + +const https = require('https'); +const http = require('http'); + +const targetUrl = process.argv[2] || 'https://www.pulumi.com'; +const isProduction = targetUrl.includes('pulumi.com'); + +const endpoints = [ + { path: '/', expectedStatus: 200, name: 'Homepage' }, + { path: '/docs', expectedStatus: 200, name: 'Docs landing' }, + { path: '/registry', expectedStatus: 200, name: 'Registry landing' }, + + { path: '/docs/reference/pkg/nodejs/pulumi/pulumi/', expectedStatus: 200, name: 'Node.js SDK' }, + { path: '/docs/reference/pkg/python/pulumi/', expectedStatus: 200, name: 'Python SDK' }, + { path: '/docs/reference/pkg/dotnet/Pulumi/Pulumi.html', expectedStatus: 200, name: '.NET SDK' }, + { path: '/docs/reference/pkg/java/', expectedStatus: 200, name: 'Java SDK' }, + + { path: '/docs/iac/get-started/', expectedStatus: 200, name: 'Getting started' }, + { path: '/docs/cli/commands/', expectedStatus: 200, name: 'CLI reference' }, +]; + +const redirectTests = [ + { + path: '/docs/intro/cloud-providers/aws/', + expectedStatus: 301, + expectedLocation: '/registry/packages/aws/', + name: 'Cloud provider redirect' + }, + { + path: '/docs/reference/pkg/nodejs/pulumi/aws/', + expectedStatus: 301, + expectedLocationPattern: /\/docs\/reference\/pkg\/aws\/\?language=nodejs/, + name: 'Node.js SDK redirect' + }, +]; + +function makeRequest(url, followRedirects = true) { + return new Promise((resolve, reject) => { + const protocol = url.startsWith('https') ? https : http; + const options = { method: 'GET' }; + + const req = protocol.get(url, options, (res) => { + if (followRedirects && (res.statusCode === 301 || res.statusCode === 302)) { + const location = res.headers.location; + const redirectUrl = location.startsWith('http') + ? location + : new URL(location, url).toString(); + return makeRequest(redirectUrl, true).then(resolve).catch(reject); + } + + resolve({ + statusCode: res.statusCode, + headers: res.headers, + location: res.headers.location + }); + }); + + req.on('error', reject); + req.setTimeout(10000, () => { + req.destroy(); + reject(new Error('Request timeout')); + }); + }); +} + +async function testEndpoint(baseUrl, endpoint) { + const url = new URL(endpoint.path, baseUrl).toString(); + + try { + const response = await makeRequest(url, true); + + if (response.statusCode !== endpoint.expectedStatus) { + console.error(`āŒ ${endpoint.name}: Expected ${endpoint.expectedStatus}, got ${response.statusCode}`); + return false; + } + + console.log(`āœ… ${endpoint.name}: ${response.statusCode}`); + return true; + } catch (error) { + console.error(`āŒ ${endpoint.name}: ${error.message}`); + return false; + } +} + +async function testRedirect(baseUrl, test) { + const url = new URL(test.path, baseUrl).toString(); + + try { + const response = await makeRequest(url, false); + + if (response.statusCode !== test.expectedStatus) { + console.error(`āŒ ${test.name}: Expected ${test.expectedStatus}, got ${response.statusCode}`); + return false; + } + + if (test.expectedLocation && response.location !== test.expectedLocation) { + console.error(`āŒ ${test.name}: Expected location ${test.expectedLocation}, got ${response.location}`); + return false; + } + + if (test.expectedLocationPattern && !test.expectedLocationPattern.test(response.location)) { + console.error(`āŒ ${test.name}: Location ${response.location} doesn't match pattern`); + return false; + } + + console.log(`āœ… ${test.name}: ${response.statusCode} → ${response.location}`); + return true; + } catch (error) { + console.error(`āŒ ${test.name}: ${error.message}`); + return false; + } +} + +async function main() { + console.log(`\nšŸ„ Post-Deployment Health Check`); + console.log(`Target: ${targetUrl}\n`); + + let allPassed = true; + + console.log('Testing endpoints...'); + for (const endpoint of endpoints) { + const passed = await testEndpoint(targetUrl, endpoint); + allPassed = allPassed && passed; + } + + console.log('\nTesting Lambda@Edge redirects...'); + for (const test of redirectTests) { + const passed = await testRedirect(targetUrl, test); + allPassed = allPassed && passed; + } + + console.log('\n' + '='.repeat(50)); + if (allPassed) { + console.log('āœ… All health checks passed!'); + process.exit(0); + } else { + console.log('āŒ Some health checks failed!'); + process.exit(1); + } +} + +main().catch(error => { + console.error('Fatal error:', error); + process.exit(1); +}); From 13a14ae8f544aec85162d66195fd0fee7a6161e9 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Tue, 10 Feb 2026 21:09:50 +0000 Subject: [PATCH 2/3] Fix heading style in BUILD-AND-DEPLOY.md Change 'Testing locally' and 'Adding new checks' from bold text to proper H3 headings per STYLE-GUIDE.md. Co-authored-by: Cam Soper --- BUILD-AND-DEPLOY.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/BUILD-AND-DEPLOY.md b/BUILD-AND-DEPLOY.md index 8f69b29c2dfb..92b602211501 100644 --- a/BUILD-AND-DEPLOY.md +++ b/BUILD-AND-DEPLOY.md @@ -2162,7 +2162,7 @@ After Pulumi updates complete, automated health checks validate the deployed sit - Health check workflow marked as failed in GitHub Actions - Deployment workflow remains marked as successful (separation of concerns) -**Testing locally:** +### Local testing ```bash # Test production @@ -2173,7 +2173,7 @@ make serve-static node scripts/post-deployment-health-check.js http://localhost:8080 ``` -**Adding new checks:** +### Adding new checks Edit `scripts/post-deployment-health-check.js` and add to: From 4aae2525b02bf354b1877c028215672ac0a3e953 Mon Sep 17 00:00:00 2001 From: Cam Date: Wed, 11 Feb 2026 00:20:53 +0000 Subject: [PATCH 3/3] Simplify health checks to inline bash Replaces 147-line Node.js script with inline curl-based bash in the GitHub Actions workflow. Removes repository checkout and Node.js setup steps, eliminating external dependencies. Net reduction: 87 lines of code. Co-Authored-By: Claude Sonnet 4.5 --- .../post-deployment-health-check.yml | 80 ++++++++-- BUILD-AND-DEPLOY.md | 19 ++- scripts/post-deployment-health-check.js | 146 ------------------ 3 files changed, 79 insertions(+), 166 deletions(-) delete mode 100755 scripts/post-deployment-health-check.js diff --git a/.github/workflows/post-deployment-health-check.yml b/.github/workflows/post-deployment-health-check.yml index 821c67e25746..eb05ec91d811 100644 --- a/.github/workflows/post-deployment-health-check.yml +++ b/.github/workflows/post-deployment-health-check.yml @@ -47,14 +47,6 @@ jobs: slack_channel: ${{ steps.target.outputs.slack_channel }} steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Setup Node.js - uses: actions/setup-node@v6 - with: - node-version: '24.x' - - name: Determine target environment id: target run: | @@ -77,9 +69,77 @@ jobs: fi - name: Run health checks - id: health-check run: | - node scripts/post-deployment-health-check.js "${{ steps.target.outputs.url }}" + #!/bin/bash + set -e + + BASE_URL="${{ steps.target.outputs.url }}" + FAILED=0 + + echo "šŸ„ Post-Deployment Health Check" + echo "Target: $BASE_URL" + echo "" + + # Function to check endpoint returns 200 + check_endpoint() { + local path="$1" + local name="$2" + local url="${BASE_URL}${path}" + + status=$(curl -s -o /dev/null -w "%{http_code}" -L "$url") + if [[ "$status" == "200" ]]; then + echo "āœ… $name: $status" + else + echo "āŒ $name: Expected 200, got $status" + FAILED=1 + fi + } + + # Function to check redirect + check_redirect() { + local path="$1" + local expected_location="$2" + local name="$3" + local url="${BASE_URL}${path}" + + # Get status and location without following redirects + response=$(curl -s -o /dev/null -w "%{http_code}|%{redirect_url}" "$url") + status="${response%%|*}" + location="${response##*|}" + + if [[ "$status" == "301" ]] && [[ "$location" =~ $expected_location ]]; then + echo "āœ… $name: $status → $location" + else + echo "āŒ $name: Expected 301 to match '$expected_location', got $status → $location" + FAILED=1 + fi + } + + echo "Testing endpoints..." + check_endpoint "/" "Homepage" + check_endpoint "/docs" "Docs landing" + check_endpoint "/registry" "Registry landing" + check_endpoint "/docs/reference/pkg/nodejs/pulumi/pulumi/" "Node.js SDK" + check_endpoint "/docs/reference/pkg/python/pulumi/" "Python SDK" + check_endpoint "/docs/reference/pkg/dotnet/Pulumi/Pulumi.html" ".NET SDK" + check_endpoint "/docs/reference/pkg/java/" "Java SDK" + check_endpoint "/docs/iac/get-started/" "Getting started" + check_endpoint "/docs/cli/commands/" "CLI reference" + + echo "" + echo "Testing Lambda@Edge redirects..." + check_redirect "/docs/intro/cloud-providers/aws/" "/registry/packages/aws/" "Cloud provider redirect" + check_redirect "/docs/reference/pkg/nodejs/pulumi/aws/" "/docs/reference/pkg/aws/\\?language=nodejs" "Node.js SDK redirect" + + echo "" + echo "==================================================" + if [[ $FAILED -eq 0 ]]; then + echo "āœ… All health checks passed!" + exit 0 + else + echo "āŒ Some health checks failed!" + exit 1 + fi notify: name: Send Slack notification on failure diff --git a/BUILD-AND-DEPLOY.md b/BUILD-AND-DEPLOY.md index 92b602211501..499f27f98b88 100644 --- a/BUILD-AND-DEPLOY.md +++ b/BUILD-AND-DEPLOY.md @@ -2136,11 +2136,11 @@ Video recordings archived in GitHub Actions artifacts on failure. ### Post-Deployment Health Checks -After Pulumi updates complete, automated health checks validate the deployed site. +After Pulumi updates complete, automated health checks validate the deployed site using curl-based tests. **Workflow:** `.github/workflows/post-deployment-health-check.yml` -**Script:** `scripts/post-deployment-health-check.js` +**Implementation:** Inline bash script using curl (no external dependencies or repository checkout required) **What it checks:** @@ -2165,20 +2165,19 @@ After Pulumi updates complete, automated health checks validate the deployed sit ### Local testing ```bash -# Test production -node scripts/post-deployment-health-check.js https://www.pulumi.com +# Test individual endpoint +curl -s -o /dev/null -w "%{http_code}\n" -L https://www.pulumi.com/docs -# Test local build -make serve-static -node scripts/post-deployment-health-check.js http://localhost:8080 +# Test redirect +curl -s -o /dev/null -w "%{http_code}|%{redirect_url}\n" https://www.pulumi.com/docs/intro/cloud-providers/aws/ ``` ### Adding new checks -Edit `scripts/post-deployment-health-check.js` and add to: +Edit `.github/workflows/post-deployment-health-check.yml` and add calls to: -- `endpoints` array for page availability checks -- `redirectTests` array for Lambda@Edge redirect tests +- `check_endpoint` function for page availability checks (expects 200 status) +- `check_redirect` function for Lambda@Edge redirect tests (expects 301 with location match) ### Example Program Testing diff --git a/scripts/post-deployment-health-check.js b/scripts/post-deployment-health-check.js deleted file mode 100755 index 5c9257e6eb40..000000000000 --- a/scripts/post-deployment-health-check.js +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/env node - -const https = require('https'); -const http = require('http'); - -const targetUrl = process.argv[2] || 'https://www.pulumi.com'; -const isProduction = targetUrl.includes('pulumi.com'); - -const endpoints = [ - { path: '/', expectedStatus: 200, name: 'Homepage' }, - { path: '/docs', expectedStatus: 200, name: 'Docs landing' }, - { path: '/registry', expectedStatus: 200, name: 'Registry landing' }, - - { path: '/docs/reference/pkg/nodejs/pulumi/pulumi/', expectedStatus: 200, name: 'Node.js SDK' }, - { path: '/docs/reference/pkg/python/pulumi/', expectedStatus: 200, name: 'Python SDK' }, - { path: '/docs/reference/pkg/dotnet/Pulumi/Pulumi.html', expectedStatus: 200, name: '.NET SDK' }, - { path: '/docs/reference/pkg/java/', expectedStatus: 200, name: 'Java SDK' }, - - { path: '/docs/iac/get-started/', expectedStatus: 200, name: 'Getting started' }, - { path: '/docs/cli/commands/', expectedStatus: 200, name: 'CLI reference' }, -]; - -const redirectTests = [ - { - path: '/docs/intro/cloud-providers/aws/', - expectedStatus: 301, - expectedLocation: '/registry/packages/aws/', - name: 'Cloud provider redirect' - }, - { - path: '/docs/reference/pkg/nodejs/pulumi/aws/', - expectedStatus: 301, - expectedLocationPattern: /\/docs\/reference\/pkg\/aws\/\?language=nodejs/, - name: 'Node.js SDK redirect' - }, -]; - -function makeRequest(url, followRedirects = true) { - return new Promise((resolve, reject) => { - const protocol = url.startsWith('https') ? https : http; - const options = { method: 'GET' }; - - const req = protocol.get(url, options, (res) => { - if (followRedirects && (res.statusCode === 301 || res.statusCode === 302)) { - const location = res.headers.location; - const redirectUrl = location.startsWith('http') - ? location - : new URL(location, url).toString(); - return makeRequest(redirectUrl, true).then(resolve).catch(reject); - } - - resolve({ - statusCode: res.statusCode, - headers: res.headers, - location: res.headers.location - }); - }); - - req.on('error', reject); - req.setTimeout(10000, () => { - req.destroy(); - reject(new Error('Request timeout')); - }); - }); -} - -async function testEndpoint(baseUrl, endpoint) { - const url = new URL(endpoint.path, baseUrl).toString(); - - try { - const response = await makeRequest(url, true); - - if (response.statusCode !== endpoint.expectedStatus) { - console.error(`āŒ ${endpoint.name}: Expected ${endpoint.expectedStatus}, got ${response.statusCode}`); - return false; - } - - console.log(`āœ… ${endpoint.name}: ${response.statusCode}`); - return true; - } catch (error) { - console.error(`āŒ ${endpoint.name}: ${error.message}`); - return false; - } -} - -async function testRedirect(baseUrl, test) { - const url = new URL(test.path, baseUrl).toString(); - - try { - const response = await makeRequest(url, false); - - if (response.statusCode !== test.expectedStatus) { - console.error(`āŒ ${test.name}: Expected ${test.expectedStatus}, got ${response.statusCode}`); - return false; - } - - if (test.expectedLocation && response.location !== test.expectedLocation) { - console.error(`āŒ ${test.name}: Expected location ${test.expectedLocation}, got ${response.location}`); - return false; - } - - if (test.expectedLocationPattern && !test.expectedLocationPattern.test(response.location)) { - console.error(`āŒ ${test.name}: Location ${response.location} doesn't match pattern`); - return false; - } - - console.log(`āœ… ${test.name}: ${response.statusCode} → ${response.location}`); - return true; - } catch (error) { - console.error(`āŒ ${test.name}: ${error.message}`); - return false; - } -} - -async function main() { - console.log(`\nšŸ„ Post-Deployment Health Check`); - console.log(`Target: ${targetUrl}\n`); - - let allPassed = true; - - console.log('Testing endpoints...'); - for (const endpoint of endpoints) { - const passed = await testEndpoint(targetUrl, endpoint); - allPassed = allPassed && passed; - } - - console.log('\nTesting Lambda@Edge redirects...'); - for (const test of redirectTests) { - const passed = await testRedirect(targetUrl, test); - allPassed = allPassed && passed; - } - - console.log('\n' + '='.repeat(50)); - if (allPassed) { - console.log('āœ… All health checks passed!'); - process.exit(0); - } else { - console.log('āŒ Some health checks failed!'); - process.exit(1); - } -} - -main().catch(error => { - console.error('Fatal error:', error); - process.exit(1); -});