From 3e24ec62013ae7d90d9250252219f43bdf970e7a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 18 Dec 2025 20:42:57 +0000 Subject: [PATCH 1/5] Initial plan From 90874d7a03928707106d707619c7bc6dff3f0779 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 18 Dec 2025 20:54:18 +0000 Subject: [PATCH 2/5] fix: move publishRetryMessage to end of loop and skip invalid messages Per reviewer feedback, publishRetryMessage is now called at the end of the processing loop for each message that is NOT marked as invalid. This prevents duplicate retry messages for the same event, since invalid messages already go back to the SQS queue for retry. Key changes: - Track validMessagesForRetry separately from all messages - Only messages that pass job queued check are added to validMessagesForRetry - publishRetryMessage is called after runner creation, not before - Messages marked as invalid (e.g., max runners reached, creation failed) are excluded from retry message publishing Tests updated to reflect new behavior: - publishRetryMessage is called AFTER runner creation - Messages marked invalid do not get retry messages published - All test scenarios updated with proper mock runner creation Co-authored-by: npalm <11609620+npalm@users.noreply.github.com> --- .../src/scale-runners/scale-up.test.ts | 15 ++++++++++----- .../control-plane/src/scale-runners/scale-up.ts | 16 +++++++++++++++- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts b/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts index 9549e7cb2d..9261d18bf9 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts @@ -1698,6 +1698,7 @@ describe('Retry mechanism tests', () => { it('calls publishRetryMessage for each valid message when job is queued', async () => { const messages = createTestMessages(3); + mockCreateRunner.mockResolvedValue(['i-12345', 'i-67890', 'i-abcdef']); // Create all requested runners await scaleUpModule.scaleUp(messages); @@ -1749,15 +1750,16 @@ describe('Retry mechanism tests', () => { ); }); - it('calls publishRetryMessage even when maximum runners is reached', async () => { + it('does not call publishRetryMessage when maximum runners is reached and messages are marked invalid', async () => { process.env.RUNNERS_MAXIMUM_COUNT = '0'; // No runners can be created const messages = createTestMessages(2); await scaleUpModule.scaleUp(messages); - // publishRetryMessage should still be called even though no runners will be created - expect(mockPublishRetryMessage).toHaveBeenCalledTimes(2); + // publishRetryMessage should NOT be called because messages are marked as invalid + // Invalid messages go back to the SQS queue and will be retried there + expect(mockPublishRetryMessage).not.toHaveBeenCalled(); expect(createRunner).not.toHaveBeenCalled(); }); @@ -1781,6 +1783,7 @@ describe('Retry mechanism tests', () => { it('calls publishRetryMessage when ENABLE_JOB_QUEUED_CHECK is false', async () => { process.env.ENABLE_JOB_QUEUED_CHECK = 'false'; + mockCreateRunner.mockResolvedValue(['i-12345', 'i-67890']); // Create all requested runners const messages = createTestMessages(2); @@ -1792,6 +1795,7 @@ describe('Retry mechanism tests', () => { }); it('calls publishRetryMessage for each message in a multi-runner scenario', async () => { + mockCreateRunner.mockResolvedValue(['i-12345', 'i-67890', 'i-abcdef', 'i-11111', 'i-22222']); // Create all requested runners const messages = createTestMessages(5); await scaleUpModule.scaleUp(messages); @@ -1808,8 +1812,9 @@ describe('Retry mechanism tests', () => { }); }); - it('calls publishRetryMessage before runner creation', async () => { + it('calls publishRetryMessage after runner creation', async () => { const messages = createTestMessages(1); + mockCreateRunner.mockResolvedValue(['i-12345']); // Create the requested runner const callOrder: string[] = []; mockPublishRetryMessage.mockImplementation(() => { @@ -1823,7 +1828,7 @@ describe('Retry mechanism tests', () => { await scaleUpModule.scaleUp(messages); - expect(callOrder).toEqual(['publishRetryMessage', 'createRunner']); + expect(callOrder).toEqual(['createRunner', 'publishRetryMessage']); }); }); diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-up.ts b/lambdas/functions/control-plane/src/scale-runners/scale-up.ts index d340b06010..56bfe0864f 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-up.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-up.ts @@ -339,6 +339,7 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise messageId)); } + + // Publish retry messages for all messages that are not marked as invalid + for (const message of validMessagesForRetry) { + if (!invalidMessages.includes(message.messageId)) { + await publishRetryMessage(message as ActionRequestMessageRetry); + } + } } return invalidMessages; From 11c1578af5e10512a478d5d782d54180b880295c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 18 Dec 2025 20:57:56 +0000 Subject: [PATCH 3/5] refactor: improve performance and reduce code duplication Address code review feedback: - Use Set for invalidMessageIds to improve lookup performance from O(n) to O(1) - Extract duplicate retry message publishing logic into helper function - Maintain both invalidMessages array (for return value) and Set (for fast lookups) This improves performance when processing large message batches and makes the code more maintainable by eliminating duplication. Co-authored-by: npalm <11609620+npalm@users.noreply.github.com> --- .../src/scale-runners/scale-up.ts | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-up.ts b/lambdas/functions/control-plane/src/scale-runners/scale-up.ts index 56bfe0864f..13eae63ca1 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-up.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-up.ts @@ -90,6 +90,17 @@ function generateRunnerServiceConfig(githubRunnerConfig: CreateGitHubRunnerConfi return config; } +async function publishRetryMessagesForValid( + messages: ActionRequestMessageSQS[], + invalidMessageIds: Set, +): Promise { + for (const message of messages) { + if (!invalidMessageIds.has(message.messageId)) { + await publishRetryMessage(message as ActionRequestMessageRetry); + } + } +} + async function getGithubRunnerRegistrationToken(githubRunnerConfig: CreateGitHubRunnerConfig, ghClient: Octokit) { const registrationToken = githubRunnerConfig.runnerType === 'Org' @@ -276,6 +287,7 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise(); const invalidMessages: string[] = []; + const invalidMessageIds = new Set(); for (const payload of payloads) { const { eventType, messageId, repositoryName, repositoryOwner } = payload; if (ephemeralEnabled && eventType !== 'workflow_job') { @@ -394,17 +406,15 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise messageId)); + const removedMessages = messages.splice(0, missingInstanceCount); + invalidMessages.push(...removedMessages.map(({ messageId }) => messageId)); + removedMessages.forEach(({ messageId }) => invalidMessageIds.add(messageId)); } // No runners will be created, so skip calling the EC2 API. if (newRunners <= 0) { // Publish retry messages for all remaining messages that are not marked as invalid - for (const message of validMessagesForRetry) { - if (!invalidMessages.includes(message.messageId)) { - await publishRetryMessage(message as ActionRequestMessageRetry); - } - } + await publishRetryMessagesForValid(validMessagesForRetry, invalidMessageIds); continue; } } @@ -456,15 +466,13 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise messageId)); + const failedMessages = messages.slice(0, failedInstanceCount); + invalidMessages.push(...failedMessages.map(({ messageId }) => messageId)); + failedMessages.forEach(({ messageId }) => invalidMessageIds.add(messageId)); } // Publish retry messages for all messages that are not marked as invalid - for (const message of validMessagesForRetry) { - if (!invalidMessages.includes(message.messageId)) { - await publishRetryMessage(message as ActionRequestMessageRetry); - } - } + await publishRetryMessagesForValid(validMessagesForRetry, invalidMessageIds); } return invalidMessages; From b5399f6517997741c43f03ace7d51e7896e6dc1e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 18 Dec 2025 21:32:51 +0000 Subject: [PATCH 4/5] refactor: simplify naming and reduce complexity Simplified the retry message logic based on reviewer feedback: - Renamed `validMessagesForRetry` to `queuedMessages` (clearer intent) - Renamed `invalidMessageIds` to `rejectedMessageIds` (more accurate) - Removed `invalidMessages` array duplication - now using only Set - Removed `publishRetryMessagesForValid` helper function (inline instead) - Convert Set to array only at return statement Result: clearer naming, less complexity, same functionality. Net reduction of 6 lines while maintaining all tests passing. Co-authored-by: npalm <11609620+npalm@users.noreply.github.com> --- .../src/scale-runners/scale-up.ts | 44 ++++++++----------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-up.ts b/lambdas/functions/control-plane/src/scale-runners/scale-up.ts index 13eae63ca1..6a3ab93bb7 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-up.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-up.ts @@ -90,17 +90,6 @@ function generateRunnerServiceConfig(githubRunnerConfig: CreateGitHubRunnerConfi return config; } -async function publishRetryMessagesForValid( - messages: ActionRequestMessageSQS[], - invalidMessageIds: Set, -): Promise { - for (const message of messages) { - if (!invalidMessageIds.has(message.messageId)) { - await publishRetryMessage(message as ActionRequestMessageRetry); - } - } -} - async function getGithubRunnerRegistrationToken(githubRunnerConfig: CreateGitHubRunnerConfig, ghClient: Octokit) { const registrationToken = githubRunnerConfig.runnerType === 'Org' @@ -286,8 +275,7 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise(); - const invalidMessages: string[] = []; - const invalidMessageIds = new Set(); + const rejectedMessageIds = new Set(); for (const payload of payloads) { const { eventType, messageId, repositoryName, repositoryOwner } = payload; if (ephemeralEnabled && eventType !== 'workflow_job') { @@ -296,7 +284,7 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise messageId)); - removedMessages.forEach(({ messageId }) => invalidMessageIds.add(messageId)); + removedMessages.forEach(({ messageId }) => rejectedMessageIds.add(messageId)); } // No runners will be created, so skip calling the EC2 API. if (newRunners <= 0) { - // Publish retry messages for all remaining messages that are not marked as invalid - await publishRetryMessagesForValid(validMessagesForRetry, invalidMessageIds); + // Publish retry messages for messages that are not rejected + for (const message of queuedMessages) { + if (!rejectedMessageIds.has(message.messageId)) { + await publishRetryMessage(message as ActionRequestMessageRetry); + } + } continue; } } @@ -467,15 +458,18 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise messageId)); - failedMessages.forEach(({ messageId }) => invalidMessageIds.add(messageId)); + failedMessages.forEach(({ messageId }) => rejectedMessageIds.add(messageId)); } - // Publish retry messages for all messages that are not marked as invalid - await publishRetryMessagesForValid(validMessagesForRetry, invalidMessageIds); + // Publish retry messages for messages that are not rejected + for (const message of queuedMessages) { + if (!rejectedMessageIds.has(message.messageId)) { + await publishRetryMessage(message as ActionRequestMessageRetry); + } + } } - return invalidMessages; + return Array.from(rejectedMessageIds); } export function getGitHubEnterpriseApiUrl() { From cba7bbf3ed3d662b64fb865236ae67b243d87979 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 18 Dec 2025 23:10:10 +0000 Subject: [PATCH 5/5] ci: run lambda workflow on all PRs, not just PRs to main Remove branch restriction from lambda.yml workflow to support stacked PRs and feature branch workflows. The workflow will now run on any PR that modifies lambda code or the workflow itself. Co-authored-by: npalm <11609620+npalm@users.noreply.github.com> --- .github/workflows/lambda.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/lambda.yml b/.github/workflows/lambda.yml index bbd6d93a8e..6f1e806594 100644 --- a/.github/workflows/lambda.yml +++ b/.github/workflows/lambda.yml @@ -2,8 +2,6 @@ name: Build lambdas on: pull_request: - branches: - - main paths: - 'lambdas/**' - '.github/workflows/lambda.yml'