From 84d74a8a29ca31e4d2e53ae8327f6c20dc3e8cc0 Mon Sep 17 00:00:00 2001 From: Harsh Date: Thu, 19 Feb 2026 16:46:37 +0530 Subject: [PATCH] fix: disable systemd restart rate limiting and update runner settings --- .../internal/pkg/monitor/templates/runner-service.aws.service | 1 + bins/runner/internal/pkg/settings/settings.go | 2 +- services/ctl-api/internal/pkg/stacks/bicep/stack.bicep | 1 + services/ctl-api/internal/pkg/stacks/bicep/tmpl.go | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/bins/runner/internal/pkg/monitor/templates/runner-service.aws.service b/bins/runner/internal/pkg/monitor/templates/runner-service.aws.service index ddd1daafba..67f2913814 100644 --- a/bins/runner/internal/pkg/monitor/templates/runner-service.aws.service +++ b/bins/runner/internal/pkg/monitor/templates/runner-service.aws.service @@ -2,6 +2,7 @@ Description=Nuon Runner Service After=docker.service Requires=docker.service +StartLimitIntervalSec=0 [Service] TimeoutStartSec=0 diff --git a/bins/runner/internal/pkg/settings/settings.go b/bins/runner/internal/pkg/settings/settings.go index d9f37b70ac..c416700948 100644 --- a/bins/runner/internal/pkg/settings/settings.go +++ b/bins/runner/internal/pkg/settings/settings.go @@ -68,7 +68,7 @@ func New(params Params) (*Settings, error) { // not use the settings in any other dependency initializer (ie: New function), because the settings will not be // loaded yet. ctx := context.Background() - ctx, cancelFn := context.WithTimeout(ctx, time.Second) + ctx, cancelFn := context.WithTimeout(ctx, 3*time.Second) defer cancelFn() if err := settings.fetch(ctx); err != nil { return nil, errors.Wrap(err, "unable to fetch settings") diff --git a/services/ctl-api/internal/pkg/stacks/bicep/stack.bicep b/services/ctl-api/internal/pkg/stacks/bicep/stack.bicep index 682b91664d..6cbab061a8 100644 --- a/services/ctl-api/internal/pkg/stacks/bicep/stack.bicep +++ b/services/ctl-api/internal/pkg/stacks/bicep/stack.bicep @@ -395,6 +395,7 @@ cat << 'EOF' > /etc/systemd/system/nuon-runner.service Description=Nuon Runner Service After=docker.service Requires=docker.service +StartLimitIntervalSec=0 [Service] TimeoutStartSec=0 diff --git a/services/ctl-api/internal/pkg/stacks/bicep/tmpl.go b/services/ctl-api/internal/pkg/stacks/bicep/tmpl.go index b1585c253f..92217e18ba 100644 --- a/services/ctl-api/internal/pkg/stacks/bicep/tmpl.go +++ b/services/ctl-api/internal/pkg/stacks/bicep/tmpl.go @@ -115,7 +115,7 @@ const tmpl = ` "createPublicSubnet3": "[not(empty(parameters('publicSubnet3CIDR')))]", "createPrivateSubnet2": "[not(empty(parameters('privateSubnet2CIDR')))]", "createPrivateSubnet3": "[not(empty(parameters('privateSubnet3CIDR')))]", - "customData": "#!/bin/bash\n\nRUNNER_ID={{.Runner.ID}}\nRUNNER_API_TOKEN={{.APIToken}}\nRUNNER_API_URL={{.Settings.RunnerAPIURL}}\nAWS_REGION={{.Install.AzureAccount.Location}}\n\n# Remove any existing Docker packages\napt-get remove -y docker docker-engine docker.io containerd runc\n\n# Update package index and install prerequisites\napt-get update\napt-get install -y ca-certificates curl gnupg lsb-release\n\n# Add Docker's official GPG key\nmkdir -p /etc/apt/keyrings\ncurl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg\n\n# Set up the repository\necho \"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable\" | tee /etc/apt/sources.list.d/docker.list > /dev/null\n\n# Install Docker Engine\napt-get update\napt-get install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin\n\n# Force unmask and start Docker service\nrm -f /etc/systemd/system/docker.service\nrm -f /etc/systemd/system/docker.socket\nsystemctl daemon-reload\nsystemctl unmask docker.service\nsystemctl unmask docker.socket\nsystemctl enable docker\nsystemctl start docker\n\n# Ensure docker group exists and set up runner user\ngroupadd -f docker\nmkdir -p /opt/nuon/runner\nuseradd runner -G docker -c \"\" -d /opt/nuon/runner -m\nchown -R runner:runner /opt/nuon/runner\n\ncat << EOF > /opt/nuon/runner/env\nRUNNER_ID=$RUNNER_ID\nRUNNER_API_TOKEN=$RUNNER_API_TOKEN\nRUNNER_API_URL=$RUNNER_API_URL\nARM_USE_MSI=true\n# FIXME(sdboyer) this hack must be fixed - userdata is only run on instance creation, and ip can change on each boot\nHOST_IP=$(curl -s https://checkip.amazonaws.com)\nEOF\n\n# this ⤵ is wrapped w/ single quotes to prevent variable expansion.\ncat << 'EOF' > /opt/nuon/runner/get_image_tag.sh\n#!/bin/bash\n\nset -u\n\n# source this file to get some env vars\n. /opt/nuon/runner/env\n\n# Fetch runner settings from the API\necho \"Fetching runner settings from $RUNNER_API_URL/v1/runners/$RUNNER_ID/settings\"\nRUNNER_SETTINGS=$(curl -s -H \"Authorization: Bearer $RUNNER_API_TOKEN\" \"$RUNNER_API_URL/v1/runners/$RUNNER_ID/settings\")\n\n# Extract container image URL and tag from the response\nCONTAINER_IMAGE_URL=$(echo \"$RUNNER_SETTINGS\" | grep -o '\"container_image_url\":\"[^\"]*\"' | cut -d '\"' -f 4)\nCONTAINER_IMAGE_TAG=$(echo \"$RUNNER_SETTINGS\" | grep -o '\"container_image_tag\":\"[^\"]*\"' | cut -d '\"' -f 4)\n\n# echo into a file for easier retrieval; re-create the file to avoid duplicate values.\nrm -f /opt/nuon/runner/image\necho \"CONTAINER_IMAGE_URL=$CONTAINER_IMAGE_URL\" >> /opt/nuon/runner/image\necho \"CONTAINER_IMAGE_TAG=$CONTAINER_IMAGE_TAG\" >> /opt/nuon/runner/image\n\n# export so we can get these values by sourcing this file\nexport CONTAINER_IMAGE_URL=$CONTAINER_IMAGE_URL\nexport CONTAINER_IMAGE_TAG=$CONTAINER_IMAGE_TAG\n\necho \"Using container image: $CONTAINER_IMAGE_URL:$CONTAINER_IMAGE_TAG\"\nEOF\n\nchmod +x /opt/nuon/runner/get_image_tag.sh\n/opt/nuon/runner/get_image_tag.sh\n\n# Create systemd unit file for runner\ncat << 'EOF' > /etc/systemd/system/nuon-runner.service\n[Unit]\nDescription=Nuon Runner Service\nAfter=docker.service\nRequires=docker.service\n\n[Service]\nTimeoutStartSec=0\nUser=runner\nExecStartPre=-/bin/sh -c '/usr/bin/docker stop $(/usr/bin/docker ps -a -q --filter=\"name=%n\")'\nExecStartPre=-/bin/sh -c '/usr/bin/docker rm $(/usr/bin/docker ps -a -q --filter=\"name=%n\")'\nExecStartPre=-/bin/sh -c \"yes | /usr/bin/docker system prune\"\nExecStartPre=-/bin/sh /opt/nuon/runner/get_image_tag.sh\nEnvironmentFile=/opt/nuon/runner/image\nEnvironmentFile=/opt/nuon/runner/env\nExecStartPre=echo \"Using container image: ${CONTAINER_IMAGE_URL}:${CONTAINER_IMAGE_TAG}\"\nExecStartPre=/usr/bin/docker pull ${CONTAINER_IMAGE_URL}:${CONTAINER_IMAGE_TAG}\nExecStart=/usr/bin/docker run --network host -v /tmp/nuon-runner:/tmp --rm --name %n -p 5000:5000 --memory \"3750g\" --cpus=\"1.75\" --env-file /opt/nuon/runner/env ${CONTAINER_IMAGE_URL}:${CONTAINER_IMAGE_TAG} run\nRestart=always\nRestartSec=5\n\n[Install]\nWantedBy=default.target\nEOF\n\n# Reload systemd and start the service (no SELinux on Ubuntu)\nsystemctl daemon-reload\nsystemctl enable --now nuon-runner\n" + "customData": "#!/bin/bash\n\nRUNNER_ID={{.Runner.ID}}\nRUNNER_API_TOKEN={{.APIToken}}\nRUNNER_API_URL={{.Settings.RunnerAPIURL}}\nAWS_REGION={{.Install.AzureAccount.Location}}\n\n# Remove any existing Docker packages\napt-get remove -y docker docker-engine docker.io containerd runc\n\n# Update package index and install prerequisites\napt-get update\napt-get install -y ca-certificates curl gnupg lsb-release\n\n# Add Docker's official GPG key\nmkdir -p /etc/apt/keyrings\ncurl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg\n\n# Set up the repository\necho \"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable\" | tee /etc/apt/sources.list.d/docker.list > /dev/null\n\n# Install Docker Engine\napt-get update\napt-get install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin\n\n# Force unmask and start Docker service\nrm -f /etc/systemd/system/docker.service\nrm -f /etc/systemd/system/docker.socket\nsystemctl daemon-reload\nsystemctl unmask docker.service\nsystemctl unmask docker.socket\nsystemctl enable docker\nsystemctl start docker\n\n# Ensure docker group exists and set up runner user\ngroupadd -f docker\nmkdir -p /opt/nuon/runner\nuseradd runner -G docker -c \"\" -d /opt/nuon/runner -m\nchown -R runner:runner /opt/nuon/runner\n\ncat << EOF > /opt/nuon/runner/env\nRUNNER_ID=$RUNNER_ID\nRUNNER_API_TOKEN=$RUNNER_API_TOKEN\nRUNNER_API_URL=$RUNNER_API_URL\nARM_USE_MSI=true\n# FIXME(sdboyer) this hack must be fixed - userdata is only run on instance creation, and ip can change on each boot\nHOST_IP=$(curl -s https://checkip.amazonaws.com)\nEOF\n\n# this ⤵ is wrapped w/ single quotes to prevent variable expansion.\ncat << 'EOF' > /opt/nuon/runner/get_image_tag.sh\n#!/bin/bash\n\nset -u\n\n# source this file to get some env vars\n. /opt/nuon/runner/env\n\n# Fetch runner settings from the API\necho \"Fetching runner settings from $RUNNER_API_URL/v1/runners/$RUNNER_ID/settings\"\nRUNNER_SETTINGS=$(curl -s -H \"Authorization: Bearer $RUNNER_API_TOKEN\" \"$RUNNER_API_URL/v1/runners/$RUNNER_ID/settings\")\n\n# Extract container image URL and tag from the response\nCONTAINER_IMAGE_URL=$(echo \"$RUNNER_SETTINGS\" | grep -o '\"container_image_url\":\"[^\"]*\"' | cut -d '\"' -f 4)\nCONTAINER_IMAGE_TAG=$(echo \"$RUNNER_SETTINGS\" | grep -o '\"container_image_tag\":\"[^\"]*\"' | cut -d '\"' -f 4)\n\n# echo into a file for easier retrieval; re-create the file to avoid duplicate values.\nrm -f /opt/nuon/runner/image\necho \"CONTAINER_IMAGE_URL=$CONTAINER_IMAGE_URL\" >> /opt/nuon/runner/image\necho \"CONTAINER_IMAGE_TAG=$CONTAINER_IMAGE_TAG\" >> /opt/nuon/runner/image\n\n# export so we can get these values by sourcing this file\nexport CONTAINER_IMAGE_URL=$CONTAINER_IMAGE_URL\nexport CONTAINER_IMAGE_TAG=$CONTAINER_IMAGE_TAG\n\necho \"Using container image: $CONTAINER_IMAGE_URL:$CONTAINER_IMAGE_TAG\"\nEOF\n\nchmod +x /opt/nuon/runner/get_image_tag.sh\n/opt/nuon/runner/get_image_tag.sh\n\n# Create systemd unit file for runner\ncat << 'EOF' > /etc/systemd/system/nuon-runner.service\n[Unit]\nDescription=Nuon Runner Service\nAfter=docker.service\nRequires=docker.service\nStartLimitIntervalSec=0\n\n[Service]\nTimeoutStartSec=0\nUser=runner\nExecStartPre=-/bin/sh -c '/usr/bin/docker stop $(/usr/bin/docker ps -a -q --filter=\"name=%n\")'\nExecStartPre=-/bin/sh -c '/usr/bin/docker rm $(/usr/bin/docker ps -a -q --filter=\"name=%n\")'\nExecStartPre=-/bin/sh -c \"yes | /usr/bin/docker system prune\"\nExecStartPre=-/bin/sh /opt/nuon/runner/get_image_tag.sh\nEnvironmentFile=/opt/nuon/runner/image\nEnvironmentFile=/opt/nuon/runner/env\nExecStartPre=echo \"Using container image: ${CONTAINER_IMAGE_URL}:${CONTAINER_IMAGE_TAG}\"\nExecStartPre=/usr/bin/docker pull ${CONTAINER_IMAGE_URL}:${CONTAINER_IMAGE_TAG}\nExecStart=/usr/bin/docker run --network host -v /tmp/nuon-runner:/tmp --rm --name %n -p 5000:5000 --memory \"3750g\" --cpus=\"1.75\" --env-file /opt/nuon/runner/env ${CONTAINER_IMAGE_URL}:${CONTAINER_IMAGE_TAG} run\nRestart=always\nRestartSec=5\n\n[Install]\nWantedBy=default.target\nEOF\n\n# Reload systemd and start the service (no SELinux on Ubuntu)\nsystemctl daemon-reload\nsystemctl enable --now nuon-runner\n" }, "resources": [ {