Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
269 changes: 143 additions & 126 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,52 @@
build:
needs: test
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- name: cpu
target: final-llamacpp
platforms: "linux/amd64, linux/arm64"
tag_suffix: ""
variant: ""
base_image: ""
extra_build_args: ""

- name: cuda
target: final-llamacpp
platforms: "linux/amd64, linux/arm64"
tag_suffix: "-cuda"
variant: "cuda"
base_image: "nvidia/cuda:12.9.0-runtime-ubuntu24.04"
extra_build_args: ""

- name: vllm-cuda
target: final-vllm
platforms: "linux/amd64, linux/arm64"
tag_suffix: "-vllm-cuda"
variant: "cuda"
base_image: "nvidia/cuda:13.0.2-runtime-ubuntu24.04"
extra_build_args: |
VLLM_CUDA_VERSION=cu130
VLLM_PYTHON_TAG=cp38-abi3
- name: sglang-cuda
target: final-sglang
platforms: "linux/amd64"
tag_suffix: "-sglang-cuda"
variant: "cuda"
base_image: "nvidia/cuda:12.9.0-runtime-ubuntu24.04"
extra_build_args: ""

- name: rocm
target: final-llamacpp
platforms: "linux/amd64"
tag_suffix: "-rocm"
variant: "rocm"
base_image: "rocm/dev-ubuntu-22.04"
extra_build_args: ""

steps:
- name: Checkout repo
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
Expand All @@ -63,46 +109,10 @@
id: tags
shell: bash
run: |
echo "cpu<<EOF" >> "$GITHUB_OUTPUT"
echo "docker/model-runner:${{ inputs.releaseTag }}" >> "$GITHUB_OUTPUT"
if [ "${{ inputs.pushLatest }}" == "true" ]; then
echo "docker/model-runner:latest" >> "$GITHUB_OUTPUT"
fi
echo 'EOF' >> "$GITHUB_OUTPUT"
echo "cuda<<EOF" >> "$GITHUB_OUTPUT"
echo "docker/model-runner:${{ inputs.releaseTag }}-cuda" >> "$GITHUB_OUTPUT"
if [ "${{ inputs.pushLatest }}" == "true" ]; then
echo "docker/model-runner:latest-cuda" >> "$GITHUB_OUTPUT"
fi
echo 'EOF' >> "$GITHUB_OUTPUT"
echo "vllm-cuda<<EOF" >> "$GITHUB_OUTPUT"
echo "docker/model-runner:${{ inputs.releaseTag }}-vllm-cuda" >> "$GITHUB_OUTPUT"
if [ "${{ inputs.pushLatest }}" == "true" ]; then
echo "docker/model-runner:latest-vllm-cuda" >> "$GITHUB_OUTPUT"
fi
echo 'EOF' >> "$GITHUB_OUTPUT"
echo "sglang-cuda<<EOF" >> "$GITHUB_OUTPUT"
echo "docker/model-runner:${{ inputs.releaseTag }}-sglang-cuda" >> "$GITHUB_OUTPUT"
if [ "${{ inputs.pushLatest }}" == "true" ]; then
echo "docker/model-runner:latest-sglang-cuda" >> "$GITHUB_OUTPUT"
fi
echo 'EOF' >> "$GITHUB_OUTPUT"
echo "rocm<<EOF" >> "$GITHUB_OUTPUT"
echo "docker/model-runner:${{ inputs.releaseTag }}-rocm" >> "$GITHUB_OUTPUT"
if [ "${{ inputs.pushLatest }}" == "true" ]; then
echo "docker/model-runner:latest-rocm" >> "$GITHUB_OUTPUT"
fi
echo 'EOF' >> "$GITHUB_OUTPUT"
echo "musa<<EOF" >> "$GITHUB_OUTPUT"
echo "docker/model-runner:${{ inputs.releaseTag }}-musa" >> "$GITHUB_OUTPUT"
if [ "${{ inputs.pushLatest }}" == "true" ]; then
echo "docker/model-runner:latest-musa" >> "$GITHUB_OUTPUT"
fi
echo 'EOF' >> "$GITHUB_OUTPUT"
echo "cann<<EOF" >> "$GITHUB_OUTPUT"
echo "docker/model-runner:${{ inputs.releaseTag }}-cann" >> "$GITHUB_OUTPUT"
echo "tags<<EOF" >> "$GITHUB_OUTPUT"
echo "docker/model-runner:${{ inputs.releaseTag }}${{ matrix.tag_suffix }}" >> "$GITHUB_OUTPUT"
if [ "${{ inputs.pushLatest }}" == "true" ]; then
echo "docker/model-runner:latest-cann" >> "$GITHUB_OUTPUT"
echo "docker/model-runner:latest${{ matrix.tag_suffix }}" >> "$GITHUB_OUTPUT"
fi
echo 'EOF' >> "$GITHUB_OUTPUT"
Expand All @@ -120,111 +130,118 @@
endpoint: "docker/make-product-smarter"
install: true

- name: Build CPU image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
with:
file: Dockerfile
target: final-llamacpp
platforms: linux/amd64, linux/arm64
build-args: |
"LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
push: true
sbom: true
provenance: mode=max
tags: ${{ steps.tags.outputs.cpu }}
- name: Prepare build args
id: build_args
shell: bash
run: |
ARGS="LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
if [ -n "${{ matrix.variant }}" ]; then
ARGS="${ARGS}
LLAMA_SERVER_VARIANT=${{ matrix.variant }}"
fi
if [ -n "${{ matrix.base_image }}" ]; then
ARGS="${ARGS}
BASE_IMAGE=${{ matrix.base_image }}"
fi
# Add vLLM version for vllm builds
if [ "${{ matrix.name }}" == "vllm-cuda" ]; then
ARGS="${ARGS}
VLLM_VERSION=${{ inputs.vllmVersion }}"
fi
# Add SGLang version for sglang builds
if [ "${{ matrix.name }}" == "sglang-cuda" ]; then
ARGS="${ARGS}
SGLANG_VERSION=${{ inputs.sglangVersion }}"
fi
# Add extra build args if present
if [ -n "${{ matrix.extra_build_args }}" ]; then
ARGS="${ARGS}
${{ matrix.extra_build_args }}"
fi
echo "args<<EOF" >> "$GITHUB_OUTPUT"
echo "$ARGS" >> "$GITHUB_OUTPUT"
echo "EOF" >> "$GITHUB_OUTPUT"
- name: Build CUDA image
- name: Build and push ${{ matrix.name }} image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
with:
file: Dockerfile
target: final-llamacpp
platforms: linux/amd64, linux/arm64
build-args: |
"LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
"LLAMA_SERVER_VARIANT=cuda"
"BASE_IMAGE=nvidia/cuda:12.9.0-runtime-ubuntu24.04"
target: ${{ matrix.target }}
platforms: ${{ matrix.platforms }}
build-args: ${{ steps.build_args.outputs.args }}
push: true
sbom: true
provenance: mode=max
tags: ${{ steps.tags.outputs.cuda }}
tags: ${{ steps.tags.outputs.tags }}

- name: Build vLLM CUDA image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
with:
file: Dockerfile
target: final-vllm
platforms: linux/amd64, linux/arm64
build-args: |
"LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
"LLAMA_SERVER_VARIANT=cuda"
"BASE_IMAGE=nvidia/cuda:13.0.2-runtime-ubuntu24.04"
"VLLM_VERSION=${{ inputs.vllmVersion }}"
"VLLM_CUDA_VERSION=cu130"
"VLLM_PYTHON_TAG=cp38-abi3"
push: true
sbom: true
provenance: mode=max
tags: ${{ steps.tags.outputs.vllm-cuda }}
build-musa-cann:
needs: test
if: ${{ inputs.buildMusaCann }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- name: musa
target: final-llamacpp
platforms: "linux/amd64"
tag_suffix: "-musa"
variant: "musa"
base_image: "mthreads/musa:rc4.3.0-runtime-ubuntu22.04-amd64"

- name: Build SGLang CUDA image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
with:
file: Dockerfile
target: final-sglang
platforms: linux/amd64
build-args: |
"LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
"LLAMA_SERVER_VARIANT=cuda"
"BASE_IMAGE=nvidia/cuda:12.9.0-runtime-ubuntu24.04"
"SGLANG_VERSION=${{ inputs.sglangVersion }}"
push: true
sbom: true
provenance: mode=max
tags: ${{ steps.tags.outputs.sglang-cuda }}
- name: cann
target: final-llamacpp
platforms: "linux/arm64, linux/amd64"
tag_suffix: "-cann"
variant: "cann"
base_image: "ascendai/cann:8.2.rc2-910b-ubuntu22.04-py3.11"

- name: Build ROCm image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
steps:
- name: Checkout repo
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8

- name: Format tags
id: tags
shell: bash
run: |
echo "tags<<EOF" >> "$GITHUB_OUTPUT"
echo "docker/model-runner:${{ inputs.releaseTag }}${{ matrix.tag_suffix }}" >> "$GITHUB_OUTPUT"
if [ "${{ inputs.pushLatest }}" == "true" ]; then
echo "docker/model-runner:latest${{ matrix.tag_suffix }}" >> "$GITHUB_OUTPUT"
fi
echo 'EOF' >> "$GITHUB_OUTPUT"
- name: Log in to DockerHub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
with:
file: Dockerfile
target: final-llamacpp
platforms: linux/amd64
build-args: |
"LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
"LLAMA_SERVER_VARIANT=rocm"
"BASE_IMAGE=rocm/dev-ubuntu-22.04"
push: true
sbom: true
provenance: mode=max
tags: ${{ steps.tags.outputs.rocm }}
username: "docker"
password: ${{ secrets.ORG_ACCESS_TOKEN }}

- name: Build MUSA image
if: ${{ inputs.buildMusaCann }}
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
- name: Set up Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435
with:
file: Dockerfile
target: final-llamacpp
platforms: linux/amd64
build-args: |
"LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
"LLAMA_SERVER_VARIANT=musa"
"BASE_IMAGE=mthreads/musa:rc4.3.0-runtime-ubuntu22.04-amd64"
push: true
sbom: true
provenance: mode=max
tags: ${{ steps.tags.outputs.musa }}
version: "lab:latest"
driver: cloud
endpoint: "docker/make-product-smarter"
install: true

- name: Build CANN image
if: ${{ inputs.buildMusaCann }}
- name: Build and push ${{ matrix.name }} image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
with:
file: Dockerfile
target: final-llamacpp
platforms: linux/arm64, linux/amd64
target: ${{ matrix.target }}
platforms: ${{ matrix.platforms }}
build-args: |
"LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
"LLAMA_SERVER_VARIANT=cann"
"BASE_IMAGE=ascendai/cann:8.2.rc2-910b-ubuntu22.04-py3.11"
LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}
LLAMA_SERVER_VARIANT=${{ matrix.variant }}
BASE_IMAGE=${{ matrix.base_image }}
push: true
sbom: true
provenance: mode=max
tags: ${{ steps.tags.outputs.cann }}
tags: ${{ steps.tags.outputs.tags }}
Comment on lines +184 to +247

Check warning

Code scanning / CodeQL

Workflow does not contain permissions Medium

Actions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {contents: read}

Copilot Autofix

AI about 15 hours ago

In general, the fix is to explicitly declare permissions for the workflow or for specific jobs so that the default, potentially broad, permissions of GITHUB_TOKEN are not used. The minimal needed permission here is read access to repository contents, since all jobs use actions/checkout and do not appear to modify GitHub resources (no pushes, releases, or PR/issue operations).

The best fix with no behavior change is to add a single permissions block at the top level of .github/workflows/release.yml, alongside name / run-name / on, so that all jobs inherit these restricted permissions. We set contents: read, which is sufficient for actions/checkout to read the repository and does not interfere with Docker Hub login or image pushes (those use secrets.ORG_ACCESS_TOKEN, not GITHUB_TOKEN). No additional methods, imports, or definitions are needed; this is purely a YAML configuration change in .github/workflows/release.yml. Concretely, insert:

permissions:
  contents: read

right after the run-name or before the on: block.

Suggested changeset 1
.github/workflows/release.yml

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,5 +1,7 @@
 name: Release model-runner images for CE
 run-name: Release model-runner images for CE, version ${{ inputs.releaseTag }}
+permissions:
+  contents: read
 
 on:
   workflow_dispatch:
EOF
@@ -1,5 +1,7 @@
name: Release model-runner images for CE
run-name: Release model-runner images for CE, version ${{ inputs.releaseTag }}
permissions:
contents: read

on:
workflow_dispatch:
Copilot is powered by AI and may make mistakes. Always verify output.