From 5dc0e6a6046622238566fb3953d5bedfd8310292 Mon Sep 17 00:00:00 2001 From: Julio Date: Tue, 30 May 2023 10:58:55 -0400 Subject: [PATCH 01/11] adding rapids runner --- .github/workflows/gpu-ci.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/gpu-ci.yml b/.github/workflows/gpu-ci.yml index 479033dfb..b12736399 100644 --- a/.github/workflows/gpu-ci.yml +++ b/.github/workflows/gpu-ci.yml @@ -3,16 +3,14 @@ name: GPU CI on: workflow_dispatch: push: - branches: [main] - tags: - - "v[0-9]+.[0-9]+.[0-9]+" + branches-ignore: [main] pull_request: branches: [main] types: [opened, synchronize, reopened] jobs: gpu-ci: - runs-on: 1GPU + runs-on: linux-amd64-gpu-p100-latest-1 steps: - uses: actions/checkout@v3 From f52a16dd021aedfc7dcc8052f3fa9502f7d3dce2 Mon Sep 17 00:00:00 2001 From: Julio Date: Tue, 30 May 2023 11:04:38 -0400 Subject: [PATCH 02/11] revert changes --- .github/workflows/gpu-ci.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gpu-ci.yml b/.github/workflows/gpu-ci.yml index b12736399..479033dfb 100644 --- a/.github/workflows/gpu-ci.yml +++ b/.github/workflows/gpu-ci.yml @@ -3,14 +3,16 @@ name: GPU CI on: workflow_dispatch: push: - branches-ignore: [main] + branches: [main] + tags: + - "v[0-9]+.[0-9]+.[0-9]+" pull_request: branches: [main] types: [opened, synchronize, reopened] jobs: gpu-ci: - runs-on: linux-amd64-gpu-p100-latest-1 + runs-on: 1GPU steps: - uses: actions/checkout@v3 From 5846e79f385f14ea05606fdbe002a18d722813bc Mon Sep 17 00:00:00 2001 From: Julio Date: Tue, 30 May 2023 11:07:28 -0400 Subject: [PATCH 03/11] added new ci workflow for runner test --- .github/workflows/test_ci.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 .github/workflows/test_ci.yml diff --git a/.github/workflows/test_ci.yml b/.github/workflows/test_ci.yml new file mode 100644 index 000000000..0bfa208fb --- /dev/null +++ b/.github/workflows/test_ci.yml @@ -0,0 +1,20 @@ +name: Test Self Hosted Runners +on: push +jobs: + job1_cpu: + runs-on: linux-amd64-gpu-p100-latest-1 + container: # GPU jobs must run in a container + image: nvcr.io/nvstaging/merlin/merlin-ci-runner:latest + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} # GPU jobs must set this container env variable + steps: + - name: gpu-test + run: | + ref_type=${{ github.ref_type }} + branch=main + if [[ $ref_type == "tag"* ]] + then + raw=$(git branch -r --contains ${{ github.ref_name }}) + branch=${raw/origin\/} + fi + cd ${{ github.workspace }}; tox -e test-gpu -- "$branch" From 323ae0f67e28e16a6a21c148196c5b979d987304 Mon Sep 17 00:00:00 2001 From: Julio Date: Tue, 30 May 2023 11:20:49 -0400 Subject: [PATCH 04/11] commit bump --- .github/workflows/test_ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_ci.yml b/.github/workflows/test_ci.yml index 0bfa208fb..ebccf5f97 100644 --- a/.github/workflows/test_ci.yml +++ b/.github/workflows/test_ci.yml @@ -17,4 +17,4 @@ jobs: raw=$(git branch -r --contains ${{ github.ref_name }}) branch=${raw/origin\/} fi - cd ${{ github.workspace }}; tox -e test-gpu -- "$branch" + cd ${{ github.workspace }}; tox -e test-gpu -- "$branch" From ec7f6cda8ff824bab863e555c1a6f279ae71be0e Mon Sep 17 00:00:00 2001 From: Julio Date: Mon, 3 Jul 2023 11:09:03 -0400 Subject: [PATCH 05/11] testing credentials and added label to actionlint --- .github/actionlint.yaml | 1 + .github/workflows/gpu-ci-docker.yaml | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 .github/workflows/gpu-ci-docker.yaml diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml index e0fa30b4b..f5f4e4c59 100644 --- a/.github/actionlint.yaml +++ b/.github/actionlint.yaml @@ -3,3 +3,4 @@ self-hosted-runner: labels: - 1GPU - 2GPU + - linux-amd64-gpu-p100-latest-1 diff --git a/.github/workflows/gpu-ci-docker.yaml b/.github/workflows/gpu-ci-docker.yaml new file mode 100644 index 000000000..03881de34 --- /dev/null +++ b/.github/workflows/gpu-ci-docker.yaml @@ -0,0 +1,23 @@ +name: GPU CI + +on: + push: + branches: + - "pull-request/[0-9]+" + +jobs: + gpu-ci-docker: + runs-on: linux-amd64-gpu-p100-latest-1 + + container: + image: nvcr.io/nvstaging/merlin/merlin-ci-runner:latest + credentials: + username: ${{ secrets.SVC_DOCKER_USER }} + password: ${{ secrets.SVC_DOCKER_TOKEN }} + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Run tests + run: | + cd ${{ github.workspace }}; tox -e test-gpu From 42ef4df6b56f6a7ee436f36df4188c1abf9bba36 Mon Sep 17 00:00:00 2001 From: Julio Date: Mon, 3 Jul 2023 11:21:56 -0400 Subject: [PATCH 06/11] add gpus flag to docker run commnad --- .github/workflows/gpu-ci-docker.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/gpu-ci-docker.yaml b/.github/workflows/gpu-ci-docker.yaml index 03881de34..7c4f779b6 100644 --- a/.github/workflows/gpu-ci-docker.yaml +++ b/.github/workflows/gpu-ci-docker.yaml @@ -14,6 +14,7 @@ jobs: credentials: username: ${{ secrets.SVC_DOCKER_USER }} password: ${{ secrets.SVC_DOCKER_TOKEN }} + options: --gpus=all steps: - uses: actions/checkout@v3 with: From 1cd3c5c5ae5f8e892b3e5521783b14c6832c0d11 Mon Sep 17 00:00:00 2001 From: Julio Date: Mon, 3 Jul 2023 11:37:19 -0400 Subject: [PATCH 07/11] add corrected env flag --- .github/workflows/gpu-ci-docker.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gpu-ci-docker.yaml b/.github/workflows/gpu-ci-docker.yaml index 7c4f779b6..a4df57f95 100644 --- a/.github/workflows/gpu-ci-docker.yaml +++ b/.github/workflows/gpu-ci-docker.yaml @@ -14,7 +14,8 @@ jobs: credentials: username: ${{ secrets.SVC_DOCKER_USER }} password: ${{ secrets.SVC_DOCKER_TOKEN }} - options: --gpus=all + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} steps: - uses: actions/checkout@v3 with: From 88d780473b45a4e360d8171f93e02a2a8d2dfc04 Mon Sep 17 00:00:00 2001 From: Julio Date: Mon, 3 Jul 2023 12:56:20 -0400 Subject: [PATCH 08/11] add in branch name and change directory target --- .github/workflows/gpu-ci-docker.yaml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gpu-ci-docker.yaml b/.github/workflows/gpu-ci-docker.yaml index a4df57f95..81b597672 100644 --- a/.github/workflows/gpu-ci-docker.yaml +++ b/.github/workflows/gpu-ci-docker.yaml @@ -20,6 +20,15 @@ jobs: - uses: actions/checkout@v3 with: fetch-depth: 0 + - uses: actions/cache@v3 + with: + path: .tox + key: tox-${{ matrix.image }}-${{ hashFiles('requirements/*.txt') }} + - name: Get Branch name + id: get-branch-name + uses: NVIDIA-Merlin/.github/actions/branch-name@6f0539fba24f60da2aee63c5925bee7cee3206e3 - name: Run tests run: | - cd ${{ github.workspace }}; tox -e test-gpu + merlin_branch="${{ steps.get-branch-name.outputs.branch }}" + MERLIN_BRANCH=$merlin_branch COMPARE_BRANCH=$merlin_branch \ + tox -e gpu From 419d8093d9a463786280aa0a8a4f9b3d96759174 Mon Sep 17 00:00:00 2001 From: Julio Date: Mon, 3 Jul 2023 14:02:56 -0400 Subject: [PATCH 09/11] remove unnecessary steps --- .github/workflows/gpu-ci-docker.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/gpu-ci-docker.yaml b/.github/workflows/gpu-ci-docker.yaml index 81b597672..373a5a307 100644 --- a/.github/workflows/gpu-ci-docker.yaml +++ b/.github/workflows/gpu-ci-docker.yaml @@ -20,10 +20,6 @@ jobs: - uses: actions/checkout@v3 with: fetch-depth: 0 - - uses: actions/cache@v3 - with: - path: .tox - key: tox-${{ matrix.image }}-${{ hashFiles('requirements/*.txt') }} - name: Get Branch name id: get-branch-name uses: NVIDIA-Merlin/.github/actions/branch-name@6f0539fba24f60da2aee63c5925bee7cee3206e3 From 07f03b8e1e5d4874a3aa28f94a83c89f1755e4d6 Mon Sep 17 00:00:00 2001 From: Julio Date: Tue, 4 Jul 2023 11:44:42 -0400 Subject: [PATCH 10/11] run correct test command --- .github/workflows/gpu-ci-docker.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gpu-ci-docker.yaml b/.github/workflows/gpu-ci-docker.yaml index 373a5a307..fca40cd8d 100644 --- a/.github/workflows/gpu-ci-docker.yaml +++ b/.github/workflows/gpu-ci-docker.yaml @@ -27,4 +27,4 @@ jobs: run: | merlin_branch="${{ steps.get-branch-name.outputs.branch }}" MERLIN_BRANCH=$merlin_branch COMPARE_BRANCH=$merlin_branch \ - tox -e gpu + tox -e test-gpu From e1b74cc67e53f8f627a9ad433ac648032aa1fe58 Mon Sep 17 00:00:00 2001 From: Julio Date: Wed, 5 Jul 2023 14:20:57 -0400 Subject: [PATCH 11/11] remove test ci yaml file --- .github/workflows/test_ci.yml | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 .github/workflows/test_ci.yml diff --git a/.github/workflows/test_ci.yml b/.github/workflows/test_ci.yml deleted file mode 100644 index ebccf5f97..000000000 --- a/.github/workflows/test_ci.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: Test Self Hosted Runners -on: push -jobs: - job1_cpu: - runs-on: linux-amd64-gpu-p100-latest-1 - container: # GPU jobs must run in a container - image: nvcr.io/nvstaging/merlin/merlin-ci-runner:latest - env: - NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} # GPU jobs must set this container env variable - steps: - - name: gpu-test - run: | - ref_type=${{ github.ref_type }} - branch=main - if [[ $ref_type == "tag"* ]] - then - raw=$(git branch -r --contains ${{ github.ref_name }}) - branch=${raw/origin\/} - fi - cd ${{ github.workspace }}; tox -e test-gpu -- "$branch"