From 51dc7cdd00bae93601a0e5c03d8696668066a031 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Sat, 18 Sep 2021 14:47:45 +0800 Subject: [PATCH 001/130] update --- .github/workflows/cnn_e2e.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index df10d05..576ac52 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -6,13 +6,13 @@ on: - "*" workflow_dispatch: inputs: - placeholder: - description: "placeholder, no effect" - required: false - + of_branch_or_commit: + description: "oneflow branch or commit" + required: true + default: 'master' jobs: build: name: 'Build and test this repo' runs-on: ubuntu-latest steps: - - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." + - run: echo "oneflow branch or commit is: ${{ github.event.inputs.of_branch_or_commit }}." From 50bd9d9cf71077dfb6b4cee5e318890d3bf5a80c Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Sat, 18 Sep 2021 14:50:25 +0800 Subject: [PATCH 002/130] add blank line --- .github/workflows/cnn_e2e.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 576ac52..56dd4f9 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -10,6 +10,7 @@ on: description: "oneflow branch or commit" required: true default: 'master' + jobs: build: name: 'Build and test this repo' From 26cb72be398cfcb89cd308936b4eae206efdad11 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Sat, 18 Sep 2021 14:58:01 +0800 Subject: [PATCH 003/130] fix --- .github/workflows/cnn_e2e.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 56dd4f9..f65b45e 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -7,7 +7,7 @@ on: workflow_dispatch: inputs: of_branch_or_commit: - description: "oneflow branch or commit" + description: 'oneflow branch or commit' required: true default: 'master' @@ -16,4 +16,4 @@ jobs: name: 'Build and test this repo' runs-on: ubuntu-latest steps: - - run: echo "oneflow branch or commit is: ${{ github.event.inputs.of_branch_or_commit }}." + - run: echo "oneflow branch or commit is ${{ github.event.inputs.of_branch_or_commit }}." From 6246a347e0c04e3c74c2d8e07f75ad1f5a1642ec Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Sat, 18 Sep 2021 16:16:55 +0800 Subject: [PATCH 004/130] test --- .github/workflows/cnn_e2e.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index f65b45e..0f083b1 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -17,3 +17,23 @@ jobs: runs-on: ubuntu-latest steps: - run: echo "oneflow branch or commit is ${{ github.event.inputs.of_branch_or_commit }}." + conda: + name: Build with conda + runs-on: ubuntu-latest + steps: + - name: Checkout Oneflow-Inc/oneflow + uses: actions/checkout@v2 + - name: Checkout Oneflow-Inc/conda-env + uses: actions/checkout@v2 + with: + repository: Oneflow-Inc/conda-env + ref: 30a7f00eb48ee9009d85a848e720823e5054c66b + path: conda-env + - uses: Oneflow-Inc/get-oneflow@b9c5793ee3b0a3d5c9e2975f230d985e6712787e + name: Build with gcc7 + with: + cmake-init-cache: cmake/caches/ci/gh-hosted/cpu-gcc.cmake + oneflow-src: . + oneflow-build-env: conda + conda-env-file: conda-env/dev/gcc7/environment-v2.yml + conda-env-name: oneflow-dev-gcc7-v2 From 536f79943460bd0625ac5f148bb69f62485f1556 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Sat, 18 Sep 2021 16:23:27 +0800 Subject: [PATCH 005/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 0f083b1..dee5856 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -33,7 +33,7 @@ jobs: name: Build with gcc7 with: cmake-init-cache: cmake/caches/ci/gh-hosted/cpu-gcc.cmake - oneflow-src: . + oneflow-src: oneflow oneflow-build-env: conda conda-env-file: conda-env/dev/gcc7/environment-v2.yml conda-env-name: oneflow-dev-gcc7-v2 From 1165e98336d9bb09ce3ad4935762d8a2d39af83e Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Sat, 18 Sep 2021 16:52:04 +0800 Subject: [PATCH 006/130] test --- .github/workflows/cnn_e2e.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index dee5856..4ea8885 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -21,8 +21,14 @@ jobs: name: Build with conda runs-on: ubuntu-latest steps: + - name: Checkout actions/checkout@v2 + uses: actions/checkout@v2 - name: Checkout Oneflow-Inc/oneflow uses: actions/checkout@v2 + with: + repository: Oneflow-Inc/oneflow + # ref: 30a7f00eb48ee9009d85a848e720823e5054c66b + path: oneflow-src - name: Checkout Oneflow-Inc/conda-env uses: actions/checkout@v2 with: @@ -33,7 +39,7 @@ jobs: name: Build with gcc7 with: cmake-init-cache: cmake/caches/ci/gh-hosted/cpu-gcc.cmake - oneflow-src: oneflow + oneflow-src: oneflow-src oneflow-build-env: conda conda-env-file: conda-env/dev/gcc7/environment-v2.yml conda-env-name: oneflow-dev-gcc7-v2 From 166c4ada4ab617fa2e4e7f1a9cb86d66c6a5990e Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Sat, 18 Sep 2021 16:56:47 +0800 Subject: [PATCH 007/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 4ea8885..68623b6 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -27,7 +27,7 @@ jobs: uses: actions/checkout@v2 with: repository: Oneflow-Inc/oneflow - # ref: 30a7f00eb48ee9009d85a848e720823e5054c66b + ref: ${{ github.event.inputs.of_branch_or_commit }} path: oneflow-src - name: Checkout Oneflow-Inc/conda-env uses: actions/checkout@v2 From afb20566374cf60f2604f397b85958e9742d59e8 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Sat, 18 Sep 2021 17:02:30 +0800 Subject: [PATCH 008/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 68623b6..9a0bdbd 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -38,7 +38,7 @@ jobs: - uses: Oneflow-Inc/get-oneflow@b9c5793ee3b0a3d5c9e2975f230d985e6712787e name: Build with gcc7 with: - cmake-init-cache: cmake/caches/ci/gh-hosted/cpu-gcc.cmake + cmake-init-cache: oneflow-src/cmake/caches/ci/gh-hosted/cpu-gcc.cmake oneflow-src: oneflow-src oneflow-build-env: conda conda-env-file: conda-env/dev/gcc7/environment-v2.yml From 8005ea04227020a08ec4ed43755bda98c31f360e Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Fri, 24 Sep 2021 17:48:08 +0800 Subject: [PATCH 009/130] use self-hoseted --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 9a0bdbd..a23486f 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -14,7 +14,7 @@ on: jobs: build: name: 'Build and test this repo' - runs-on: ubuntu-latest + runs-on: [‘self-hosted’, ‘linux’, ‘provision’] steps: - run: echo "oneflow branch or commit is ${{ github.event.inputs.of_branch_or_commit }}." conda: From 683aa98a996550781f2466c0e0250f214a40cfb8 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Fri, 24 Sep 2021 19:04:09 +0800 Subject: [PATCH 010/130] fix --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index a23486f..3e9b47e 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -19,7 +19,7 @@ jobs: - run: echo "oneflow branch or commit is ${{ github.event.inputs.of_branch_or_commit }}." conda: name: Build with conda - runs-on: ubuntu-latest + runs-on: [‘self-hosted’, ‘linux’, ‘provision’] steps: - name: Checkout actions/checkout@v2 uses: actions/checkout@v2 From 73ef63de0d872949c03c7fff1f4fb4b42001419f Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Fri, 24 Sep 2021 19:10:24 +0800 Subject: [PATCH 011/130] fix --- .github/workflows/cnn_e2e.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 3e9b47e..2b2b84f 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -14,12 +14,12 @@ on: jobs: build: name: 'Build and test this repo' - runs-on: [‘self-hosted’, ‘linux’, ‘provision’] + runs-on: ['self-hosted', 'linux', 'provision'] steps: - run: echo "oneflow branch or commit is ${{ github.event.inputs.of_branch_or_commit }}." conda: name: Build with conda - runs-on: [‘self-hosted’, ‘linux’, ‘provision’] + runs-on: ['self-hosted', 'linux', 'provision'] steps: - name: Checkout actions/checkout@v2 uses: actions/checkout@v2 From 210dabf3420522313f82490d09d10e5fb60db7ac Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Sun, 26 Sep 2021 09:48:59 +0800 Subject: [PATCH 012/130] update get oneflow commit --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 2b2b84f..52674a9 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -35,7 +35,7 @@ jobs: repository: Oneflow-Inc/conda-env ref: 30a7f00eb48ee9009d85a848e720823e5054c66b path: conda-env - - uses: Oneflow-Inc/get-oneflow@b9c5793ee3b0a3d5c9e2975f230d985e6712787e + - uses: Oneflow-Inc/get-oneflow@4417f492e7e95b469e5f2abcf12669621443a47d name: Build with gcc7 with: cmake-init-cache: oneflow-src/cmake/caches/ci/gh-hosted/cpu-gcc.cmake From 67fa4600930866db69e367e88239c27e6ee7be6c Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Sun, 26 Sep 2021 15:48:55 +0800 Subject: [PATCH 013/130] test --- .github/workflows/cnn_e2e.yml | 76 ++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 52674a9..407f919 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -1,9 +1,9 @@ name: 'resnet e2e test' on: - pull_request: - types: [review_requested] - branches: - - "*" + # pull_request: + # types: [review_requested] + # branches: + # - "*" workflow_dispatch: inputs: of_branch_or_commit: @@ -12,34 +12,44 @@ on: default: 'master' jobs: - build: - name: 'Build and test this repo' - runs-on: ['self-hosted', 'linux', 'provision'] + cancel_previous: + name: Cancel previous runs + runs-on: ubuntu-latest steps: - - run: echo "oneflow branch or commit is ${{ github.event.inputs.of_branch_or_commit }}." - conda: - name: Build with conda - runs-on: ['self-hosted', 'linux', 'provision'] - steps: - - name: Checkout actions/checkout@v2 - uses: actions/checkout@v2 - - name: Checkout Oneflow-Inc/oneflow - uses: actions/checkout@v2 - with: - repository: Oneflow-Inc/oneflow - ref: ${{ github.event.inputs.of_branch_or_commit }} - path: oneflow-src - - name: Checkout Oneflow-Inc/conda-env - uses: actions/checkout@v2 + - name: Cancel previous runs of outdated commit + uses: styfle/cancel-workflow-action@0.9.0 with: - repository: Oneflow-Inc/conda-env - ref: 30a7f00eb48ee9009d85a848e720823e5054c66b - path: conda-env - - uses: Oneflow-Inc/get-oneflow@4417f492e7e95b469e5f2abcf12669621443a47d - name: Build with gcc7 - with: - cmake-init-cache: oneflow-src/cmake/caches/ci/gh-hosted/cpu-gcc.cmake - oneflow-src: oneflow-src - oneflow-build-env: conda - conda-env-file: conda-env/dev/gcc7/environment-v2.yml - conda-env-name: oneflow-dev-gcc7-v2 + access_token: ${{ github.token }} + all_but_latest: true + # build: + # name: 'Build and test this repo' + # runs-on: ['self-hosted', 'linux', 'provision'] + # steps: + # - run: echo "oneflow branch or commit is ${{ github.event.inputs.of_branch_or_commit }}." + # conda: + # name: Build with conda + # runs-on: ['self-hosted', 'linux', 'provision'] + # steps: + # - name: Checkout actions/checkout@v2 + # uses: actions/checkout@v2 + # - name: Checkout Oneflow-Inc/oneflow + # uses: actions/checkout@v2 + # with: + # repository: Oneflow-Inc/oneflow + # ref: ${{ github.event.inputs.of_branch_or_commit }} + # path: oneflow-src + # - name: Checkout Oneflow-Inc/conda-env + # uses: actions/checkout@v2 + # with: + # repository: Oneflow-Inc/conda-env + # ref: 30a7f00eb48ee9009d85a848e720823e5054c66b + # path: conda-env + # - uses: Oneflow-Inc/get-oneflow@4417f492e7e95b469e5f2abcf12669621443a47d + # name: Build with gcc7 + # with: + # cmake-init-cache: oneflow-src/cmake/caches/ci/gh-hosted/cpu-gcc.cmake + # oneflow-src: oneflow-src + # oneflow-build-env: conda + # conda-env-file: conda-env/dev/gcc7/environment-v2.yml + # conda-env-name: oneflow-dev-gcc7-v2 + From 9d14df4d810bea8a361c627c3ac26f45d5c746b2 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Sun, 26 Sep 2021 22:51:43 +0800 Subject: [PATCH 014/130] test --- .github/workflows/cnn_e2e.yml | 44 +++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 407f919..fa68b11 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -21,6 +21,50 @@ jobs: with: access_token: ${{ github.token }} all_but_latest: true + build-manylinux: + name: "Build manylinux" + runs-on: ['self-hosted', 'linux', 'provision'] + env: + ONEFLOW_SRC: oneflow-src + MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/cu102 + WHEELHOUSE_DIR: manylinux-wheelhouse + SSH_TANK_HOST: 192.168.1.13 + SSH_TANK_PATH: /tank + OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} + OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + steps: + - name: Fix permissions + run: | + set -x + docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . + - name: Remove leftover cuda-installer.log + run: | + docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log + - name: Checkout Oneflow-Inc/oneflow + uses: actions/checkout@v2 + with: + repository: Oneflow-Inc/oneflow + ref: ${{ github.event.inputs.of_branch_or_commit }} + path: ${{ env.ONEFLOW_SRC }} + - uses: Oneflow-Inc/get-oneflow@4417f492e7e95b469e5f2abcf12669621443a47d + name: Build manylinux cu102 + id: build-cuda + with: + cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake + build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh + oneflow-src: ${{ env.ONEFLOW_SRC }} + oneflow-build-env: manylinux + wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} + clear-wheelhouse-dir: true + self-hosted: true + cuda-version: "10.2" + manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} + docker-run-use-system-http-proxy: false + docker-run-use-lld: false + retry-failed-build: true + python-versions: | + 3.6 + 3.7 # build: # name: 'Build and test this repo' # runs-on: ['self-hosted', 'linux', 'provision'] From 46b221630e4fdd802cec4759bb1c847eb77b5f98 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 27 Sep 2021 12:57:03 +0800 Subject: [PATCH 015/130] test --- .github/workflows/cnn_e2e.yml | 55 +++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index fa68b11..236f0ea 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -21,12 +21,44 @@ jobs: with: access_token: ${{ github.token }} all_but_latest: true + find-build-cache: + name: "Find build cache" + runs-on: ubuntu-latest + env: + ONEFLOW_SRC: . + OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} + OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + outputs: + matrix: ${{ steps.find-cache.outputs.matrix }} + steps: + - uses: actions/checkout@v2 + - name: Checkout Oneflow-Inc/oneflow + uses: actions/checkout@v2 + with: + repository: Oneflow-Inc/oneflow + ref: ${{ github.event.pull_request.head.sha }} + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@4417f492e7e95b469e5f2abcf12669621443a47d + name: find cache + id: find-cache + with: + runner-labels: | + self-hosted + linux + build + oneflow-src: ${{ env.ONEFLOW_SRC }} + entries: | + cu102 build-manylinux: name: "Build manylinux" runs-on: ['self-hosted', 'linux', 'provision'] + needs: [find-build-cache] + strategy: + fail-fast: true + max-parallel: 5 + matrix: ${{ fromJson(needs.find-build-cache.outputs.matrix) }} env: ONEFLOW_SRC: oneflow-src - MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/cu102 + MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ matrix.entry }} WHEELHOUSE_DIR: manylinux-wheelhouse SSH_TANK_HOST: 192.168.1.13 SSH_TANK_PATH: /tank @@ -64,7 +96,26 @@ jobs: retry-failed-build: true python-versions: | 3.6 - 3.7 + - name: Upload bin + if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + uses: Oneflow-Inc/get-oneflow/degist/upload@4417f492e7e95b469e5f2abcf12669621443a47d + with: + digest: ${{ steps.save-cache.outputs.build-digest }} + entry: ${{ matrix.entry }} + ssh-tank-host: ${{ env.SSH_TANK_HOST }} + ssh-tank-path: ${{ env.SSH_TANK_PATH }} + src-dir: ${{ env.MANYLINUX_CACHE_DIR }}/build/bin + dst-dir: bin + - name: Upload whl + if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + uses: Oneflow-Inc/get-oneflow/degist/upload@4417f492e7e95b469e5f2abcf12669621443a47d + with: + digest: ${{ steps.save-cache.outputs.build-digest }} + entry: ${{ matrix.entry }} + ssh-tank-host: ${{ env.SSH_TANK_HOST }} + ssh-tank-path: ${{ env.SSH_TANK_PATH }} + src-dir: ${{ env.WHEELHOUSE_DIR }} + dst-dir: whl # build: # name: 'Build and test this repo' # runs-on: ['self-hosted', 'linux', 'provision'] From fc16ec3fdc5555ca93d78d23566ded5caab21fd2 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 27 Sep 2021 12:59:05 +0800 Subject: [PATCH 016/130] test --- .github/workflows/cnn_e2e.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 236f0ea..3978d29 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -60,8 +60,8 @@ jobs: ONEFLOW_SRC: oneflow-src MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ matrix.entry }} WHEELHOUSE_DIR: manylinux-wheelhouse - SSH_TANK_HOST: 192.168.1.13 - SSH_TANK_PATH: /tank + SSH_TANK_HOST: 192.168.1.22 + SSH_TANK_PATH: /tmp OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} steps: From 34048bdead27d46a27ce437468b55fade1976a36 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 27 Sep 2021 14:14:27 +0800 Subject: [PATCH 017/130] update get-oneflow commit --- .github/workflows/cnn_e2e.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 3978d29..1190a93 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -37,7 +37,7 @@ jobs: with: repository: Oneflow-Inc/oneflow ref: ${{ github.event.pull_request.head.sha }} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@4417f492e7e95b469e5f2abcf12669621443a47d + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@2a9efceab8d45b725a687e73f870f9b75a15e472 name: find cache id: find-cache with: @@ -78,7 +78,7 @@ jobs: repository: Oneflow-Inc/oneflow ref: ${{ github.event.inputs.of_branch_or_commit }} path: ${{ env.ONEFLOW_SRC }} - - uses: Oneflow-Inc/get-oneflow@4417f492e7e95b469e5f2abcf12669621443a47d + - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 name: Build manylinux cu102 id: build-cuda with: @@ -98,7 +98,7 @@ jobs: 3.6 - name: Upload bin if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} - uses: Oneflow-Inc/get-oneflow/degist/upload@4417f492e7e95b469e5f2abcf12669621443a47d + uses: Oneflow-Inc/get-oneflow/degist/upload@2a9efceab8d45b725a687e73f870f9b75a15e472 with: digest: ${{ steps.save-cache.outputs.build-digest }} entry: ${{ matrix.entry }} @@ -108,7 +108,7 @@ jobs: dst-dir: bin - name: Upload whl if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} - uses: Oneflow-Inc/get-oneflow/degist/upload@4417f492e7e95b469e5f2abcf12669621443a47d + uses: Oneflow-Inc/get-oneflow/degist/upload@2a9efceab8d45b725a687e73f870f9b75a15e472 with: digest: ${{ steps.save-cache.outputs.build-digest }} entry: ${{ matrix.entry }} @@ -139,7 +139,7 @@ jobs: # repository: Oneflow-Inc/conda-env # ref: 30a7f00eb48ee9009d85a848e720823e5054c66b # path: conda-env - # - uses: Oneflow-Inc/get-oneflow@4417f492e7e95b469e5f2abcf12669621443a47d + # - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 # name: Build with gcc7 # with: # cmake-init-cache: oneflow-src/cmake/caches/ci/gh-hosted/cpu-gcc.cmake From 2baa5b7e34c92c5e53be1844b0e9453ec9a56abb Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 27 Sep 2021 14:53:01 +0800 Subject: [PATCH 018/130] test --- .github/workflows/cnn_e2e.yml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 1190a93..3ff2bef 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -60,8 +60,8 @@ jobs: ONEFLOW_SRC: oneflow-src MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ matrix.entry }} WHEELHOUSE_DIR: manylinux-wheelhouse - SSH_TANK_HOST: 192.168.1.22 - SSH_TANK_PATH: /tmp + SSH_TANK_HOST: 192.168.1.23 + SSH_TANK_PATH: /home/ci-user/tank OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} steps: @@ -78,6 +78,20 @@ jobs: repository: Oneflow-Inc/oneflow ref: ${{ github.event.inputs.of_branch_or_commit }} path: ${{ env.ONEFLOW_SRC }} + - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 + name: Save cache if successful + id: save-cache + timeout-minutes: 5 + with: + oneflow-src: ${{ env.ONEFLOW_SRC }} + entry: ${{ matrix.entry }} + digest-type: build + mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }} + - name: Check digest and fail if cache result not identical to matrix + if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} + run: | + echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" + exit 1 - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 name: Build manylinux cu102 id: build-cuda From 581c4ff3e0a43d2e51383cf691751677070d08b4 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 27 Sep 2021 15:12:59 +0800 Subject: [PATCH 019/130] test --- .github/workflows/cnn_e2e.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 3ff2bef..8b13cd4 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -130,6 +130,26 @@ jobs: ssh-tank-path: ${{ env.SSH_TANK_PATH }} src-dir: ${{ env.WHEELHOUSE_DIR }} dst-dir: whl + find-test-cache: + name: "Find test cache" + runs-on: ubuntu-latest + needs: [build-manylinux] + env: + ONEFLOW_SRC: oneflow-src + outputs: + matrix: ${{ steps.find-cache.outputs.matrix }} + steps: + - name: Checkout Oneflow-Inc/oneflow + uses: actions/checkout@v2 + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@2a9efceab8d45b725a687e73f870f9b75a15e472 + name: find cache + id: find-cache + timeout-minutes: 5 + with: + runner-labels: | + self-hosted + linux + oneflow-src: ${{ env.ONEFLOW_SRC }} # build: # name: 'Build and test this repo' # runs-on: ['self-hosted', 'linux', 'provision'] From ffb1e06ee89b95861fb1bde7c6adcd36e2c990cc Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 27 Sep 2021 15:59:38 +0800 Subject: [PATCH 020/130] test --- .github/workflows/cnn_e2e.yml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 8b13cd4..f3722e8 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -25,7 +25,7 @@ jobs: name: "Find build cache" runs-on: ubuntu-latest env: - ONEFLOW_SRC: . + ONEFLOW_SRC: oneflow-src OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} outputs: @@ -36,7 +36,8 @@ jobs: uses: actions/checkout@v2 with: repository: Oneflow-Inc/oneflow - ref: ${{ github.event.pull_request.head.sha }} + ref: ${{ github.event.inputs.of_branch_or_commit }} + path: ${{ env.ONEFLOW_SRC }} - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@2a9efceab8d45b725a687e73f870f9b75a15e472 name: find cache id: find-cache @@ -57,7 +58,7 @@ jobs: max-parallel: 5 matrix: ${{ fromJson(needs.find-build-cache.outputs.matrix) }} env: - ONEFLOW_SRC: oneflow-src + ONEFLOW_SRC: ${{ env.ONEFLOW_SRC }} MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ matrix.entry }} WHEELHOUSE_DIR: manylinux-wheelhouse SSH_TANK_HOST: 192.168.1.23 @@ -141,6 +142,10 @@ jobs: steps: - name: Checkout Oneflow-Inc/oneflow uses: actions/checkout@v2 + with: + repository: Oneflow-Inc/oneflow + ref: ${{ github.event.inputs.of_branch_or_commit }} + path: ${{ env.ONEFLOW_SRC }} - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@2a9efceab8d45b725a687e73f870f9b75a15e472 name: find cache id: find-cache From 6123277aa0d3f457bb6b176f4b7f992eb6f8bf32 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 27 Sep 2021 16:08:41 +0800 Subject: [PATCH 021/130] test --- .github/workflows/cnn_e2e.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index f3722e8..c2808ac 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -10,7 +10,8 @@ on: description: 'oneflow branch or commit' required: true default: 'master' - + env: + ONEFLOW_SRC: oneflow-src jobs: cancel_previous: name: Cancel previous runs @@ -25,7 +26,7 @@ jobs: name: "Find build cache" runs-on: ubuntu-latest env: - ONEFLOW_SRC: oneflow-src + # ONEFLOW_SRC: oneflow-src OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} outputs: @@ -58,7 +59,7 @@ jobs: max-parallel: 5 matrix: ${{ fromJson(needs.find-build-cache.outputs.matrix) }} env: - ONEFLOW_SRC: ${{ env.ONEFLOW_SRC }} + # ONEFLOW_SRC: oneflow-src MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ matrix.entry }} WHEELHOUSE_DIR: manylinux-wheelhouse SSH_TANK_HOST: 192.168.1.23 @@ -135,8 +136,8 @@ jobs: name: "Find test cache" runs-on: ubuntu-latest needs: [build-manylinux] - env: - ONEFLOW_SRC: oneflow-src + # env: + # ONEFLOW_SRC: oneflow-src outputs: matrix: ${{ steps.find-cache.outputs.matrix }} steps: From de2e11c98c1fff58bf1d0bb60528d0c76da39510 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 27 Sep 2021 16:10:33 +0800 Subject: [PATCH 022/130] test --- .github/workflows/cnn_e2e.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index c2808ac..0782cdd 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -10,8 +10,8 @@ on: description: 'oneflow branch or commit' required: true default: 'master' - env: - ONEFLOW_SRC: oneflow-src +env: + ONEFLOW_SRC: oneflow-src jobs: cancel_previous: name: Cancel previous runs From 27ba8e793a743fb1e62fd80415d793ab5bdd1b48 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 27 Sep 2021 16:40:25 +0800 Subject: [PATCH 023/130] test --- .github/workflows/cnn_e2e.yml | 55 +++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 0782cdd..201940a 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -12,6 +12,8 @@ on: default: 'master' env: ONEFLOW_SRC: oneflow-src + SSH_TANK_HOST: 192.168.1.23 + SSH_TANK_PATH: /home/ci-user/tank jobs: cancel_previous: name: Cancel previous runs @@ -26,7 +28,6 @@ jobs: name: "Find build cache" runs-on: ubuntu-latest env: - # ONEFLOW_SRC: oneflow-src OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} outputs: @@ -59,11 +60,8 @@ jobs: max-parallel: 5 matrix: ${{ fromJson(needs.find-build-cache.outputs.matrix) }} env: - # ONEFLOW_SRC: oneflow-src MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ matrix.entry }} WHEELHOUSE_DIR: manylinux-wheelhouse - SSH_TANK_HOST: 192.168.1.23 - SSH_TANK_PATH: /home/ci-user/tank OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} steps: @@ -136,8 +134,6 @@ jobs: name: "Find test cache" runs-on: ubuntu-latest needs: [build-manylinux] - # env: - # ONEFLOW_SRC: oneflow-src outputs: matrix: ${{ steps.find-cache.outputs.matrix }} steps: @@ -156,6 +152,53 @@ jobs: self-hosted linux oneflow-src: ${{ env.ONEFLOW_SRC }} + test: + name: Test suite + needs: [wait_for_gpu_slot, find-test-cache] + runs-on: ${{ matrix.runs-on }} + strategy: + fail-fast: true + max-parallel: 5 + matrix: ${{ fromJson(needs.find-test-cache.outputs.matrix) }} + env: + TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" + steps: + - name: Fix permissions + if: ${{ contains(matrix.runs-on, 'self-hosted') }} + run: | + set -x + docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . + - name: Checkout Oneflow-Inc/OneFlow-Benchmark + uses: actions/checkout@v2 + - name: Remove container + timeout-minutes: 45 + if: ${{ contains(matrix.runs-on, 'self-hosted') }} + run: | + docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true + - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 + name: Save cache if successful + id: save-cache + timeout-minutes: 5 + with: + oneflow-src: ${{ env.ONEFLOW_SRC }} + entry: ${{ matrix.entry }} + digest-type: ${{ matrix.digest-type }} + mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }} + - name: Check digest and fail if cache result not identical to matrix + if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} + run: | + echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" + exit 1 + - name: Download wheel and binary + if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} + uses: Oneflow-Inc/get-oneflow/degist/download@2a9efceab8d45b725a687e73f870f9b75a15e472 + id: download-digest + timeout-minutes: 10 + with: + digest: ${{ steps.save-cache.outputs.build-digest }} + entry: ${{ matrix.compute-platform }} + ssh-tank-host: ${{ env.SSH_TANK_HOST }} + ssh-tank-path: ${{ env.SSH_TANK_PATH }} # build: # name: 'Build and test this repo' # runs-on: ['self-hosted', 'linux', 'provision'] From 5dffaa0b5be176785762f2414ca2c2b41c0ca78f Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 27 Sep 2021 16:44:43 +0800 Subject: [PATCH 024/130] test --- .github/workflows/cnn_e2e.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 201940a..4752cef 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -154,7 +154,8 @@ jobs: oneflow-src: ${{ env.ONEFLOW_SRC }} test: name: Test suite - needs: [wait_for_gpu_slot, find-test-cache] + needs: [find-test-cache] + # needs: [wait_for_gpu_slot, find-test-cache] runs-on: ${{ matrix.runs-on }} strategy: fail-fast: true From f187112284c96997fc6c3ebd6790216b5fc4ef18 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 27 Sep 2021 17:47:37 +0800 Subject: [PATCH 025/130] test --- .github/workflows/cnn_e2e.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 4752cef..3d3a16c 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -156,11 +156,8 @@ jobs: name: Test suite needs: [find-test-cache] # needs: [wait_for_gpu_slot, find-test-cache] - runs-on: ${{ matrix.runs-on }} - strategy: - fail-fast: true - max-parallel: 5 - matrix: ${{ fromJson(needs.find-test-cache.outputs.matrix) }} + runs-on: ['self-hosted', 'linux', 'provision'] + # runs-on: ${{ matrix.runs-on }} env: TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" steps: @@ -191,7 +188,7 @@ jobs: echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" exit 1 - name: Download wheel and binary - if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} + if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} uses: Oneflow-Inc/get-oneflow/degist/download@2a9efceab8d45b725a687e73f870f9b75a15e472 id: download-digest timeout-minutes: 10 From 825b496e627389d3d5d0ce389c8ba87fcfed6ce3 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 27 Sep 2021 18:09:42 +0800 Subject: [PATCH 026/130] test --- .github/workflows/cnn_e2e.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 3d3a16c..a621b17 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -155,9 +155,7 @@ jobs: test: name: Test suite needs: [find-test-cache] - # needs: [wait_for_gpu_slot, find-test-cache] runs-on: ['self-hosted', 'linux', 'provision'] - # runs-on: ${{ matrix.runs-on }} env: TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" steps: @@ -166,8 +164,8 @@ jobs: run: | set -x docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - - name: Checkout Oneflow-Inc/OneFlow-Benchmark - uses: actions/checkout@v2 + # - name: Checkout Oneflow-Inc/OneFlow-Benchmark + # uses: actions/checkout@v2 - name: Remove container timeout-minutes: 45 if: ${{ contains(matrix.runs-on, 'self-hosted') }} @@ -194,7 +192,7 @@ jobs: timeout-minutes: 10 with: digest: ${{ steps.save-cache.outputs.build-digest }} - entry: ${{ matrix.compute-platform }} + entry: ${{ matrix.entry }} ssh-tank-host: ${{ env.SSH_TANK_HOST }} ssh-tank-path: ${{ env.SSH_TANK_PATH }} # build: From b7c0a296c9939e075ae3f051f8686fbb03414528 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 28 Sep 2021 11:01:28 +0800 Subject: [PATCH 027/130] test --- .github/workflows/cnn_e2e.yml | 63 +++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index a621b17..192be36 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -130,34 +130,39 @@ jobs: ssh-tank-path: ${{ env.SSH_TANK_PATH }} src-dir: ${{ env.WHEELHOUSE_DIR }} dst-dir: whl - find-test-cache: - name: "Find test cache" - runs-on: ubuntu-latest - needs: [build-manylinux] - outputs: - matrix: ${{ steps.find-cache.outputs.matrix }} - steps: - - name: Checkout Oneflow-Inc/oneflow - uses: actions/checkout@v2 - with: - repository: Oneflow-Inc/oneflow - ref: ${{ github.event.inputs.of_branch_or_commit }} - path: ${{ env.ONEFLOW_SRC }} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@2a9efceab8d45b725a687e73f870f9b75a15e472 - name: find cache - id: find-cache - timeout-minutes: 5 - with: - runner-labels: | - self-hosted - linux - oneflow-src: ${{ env.ONEFLOW_SRC }} + # find-test-cache: + # name: "Find test cache" + # runs-on: ubuntu-latest + # needs: [build-manylinux] + # outputs: + # matrix: ${{ steps.find-cache.outputs.matrix }} + # steps: + # - name: Checkout Oneflow-Inc/oneflow + # uses: actions/checkout@v2 + # with: + # repository: Oneflow-Inc/oneflow + # ref: ${{ github.event.inputs.of_branch_or_commit }} + # path: ${{ env.ONEFLOW_SRC }} + # - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@2a9efceab8d45b725a687e73f870f9b75a15e472 + # name: find cache + # id: find-cache + # timeout-minutes: 5 + # with: + # runner-labels: | + # self-hosted + # linux + # oneflow-src: ${{ env.ONEFLOW_SRC }} test: name: Test suite - needs: [find-test-cache] + # needs: [find-test-cache]build-manylinux + needs: [build-manylinux] runs-on: ['self-hosted', 'linux', 'provision'] env: TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" + # strategy: + # fail-fast: true + # max-parallel: 5 + # matrix: ${{ fromJson(needs.find-test-cache.outputs.matrix) }} steps: - name: Fix permissions if: ${{ contains(matrix.runs-on, 'self-hosted') }} @@ -168,7 +173,7 @@ jobs: # uses: actions/checkout@v2 - name: Remove container timeout-minutes: 45 - if: ${{ contains(matrix.runs-on, 'self-hosted') }} + # if: ${{ contains(matrix.runs-on, 'self-hosted') }} run: | docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 @@ -177,22 +182,22 @@ jobs: timeout-minutes: 5 with: oneflow-src: ${{ env.ONEFLOW_SRC }} - entry: ${{ matrix.entry }} - digest-type: ${{ matrix.digest-type }} - mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }} + entry: cu102 + digest-type: test + mark-as-completed: true - name: Check digest and fail if cache result not identical to matrix if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} run: | echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" exit 1 - name: Download wheel and binary - if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} + # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} uses: Oneflow-Inc/get-oneflow/degist/download@2a9efceab8d45b725a687e73f870f9b75a15e472 id: download-digest timeout-minutes: 10 with: digest: ${{ steps.save-cache.outputs.build-digest }} - entry: ${{ matrix.entry }} + entry: cu102 ssh-tank-host: ${{ env.SSH_TANK_HOST }} ssh-tank-path: ${{ env.SSH_TANK_PATH }} # build: From 0ee1ac5087eb11daf1252e4d9654873f9b64c5dc Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 28 Sep 2021 11:40:42 +0800 Subject: [PATCH 028/130] test --- .github/workflows/cnn_e2e.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 192be36..c23961e 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -14,6 +14,8 @@ env: ONEFLOW_SRC: oneflow-src SSH_TANK_HOST: 192.168.1.23 SSH_TANK_PATH: /home/ci-user/tank + OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} + OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} jobs: cancel_previous: name: Cancel previous runs @@ -27,9 +29,6 @@ jobs: find-build-cache: name: "Find build cache" runs-on: ubuntu-latest - env: - OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} - OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} outputs: matrix: ${{ steps.find-cache.outputs.matrix }} steps: From 649bc12eb26469ba28913995037b76f2a168fda6 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 28 Sep 2021 12:38:22 +0800 Subject: [PATCH 029/130] test --- .github/workflows/cnn_e2e.yml | 49 +++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index c23961e..cb7a671 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -199,6 +199,55 @@ jobs: entry: cu102 ssh-tank-host: ${{ env.SSH_TANK_HOST }} ssh-tank-path: ${{ env.SSH_TANK_PATH }} + - name: Enable Pytorch container + run: | + echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV + - name: Set environment variables + # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} + run: | + set -x + echo "ONEFLOW_TEST_CACHE_DIR=$HOME/ci-cache/test_cache" >> $GITHUB_ENV + echo "ONEFLOW_WHEEL_PATH=${{ steps.download-digest.outputs.entry-dir }}/whl" >> $GITHUB_ENV + - name: Start container + # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} + working-directory: ${{ env.ONEFLOW_SRC }} + env: + ONEFLOW_BIN_PATH: ${{ steps.download-digest.outputs.entry-dir }}/bin + run: | + docker pull ${{ env.TEST_IMG_TAG }} + docker run -d --rm --privileged --network host --shm-size=8g \ + --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ + --runtime=nvidia \ + -v /dataset:/dataset:ro -v /model_zoo:/model_zoo:ro \ + -v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \ + -v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \ + -v $HOME/test-container-cache/dot-local:/root/.local \ + -v $HOME/test-container-cache/dot-cache:/root/.cache \ + -e ONEFLOW_WHEEL_PATH=${ONEFLOW_WHEEL_PATH} \ + -e ONEFLOW_BIN_PATH=${ONEFLOW_BIN_PATH} \ + -e ONEFLOW_CI=1 \ + -v $PWD:$PWD \ + -w $PWD \ + -v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \ + -e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \ + --name ${TEST_CONTAINER_NAME} \ + ${{ env.TEST_IMG_TAG }} \ + sleep 3600 + - name: Test container + # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} + run: | + docker exec ${{ env.TEST_CONTAINER_NAME }} ls + - name: Install OneFlow + # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} + run: | + ls ${ONEFLOW_WHEEL_PATH} + docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${ONEFLOW_WHEEL_PATH} oneflow + - name: Run OneFlow doctor + # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} + run: | + docker exec ${{ env.TEST_CONTAINER_NAME }} python3 -m oneflow --doctor + # build: # name: 'Build and test this repo' # runs-on: ['self-hosted', 'linux', 'provision'] From 45956927f721467410063576df7b41d0166968d5 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 28 Sep 2021 13:00:56 +0800 Subject: [PATCH 030/130] test --- .github/workflows/cnn_e2e.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index cb7a671..98b3051 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -184,11 +184,11 @@ jobs: entry: cu102 digest-type: test mark-as-completed: true - - name: Check digest and fail if cache result not identical to matrix - if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} - run: | - echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" - exit 1 + # - name: Check digest and fail if cache result not identical to matrix + # if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} + # run: | + # echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" + # exit 1 - name: Download wheel and binary # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} uses: Oneflow-Inc/get-oneflow/degist/download@2a9efceab8d45b725a687e73f870f9b75a15e472 From f6e8cf17d045a9ccb36038a28403226899d05737 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 28 Sep 2021 14:09:44 +0800 Subject: [PATCH 031/130] test --- .github/workflows/cnn_e2e.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 98b3051..7844a57 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -158,6 +158,7 @@ jobs: runs-on: ['self-hosted', 'linux', 'provision'] env: TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" + TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0:e7a497b41d8b7f1bce055b1f23d027f93b1557ae # strategy: # fail-fast: true # max-parallel: 5 From 4be0dc638f516dff7ada24e9ce7db7cbd30ee123 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 28 Sep 2021 14:39:20 +0800 Subject: [PATCH 032/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 7844a57..280d03b 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -108,7 +108,7 @@ jobs: docker-run-use-lld: false retry-failed-build: true python-versions: | - 3.6 + 3.7 - name: Upload bin if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} uses: Oneflow-Inc/get-oneflow/degist/upload@2a9efceab8d45b725a687e73f870f9b75a15e472 From 5a3a6e746059459730da840efc49bec08dff3206 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 28 Sep 2021 14:58:34 +0800 Subject: [PATCH 033/130] test --- .github/workflows/cnn_e2e.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 280d03b..f2b9b21 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -110,7 +110,7 @@ jobs: python-versions: | 3.7 - name: Upload bin - if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} uses: Oneflow-Inc/get-oneflow/degist/upload@2a9efceab8d45b725a687e73f870f9b75a15e472 with: digest: ${{ steps.save-cache.outputs.build-digest }} @@ -120,7 +120,7 @@ jobs: src-dir: ${{ env.MANYLINUX_CACHE_DIR }}/build/bin dst-dir: bin - name: Upload whl - if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} uses: Oneflow-Inc/get-oneflow/degist/upload@2a9efceab8d45b725a687e73f870f9b75a15e472 with: digest: ${{ steps.save-cache.outputs.build-digest }} @@ -248,7 +248,9 @@ jobs: # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} run: | docker exec ${{ env.TEST_CONTAINER_NAME }} python3 -m oneflow --doctor - + - name: Checkout OneFlow-Benchmark + uses: actions/checkout@v2 + # build: # name: 'Build and test this repo' # runs-on: ['self-hosted', 'linux', 'provision'] From b179c202bd590cb1b2317d81f6cecb0621aa89d4 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 28 Sep 2021 16:50:46 +0800 Subject: [PATCH 034/130] test --- .github/workflows/cnn_e2e.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index f2b9b21..90e3420 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -200,6 +200,7 @@ jobs: entry: cu102 ssh-tank-host: ${{ env.SSH_TANK_HOST }} ssh-tank-path: ${{ env.SSH_TANK_PATH }} + digest-cache-dir: '~/digest-cache-bm' - name: Enable Pytorch container run: | echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV From 92f5f1a4db6994297e8bb8259a1273532d905d50 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 28 Sep 2021 17:29:39 +0800 Subject: [PATCH 035/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 90e3420..a9afa1d 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -165,7 +165,7 @@ jobs: # matrix: ${{ fromJson(needs.find-test-cache.outputs.matrix) }} steps: - name: Fix permissions - if: ${{ contains(matrix.runs-on, 'self-hosted') }} + # if: ${{ contains(matrix.runs-on, 'self-hosted') }} run: | set -x docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . From 029fda37d6ad4ef2d94c868809bf1ec7a62136ac Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 28 Sep 2021 18:19:58 +0800 Subject: [PATCH 036/130] test --- .github/workflows/cnn_e2e.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index a9afa1d..adfd695 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -245,10 +245,10 @@ jobs: ls ${ONEFLOW_WHEEL_PATH} docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${ONEFLOW_WHEEL_PATH} oneflow - - name: Run OneFlow doctor - # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} - run: | - docker exec ${{ env.TEST_CONTAINER_NAME }} python3 -m oneflow --doctor + # - name: Run OneFlow doctor + # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} + # run: | + # docker exec ${{ env.TEST_CONTAINER_NAME }} python3 -m oneflow --doctor - name: Checkout OneFlow-Benchmark uses: actions/checkout@v2 From a546c5316528dc8026ed630763686e2479bf1e27 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 28 Sep 2021 18:23:07 +0800 Subject: [PATCH 037/130] test --- .github/workflows/cnn_e2e.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index adfd695..20eccd7 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -169,8 +169,8 @@ jobs: run: | set -x docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - # - name: Checkout Oneflow-Inc/OneFlow-Benchmark - # uses: actions/checkout@v2 + - name: Checkout Oneflow-Inc/OneFlow-Benchmark + uses: actions/checkout@v2 - name: Remove container timeout-minutes: 45 # if: ${{ contains(matrix.runs-on, 'self-hosted') }} @@ -249,8 +249,6 @@ jobs: # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} # run: | # docker exec ${{ env.TEST_CONTAINER_NAME }} python3 -m oneflow --doctor - - name: Checkout OneFlow-Benchmark - uses: actions/checkout@v2 # build: # name: 'Build and test this repo' From 00d1399d840c8545a92b6d45d50c965bf5f4e97b Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 28 Sep 2021 18:31:03 +0800 Subject: [PATCH 038/130] test --- .github/workflows/cnn_e2e.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 20eccd7..4e21043 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -94,6 +94,7 @@ jobs: - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 name: Build manylinux cu102 id: build-cuda + if: ${{ matrix.entry =='cu102' && !matrix.cache-hit }} with: cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh From 2c49a7301f4f57ba65fee7965fc4f0deb7469407 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 28 Sep 2021 18:36:46 +0800 Subject: [PATCH 039/130] test --- .github/workflows/cnn_e2e.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 4e21043..28c2dbe 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -172,6 +172,12 @@ jobs: docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - name: Checkout Oneflow-Inc/OneFlow-Benchmark uses: actions/checkout@v2 + - name: Checkout Oneflow-Inc/oneflow + uses: actions/checkout@v2 + with: + repository: Oneflow-Inc/oneflow + ref: ${{ github.event.inputs.of_branch_or_commit }} + path: ${{ env.ONEFLOW_SRC }} - name: Remove container timeout-minutes: 45 # if: ${{ contains(matrix.runs-on, 'self-hosted') }} From 49dfd56b505246c9ac0a321b329853f3441e5683 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 28 Sep 2021 18:50:15 +0800 Subject: [PATCH 040/130] test --- .github/workflows/cnn_e2e.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 28c2dbe..6989bfd 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -111,7 +111,7 @@ jobs: python-versions: | 3.7 - name: Upload bin - # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} uses: Oneflow-Inc/get-oneflow/degist/upload@2a9efceab8d45b725a687e73f870f9b75a15e472 with: digest: ${{ steps.save-cache.outputs.build-digest }} @@ -121,7 +121,7 @@ jobs: src-dir: ${{ env.MANYLINUX_CACHE_DIR }}/build/bin dst-dir: bin - name: Upload whl - # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} uses: Oneflow-Inc/get-oneflow/degist/upload@2a9efceab8d45b725a687e73f870f9b75a15e472 with: digest: ${{ steps.save-cache.outputs.build-digest }} From ff9755753cd39c8b3f9b8b330d28c712ce82e828 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 28 Sep 2021 19:20:54 +0800 Subject: [PATCH 041/130] test --- .github/workflows/cnn_e2e.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 6989bfd..91f5c8c 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -219,7 +219,7 @@ jobs: echo "ONEFLOW_WHEEL_PATH=${{ steps.download-digest.outputs.entry-dir }}/whl" >> $GITHUB_ENV - name: Start container # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - working-directory: ${{ env.ONEFLOW_SRC }} + # working-directory: ${{ env.ONEFLOW_SRC }} env: ONEFLOW_BIN_PATH: ${{ steps.download-digest.outputs.entry-dir }}/bin run: | @@ -242,16 +242,16 @@ jobs: --name ${TEST_CONTAINER_NAME} \ ${{ env.TEST_IMG_TAG }} \ sleep 3600 - - name: Test container - # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - run: | - docker exec ${{ env.TEST_CONTAINER_NAME }} ls - name: Install OneFlow # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} run: | ls ${ONEFLOW_WHEEL_PATH} docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${ONEFLOW_WHEEL_PATH} oneflow + - name: Test container + # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} + run: | + docker exec ${{ env.TEST_CONTAINER_NAME }} ls # - name: Run OneFlow doctor # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} # run: | From 08d24b8a92fcf58b5bfe66dab400dee2b3791a29 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 29 Sep 2021 17:07:28 +0800 Subject: [PATCH 042/130] ci train script --- Classification/cnns/ci_train.sh | 44 +++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100755 Classification/cnns/ci_train.sh diff --git a/Classification/cnns/ci_train.sh b/Classification/cnns/ci_train.sh new file mode 100755 index 0000000..cc64358 --- /dev/null +++ b/Classification/cnns/ci_train.sh @@ -0,0 +1,44 @@ +GPU_NUM=${1:-8} +NODE_NUM=${2:-1} +BATCH_SIZE=${3:-192} +LEARNING_RATE=${4:-1.536} +NUM_EPOCH=${5:-50} +SRC_ROOT=${6:-"Classification/cnns"} +DATA_ROOT=${7:-"/dataset/ImageNet/ofrecord"} + +test_case=n${NODE_NUM}_g${GPU_NUM}_b${BATCH_SIZE}_lr${LEARNING_RATE}_e${NUM_EPOCH} +LOG_FOLDER=./log +mkdir -p $LOG_FOLDER + +model="resnet50" +LOGFILE=$LOG_FOLDER/${model}_${test_case}.log + +export PYTHONUNBUFFERED=1 +export NCCL_LAUNCH_MODE=PARALLEL + +python3 ${SRC_ROOT}/of_cnn_train_val.py \ + --train_data_dir=$DATA_ROOT/train \ + --train_data_part_num=256 \ + --val_data_dir=$DATA_ROOT/validation \ + --val_data_part_num=256 \ + --num_nodes=${NODE_NUM} \ + --gpu_num_per_node=${GPU_NUM} \ + --optimizer="sgd" \ + --momentum=0.875 \ + --label_smoothing=0.1 \ + --learning_rate=${LEARNING_RATE} \ + --loss_print_every_n_iter=100 \ + --batch_size_per_device=${BATCH_SIZE} \ + --val_batch_size_per_device=50 \ + --use_fp16 \ + --channel_last=True \ + --pad_output \ + --fuse_bn_relu=True \ + --fuse_bn_add_relu=True \ + --nccl_fusion_threshold_mb=16 \ + --nccl_fusion_max_ops=24 \ + --gpu_image_decoder=True \ + --num_epoch=$NUM_EPOCH \ + --model=${model} 2>&1 | tee ${LOGFILE} + +echo "Writting log to ${LOGFILE}" From ec2cb8f3ebda3ad603cc48b820ab756d77665983 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 29 Sep 2021 17:14:02 +0800 Subject: [PATCH 043/130] test --- .github/workflows/cnn_e2e.yml | 10 +++++++++- Classification/cnns/ci_train.sh | 10 +++++----- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 91f5c8c..85a0447 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -10,6 +10,14 @@ on: description: 'oneflow branch or commit' required: true default: 'master' + num_epochs: + description: 'number of training epoches' + required: true + default: 50 + gpu_num_per_node: + description: 'gpu number per node' + required: true + default: 8 env: ONEFLOW_SRC: oneflow-src SSH_TANK_HOST: 192.168.1.23 @@ -251,7 +259,7 @@ jobs: - name: Test container # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} run: | - docker exec ${{ env.TEST_CONTAINER_NAME }} ls + docker exec ${{ env.TEST_CONTAINER_NAME }} bash Classification/cnns/ci_train.sh ${{ github.event.inputs.num_epochs }} ${{ github.event.inputs.gpu_num_per_node }} # - name: Run OneFlow doctor # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} # run: | diff --git a/Classification/cnns/ci_train.sh b/Classification/cnns/ci_train.sh index cc64358..0ead036 100755 --- a/Classification/cnns/ci_train.sh +++ b/Classification/cnns/ci_train.sh @@ -1,8 +1,8 @@ -GPU_NUM=${1:-8} -NODE_NUM=${2:-1} -BATCH_SIZE=${3:-192} -LEARNING_RATE=${4:-1.536} -NUM_EPOCH=${5:-50} +NUM_EPOCH=${1:-50} +GPU_NUM=${2:-8} +NODE_NUM=${3:-1} +BATCH_SIZE=${4:-192} +LEARNING_RATE=${5:-1.536} SRC_ROOT=${6:-"Classification/cnns"} DATA_ROOT=${7:-"/dataset/ImageNet/ofrecord"} From fe534c937a6e8e2acb81dbc3fc937fadaac01581 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 29 Sep 2021 17:26:26 +0800 Subject: [PATCH 044/130] test --- Classification/cnns/ci_train.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Classification/cnns/ci_train.sh b/Classification/cnns/ci_train.sh index 0ead036..293b7b9 100755 --- a/Classification/cnns/ci_train.sh +++ b/Classification/cnns/ci_train.sh @@ -39,6 +39,7 @@ python3 ${SRC_ROOT}/of_cnn_train_val.py \ --nccl_fusion_max_ops=24 \ --gpu_image_decoder=True \ --num_epoch=$NUM_EPOCH \ - --model=${model} 2>&1 | tee ${LOGFILE} + --model=${model} + # 2>&1 | tee ${LOGFILE} echo "Writting log to ${LOGFILE}" From 04c7c8f6506bc34863e875967467c865edeba054 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 29 Sep 2021 18:12:35 +0800 Subject: [PATCH 045/130] test --- Classification/cnns/ci_train.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Classification/cnns/ci_train.sh b/Classification/cnns/ci_train.sh index 293b7b9..6716e32 100755 --- a/Classification/cnns/ci_train.sh +++ b/Classification/cnns/ci_train.sh @@ -1,7 +1,7 @@ NUM_EPOCH=${1:-50} GPU_NUM=${2:-8} NODE_NUM=${3:-1} -BATCH_SIZE=${4:-192} +BATCH_SIZE=${4:-32} LEARNING_RATE=${5:-1.536} SRC_ROOT=${6:-"Classification/cnns"} DATA_ROOT=${7:-"/dataset/ImageNet/ofrecord"} @@ -39,7 +39,7 @@ python3 ${SRC_ROOT}/of_cnn_train_val.py \ --nccl_fusion_max_ops=24 \ --gpu_image_decoder=True \ --num_epoch=$NUM_EPOCH \ - --model=${model} - # 2>&1 | tee ${LOGFILE} + --num_examples=1024 \ + --model=${model} 2>&1 | tee ${LOGFILE} echo "Writting log to ${LOGFILE}" From 282727c5d43d0f4005de989c742c063b061c518a Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 30 Sep 2021 09:24:08 +0800 Subject: [PATCH 046/130] mkdir ci --- .github/workflows/cnn_e2e.yml | 9 ++++++- .../ci_train.sh => ci/test/resnet50_e2e.sh | 26 +++++++------------ 2 files changed, 17 insertions(+), 18 deletions(-) rename Classification/cnns/ci_train.sh => ci/test/resnet50_e2e.sh (55%) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 85a0447..e82e3f2 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -247,6 +247,13 @@ jobs: -w $PWD \ -v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \ -e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \ + -e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \ + -e E2E_GPU_NUM_PER_NODE=${{ github.event.inputs.gpu_num_per_node }} \ + -e E2E_NODE_NUM=1 \ + -e E2E_BATCH_SIZE=32 \ + -e E2E_LEARNING_RATE=1.536 \ + -e E2E_SRC_ROOT=Classification/cnns \ + -e E2E_DATA_ROOT=/dataset/ImageNet/ofrecord \ --name ${TEST_CONTAINER_NAME} \ ${{ env.TEST_IMG_TAG }} \ sleep 3600 @@ -259,7 +266,7 @@ jobs: - name: Test container # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} run: | - docker exec ${{ env.TEST_CONTAINER_NAME }} bash Classification/cnns/ci_train.sh ${{ github.event.inputs.num_epochs }} ${{ github.event.inputs.gpu_num_per_node }} + docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh # - name: Run OneFlow doctor # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} # run: | diff --git a/Classification/cnns/ci_train.sh b/ci/test/resnet50_e2e.sh similarity index 55% rename from Classification/cnns/ci_train.sh rename to ci/test/resnet50_e2e.sh index 6716e32..9f845d5 100755 --- a/Classification/cnns/ci_train.sh +++ b/ci/test/resnet50_e2e.sh @@ -1,12 +1,4 @@ -NUM_EPOCH=${1:-50} -GPU_NUM=${2:-8} -NODE_NUM=${3:-1} -BATCH_SIZE=${4:-32} -LEARNING_RATE=${5:-1.536} -SRC_ROOT=${6:-"Classification/cnns"} -DATA_ROOT=${7:-"/dataset/ImageNet/ofrecord"} - -test_case=n${NODE_NUM}_g${GPU_NUM}_b${BATCH_SIZE}_lr${LEARNING_RATE}_e${NUM_EPOCH} +test_case=n${E2E_NODE_NUM}_g${E2E_GPU_NUM}_b${E2E_BATCH_SIZE}_lr${E2E_LEARNING_RATE}_e${E2E_NUM_EPOCH} LOG_FOLDER=./log mkdir -p $LOG_FOLDER @@ -16,19 +8,19 @@ LOGFILE=$LOG_FOLDER/${model}_${test_case}.log export PYTHONUNBUFFERED=1 export NCCL_LAUNCH_MODE=PARALLEL -python3 ${SRC_ROOT}/of_cnn_train_val.py \ - --train_data_dir=$DATA_ROOT/train \ +python3 ${E2E_SRC_ROOT}/of_cnn_train_val.py \ + --train_data_dir=$E2E_DATA_ROOT/train \ --train_data_part_num=256 \ - --val_data_dir=$DATA_ROOT/validation \ + --val_data_dir=$E2E_DATA_ROOT/validation \ --val_data_part_num=256 \ - --num_nodes=${NODE_NUM} \ - --gpu_num_per_node=${GPU_NUM} \ + --num_nodes=${E2E_NODE_NUM} \ + --gpu_num_per_node=${E2E_GPU_NUM} \ --optimizer="sgd" \ --momentum=0.875 \ --label_smoothing=0.1 \ - --learning_rate=${LEARNING_RATE} \ + --learning_rate=${E2E_LEARNING_RATE} \ --loss_print_every_n_iter=100 \ - --batch_size_per_device=${BATCH_SIZE} \ + --batch_size_per_device=${E2E_BATCH_SIZE} \ --val_batch_size_per_device=50 \ --use_fp16 \ --channel_last=True \ @@ -38,7 +30,7 @@ python3 ${SRC_ROOT}/of_cnn_train_val.py \ --nccl_fusion_threshold_mb=16 \ --nccl_fusion_max_ops=24 \ --gpu_image_decoder=True \ - --num_epoch=$NUM_EPOCH \ + --num_epoch=$E2E_NUM_EPOCH \ --num_examples=1024 \ --model=${model} 2>&1 | tee ${LOGFILE} From 21e1712123589275d75111d8daf870830e123c64 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 30 Sep 2021 09:29:32 +0800 Subject: [PATCH 047/130] fix --- ci/test/resnet50_e2e.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/test/resnet50_e2e.sh b/ci/test/resnet50_e2e.sh index 9f845d5..592bf16 100755 --- a/ci/test/resnet50_e2e.sh +++ b/ci/test/resnet50_e2e.sh @@ -1,4 +1,4 @@ -test_case=n${E2E_NODE_NUM}_g${E2E_GPU_NUM}_b${E2E_BATCH_SIZE}_lr${E2E_LEARNING_RATE}_e${E2E_NUM_EPOCH} +test_case=n${E2E_NODE_NUM}_g${E2E_GPU_NUM_PER_NODE}_b${E2E_BATCH_SIZE}_lr${E2E_LEARNING_RATE}_e${E2E_NUM_EPOCH} LOG_FOLDER=./log mkdir -p $LOG_FOLDER @@ -14,7 +14,7 @@ python3 ${E2E_SRC_ROOT}/of_cnn_train_val.py \ --val_data_dir=$E2E_DATA_ROOT/validation \ --val_data_part_num=256 \ --num_nodes=${E2E_NODE_NUM} \ - --gpu_num_per_node=${E2E_GPU_NUM} \ + --gpu_num_per_node=${E2E_GPU_NUM_PER_NODE} \ --optimizer="sgd" \ --momentum=0.875 \ --label_smoothing=0.1 \ From b21e5a8f5fc9c9d9386cc3451c8352681ff6d7ef Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 30 Sep 2021 09:33:55 +0800 Subject: [PATCH 048/130] fix --- ci/test/resnet50_e2e.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/test/resnet50_e2e.sh b/ci/test/resnet50_e2e.sh index 592bf16..eec78e4 100755 --- a/ci/test/resnet50_e2e.sh +++ b/ci/test/resnet50_e2e.sh @@ -30,7 +30,7 @@ python3 ${E2E_SRC_ROOT}/of_cnn_train_val.py \ --nccl_fusion_threshold_mb=16 \ --nccl_fusion_max_ops=24 \ --gpu_image_decoder=True \ - --num_epoch=$E2E_NUM_EPOCH \ + --num_epoch=$E2E_NUM_EPOCHS \ --num_examples=1024 \ --model=${model} 2>&1 | tee ${LOGFILE} From c0f3e553ece78f8e5c7736c544e03eeef409ceea Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 30 Sep 2021 11:42:08 +0800 Subject: [PATCH 049/130] upload log to oss --- .github/actions/upload_oss/action.yml | 40 +++++++++++++++++++++++++++ .github/workflows/cnn_e2e.yml | 15 ++++++++-- 2 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 .github/actions/upload_oss/action.yml diff --git a/.github/actions/upload_oss/action.yml b/.github/actions/upload_oss/action.yml new file mode 100644 index 0000000..adce803 --- /dev/null +++ b/.github/actions/upload_oss/action.yml @@ -0,0 +1,40 @@ +inputs: + src_path: + required: true + oss_dst_path: + required: true + oss_access_key_id: + required: true + oss_access_key_secret: + required: true + upload_core: + required: false +runs: + using: "composite" + steps: + - run: | + if [ -z "$OSS_ACCESS_KEY_ID" ] + then + exit 0 + fi + if [ ! -f "$HOME/ossutil64" ]; then + curl http://gosspublic.alicdn.com/ossutil/1.6.19/ossutil64 -o $HOME/ossutil64 + fi + chmod 755 $HOME/ossutil64 + $HOME/ossutil64 config -e oss-cn-beijing.aliyuncs.com -i ${{ inputs.oss_access_key_id }} -k ${{ inputs.oss_access_key_secret }} -L EN -c $HOME/.ossutilconfig + dir_arg="" + if [ -d "${{ inputs.src_path }}" ]; then + dir_arg="--recursive" + fi + upload_core_arg="" + if [ "${{ inputs.upload_core }}" == "true" ]; then + echo "will upload core files" + else + upload_core_arg+="--exclude core*" + fi + $HOME/ossutil64 cp --update ${dir_arg} ${upload_core_arg} ${{ inputs.src_path }} ${{ inputs.oss_dst_path }} + shell: bash + env: + OSS_ACCESS_KEY_ID: ${{ inputs.oss_access_key_id }} + OSS_ACCESS_KEY_SECRET: ${{ inputs.oss_access_key_secret }} + \ No newline at end of file diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index e82e3f2..36edd13 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -69,8 +69,8 @@ jobs: env: MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ matrix.entry }} WHEELHOUSE_DIR: manylinux-wheelhouse - OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} - OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + # OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} + # OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} steps: - name: Fix permissions run: | @@ -260,13 +260,22 @@ jobs: - name: Install OneFlow # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} run: | - ls ${ONEFLOW_WHEEL_PATH} docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${ONEFLOW_WHEEL_PATH} oneflow - name: Test container # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} run: | docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh + - name: Upload log + # if: ${{ always() && (steps.distributed_try_3.outcome=='failure' || steps.new_interface_distributed_try_3.outcome=='failure') && github.event.pull_request.head.repo.full_name == github.repository }} + uses: ./.github/actions/upload_oss + with: + src_path: log + oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.repository }}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} + oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + upload_core: false + # - name: Run OneFlow doctor # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} # run: | From 97c8ac1ecd1b78695d5cd1f7e4165e8f27d04159 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 30 Sep 2021 12:05:00 +0800 Subject: [PATCH 050/130] fix --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 36edd13..025256e 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -271,7 +271,7 @@ jobs: uses: ./.github/actions/upload_oss with: src_path: log - oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.repository }}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.head_ref }}.${{ github.sha::7 }}/oneflow/${{ github.event.inputs.of_branch_or_commit::7 }}/${{github.run_id}}/log oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} upload_core: false From 260c0d3b6a092bec3a8b38a9cbaa4db665a9d493 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 30 Sep 2021 12:06:53 +0800 Subject: [PATCH 051/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 025256e..1787db9 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -271,7 +271,7 @@ jobs: uses: ./.github/actions/upload_oss with: src_path: log - oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.head_ref }}.${{ github.sha::7 }}/oneflow/${{ github.event.inputs.of_branch_or_commit::7 }}/${{github.run_id}}/log + oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.head_ref }}.${{github.sha::7}}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} upload_core: false From b508298f44a7086beae44cffcf640d470d440712 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 30 Sep 2021 12:07:54 +0800 Subject: [PATCH 052/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 1787db9..9840439 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -271,7 +271,7 @@ jobs: uses: ./.github/actions/upload_oss with: src_path: log - oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.head_ref }}.${{github.sha::7}}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.head_ref }}.${{ github.sha }}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} upload_core: false From 2009a994950ef702d2475bdc73857322bb77697a Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 30 Sep 2021 12:16:48 +0800 Subject: [PATCH 053/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 9840439..681634a 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -271,7 +271,7 @@ jobs: uses: ./.github/actions/upload_oss with: src_path: log - oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.head_ref }}.${{ github.sha }}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${{ github.sha }}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} upload_core: false From c12fc3d4edc8fc715ca53ba14521600b8f82bcc8 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 30 Sep 2021 12:27:14 +0800 Subject: [PATCH 054/130] test --- .github/workflows/cnn_e2e.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 681634a..d16ad7d 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -271,7 +271,8 @@ jobs: uses: ./.github/actions/upload_oss with: src_path: log - oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${{ github.sha }}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${github.sha::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + # oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${{ github.sha }}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} upload_core: false From df3273b17c379a0e8171677c3681db78afeecb2b Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 30 Sep 2021 12:33:01 +0800 Subject: [PATCH 055/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index d16ad7d..e84fbc5 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -271,7 +271,7 @@ jobs: uses: ./.github/actions/upload_oss with: src_path: log - oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${github.sha::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log # oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${{ github.sha }}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} From c44a69b24251888731c4b8d980d11258e44df672 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 30 Sep 2021 12:40:56 +0800 Subject: [PATCH 056/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index e84fbc5..288d59a 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -271,7 +271,7 @@ jobs: uses: ./.github/actions/upload_oss with: src_path: log - oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit::7 }}/${{github.run_id}}/log # oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${{ github.sha }}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} From e6d0e96beff6fde4e936587eebc65f22e3fe9e5c Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 30 Sep 2021 12:45:49 +0800 Subject: [PATCH 057/130] test --- .github/workflows/cnn_e2e.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 288d59a..c7856e8 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -271,8 +271,7 @@ jobs: uses: ./.github/actions/upload_oss with: src_path: log - oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit::7 }}/${{github.run_id}}/log - # oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${{ github.sha }}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} upload_core: false From c82780b34b04bcce2c6789d9ff3845c9b06c67bd Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Sat, 16 Oct 2021 23:08:40 +0800 Subject: [PATCH 058/130] test gpu8 --- .github/workflows/cnn_e2e.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index c7856e8..9592464 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -20,8 +20,10 @@ on: default: 8 env: ONEFLOW_SRC: oneflow-src - SSH_TANK_HOST: 192.168.1.23 - SSH_TANK_PATH: /home/ci-user/tank + # SSH_TANK_HOST: 192.168.1.23 + SSH_TANK_HOST: 10.0.22.16 + # SSH_TANK_PATH: /home/ci-user/tank + SSH_TANK_PATH: /DATA/disk1/tank OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} jobs: @@ -60,7 +62,8 @@ jobs: cu102 build-manylinux: name: "Build manylinux" - runs-on: ['self-hosted', 'linux', 'provision'] + # runs-on: ['self-hosted', 'linux', 'provision'] + runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] needs: [find-build-cache] strategy: fail-fast: true @@ -164,7 +167,8 @@ jobs: name: Test suite # needs: [find-test-cache]build-manylinux needs: [build-manylinux] - runs-on: ['self-hosted', 'linux', 'provision'] + # runs-on: ['self-hosted', 'linux', 'provision'] + runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] env: TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0:e7a497b41d8b7f1bce055b1f23d027f93b1557ae From 989cb2d9125f60081788c97958f0594f870e6a91 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 11:59:44 +0800 Subject: [PATCH 059/130] rm usless lines --- .github/workflows/cnn_e2e.yml | 59 +---------------------------------- 1 file changed, 1 insertion(+), 58 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 9592464..e5aa451 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -141,28 +141,7 @@ jobs: ssh-tank-path: ${{ env.SSH_TANK_PATH }} src-dir: ${{ env.WHEELHOUSE_DIR }} dst-dir: whl - # find-test-cache: - # name: "Find test cache" - # runs-on: ubuntu-latest - # needs: [build-manylinux] - # outputs: - # matrix: ${{ steps.find-cache.outputs.matrix }} - # steps: - # - name: Checkout Oneflow-Inc/oneflow - # uses: actions/checkout@v2 - # with: - # repository: Oneflow-Inc/oneflow - # ref: ${{ github.event.inputs.of_branch_or_commit }} - # path: ${{ env.ONEFLOW_SRC }} - # - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@2a9efceab8d45b725a687e73f870f9b75a15e472 - # name: find cache - # id: find-cache - # timeout-minutes: 5 - # with: - # runner-labels: | - # self-hosted - # linux - # oneflow-src: ${{ env.ONEFLOW_SRC }} + test: name: Test suite # needs: [find-test-cache]build-manylinux @@ -280,40 +259,4 @@ jobs: oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} upload_core: false - # - name: Run OneFlow doctor - # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} - # run: | - # docker exec ${{ env.TEST_CONTAINER_NAME }} python3 -m oneflow --doctor - - # build: - # name: 'Build and test this repo' - # runs-on: ['self-hosted', 'linux', 'provision'] - # steps: - # - run: echo "oneflow branch or commit is ${{ github.event.inputs.of_branch_or_commit }}." - # conda: - # name: Build with conda - # runs-on: ['self-hosted', 'linux', 'provision'] - # steps: - # - name: Checkout actions/checkout@v2 - # uses: actions/checkout@v2 - # - name: Checkout Oneflow-Inc/oneflow - # uses: actions/checkout@v2 - # with: - # repository: Oneflow-Inc/oneflow - # ref: ${{ github.event.inputs.of_branch_or_commit }} - # path: oneflow-src - # - name: Checkout Oneflow-Inc/conda-env - # uses: actions/checkout@v2 - # with: - # repository: Oneflow-Inc/conda-env - # ref: 30a7f00eb48ee9009d85a848e720823e5054c66b - # path: conda-env - # - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 - # name: Build with gcc7 - # with: - # cmake-init-cache: oneflow-src/cmake/caches/ci/gh-hosted/cpu-gcc.cmake - # oneflow-src: oneflow-src - # oneflow-build-env: conda - # conda-env-file: conda-env/dev/gcc7/environment-v2.yml - # conda-env-name: oneflow-dev-gcc7-v2 From 08e38ed8ce647db0456af3493616668f5e182821 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 12:50:34 +0800 Subject: [PATCH 060/130] update --- .github/workflows/cnn_e2e.yml | 277 +++++++++++++++++----------------- 1 file changed, 139 insertions(+), 138 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index e5aa451..0b9d000 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -20,12 +20,6 @@ on: default: 8 env: ONEFLOW_SRC: oneflow-src - # SSH_TANK_HOST: 192.168.1.23 - SSH_TANK_HOST: 10.0.22.16 - # SSH_TANK_PATH: /home/ci-user/tank - SSH_TANK_PATH: /DATA/disk1/tank - OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} - OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} jobs: cancel_previous: name: Cancel previous runs @@ -49,6 +43,16 @@ jobs: repository: Oneflow-Inc/oneflow ref: ${{ github.event.inputs.of_branch_or_commit }} path: ${{ env.ONEFLOW_SRC }} + - name: Set environment variables + run: | + set -x + oneflow_branch=`git rev-parse --abbrev-ref HEAD` + oneflow_commit=`git rev-parse HEAD~2` + oss_branch_dir=branch/${oneflow_branch}/${{ matrix.compute_platform }} + oss_dir=${oss_branch_dir}/${oneflow_commit} + echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV + echo "oss_dir=${oss_dir}" >> $GITHUB_ENV + set +x - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@2a9efceab8d45b725a687e73f870f9b75a15e472 name: find cache id: find-cache @@ -121,142 +125,139 @@ jobs: retry-failed-build: true python-versions: | 3.7 - - name: Upload bin - if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} - uses: Oneflow-Inc/get-oneflow/degist/upload@2a9efceab8d45b725a687e73f870f9b75a15e472 - with: - digest: ${{ steps.save-cache.outputs.build-digest }} - entry: ${{ matrix.entry }} - ssh-tank-host: ${{ env.SSH_TANK_HOST }} - ssh-tank-path: ${{ env.SSH_TANK_PATH }} - src-dir: ${{ env.MANYLINUX_CACHE_DIR }}/build/bin - dst-dir: bin - - name: Upload whl + - name: Upload wheel if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} - uses: Oneflow-Inc/get-oneflow/degist/upload@2a9efceab8d45b725a687e73f870f9b75a15e472 - with: - digest: ${{ steps.save-cache.outputs.build-digest }} - entry: ${{ matrix.entry }} - ssh-tank-host: ${{ env.SSH_TANK_HOST }} - ssh-tank-path: ${{ env.SSH_TANK_PATH }} - src-dir: ${{ env.WHEELHOUSE_DIR }} - dst-dir: whl - - test: - name: Test suite - # needs: [find-test-cache]build-manylinux - needs: [build-manylinux] - # runs-on: ['self-hosted', 'linux', 'provision'] - runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] - env: - TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" - TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0:e7a497b41d8b7f1bce055b1f23d027f93b1557ae - # strategy: - # fail-fast: true - # max-parallel: 5 - # matrix: ${{ fromJson(needs.find-test-cache.outputs.matrix) }} - steps: - - name: Fix permissions - # if: ${{ contains(matrix.runs-on, 'self-hosted') }} - run: | - set -x - docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - - name: Checkout Oneflow-Inc/OneFlow-Benchmark - uses: actions/checkout@v2 - - name: Checkout Oneflow-Inc/oneflow - uses: actions/checkout@v2 - with: - repository: Oneflow-Inc/oneflow - ref: ${{ github.event.inputs.of_branch_or_commit }} - path: ${{ env.ONEFLOW_SRC }} - - name: Remove container - timeout-minutes: 45 - # if: ${{ contains(matrix.runs-on, 'self-hosted') }} - run: | - docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true - - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 - name: Save cache if successful - id: save-cache - timeout-minutes: 5 - with: - oneflow-src: ${{ env.ONEFLOW_SRC }} - entry: cu102 - digest-type: test - mark-as-completed: true - # - name: Check digest and fail if cache result not identical to matrix - # if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} - # run: | - # echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" - # exit 1 - - name: Download wheel and binary - # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow/degist/download@2a9efceab8d45b725a687e73f870f9b75a15e472 - id: download-digest - timeout-minutes: 10 - with: - digest: ${{ steps.save-cache.outputs.build-digest }} - entry: cu102 - ssh-tank-host: ${{ env.SSH_TANK_HOST }} - ssh-tank-path: ${{ env.SSH_TANK_PATH }} - digest-cache-dir: '~/digest-cache-bm' - - name: Enable Pytorch container - run: | - echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV - - name: Set environment variables - # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - run: | - set -x - echo "ONEFLOW_TEST_CACHE_DIR=$HOME/ci-cache/test_cache" >> $GITHUB_ENV - echo "ONEFLOW_WHEEL_PATH=${{ steps.download-digest.outputs.entry-dir }}/whl" >> $GITHUB_ENV - - name: Start container - # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - # working-directory: ${{ env.ONEFLOW_SRC }} - env: - ONEFLOW_BIN_PATH: ${{ steps.download-digest.outputs.entry-dir }}/bin - run: | - docker pull ${{ env.TEST_IMG_TAG }} - docker run -d --rm --privileged --network host --shm-size=8g \ - --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ - --runtime=nvidia \ - -v /dataset:/dataset:ro -v /model_zoo:/model_zoo:ro \ - -v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \ - -v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \ - -v $HOME/test-container-cache/dot-local:/root/.local \ - -v $HOME/test-container-cache/dot-cache:/root/.cache \ - -e ONEFLOW_WHEEL_PATH=${ONEFLOW_WHEEL_PATH} \ - -e ONEFLOW_BIN_PATH=${ONEFLOW_BIN_PATH} \ - -e ONEFLOW_CI=1 \ - -v $PWD:$PWD \ - -w $PWD \ - -v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \ - -e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \ - -e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \ - -e E2E_GPU_NUM_PER_NODE=${{ github.event.inputs.gpu_num_per_node }} \ - -e E2E_NODE_NUM=1 \ - -e E2E_BATCH_SIZE=32 \ - -e E2E_LEARNING_RATE=1.536 \ - -e E2E_SRC_ROOT=Classification/cnns \ - -e E2E_DATA_ROOT=/dataset/ImageNet/ofrecord \ - --name ${TEST_CONTAINER_NAME} \ - ${{ env.TEST_IMG_TAG }} \ - sleep 3600 - - name: Install OneFlow - # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} - run: | - docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${ONEFLOW_WHEEL_PATH} oneflow - - name: Test container - # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - run: | - docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh - - name: Upload log - # if: ${{ always() && (steps.distributed_try_3.outcome=='failure' || steps.new_interface_distributed_try_3.outcome=='failure') && github.event.pull_request.head.repo.full_name == github.repository }} uses: ./.github/actions/upload_oss with: - src_path: log - oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + src_path: ${{ env.WHEELHOUSE_DIR }} + oss_dst_path: oss://oneflow-staging/${{ env.oss_dir }} oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - upload_core: false + - name: Update pip index + if: env.is_built != '1' && contains(env.compute_platforms, matrix.compute_platform) + env: + OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} + OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + run: | + python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + python3 -m pip install oss2 beautifulsoup4 --user + python3 tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html + + # test: + # name: Test suite + # # needs: [find-test-cache]build-manylinux + # needs: [build-manylinux] + # # runs-on: ['self-hosted', 'linux', 'provision'] + # runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] + # env: + # TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" + # TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0:e7a497b41d8b7f1bce055b1f23d027f93b1557ae + # # strategy: + # # fail-fast: true + # # max-parallel: 5 + # # matrix: ${{ fromJson(needs.find-test-cache.outputs.matrix) }} + # steps: + # - name: Fix permissions + # # if: ${{ contains(matrix.runs-on, 'self-hosted') }} + # run: | + # set -x + # docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . + # - name: Checkout Oneflow-Inc/OneFlow-Benchmark + # uses: actions/checkout@v2 + # - name: Checkout Oneflow-Inc/oneflow + # uses: actions/checkout@v2 + # with: + # repository: Oneflow-Inc/oneflow + # ref: ${{ github.event.inputs.of_branch_or_commit }} + # path: ${{ env.ONEFLOW_SRC }} + # - name: Remove container + # timeout-minutes: 45 + # # if: ${{ contains(matrix.runs-on, 'self-hosted') }} + # run: | + # docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true + # - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 + # name: Save cache if successful + # id: save-cache + # timeout-minutes: 5 + # with: + # oneflow-src: ${{ env.ONEFLOW_SRC }} + # entry: cu102 + # digest-type: test + # mark-as-completed: true + # # - name: Check digest and fail if cache result not identical to matrix + # # if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} + # # run: | + # # echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" + # # exit 1 + # - name: Download wheel and binary + # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} + # uses: Oneflow-Inc/get-oneflow/degist/download@2a9efceab8d45b725a687e73f870f9b75a15e472 + # id: download-digest + # timeout-minutes: 10 + # with: + # digest: ${{ steps.save-cache.outputs.build-digest }} + # entry: cu102 + # ssh-tank-host: ${{ env.SSH_TANK_HOST }} + # ssh-tank-path: ${{ env.SSH_TANK_PATH }} + # digest-cache-dir: '~/digest-cache-bm' + # - name: Enable Pytorch container + # run: | + # echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV + # - name: Set environment variables + # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} + # run: | + # set -x + # echo "ONEFLOW_TEST_CACHE_DIR=$HOME/ci-cache/test_cache" >> $GITHUB_ENV + # echo "ONEFLOW_WHEEL_PATH=${{ steps.download-digest.outputs.entry-dir }}/whl" >> $GITHUB_ENV + # - name: Start container + # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} + # # working-directory: ${{ env.ONEFLOW_SRC }} + # env: + # ONEFLOW_BIN_PATH: ${{ steps.download-digest.outputs.entry-dir }}/bin + # run: | + # docker pull ${{ env.TEST_IMG_TAG }} + # docker run -d --rm --privileged --network host --shm-size=8g \ + # --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ + # --runtime=nvidia \ + # -v /dataset:/dataset:ro -v /model_zoo:/model_zoo:ro \ + # -v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \ + # -v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \ + # -v $HOME/test-container-cache/dot-local:/root/.local \ + # -v $HOME/test-container-cache/dot-cache:/root/.cache \ + # -e ONEFLOW_WHEEL_PATH=${ONEFLOW_WHEEL_PATH} \ + # -e ONEFLOW_BIN_PATH=${ONEFLOW_BIN_PATH} \ + # -e ONEFLOW_CI=1 \ + # -v $PWD:$PWD \ + # -w $PWD \ + # -v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \ + # -e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \ + # -e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \ + # -e E2E_GPU_NUM_PER_NODE=${{ github.event.inputs.gpu_num_per_node }} \ + # -e E2E_NODE_NUM=1 \ + # -e E2E_BATCH_SIZE=32 \ + # -e E2E_LEARNING_RATE=1.536 \ + # -e E2E_SRC_ROOT=Classification/cnns \ + # -e E2E_DATA_ROOT=/dataset/ImageNet/ofrecord \ + # --name ${TEST_CONTAINER_NAME} \ + # ${{ env.TEST_IMG_TAG }} \ + # sleep 3600 + # - name: Install OneFlow + # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} + # run: | + # docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + # docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${ONEFLOW_WHEEL_PATH} oneflow + # - name: Test container + # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} + # run: | + # docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh + # - name: Upload log + # # if: ${{ always() && (steps.distributed_try_3.outcome=='failure' || steps.new_interface_distributed_try_3.outcome=='failure') && github.event.pull_request.head.repo.full_name == github.repository }} + # uses: ./.github/actions/upload_oss + # with: + # src_path: log + # oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + # oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} + # oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + # upload_core: false From 546024740c47dab18fefbc8dc9043e1b0556824b Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 12:52:58 +0800 Subject: [PATCH 061/130] change host --- .github/workflows/cnn_e2e.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 0b9d000..f7dda17 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -66,8 +66,8 @@ jobs: cu102 build-manylinux: name: "Build manylinux" - # runs-on: ['self-hosted', 'linux', 'provision'] - runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] + runs-on: ['self-hosted', 'linux', 'provision'] + # runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] needs: [find-build-cache] strategy: fail-fast: true From 6d3c895007f78ac0c5615e65111cce74a0c26006 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 12:55:55 +0800 Subject: [PATCH 062/130] change host --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index f7dda17..260e287 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -47,7 +47,7 @@ jobs: run: | set -x oneflow_branch=`git rev-parse --abbrev-ref HEAD` - oneflow_commit=`git rev-parse HEAD~2` + oneflow_commit=`git rev-parse HEAD` oss_branch_dir=branch/${oneflow_branch}/${{ matrix.compute_platform }} oss_dir=${oss_branch_dir}/${oneflow_commit} echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV From 85a28c0f73a59ce176f50a56d3e44eba8539ed02 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 14:15:51 +0800 Subject: [PATCH 063/130] fix --- .github/workflows/cnn_e2e.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 260e287..b255beb 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -46,12 +46,15 @@ jobs: - name: Set environment variables run: | set -x + current_dir=$PWD + cd ${{ env.ONEFLOW_SRC }} oneflow_branch=`git rev-parse --abbrev-ref HEAD` oneflow_commit=`git rev-parse HEAD` oss_branch_dir=branch/${oneflow_branch}/${{ matrix.compute_platform }} oss_dir=${oss_branch_dir}/${oneflow_commit} echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV echo "oss_dir=${oss_dir}" >> $GITHUB_ENV + cd $current_dir set +x - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@2a9efceab8d45b725a687e73f870f9b75a15e472 name: find cache @@ -86,6 +89,7 @@ jobs: - name: Remove leftover cuda-installer.log run: | docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log + - uses: actions/checkout@v2 - name: Checkout Oneflow-Inc/oneflow uses: actions/checkout@v2 with: From 27c34d607df3560993031c910425ea55719cd337 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 14:26:32 +0800 Subject: [PATCH 064/130] fix --- .github/workflows/cnn_e2e.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index b255beb..927868b 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -48,7 +48,8 @@ jobs: set -x current_dir=$PWD cd ${{ env.ONEFLOW_SRC }} - oneflow_branch=`git rev-parse --abbrev-ref HEAD` + # oneflow_branch=`git rev-parse --abbrev-ref HEAD` + oneflow_branch=`git branch --show-current` oneflow_commit=`git rev-parse HEAD` oss_branch_dir=branch/${oneflow_branch}/${{ matrix.compute_platform }} oss_dir=${oss_branch_dir}/${oneflow_commit} From bb45ed3d24e30220c9db5505562843b693d9dc59 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 14:33:50 +0800 Subject: [PATCH 065/130] fix --- .github/workflows/cnn_e2e.yml | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 927868b..86069d0 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -43,20 +43,6 @@ jobs: repository: Oneflow-Inc/oneflow ref: ${{ github.event.inputs.of_branch_or_commit }} path: ${{ env.ONEFLOW_SRC }} - - name: Set environment variables - run: | - set -x - current_dir=$PWD - cd ${{ env.ONEFLOW_SRC }} - # oneflow_branch=`git rev-parse --abbrev-ref HEAD` - oneflow_branch=`git branch --show-current` - oneflow_commit=`git rev-parse HEAD` - oss_branch_dir=branch/${oneflow_branch}/${{ matrix.compute_platform }} - oss_dir=${oss_branch_dir}/${oneflow_commit} - echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV - echo "oss_dir=${oss_dir}" >> $GITHUB_ENV - cd $current_dir - set +x - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@2a9efceab8d45b725a687e73f870f9b75a15e472 name: find cache id: find-cache @@ -97,6 +83,20 @@ jobs: repository: Oneflow-Inc/oneflow ref: ${{ github.event.inputs.of_branch_or_commit }} path: ${{ env.ONEFLOW_SRC }} + - name: Set environment variables + run: | + set -x + current_dir=$PWD + cd ${{ env.ONEFLOW_SRC }} + # oneflow_branch=`git rev-parse --abbrev-ref HEAD` + oneflow_branch=`git branch --show-current` + oneflow_commit=`git rev-parse HEAD` + oss_branch_dir=branch/${oneflow_branch}/${{ matrix.compute_platform }} + oss_dir=${oss_branch_dir}/${oneflow_commit} + echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV + echo "oss_dir=${oss_dir}" >> $GITHUB_ENV + cd $current_dir + set +x - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 name: Save cache if successful id: save-cache @@ -139,7 +139,7 @@ jobs: oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - name: Update pip index - if: env.is_built != '1' && contains(env.compute_platforms, matrix.compute_platform) + if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} env: OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} From 923c26364348e474f3da3feba58116804117961d Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 14:48:14 +0800 Subject: [PATCH 066/130] test --- .github/workflows/cnn_e2e.yml | 103 +++++++++++++++++----------------- 1 file changed, 52 insertions(+), 51 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 86069d0..e0657fc 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -89,7 +89,8 @@ jobs: current_dir=$PWD cd ${{ env.ONEFLOW_SRC }} # oneflow_branch=`git rev-parse --abbrev-ref HEAD` - oneflow_branch=`git branch --show-current` + # oneflow_branch=`git branch --show-current` + oneflow_branch=`git symbolic-ref --short HEAD` oneflow_commit=`git rev-parse HEAD` oss_branch_dir=branch/${oneflow_branch}/${{ matrix.compute_platform }} oss_dir=${oss_branch_dir}/${oneflow_commit} @@ -97,56 +98,56 @@ jobs: echo "oss_dir=${oss_dir}" >> $GITHUB_ENV cd $current_dir set +x - - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 - name: Save cache if successful - id: save-cache - timeout-minutes: 5 - with: - oneflow-src: ${{ env.ONEFLOW_SRC }} - entry: ${{ matrix.entry }} - digest-type: build - mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }} - - name: Check digest and fail if cache result not identical to matrix - if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} - run: | - echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" - exit 1 - - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 - name: Build manylinux cu102 - id: build-cuda - if: ${{ matrix.entry =='cu102' && !matrix.cache-hit }} - with: - cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake - build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh - oneflow-src: ${{ env.ONEFLOW_SRC }} - oneflow-build-env: manylinux - wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} - clear-wheelhouse-dir: true - self-hosted: true - cuda-version: "10.2" - manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} - docker-run-use-system-http-proxy: false - docker-run-use-lld: false - retry-failed-build: true - python-versions: | - 3.7 - - name: Upload wheel - if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} - uses: ./.github/actions/upload_oss - with: - src_path: ${{ env.WHEELHOUSE_DIR }} - oss_dst_path: oss://oneflow-staging/${{ env.oss_dir }} - oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} - oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - - name: Update pip index - if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} - env: - OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} - OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - run: | - python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - python3 -m pip install oss2 beautifulsoup4 --user - python3 tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html + # - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 + # name: Save cache if successful + # id: save-cache + # timeout-minutes: 5 + # with: + # oneflow-src: ${{ env.ONEFLOW_SRC }} + # entry: ${{ matrix.entry }} + # digest-type: build + # mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }} + # - name: Check digest and fail if cache result not identical to matrix + # if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} + # run: | + # echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" + # exit 1 + # - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 + # name: Build manylinux cu102 + # id: build-cuda + # if: ${{ matrix.entry =='cu102' && !matrix.cache-hit }} + # with: + # cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake + # build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh + # oneflow-src: ${{ env.ONEFLOW_SRC }} + # oneflow-build-env: manylinux + # wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} + # clear-wheelhouse-dir: true + # self-hosted: true + # cuda-version: "10.2" + # manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} + # docker-run-use-system-http-proxy: false + # docker-run-use-lld: false + # retry-failed-build: true + # python-versions: | + # 3.7 + # - name: Upload wheel + # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + # uses: ./.github/actions/upload_oss + # with: + # src_path: ${{ env.WHEELHOUSE_DIR }} + # oss_dst_path: oss://oneflow-staging/${{ env.oss_dir }} + # oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} + # oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + # - name: Update pip index + # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + # env: + # OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} + # OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + # run: | + # python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + # python3 -m pip install oss2 beautifulsoup4 --user + # python3 tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html # test: # name: Test suite From 19e668375e8ad02cc93bc13e395ddc5363b25d23 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 15:01:08 +0800 Subject: [PATCH 067/130] test --- .github/workflows/cnn_e2e.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index e0657fc..d66cf06 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -88,9 +88,9 @@ jobs: set -x current_dir=$PWD cd ${{ env.ONEFLOW_SRC }} - # oneflow_branch=`git rev-parse --abbrev-ref HEAD` + oneflow_branch=`git rev-parse --abbrev-ref HEAD` # oneflow_branch=`git branch --show-current` - oneflow_branch=`git symbolic-ref --short HEAD` + # oneflow_branch=`git symbolic-ref --short HEAD` oneflow_commit=`git rev-parse HEAD` oss_branch_dir=branch/${oneflow_branch}/${{ matrix.compute_platform }} oss_dir=${oss_branch_dir}/${oneflow_commit} From bff2c4935c401f1ffc0437301385a537ba4a51b9 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 15:20:20 +0800 Subject: [PATCH 068/130] test --- .github/workflows/cnn_e2e.yml | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index d66cf06..d5fbff6 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -18,6 +18,14 @@ on: description: 'gpu number per node' required: true default: 8 + python_version: + description: "python_version" + default: "3.7" + required: false + compute_platform: + description: "compute_platform" + default: "cu112" + required: false env: ONEFLOW_SRC: oneflow-src jobs: @@ -53,7 +61,7 @@ jobs: build oneflow-src: ${{ env.ONEFLOW_SRC }} entries: | - cu102 + ${{ github.event.inputs.compute_platform }} build-manylinux: name: "Build manylinux" runs-on: ['self-hosted', 'linux', 'provision'] @@ -92,7 +100,7 @@ jobs: # oneflow_branch=`git branch --show-current` # oneflow_branch=`git symbolic-ref --short HEAD` oneflow_commit=`git rev-parse HEAD` - oss_branch_dir=branch/${oneflow_branch}/${{ matrix.compute_platform }} + oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} oss_dir=${oss_branch_dir}/${oneflow_commit} echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV echo "oss_dir=${oss_dir}" >> $GITHUB_ENV @@ -113,9 +121,9 @@ jobs: # echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" # exit 1 # - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 - # name: Build manylinux cu102 + # name: Build manylinux ${{ github.event.inputs.compute_platform }} # id: build-cuda - # if: ${{ matrix.entry =='cu102' && !matrix.cache-hit }} + # if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} # with: # cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake # build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh @@ -124,13 +132,13 @@ jobs: # wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} # clear-wheelhouse-dir: true # self-hosted: true - # cuda-version: "10.2" + # cuda-version: "11.2" # manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} # docker-run-use-system-http-proxy: false # docker-run-use-lld: false # retry-failed-build: true # python-versions: | - # 3.7 + # ${{ github.event.inputs.python_version }} # - name: Upload wheel # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} # uses: ./.github/actions/upload_oss @@ -187,7 +195,7 @@ jobs: # timeout-minutes: 5 # with: # oneflow-src: ${{ env.ONEFLOW_SRC }} - # entry: cu102 + # entry: ${{ github.event.inputs.compute_platform }} # digest-type: test # mark-as-completed: true # # - name: Check digest and fail if cache result not identical to matrix @@ -202,7 +210,7 @@ jobs: # timeout-minutes: 10 # with: # digest: ${{ steps.save-cache.outputs.build-digest }} - # entry: cu102 + # entry: ${{ github.event.inputs.compute_platform }} # ssh-tank-host: ${{ env.SSH_TANK_HOST }} # ssh-tank-path: ${{ env.SSH_TANK_PATH }} # digest-cache-dir: '~/digest-cache-bm' From d19b5c51f5491be7bab290eafc04fb72baf4e3f6 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 15:45:16 +0800 Subject: [PATCH 069/130] test --- .github/workflows/cnn_e2e.yml | 100 +++++++++++++++++----------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index d5fbff6..38a77d6 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -106,56 +106,56 @@ jobs: echo "oss_dir=${oss_dir}" >> $GITHUB_ENV cd $current_dir set +x - # - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 - # name: Save cache if successful - # id: save-cache - # timeout-minutes: 5 - # with: - # oneflow-src: ${{ env.ONEFLOW_SRC }} - # entry: ${{ matrix.entry }} - # digest-type: build - # mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }} - # - name: Check digest and fail if cache result not identical to matrix - # if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} - # run: | - # echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" - # exit 1 - # - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 - # name: Build manylinux ${{ github.event.inputs.compute_platform }} - # id: build-cuda - # if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} - # with: - # cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake - # build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh - # oneflow-src: ${{ env.ONEFLOW_SRC }} - # oneflow-build-env: manylinux - # wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} - # clear-wheelhouse-dir: true - # self-hosted: true - # cuda-version: "11.2" - # manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} - # docker-run-use-system-http-proxy: false - # docker-run-use-lld: false - # retry-failed-build: true - # python-versions: | - # ${{ github.event.inputs.python_version }} - # - name: Upload wheel - # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} - # uses: ./.github/actions/upload_oss - # with: - # src_path: ${{ env.WHEELHOUSE_DIR }} - # oss_dst_path: oss://oneflow-staging/${{ env.oss_dir }} - # oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} - # oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - # - name: Update pip index - # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} - # env: - # OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} - # OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - # run: | - # python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - # python3 -m pip install oss2 beautifulsoup4 --user - # python3 tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html + - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 + name: Save cache if successful + id: save-cache + timeout-minutes: 5 + with: + oneflow-src: ${{ env.ONEFLOW_SRC }} + entry: ${{ matrix.entry }} + digest-type: build + mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }} + - name: Check digest and fail if cache result not identical to matrix + if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} + run: | + echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" + exit 1 + - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 + name: Build manylinux ${{ github.event.inputs.compute_platform }} + id: build-cuda + if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} + with: + cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake + build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh + oneflow-src: ${{ env.ONEFLOW_SRC }} + oneflow-build-env: manylinux + wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} + clear-wheelhouse-dir: true + self-hosted: true + cuda-version: "11.2" + manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} + docker-run-use-system-http-proxy: false + docker-run-use-lld: false + retry-failed-build: true + python-versions: | + ${{ github.event.inputs.python_version }} + - name: Upload wheel + if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + uses: ./.github/actions/upload_oss + with: + src_path: ${{ env.WHEELHOUSE_DIR }} + oss_dst_path: oss://oneflow-staging/${{ env.oss_dir }} + oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} + oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + - name: Update pip index + if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + env: + OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} + OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + run: | + python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + python3 -m pip install oss2 beautifulsoup4 --user + python3 tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html # test: # name: Test suite From bbf9f06fc0772780dc7df27366871385c0f8d6e3 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 15:52:19 +0800 Subject: [PATCH 070/130] test --- .github/workflows/cnn_e2e.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 38a77d6..b899df9 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -74,8 +74,8 @@ jobs: env: MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ matrix.entry }} WHEELHOUSE_DIR: manylinux-wheelhouse - # OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} - # OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} + OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} steps: - name: Fix permissions run: | From 0758893c5add67d0735ed4f6ce512971af3f0035 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 15:55:17 +0800 Subject: [PATCH 071/130] test --- .github/workflows/cnn_e2e.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index b899df9..78ae26c 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -123,7 +123,7 @@ jobs: - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 name: Build manylinux ${{ github.event.inputs.compute_platform }} id: build-cuda - if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} + # if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} with: cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh @@ -140,7 +140,7 @@ jobs: python-versions: | ${{ github.event.inputs.python_version }} - name: Upload wheel - if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} uses: ./.github/actions/upload_oss with: src_path: ${{ env.WHEELHOUSE_DIR }} @@ -148,7 +148,7 @@ jobs: oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - name: Update pip index - if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} env: OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} From 5d7482dd3e7cf4b2d56ee6f75a93a9b534879a8e Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 15:59:57 +0800 Subject: [PATCH 072/130] test --- .github/workflows/cnn_e2e.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 78ae26c..2a1094d 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -24,7 +24,7 @@ on: required: false compute_platform: description: "compute_platform" - default: "cu112" + default: "cu102" required: false env: ONEFLOW_SRC: oneflow-src @@ -123,7 +123,7 @@ jobs: - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 name: Build manylinux ${{ github.event.inputs.compute_platform }} id: build-cuda - # if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} + if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} with: cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh @@ -132,7 +132,7 @@ jobs: wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} clear-wheelhouse-dir: true self-hosted: true - cuda-version: "11.2" + cuda-version: "10.2" manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} docker-run-use-system-http-proxy: false docker-run-use-lld: false @@ -140,7 +140,7 @@ jobs: python-versions: | ${{ github.event.inputs.python_version }} - name: Upload wheel - # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} uses: ./.github/actions/upload_oss with: src_path: ${{ env.WHEELHOUSE_DIR }} @@ -148,7 +148,7 @@ jobs: oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - name: Update pip index - # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} env: OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} From 7943135cfdedb2c2bf1c1f465c9d324729eea54c Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 17:13:48 +0800 Subject: [PATCH 073/130] test --- .github/workflows/cnn_e2e.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 2a1094d..f1b619e 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -115,15 +115,15 @@ jobs: entry: ${{ matrix.entry }} digest-type: build mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }} - - name: Check digest and fail if cache result not identical to matrix - if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} - run: | - echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" - exit 1 + # - name: Check digest and fail if cache result not identical to matrix + # if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} + # run: | + # echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" + # exit 1 - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 name: Build manylinux ${{ github.event.inputs.compute_platform }} id: build-cuda - if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} + # if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} with: cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh @@ -140,7 +140,7 @@ jobs: python-versions: | ${{ github.event.inputs.python_version }} - name: Upload wheel - if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} uses: ./.github/actions/upload_oss with: src_path: ${{ env.WHEELHOUSE_DIR }} @@ -148,7 +148,7 @@ jobs: oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - name: Update pip index - if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} env: OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} From bc30a1075cb26228f32ba19b4e87a00675a0a30f Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 18:06:00 +0800 Subject: [PATCH 074/130] test --- .github/workflows/cnn_e2e.yml | 206 +++++++++++++++------------------- 1 file changed, 89 insertions(+), 117 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index f1b619e..24e9db0 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -104,6 +104,8 @@ jobs: oss_dir=${oss_branch_dir}/${oneflow_commit} echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV echo "oss_dir=${oss_dir}" >> $GITHUB_ENV + oneflow_wheel_path=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }}/${oneflow_commit} + echo "ONEFLOW_WHEEL_PATH=${oneflow_wheel_path}" >> $GITHUB_ENV cd $current_dir set +x - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 @@ -155,123 +157,93 @@ jobs: run: | python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple python3 -m pip install oss2 beautifulsoup4 --user - python3 tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html + python3 ${{ env.ONEFLOW_SRC }}/tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html - # test: - # name: Test suite - # # needs: [find-test-cache]build-manylinux - # needs: [build-manylinux] - # # runs-on: ['self-hosted', 'linux', 'provision'] - # runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] - # env: - # TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" - # TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0:e7a497b41d8b7f1bce055b1f23d027f93b1557ae - # # strategy: - # # fail-fast: true - # # max-parallel: 5 - # # matrix: ${{ fromJson(needs.find-test-cache.outputs.matrix) }} - # steps: - # - name: Fix permissions - # # if: ${{ contains(matrix.runs-on, 'self-hosted') }} - # run: | - # set -x - # docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - # - name: Checkout Oneflow-Inc/OneFlow-Benchmark - # uses: actions/checkout@v2 - # - name: Checkout Oneflow-Inc/oneflow - # uses: actions/checkout@v2 - # with: - # repository: Oneflow-Inc/oneflow - # ref: ${{ github.event.inputs.of_branch_or_commit }} - # path: ${{ env.ONEFLOW_SRC }} - # - name: Remove container - # timeout-minutes: 45 - # # if: ${{ contains(matrix.runs-on, 'self-hosted') }} - # run: | - # docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true - # - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 - # name: Save cache if successful - # id: save-cache - # timeout-minutes: 5 - # with: - # oneflow-src: ${{ env.ONEFLOW_SRC }} - # entry: ${{ github.event.inputs.compute_platform }} - # digest-type: test - # mark-as-completed: true - # # - name: Check digest and fail if cache result not identical to matrix - # # if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} - # # run: | - # # echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" - # # exit 1 - # - name: Download wheel and binary - # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - # uses: Oneflow-Inc/get-oneflow/degist/download@2a9efceab8d45b725a687e73f870f9b75a15e472 - # id: download-digest - # timeout-minutes: 10 - # with: - # digest: ${{ steps.save-cache.outputs.build-digest }} - # entry: ${{ github.event.inputs.compute_platform }} - # ssh-tank-host: ${{ env.SSH_TANK_HOST }} - # ssh-tank-path: ${{ env.SSH_TANK_PATH }} - # digest-cache-dir: '~/digest-cache-bm' - # - name: Enable Pytorch container - # run: | - # echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV - # - name: Set environment variables - # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - # run: | - # set -x - # echo "ONEFLOW_TEST_CACHE_DIR=$HOME/ci-cache/test_cache" >> $GITHUB_ENV - # echo "ONEFLOW_WHEEL_PATH=${{ steps.download-digest.outputs.entry-dir }}/whl" >> $GITHUB_ENV - # - name: Start container - # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - # # working-directory: ${{ env.ONEFLOW_SRC }} - # env: - # ONEFLOW_BIN_PATH: ${{ steps.download-digest.outputs.entry-dir }}/bin - # run: | - # docker pull ${{ env.TEST_IMG_TAG }} - # docker run -d --rm --privileged --network host --shm-size=8g \ - # --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ - # --runtime=nvidia \ - # -v /dataset:/dataset:ro -v /model_zoo:/model_zoo:ro \ - # -v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \ - # -v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \ - # -v $HOME/test-container-cache/dot-local:/root/.local \ - # -v $HOME/test-container-cache/dot-cache:/root/.cache \ - # -e ONEFLOW_WHEEL_PATH=${ONEFLOW_WHEEL_PATH} \ - # -e ONEFLOW_BIN_PATH=${ONEFLOW_BIN_PATH} \ - # -e ONEFLOW_CI=1 \ - # -v $PWD:$PWD \ - # -w $PWD \ - # -v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \ - # -e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \ - # -e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \ - # -e E2E_GPU_NUM_PER_NODE=${{ github.event.inputs.gpu_num_per_node }} \ - # -e E2E_NODE_NUM=1 \ - # -e E2E_BATCH_SIZE=32 \ - # -e E2E_LEARNING_RATE=1.536 \ - # -e E2E_SRC_ROOT=Classification/cnns \ - # -e E2E_DATA_ROOT=/dataset/ImageNet/ofrecord \ - # --name ${TEST_CONTAINER_NAME} \ - # ${{ env.TEST_IMG_TAG }} \ - # sleep 3600 - # - name: Install OneFlow - # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} - # run: | - # docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - # docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${ONEFLOW_WHEEL_PATH} oneflow - # - name: Test container - # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - # run: | - # docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh - # - name: Upload log - # # if: ${{ always() && (steps.distributed_try_3.outcome=='failure' || steps.new_interface_distributed_try_3.outcome=='failure') && github.event.pull_request.head.repo.full_name == github.repository }} - # uses: ./.github/actions/upload_oss - # with: - # src_path: log - # oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log - # oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} - # oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - # upload_core: false + test: + name: Test suite + # needs: [find-test-cache]build-manylinux + needs: [build-manylinux] + # runs-on: ['self-hosted', 'linux', 'provision'] + runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] + env: + TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" + TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0:e7a497b41d8b7f1bce055b1f23d027f93b1557ae + # strategy: + # fail-fast: true + # max-parallel: 5 + # matrix: ${{ fromJson(needs.find-test-cache.outputs.matrix) }} + steps: + - name: Fix permissions + # if: ${{ contains(matrix.runs-on, 'self-hosted') }} + run: | + set -x + docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . + - name: Checkout Oneflow-Inc/OneFlow-Benchmark + uses: actions/checkout@v2 + - name: Remove container + timeout-minutes: 45 + # if: ${{ contains(matrix.runs-on, 'self-hosted') }} + run: | + docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true + - name: Enable Pytorch container + run: | + echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV + # - name: Set environment variables + # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} + # run: | + # set -x + # echo "ONEFLOW_TEST_CACHE_DIR=$HOME/ci-cache/test_cache" >> $GITHUB_ENV + # echo "ONEFLOW_WHEEL_PATH=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/branch/master/cu102/41b06bf56daaa5ea0087998399d5980e9fc5ab59" >> $GITHUB_ENV + - name: Start container + # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} + # working-directory: ${{ env.ONEFLOW_SRC }} + # env: + # ONEFLOW_BIN_PATH: ${{ steps.download-digest.outputs.entry-dir }}/bin + run: | + docker pull ${{ env.TEST_IMG_TAG }} + docker run -d --rm --privileged --network host --shm-size=8g \ + --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ + --runtime=nvidia \ + -v /DATA/disk1:/dataset:ro \ + -e ONEFLOW_WHEEL_PATH=${ONEFLOW_WHEEL_PATH} \ + -v $PWD:$PWD \ + -w $PWD \ + -e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \ + -e E2E_GPU_NUM_PER_NODE=${{ github.event.inputs.gpu_num_per_node }} \ + -e E2E_NODE_NUM=1 \ + -e E2E_BATCH_SIZE=32 \ + -e E2E_LEARNING_RATE=1.536 \ + -e E2E_SRC_ROOT=Classification/cnns \ + -e E2E_DATA_ROOT=/dataset/ImageNet/ofrecord \ + --name ${TEST_CONTAINER_NAME} \ + ${{ env.TEST_IMG_TAG }} \ + sleep 3600 + # -e ONEFLOW_CI=1 \ + # -v /model_zoo:/model_zoo:ro \ + # -v $HOME/test-container-cache/dot-local:/root/.local \ + # -v $HOME/test-container-cache/dot-cache:/root/.cache \ + # -e ONEFLOW_BIN_PATH=${ONEFLOW_BIN_PATH} \ + # -v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \ + # -v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \ + # -v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \ + # -e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \ + - name: Install OneFlow + # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} + run: | + docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${ONEFLOW_WHEEL_PATH} oneflow + - name: Test container + # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} + run: | + docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh + - name: Upload log + # if: ${{ always() && (steps.distributed_try_3.outcome=='failure' || steps.new_interface_distributed_try_3.outcome=='failure') && github.event.pull_request.head.repo.full_name == github.repository }} + uses: ./.github/actions/upload_oss + with: + src_path: log + oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} + oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + upload_core: false From 47b824567e3be50827c0a0156f01331d13ec516d Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 18:45:50 +0800 Subject: [PATCH 075/130] test --- .github/workflows/cnn_e2e.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 24e9db0..098e923 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -125,7 +125,7 @@ jobs: - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 name: Build manylinux ${{ github.event.inputs.compute_platform }} id: build-cuda - # if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} + if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} with: cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh @@ -142,7 +142,7 @@ jobs: python-versions: | ${{ github.event.inputs.python_version }} - name: Upload wheel - # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} uses: ./.github/actions/upload_oss with: src_path: ${{ env.WHEELHOUSE_DIR }} @@ -150,7 +150,7 @@ jobs: oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - name: Update pip index - # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} env: OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} @@ -205,7 +205,7 @@ jobs: --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ --runtime=nvidia \ -v /DATA/disk1:/dataset:ro \ - -e ONEFLOW_WHEEL_PATH=${ONEFLOW_WHEEL_PATH} \ + -e ONEFLOW_WHEEL_PATH=${{ env.ONEFLOW_WHEEL_PATH }} \ -v $PWD:$PWD \ -w $PWD \ -e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \ From 82439ddc62bb3e550932781f8bd1b04445b13d5d Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 18:49:14 +0800 Subject: [PATCH 076/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 098e923..c33dcd9 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -231,7 +231,7 @@ jobs: # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} run: | docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${ONEFLOW_WHEEL_PATH} oneflow + docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${{ env.ONEFLOW_WHEEL_PATH }} oneflow - name: Test container # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} run: | From 4e7b97855a1a20a05d27b788fe56a5734f86f94a Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 19:01:44 +0800 Subject: [PATCH 077/130] test --- .github/workflows/cnn_e2e.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index c33dcd9..58a9749 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -205,7 +205,7 @@ jobs: --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ --runtime=nvidia \ -v /DATA/disk1:/dataset:ro \ - -e ONEFLOW_WHEEL_PATH=${{ env.ONEFLOW_WHEEL_PATH }} \ + -e ONEFLOW_WHEEL_PATH=${{ jobs.build-manylinux.env.ONEFLOW_WHEEL_PATH }} \ -v $PWD:$PWD \ -w $PWD \ -e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \ @@ -230,8 +230,9 @@ jobs: - name: Install OneFlow # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} run: | + docker exec ${TEST_CONTAINER_NAME} python3 --version docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${{ env.ONEFLOW_WHEEL_PATH }} oneflow + docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${ONEFLOW_WHEEL_PATH} oneflow - name: Test container # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} run: | From 3ec4d9a6b66b82ff5bef093677a6e3f516a54eb1 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 21 Oct 2021 19:04:33 +0800 Subject: [PATCH 078/130] test --- .github/workflows/cnn_e2e.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 58a9749..fe8b164 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -64,6 +64,7 @@ jobs: ${{ github.event.inputs.compute_platform }} build-manylinux: name: "Build manylinux" + id: build-manylinux runs-on: ['self-hosted', 'linux', 'provision'] # runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] needs: [find-build-cache] From d22b56650aa50c09990afad7238e7c9aad7640bd Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Fri, 22 Oct 2021 10:56:11 +0800 Subject: [PATCH 079/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index fe8b164..73e01ee 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -233,7 +233,7 @@ jobs: run: | docker exec ${TEST_CONTAINER_NAME} python3 --version docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${ONEFLOW_WHEEL_PATH} oneflow + docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${{ env.ONEFLOW_WHEEL_PATH }} oneflow - name: Test container # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} run: | From 8fd681dfb7a81ab81a62310bf38c556cfe01f79a Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Fri, 22 Oct 2021 10:58:50 +0800 Subject: [PATCH 080/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 73e01ee..75afbb0 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -206,7 +206,7 @@ jobs: --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ --runtime=nvidia \ -v /DATA/disk1:/dataset:ro \ - -e ONEFLOW_WHEEL_PATH=${{ jobs.build-manylinux.env.ONEFLOW_WHEEL_PATH }} \ + -e ONEFLOW_WHEEL_PATH=${{ env.ONEFLOW_WHEEL_PATH }} \ -v $PWD:$PWD \ -w $PWD \ -e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \ From 0277f49f8c4b1798ea25135ffc3ebe5f1de13a6e Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Fri, 22 Oct 2021 11:00:34 +0800 Subject: [PATCH 081/130] test --- .github/workflows/cnn_e2e.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 75afbb0..5a2e501 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -64,7 +64,6 @@ jobs: ${{ github.event.inputs.compute_platform }} build-manylinux: name: "Build manylinux" - id: build-manylinux runs-on: ['self-hosted', 'linux', 'provision'] # runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] needs: [find-build-cache] From c45d0080a880f3c5ea47cff533d8a0c3c027a76d Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Fri, 22 Oct 2021 11:27:52 +0800 Subject: [PATCH 082/130] test --- .github/workflows/cnn_e2e.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 5a2e501..7f6fbd4 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -76,6 +76,8 @@ jobs: WHEELHOUSE_DIR: manylinux-wheelhouse OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + outputs: + ONEFLOW_WHEEL_PATH: ${{ steps.set-env.outputs.ONEFLOW_WHEEL_PATH }} steps: - name: Fix permissions run: | @@ -92,6 +94,7 @@ jobs: ref: ${{ github.event.inputs.of_branch_or_commit }} path: ${{ env.ONEFLOW_SRC }} - name: Set environment variables + id: set-env run: | set -x current_dir=$PWD @@ -105,7 +108,8 @@ jobs: echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV echo "oss_dir=${oss_dir}" >> $GITHUB_ENV oneflow_wheel_path=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }}/${oneflow_commit} - echo "ONEFLOW_WHEEL_PATH=${oneflow_wheel_path}" >> $GITHUB_ENV + # echo "ONEFLOW_WHEEL_PATH=${oneflow_wheel_path}" >> $GITHUB_ENV + echo "::set-output name=ONEFLOW_WHEEL_PATH::${oneflow_wheel_path}" cd $current_dir set +x - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 @@ -232,7 +236,7 @@ jobs: run: | docker exec ${TEST_CONTAINER_NAME} python3 --version docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${{ env.ONEFLOW_WHEEL_PATH }} oneflow + docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${{ needs.build-manylinux.outputs.ONEFLOW_WHEEL_PATH }} oneflow - name: Test container # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} run: | From 9c0e4ce6cc5bda4a7e8a11d6c96c469a9a6d7e8a Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 18:55:54 +0800 Subject: [PATCH 083/130] test --- .github/workflows/cnn_e2e.yml | 417 ++++++++++++++++++---------------- 1 file changed, 222 insertions(+), 195 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 7f6fbd4..86f35bb 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -38,217 +38,244 @@ jobs: with: access_token: ${{ github.token }} all_but_latest: true - find-build-cache: - name: "Find build cache" + find-oss-wheel: + name: "Set env and Find wheel in oss" runs-on: ubuntu-latest outputs: - matrix: ${{ steps.find-cache.outputs.matrix }} + found: ${{ steps.find-wheel.outputs.found }} + ONEFLOW_WHEEL_PATH: ${{ steps.set-wheel-path.outputs.ONEFLOW_WHEEL_PATH }} steps: - - uses: actions/checkout@v2 - name: Checkout Oneflow-Inc/oneflow uses: actions/checkout@v2 with: repository: Oneflow-Inc/oneflow ref: ${{ github.event.inputs.of_branch_or_commit }} path: ${{ env.ONEFLOW_SRC }} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@2a9efceab8d45b725a687e73f870f9b75a15e472 - name: find cache - id: find-cache - with: - runner-labels: | - self-hosted - linux - build - oneflow-src: ${{ env.ONEFLOW_SRC }} - entries: | - ${{ github.event.inputs.compute_platform }} - build-manylinux: - name: "Build manylinux" - runs-on: ['self-hosted', 'linux', 'provision'] - # runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] - needs: [find-build-cache] - strategy: - fail-fast: true - max-parallel: 5 - matrix: ${{ fromJson(needs.find-build-cache.outputs.matrix) }} - env: - MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ matrix.entry }} - WHEELHOUSE_DIR: manylinux-wheelhouse - OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} - OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - outputs: - ONEFLOW_WHEEL_PATH: ${{ steps.set-env.outputs.ONEFLOW_WHEEL_PATH }} - steps: - - name: Fix permissions + - name: Get Oneflow Wheel Path + id: set-wheel-path run: | set -x - docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - - name: Remove leftover cuda-installer.log - run: | - docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log - - uses: actions/checkout@v2 - - name: Checkout Oneflow-Inc/oneflow - uses: actions/checkout@v2 - with: - repository: Oneflow-Inc/oneflow - ref: ${{ github.event.inputs.of_branch_or_commit }} - path: ${{ env.ONEFLOW_SRC }} - - name: Set environment variables - id: set-env - run: | - set -x - current_dir=$PWD cd ${{ env.ONEFLOW_SRC }} - oneflow_branch=`git rev-parse --abbrev-ref HEAD` - # oneflow_branch=`git branch --show-current` - # oneflow_branch=`git symbolic-ref --short HEAD` oneflow_commit=`git rev-parse HEAD` - oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} - oss_dir=${oss_branch_dir}/${oneflow_commit} - echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV - echo "oss_dir=${oss_dir}" >> $GITHUB_ENV - oneflow_wheel_path=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }}/${oneflow_commit} - # echo "ONEFLOW_WHEEL_PATH=${oneflow_wheel_path}" >> $GITHUB_ENV + echo "oneflow_commit=${oneflow_commit}" >> $GITHUB_ENV + oneflow_wheel_path=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/commit/${oneflow_commit}/${{ github.event.inputs.compute_platform }} echo "::set-output name=ONEFLOW_WHEEL_PATH::${oneflow_wheel_path}" - cd $current_dir set +x - - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 - name: Save cache if successful - id: save-cache - timeout-minutes: 5 - with: - oneflow-src: ${{ env.ONEFLOW_SRC }} - entry: ${{ matrix.entry }} - digest-type: build - mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }} - # - name: Check digest and fail if cache result not identical to matrix - # if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} - # run: | - # echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" - # exit 1 - - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 - name: Build manylinux ${{ github.event.inputs.compute_platform }} - id: build-cuda - if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} + - name: Check if wheel available + id: find-wheel + uses: Oneflow-Inc/get-oneflow/find-wheel@ee5b8d83dfc4645d0e67ae603d31f78fd4b1c1a7 with: - cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake - build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh - oneflow-src: ${{ env.ONEFLOW_SRC }} - oneflow-build-env: manylinux - wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} - clear-wheelhouse-dir: true - self-hosted: true - cuda-version: "10.2" - manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} - docker-run-use-system-http-proxy: false - docker-run-use-lld: false - retry-failed-build: true - python-versions: | - ${{ github.event.inputs.python_version }} - - name: Upload wheel - if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} - uses: ./.github/actions/upload_oss - with: - src_path: ${{ env.WHEELHOUSE_DIR }} - oss_dst_path: oss://oneflow-staging/${{ env.oss_dir }} - oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} - oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - - name: Update pip index - if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} - env: - OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} - OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - run: | - python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - python3 -m pip install oss2 beautifulsoup4 --user - python3 ${{ env.ONEFLOW_SRC }}/tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html + ref: ${{ env.oneflow_commit }} + platform: ${{ github.event.inputs.compute_platform }} + # find-build-cache: + # name: "Find build wheel" + # runs-on: ubuntu-latest + # outputs: + # matrix: ${{ steps.find-cache.outputs.matrix }} + # steps: + # - uses: actions/checkout@v2 + # - name: Checkout Oneflow-Inc/oneflow + # uses: actions/checkout@v2 + # with: + # repository: Oneflow-Inc/oneflow + # ref: ${{ github.event.inputs.of_branch_or_commit }} + # path: ${{ env.ONEFLOW_SRC }} + # - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@2a9efceab8d45b725a687e73f870f9b75a15e472 + # name: find cache + # id: find-cache + # with: + # runner-labels: | + # self-hosted + # linux + # build + # oneflow-src: ${{ env.ONEFLOW_SRC }} + # entries: | + # ${{ github.event.inputs.compute_platform }} + # build-manylinux: + # name: "Build manylinux" + # runs-on: ['self-hosted', 'linux', 'provision'] + # # runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] + # needs: [find-build-cache] + # strategy: + # fail-fast: true + # max-parallel: 5 + # matrix: ${{ fromJson(needs.find-build-cache.outputs.matrix) }} + # env: + # MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ matrix.entry }} + # WHEELHOUSE_DIR: manylinux-wheelhouse + # OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} + # OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + # steps: + # - name: Fix permissions + # run: | + # set -x + # docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . + # - name: Remove leftover cuda-installer.log + # run: | + # docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log + # - uses: actions/checkout@v2 + # - name: Checkout Oneflow-Inc/oneflow + # uses: actions/checkout@v2 + # with: + # repository: Oneflow-Inc/oneflow + # ref: ${{ github.event.inputs.of_branch_or_commit }} + # path: ${{ env.ONEFLOW_SRC }} + # - name: Set environment variables + # id: set-env + # run: | + # set -x + # current_dir=$PWD + # cd ${{ env.ONEFLOW_SRC }} + # oneflow_branch=`git rev-parse --abbrev-ref HEAD` + # # oneflow_branch=`git branch --show-current` + # # oneflow_branch=`git symbolic-ref --short HEAD` + # oneflow_commit=`git rev-parse HEAD` + # oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} + # oss_dir=${oss_branch_dir}/${oneflow_commit} + # echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV + # echo "oss_dir=${oss_dir}" >> $GITHUB_ENV + # oneflow_wheel_path=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/commit/${oneflow_commit}/${{ github.event.inputs.compute_platform }} + # # echo "ONEFLOW_WHEEL_PATH=${oneflow_wheel_path}" >> $GITHUB_ENV + # echo "::set-output name=ONEFLOW_WHEEL_PATH::${oneflow_wheel_path}" + # cd $current_dir + # set +x + # - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 + # name: Save cache if successful + # id: save-cache + # timeout-minutes: 5 + # with: + # oneflow-src: ${{ env.ONEFLOW_SRC }} + # entry: ${{ matrix.entry }} + # digest-type: build + # mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }} + # # - name: Check digest and fail if cache result not identical to matrix + # # if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} + # # run: | + # # echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" + # # exit 1 + # - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 + # name: Build manylinux ${{ github.event.inputs.compute_platform }} + # id: build-cuda + # if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} + # with: + # cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake + # build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh + # oneflow-src: ${{ env.ONEFLOW_SRC }} + # oneflow-build-env: manylinux + # wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} + # clear-wheelhouse-dir: true + # self-hosted: true + # cuda-version: "10.2" + # manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} + # docker-run-use-system-http-proxy: false + # docker-run-use-lld: false + # retry-failed-build: true + # python-versions: | + # ${{ github.event.inputs.python_version }} + # - name: Upload wheel + # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + # uses: ./.github/actions/upload_oss + # with: + # src_path: ${{ env.WHEELHOUSE_DIR }} + # oss_dst_path: oss://oneflow-staging/${{ env.oss_dir }} + # oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} + # oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + # - name: Update pip index + # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + # env: + # OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} + # OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + # run: | + # python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + # python3 -m pip install oss2 beautifulsoup4 --user + # python3 ${{ env.ONEFLOW_SRC }}/tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html - test: - name: Test suite - # needs: [find-test-cache]build-manylinux - needs: [build-manylinux] - # runs-on: ['self-hosted', 'linux', 'provision'] - runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] - env: - TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" - TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0:e7a497b41d8b7f1bce055b1f23d027f93b1557ae - # strategy: - # fail-fast: true - # max-parallel: 5 - # matrix: ${{ fromJson(needs.find-test-cache.outputs.matrix) }} - steps: - - name: Fix permissions - # if: ${{ contains(matrix.runs-on, 'self-hosted') }} - run: | - set -x - docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - - name: Checkout Oneflow-Inc/OneFlow-Benchmark - uses: actions/checkout@v2 - - name: Remove container - timeout-minutes: 45 - # if: ${{ contains(matrix.runs-on, 'self-hosted') }} - run: | - docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true - - name: Enable Pytorch container - run: | - echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV - # - name: Set environment variables - # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - # run: | - # set -x - # echo "ONEFLOW_TEST_CACHE_DIR=$HOME/ci-cache/test_cache" >> $GITHUB_ENV - # echo "ONEFLOW_WHEEL_PATH=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/branch/master/cu102/41b06bf56daaa5ea0087998399d5980e9fc5ab59" >> $GITHUB_ENV - - name: Start container - # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - # working-directory: ${{ env.ONEFLOW_SRC }} - # env: - # ONEFLOW_BIN_PATH: ${{ steps.download-digest.outputs.entry-dir }}/bin - run: | - docker pull ${{ env.TEST_IMG_TAG }} - docker run -d --rm --privileged --network host --shm-size=8g \ - --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ - --runtime=nvidia \ - -v /DATA/disk1:/dataset:ro \ - -e ONEFLOW_WHEEL_PATH=${{ env.ONEFLOW_WHEEL_PATH }} \ - -v $PWD:$PWD \ - -w $PWD \ - -e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \ - -e E2E_GPU_NUM_PER_NODE=${{ github.event.inputs.gpu_num_per_node }} \ - -e E2E_NODE_NUM=1 \ - -e E2E_BATCH_SIZE=32 \ - -e E2E_LEARNING_RATE=1.536 \ - -e E2E_SRC_ROOT=Classification/cnns \ - -e E2E_DATA_ROOT=/dataset/ImageNet/ofrecord \ - --name ${TEST_CONTAINER_NAME} \ - ${{ env.TEST_IMG_TAG }} \ - sleep 3600 - # -e ONEFLOW_CI=1 \ - # -v /model_zoo:/model_zoo:ro \ - # -v $HOME/test-container-cache/dot-local:/root/.local \ - # -v $HOME/test-container-cache/dot-cache:/root/.cache \ - # -e ONEFLOW_BIN_PATH=${ONEFLOW_BIN_PATH} \ - # -v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \ - # -v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \ - # -v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \ - # -e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \ - - name: Install OneFlow - # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} - run: | - docker exec ${TEST_CONTAINER_NAME} python3 --version - docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${{ needs.build-manylinux.outputs.ONEFLOW_WHEEL_PATH }} oneflow - - name: Test container - # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - run: | - docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh - - name: Upload log - # if: ${{ always() && (steps.distributed_try_3.outcome=='failure' || steps.new_interface_distributed_try_3.outcome=='failure') && github.event.pull_request.head.repo.full_name == github.repository }} - uses: ./.github/actions/upload_oss - with: - src_path: log - oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log - oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} - oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - upload_core: false + # test: + # name: Test suite + # # needs: [find-test-cache]build-manylinux + # needs: [build-manylinux] + # # runs-on: ['self-hosted', 'linux', 'provision'] + # runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] + # env: + # TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" + # TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0:e7a497b41d8b7f1bce055b1f23d027f93b1557ae + # # strategy: + # # fail-fast: true + # # max-parallel: 5 + # # matrix: ${{ fromJson(needs.find-test-cache.outputs.matrix) }} + # steps: + # - name: Fix permissions + # # if: ${{ contains(matrix.runs-on, 'self-hosted') }} + # run: | + # set -x + # docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . + # - name: Checkout Oneflow-Inc/OneFlow-Benchmark + # uses: actions/checkout@v2 + # - name: Remove container + # timeout-minutes: 45 + # # if: ${{ contains(matrix.runs-on, 'self-hosted') }} + # run: | + # docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true + # - name: Enable Pytorch container + # run: | + # echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV + # # - name: Set environment variables + # # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} + # # run: | + # # set -x + # # echo "ONEFLOW_TEST_CACHE_DIR=$HOME/ci-cache/test_cache" >> $GITHUB_ENV + # # echo "ONEFLOW_WHEEL_PATH=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/branch/master/cu102/41b06bf56daaa5ea0087998399d5980e9fc5ab59" >> $GITHUB_ENV + # - name: Start container + # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} + # # working-directory: ${{ env.ONEFLOW_SRC }} + # # env: + # # ONEFLOW_BIN_PATH: ${{ steps.download-digest.outputs.entry-dir }}/bin + # run: | + # docker pull ${{ env.TEST_IMG_TAG }} + # docker run -d --rm --privileged --network host --shm-size=8g \ + # --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ + # --runtime=nvidia \ + # -v /DATA/disk1:/dataset:ro \ + # -e ONEFLOW_WHEEL_PATH=${{ env.ONEFLOW_WHEEL_PATH }} \ + # -v $PWD:$PWD \ + # -w $PWD \ + # -e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \ + # -e E2E_GPU_NUM_PER_NODE=${{ github.event.inputs.gpu_num_per_node }} \ + # -e E2E_NODE_NUM=1 \ + # -e E2E_BATCH_SIZE=32 \ + # -e E2E_LEARNING_RATE=1.536 \ + # -e E2E_SRC_ROOT=Classification/cnns \ + # -e E2E_DATA_ROOT=/dataset/ImageNet/ofrecord \ + # --name ${TEST_CONTAINER_NAME} \ + # ${{ env.TEST_IMG_TAG }} \ + # sleep 3600 + # # -e ONEFLOW_CI=1 \ + # # -v /model_zoo:/model_zoo:ro \ + # # -v $HOME/test-container-cache/dot-local:/root/.local \ + # # -v $HOME/test-container-cache/dot-cache:/root/.cache \ + # # -e ONEFLOW_BIN_PATH=${ONEFLOW_BIN_PATH} \ + # # -v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \ + # # -v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \ + # # -v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \ + # # -e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \ + # - name: Install OneFlow + # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} + # run: | + # docker exec ${TEST_CONTAINER_NAME} python3 --version + # docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + # docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${{ needs.build-manylinux.outputs.ONEFLOW_WHEEL_PATH }} oneflow + # - name: Test container + # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} + # run: | + # docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh + # - name: Upload log + # # if: ${{ always() && (steps.distributed_try_3.outcome=='failure' || steps.new_interface_distributed_try_3.outcome=='failure') && github.event.pull_request.head.repo.full_name == github.repository }} + # uses: ./.github/actions/upload_oss + # with: + # src_path: log + # oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + # oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} + # oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + # upload_core: false From 1df9cebb996114b0e3e3ea0b904ec8d1c585b5b7 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 18:58:26 +0800 Subject: [PATCH 084/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 86f35bb..c2cb3de 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -66,7 +66,7 @@ jobs: uses: Oneflow-Inc/get-oneflow/find-wheel@ee5b8d83dfc4645d0e67ae603d31f78fd4b1c1a7 with: ref: ${{ env.oneflow_commit }} - platform: ${{ github.event.inputs.compute_platform }} + entry: ${{ github.event.inputs.compute_platform }} # find-build-cache: # name: "Find build wheel" # runs-on: ubuntu-latest From 775a26361aa6861ee508a74762831899d2aa2380 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 20:43:06 +0800 Subject: [PATCH 085/130] test --- .github/workflows/cnn_e2e.yml | 55 ++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index c2cb3de..14b2ae3 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -42,7 +42,7 @@ jobs: name: "Set env and Find wheel in oss" runs-on: ubuntu-latest outputs: - found: ${{ steps.find-wheel.outputs.found }} + find-wheel-hit: ${{ steps.find-wheel.outputs.find-wheel-hit }} ONEFLOW_WHEEL_PATH: ${{ steps.set-wheel-path.outputs.ONEFLOW_WHEEL_PATH }} steps: - name: Checkout Oneflow-Inc/oneflow @@ -67,34 +67,37 @@ jobs: with: ref: ${{ env.oneflow_commit }} entry: ${{ github.event.inputs.compute_platform }} - # find-build-cache: - # name: "Find build wheel" - # runs-on: ubuntu-latest - # outputs: - # matrix: ${{ steps.find-cache.outputs.matrix }} - # steps: - # - uses: actions/checkout@v2 - # - name: Checkout Oneflow-Inc/oneflow - # uses: actions/checkout@v2 - # with: - # repository: Oneflow-Inc/oneflow - # ref: ${{ github.event.inputs.of_branch_or_commit }} - # path: ${{ env.ONEFLOW_SRC }} - # - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@2a9efceab8d45b725a687e73f870f9b75a15e472 - # name: find cache - # id: find-cache - # with: - # runner-labels: | - # self-hosted - # linux - # build - # oneflow-src: ${{ env.ONEFLOW_SRC }} - # entries: | - # ${{ github.event.inputs.compute_platform }} + find-build-cache: + name: "Find build wheel" + needs: [find-oss-wheel] + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.find-cache.outputs.matrix }} + steps: + - uses: actions/checkout@v2 + if: ${{ fromJSON(needs.find-wheel.outputs.find-wheel-hit) == 1 }} + - name: Checkout Oneflow-Inc/oneflow + uses: actions/checkout@v2 + if: ${{ fromJSON(needs.find-wheel.outputs.find-wheel-hit) == 1 }} + with: + repository: Oneflow-Inc/oneflow + ref: ${{ github.event.inputs.of_branch_or_commit }} + path: ${{ env.ONEFLOW_SRC }} + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@2a9efceab8d45b725a687e73f870f9b75a15e472 + if: ${{ fromJSON(needs.find-wheel.outputs.find-wheel-hit) == 1 }} + name: find cache + id: find-cache + with: + runner-labels: | + self-hosted + linux + build + oneflow-src: ${{ env.ONEFLOW_SRC }} + entries: | + ${{ github.event.inputs.compute_platform }} # build-manylinux: # name: "Build manylinux" # runs-on: ['self-hosted', 'linux', 'provision'] - # # runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] # needs: [find-build-cache] # strategy: # fail-fast: true From 4da6c124a8b191e9d43a40bf47592b5460f4f206 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 20:48:29 +0800 Subject: [PATCH 086/130] test --- .github/workflows/cnn_e2e.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 14b2ae3..2fe1868 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -75,16 +75,16 @@ jobs: matrix: ${{ steps.find-cache.outputs.matrix }} steps: - uses: actions/checkout@v2 - if: ${{ fromJSON(needs.find-wheel.outputs.find-wheel-hit) == 1 }} + if: ${{ needs.find-wheel.outputs.find-wheel-hit == 1 }} - name: Checkout Oneflow-Inc/oneflow uses: actions/checkout@v2 - if: ${{ fromJSON(needs.find-wheel.outputs.find-wheel-hit) == 1 }} + if: ${{ needs.find-wheel.outputs.find-wheel-hit == 1 }} with: repository: Oneflow-Inc/oneflow ref: ${{ github.event.inputs.of_branch_or_commit }} path: ${{ env.ONEFLOW_SRC }} - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@2a9efceab8d45b725a687e73f870f9b75a15e472 - if: ${{ fromJSON(needs.find-wheel.outputs.find-wheel-hit) == 1 }} + if: ${{ needs.find-wheel.outputs.find-wheel-hit == 1 }} name: find cache id: find-cache with: From 48e3e907871734d6c0dafdae006bd3bafd3adadd Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 20:53:05 +0800 Subject: [PATCH 087/130] test --- .github/workflows/cnn_e2e.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 2fe1868..a6f7cc7 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -74,6 +74,11 @@ jobs: outputs: matrix: ${{ steps.find-cache.outputs.matrix }} steps: + steps: + - name: test only + run: | + set -x + echo 'find-wheel-hit = ${{ needs.find-wheel.outputs.find-wheel-hit }}'' - uses: actions/checkout@v2 if: ${{ needs.find-wheel.outputs.find-wheel-hit == 1 }} - name: Checkout Oneflow-Inc/oneflow From 801ceb552e61c4f6fad67b7fe06f6a16ffbc120d Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 20:54:18 +0800 Subject: [PATCH 088/130] test --- .github/workflows/cnn_e2e.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index a6f7cc7..83aacf2 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -73,7 +73,6 @@ jobs: runs-on: ubuntu-latest outputs: matrix: ${{ steps.find-cache.outputs.matrix }} - steps: steps: - name: test only run: | From 428b30ac4d07f80651240b69ead771727ce7a4d0 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 20:55:57 +0800 Subject: [PATCH 089/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 83aacf2..d2a1e96 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -77,7 +77,7 @@ jobs: - name: test only run: | set -x - echo 'find-wheel-hit = ${{ needs.find-wheel.outputs.find-wheel-hit }}'' + echo 'find-wheel-hit = ${{ needs.find-wheel.outputs.find-wheel-hit }}' - uses: actions/checkout@v2 if: ${{ needs.find-wheel.outputs.find-wheel-hit == 1 }} - name: Checkout Oneflow-Inc/oneflow From 776b6513f1087d496a163d1c97c6098ede1e9ce2 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 21:04:49 +0800 Subject: [PATCH 090/130] test --- .github/workflows/cnn_e2e.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index d2a1e96..7c1889e 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -67,6 +67,8 @@ jobs: with: ref: ${{ env.oneflow_commit }} entry: ${{ github.event.inputs.compute_platform }} + - name: step-2 + run: echo "find-wheel.outputs.find-wheel-hit = ${{ steps.find-wheel.outputs.find-wheel-hit }}" find-build-cache: name: "Find build wheel" needs: [find-oss-wheel] From 897f976947d1c0a30de612811eae3aacb5b511f5 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 21:08:33 +0800 Subject: [PATCH 091/130] test --- .github/workflows/cnn_e2e.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 7c1889e..5fa6a49 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -79,6 +79,7 @@ jobs: - name: test only run: | set -x + echo 'find-wheel-hit = ${{ needs.find-oss-wheel.outputs.find-wheel-hit }}' echo 'find-wheel-hit = ${{ needs.find-wheel.outputs.find-wheel-hit }}' - uses: actions/checkout@v2 if: ${{ needs.find-wheel.outputs.find-wheel-hit == 1 }} From a0c015b32dd26b4dfd0fb72cb9fd97b207663fb2 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 21:11:02 +0800 Subject: [PATCH 092/130] test --- .github/workflows/cnn_e2e.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 5fa6a49..e3f4458 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -80,18 +80,17 @@ jobs: run: | set -x echo 'find-wheel-hit = ${{ needs.find-oss-wheel.outputs.find-wheel-hit }}' - echo 'find-wheel-hit = ${{ needs.find-wheel.outputs.find-wheel-hit }}' - uses: actions/checkout@v2 - if: ${{ needs.find-wheel.outputs.find-wheel-hit == 1 }} + if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit == 1 }} - name: Checkout Oneflow-Inc/oneflow uses: actions/checkout@v2 - if: ${{ needs.find-wheel.outputs.find-wheel-hit == 1 }} + if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit == 1 }} with: repository: Oneflow-Inc/oneflow ref: ${{ github.event.inputs.of_branch_or_commit }} path: ${{ env.ONEFLOW_SRC }} - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@2a9efceab8d45b725a687e73f870f9b75a15e472 - if: ${{ needs.find-wheel.outputs.find-wheel-hit == 1 }} + if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit != 1 }} name: find cache id: find-cache with: From fc47d1b025c3be6d026f1a059d3ef6bca84f0289 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 21:17:15 +0800 Subject: [PATCH 093/130] test --- .github/workflows/cnn_e2e.yml | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index e3f4458..8fb4fb4 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -72,25 +72,22 @@ jobs: find-build-cache: name: "Find build wheel" needs: [find-oss-wheel] + if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit != 1 }} runs-on: ubuntu-latest outputs: matrix: ${{ steps.find-cache.outputs.matrix }} steps: - - name: test only - run: | - set -x - echo 'find-wheel-hit = ${{ needs.find-oss-wheel.outputs.find-wheel-hit }}' - uses: actions/checkout@v2 - if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit == 1 }} + # if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit != 1 }} - name: Checkout Oneflow-Inc/oneflow uses: actions/checkout@v2 - if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit == 1 }} + # if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit != 1 }} with: repository: Oneflow-Inc/oneflow ref: ${{ github.event.inputs.of_branch_or_commit }} path: ${{ env.ONEFLOW_SRC }} - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@2a9efceab8d45b725a687e73f870f9b75a15e472 - if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit != 1 }} + # if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit != 1 }} name: find cache id: find-cache with: From 238f649371ccefd7643b01e11434d5352328d9ee Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 21:28:55 +0800 Subject: [PATCH 094/130] test --- .github/workflows/cnn_e2e.yml | 195 ++++++++++++++++------------------ 1 file changed, 92 insertions(+), 103 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 8fb4fb4..548a12e 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -78,16 +78,13 @@ jobs: matrix: ${{ steps.find-cache.outputs.matrix }} steps: - uses: actions/checkout@v2 - # if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit != 1 }} - name: Checkout Oneflow-Inc/oneflow uses: actions/checkout@v2 - # if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit != 1 }} with: repository: Oneflow-Inc/oneflow ref: ${{ github.event.inputs.of_branch_or_commit }} path: ${{ env.ONEFLOW_SRC }} - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@2a9efceab8d45b725a687e73f870f9b75a15e472 - # if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit != 1 }} name: find cache id: find-cache with: @@ -98,108 +95,100 @@ jobs: oneflow-src: ${{ env.ONEFLOW_SRC }} entries: | ${{ github.event.inputs.compute_platform }} - # build-manylinux: - # name: "Build manylinux" - # runs-on: ['self-hosted', 'linux', 'provision'] - # needs: [find-build-cache] - # strategy: - # fail-fast: true - # max-parallel: 5 - # matrix: ${{ fromJson(needs.find-build-cache.outputs.matrix) }} - # env: - # MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ matrix.entry }} - # WHEELHOUSE_DIR: manylinux-wheelhouse - # OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} - # OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - # steps: - # - name: Fix permissions - # run: | - # set -x - # docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - # - name: Remove leftover cuda-installer.log - # run: | - # docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log - # - uses: actions/checkout@v2 - # - name: Checkout Oneflow-Inc/oneflow - # uses: actions/checkout@v2 - # with: - # repository: Oneflow-Inc/oneflow - # ref: ${{ github.event.inputs.of_branch_or_commit }} - # path: ${{ env.ONEFLOW_SRC }} - # - name: Set environment variables - # id: set-env - # run: | - # set -x - # current_dir=$PWD - # cd ${{ env.ONEFLOW_SRC }} - # oneflow_branch=`git rev-parse --abbrev-ref HEAD` - # # oneflow_branch=`git branch --show-current` - # # oneflow_branch=`git symbolic-ref --short HEAD` - # oneflow_commit=`git rev-parse HEAD` - # oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} - # oss_dir=${oss_branch_dir}/${oneflow_commit} - # echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV - # echo "oss_dir=${oss_dir}" >> $GITHUB_ENV - # oneflow_wheel_path=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/commit/${oneflow_commit}/${{ github.event.inputs.compute_platform }} - # # echo "ONEFLOW_WHEEL_PATH=${oneflow_wheel_path}" >> $GITHUB_ENV - # echo "::set-output name=ONEFLOW_WHEEL_PATH::${oneflow_wheel_path}" - # cd $current_dir - # set +x - # - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 - # name: Save cache if successful - # id: save-cache - # timeout-minutes: 5 - # with: - # oneflow-src: ${{ env.ONEFLOW_SRC }} - # entry: ${{ matrix.entry }} - # digest-type: build - # mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }} - # # - name: Check digest and fail if cache result not identical to matrix - # # if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }} - # # run: | - # # echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" - # # exit 1 - # - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 - # name: Build manylinux ${{ github.event.inputs.compute_platform }} - # id: build-cuda - # if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} - # with: - # cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake - # build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh - # oneflow-src: ${{ env.ONEFLOW_SRC }} - # oneflow-build-env: manylinux - # wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} - # clear-wheelhouse-dir: true - # self-hosted: true - # cuda-version: "10.2" - # manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} - # docker-run-use-system-http-proxy: false - # docker-run-use-lld: false - # retry-failed-build: true - # python-versions: | - # ${{ github.event.inputs.python_version }} - # - name: Upload wheel - # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} - # uses: ./.github/actions/upload_oss - # with: - # src_path: ${{ env.WHEELHOUSE_DIR }} - # oss_dst_path: oss://oneflow-staging/${{ env.oss_dir }} - # oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} - # oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - # - name: Update pip index - # if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} - # env: - # OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} - # OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - # run: | - # python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - # python3 -m pip install oss2 beautifulsoup4 --user - # python3 ${{ env.ONEFLOW_SRC }}/tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html + build-oneflow: + name: "Build OneFlow ${{ github.event.inputs.compute_platform }}" + runs-on: ['self-hosted', 'linux', 'provision'] + needs: [find-build-cache] + strategy: + fail-fast: true + max-parallel: 5 + matrix: ${{ fromJson(needs.find-build-cache.outputs.matrix) }} + env: + MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ matrix.entry }} + WHEELHOUSE_DIR: manylinux-wheelhouse + OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} + OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + steps: + - name: Fix permissions + run: | + set -x + docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . + - name: Remove leftover cuda-installer.log + run: | + docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log + - uses: actions/checkout@v2 + - name: Checkout Oneflow-Inc/oneflow + uses: actions/checkout@v2 + with: + repository: Oneflow-Inc/oneflow + ref: ${{ github.event.inputs.of_branch_or_commit }} + path: ${{ env.ONEFLOW_SRC }} + - name: Set environment variables + id: set-env + run: | + set -x + current_dir=$PWD + cd ${{ env.ONEFLOW_SRC }} + oneflow_branch=`git rev-parse --abbrev-ref HEAD` + # oneflow_branch=`git branch --show-current` + # oneflow_branch=`git symbolic-ref --short HEAD` + oneflow_commit=`git rev-parse HEAD` + oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} + oss_dir=${oss_branch_dir}/${oneflow_commit} + echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV + echo "oss_dir=${oss_dir}" >> $GITHUB_ENV + cd $current_dir + set +x + - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 + name: Save cache if successful + id: save-cache + timeout-minutes: 5 + with: + oneflow-src: ${{ env.ONEFLOW_SRC }} + entry: ${{ matrix.entry }} + digest-type: build + mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }} + - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 + name: Build manylinux ${{ github.event.inputs.compute_platform }} + id: build-cuda + if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} + with: + cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake + build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh + oneflow-src: ${{ env.ONEFLOW_SRC }} + oneflow-build-env: manylinux + wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} + clear-wheelhouse-dir: true + self-hosted: true + cuda-version: "10.2" + manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} + docker-run-use-system-http-proxy: false + docker-run-use-lld: false + retry-failed-build: true + python-versions: | + ${{ github.event.inputs.python_version }} + - name: Upload wheel + if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + uses: ./.github/actions/upload_oss + with: + src_path: ${{ env.WHEELHOUSE_DIR }} + oss_dst_path: oss://oneflow-staging/${{ env.oss_dir }} + oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} + oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + - name: Update pip index + if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + env: + OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} + OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + run: | + python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + python3 -m pip install oss2 beautifulsoup4 --user + python3 ${{ env.ONEFLOW_SRC }}/tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html # test: # name: Test suite - # # needs: [find-test-cache]build-manylinux - # needs: [build-manylinux] + # # needs: [find-test-cache]build-oneflow + # needs: [build-oneflow] # # runs-on: ['self-hosted', 'linux', 'provision'] # runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] # env: @@ -269,7 +258,7 @@ jobs: # run: | # docker exec ${TEST_CONTAINER_NAME} python3 --version # docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - # docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${{ needs.build-manylinux.outputs.ONEFLOW_WHEEL_PATH }} oneflow + # docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${{ needs.build-oneflow.outputs.ONEFLOW_WHEEL_PATH }} oneflow # - name: Test container # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} # run: | From c9d9e8dff9b8cd66c140406d8e210b4621b16164 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 21:37:00 +0800 Subject: [PATCH 095/130] test --- .github/workflows/cnn_e2e.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 548a12e..eacfe19 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -72,7 +72,7 @@ jobs: find-build-cache: name: "Find build wheel" needs: [find-oss-wheel] - if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit != 1 }} + if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit == 1 }} runs-on: ubuntu-latest outputs: matrix: ${{ steps.find-cache.outputs.matrix }} @@ -183,7 +183,7 @@ jobs: run: | python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple python3 -m pip install oss2 beautifulsoup4 --user - python3 ${{ env.ONEFLOW_SRC }}/tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html + python3 ${{ env.ONEFLOW_SRC }}/tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html --index_key=${{ needs.find-oss-wheel.outputs.ONEFLOW_WHEEL_PATH }}/index.html # test: # name: Test suite From 6c76e33d034733a902fa483e3ff7592f7a95e81a Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 21:46:54 +0800 Subject: [PATCH 096/130] test --- .github/workflows/cnn_e2e.yml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index eacfe19..58699fc 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -109,6 +109,10 @@ jobs: OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} steps: + - name: Test only + run: | + set -x + echo '${{ matrix }} == ${{ github.event.inputs.compute_platform }}' - name: Fix permissions run: | set -x @@ -133,7 +137,8 @@ jobs: # oneflow_branch=`git branch --show-current` # oneflow_branch=`git symbolic-ref --short HEAD` oneflow_commit=`git rev-parse HEAD` - oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} + # oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} + oss_branch_dir=branch/${oneflow_branch}/${{ matrix.entry }} oss_dir=${oss_branch_dir}/${oneflow_commit} echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV echo "oss_dir=${oss_dir}" >> $GITHUB_ENV @@ -145,11 +150,11 @@ jobs: timeout-minutes: 5 with: oneflow-src: ${{ env.ONEFLOW_SRC }} - entry: ${{ matrix.entry }} + entry: digest-type: build mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 - name: Build manylinux ${{ github.event.inputs.compute_platform }} + name: Build OneFlow ${{ github.event.inputs.compute_platform }} id: build-cuda if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} with: From e8a6aaf4def5b1a0960acd984f8f1b4b16797a5d Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 22:09:03 +0800 Subject: [PATCH 097/130] test --- .github/workflows/cnn_e2e.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 58699fc..3503a31 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -67,8 +67,6 @@ jobs: with: ref: ${{ env.oneflow_commit }} entry: ${{ github.event.inputs.compute_platform }} - - name: step-2 - run: echo "find-wheel.outputs.find-wheel-hit = ${{ steps.find-wheel.outputs.find-wheel-hit }}" find-build-cache: name: "Find build wheel" needs: [find-oss-wheel] @@ -112,7 +110,7 @@ jobs: - name: Test only run: | set -x - echo '${{ matrix }} == ${{ github.event.inputs.compute_platform }}' + echo '${{ matrix.entry }} == ${{ github.event.inputs.compute_platform }}' - name: Fix permissions run: | set -x From 358c0b86835b2921bd4f3870098c6edeb62f97fe Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 22:12:08 +0800 Subject: [PATCH 098/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 3503a31..328f5b0 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -148,7 +148,7 @@ jobs: timeout-minutes: 5 with: oneflow-src: ${{ env.ONEFLOW_SRC }} - entry: + entry: ${{ matrix.entry }} digest-type: build mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 From 843012a9afd9cf597fd108681562eca8f09aff31 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Mon, 25 Oct 2021 22:14:11 +0800 Subject: [PATCH 099/130] test --- .github/workflows/cnn_e2e.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 328f5b0..91a6f06 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -152,9 +152,9 @@ jobs: digest-type: build mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 - name: Build OneFlow ${{ github.event.inputs.compute_platform }} + name: Build OneFlow ${{ matrix.entry }} id: build-cuda - if: ${{ matrix.entry =='${{ github.event.inputs.compute_platform }}' && !matrix.cache-hit }} + if: ${{ matrix.entry == ${{ github.event.inputs.compute_platform }} && !matrix.cache-hit }} with: cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh From 27222533df7d5ead31380c883ba5ee73522a045f Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 26 Oct 2021 15:13:50 +0800 Subject: [PATCH 100/130] test --- .github/workflows/cnn_e2e.yml | 56 ++++------------------------------- 1 file changed, 6 insertions(+), 50 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 91a6f06..dccd4c5 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -67,50 +67,16 @@ jobs: with: ref: ${{ env.oneflow_commit }} entry: ${{ github.event.inputs.compute_platform }} - find-build-cache: - name: "Find build wheel" - needs: [find-oss-wheel] - if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit == 1 }} - runs-on: ubuntu-latest - outputs: - matrix: ${{ steps.find-cache.outputs.matrix }} - steps: - - uses: actions/checkout@v2 - - name: Checkout Oneflow-Inc/oneflow - uses: actions/checkout@v2 - with: - repository: Oneflow-Inc/oneflow - ref: ${{ github.event.inputs.of_branch_or_commit }} - path: ${{ env.ONEFLOW_SRC }} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@2a9efceab8d45b725a687e73f870f9b75a15e472 - name: find cache - id: find-cache - with: - runner-labels: | - self-hosted - linux - build - oneflow-src: ${{ env.ONEFLOW_SRC }} - entries: | - ${{ github.event.inputs.compute_platform }} build-oneflow: name: "Build OneFlow ${{ github.event.inputs.compute_platform }}" + if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit != 1 }} runs-on: ['self-hosted', 'linux', 'provision'] - needs: [find-build-cache] - strategy: - fail-fast: true - max-parallel: 5 - matrix: ${{ fromJson(needs.find-build-cache.outputs.matrix) }} env: - MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ matrix.entry }} + MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ github.event.inputs.compute_platform }} WHEELHOUSE_DIR: manylinux-wheelhouse OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} steps: - - name: Test only - run: | - set -x - echo '${{ matrix.entry }} == ${{ github.event.inputs.compute_platform }}' - name: Fix permissions run: | set -x @@ -136,25 +102,15 @@ jobs: # oneflow_branch=`git symbolic-ref --short HEAD` oneflow_commit=`git rev-parse HEAD` # oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} - oss_branch_dir=branch/${oneflow_branch}/${{ matrix.entry }} + oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} oss_dir=${oss_branch_dir}/${oneflow_commit} echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV echo "oss_dir=${oss_dir}" >> $GITHUB_ENV cd $current_dir set +x - - uses: Oneflow-Inc/get-oneflow/cache-complete@2a9efceab8d45b725a687e73f870f9b75a15e472 - name: Save cache if successful - id: save-cache - timeout-minutes: 5 - with: - oneflow-src: ${{ env.ONEFLOW_SRC }} - entry: ${{ matrix.entry }} - digest-type: build - mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 - name: Build OneFlow ${{ matrix.entry }} + name: Build OneFlow ${{ github.event.inputs.compute_platform }} id: build-cuda - if: ${{ matrix.entry == ${{ github.event.inputs.compute_platform }} && !matrix.cache-hit }} with: cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh @@ -171,7 +127,7 @@ jobs: python-versions: | ${{ github.event.inputs.python_version }} - name: Upload wheel - if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + if: ${{ steps.build-cuda.outcome == 'success' }} uses: ./.github/actions/upload_oss with: src_path: ${{ env.WHEELHOUSE_DIR }} @@ -179,7 +135,7 @@ jobs: oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - name: Update pip index - if: ${{ !fromJson(matrix.cache-hit) && steps.build-cuda.outcome == 'success' }} + if: ${{ steps.build-cuda.outcome == 'success' }} env: OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} From c03fdfc4aee037c782729f7d142e1618adbb4148 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 26 Oct 2021 15:15:58 +0800 Subject: [PATCH 101/130] test --- .github/workflows/cnn_e2e.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index dccd4c5..767eeea 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -69,6 +69,7 @@ jobs: entry: ${{ github.event.inputs.compute_platform }} build-oneflow: name: "Build OneFlow ${{ github.event.inputs.compute_platform }}" + needs: [find-oss-wheel] if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit != 1 }} runs-on: ['self-hosted', 'linux', 'provision'] env: From 4cfd7ac0f42f1c4198f5892bde3ef17f3b39ce16 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 26 Oct 2021 15:18:31 +0800 Subject: [PATCH 102/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 767eeea..375ff06 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -70,7 +70,7 @@ jobs: build-oneflow: name: "Build OneFlow ${{ github.event.inputs.compute_platform }}" needs: [find-oss-wheel] - if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit != 1 }} + if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit == 1 }} runs-on: ['self-hosted', 'linux', 'provision'] env: MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ github.event.inputs.compute_platform }} From f7ba24ea026815a3c0a61cd6a34cd2e3bcc9dcad Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 26 Oct 2021 15:43:27 +0800 Subject: [PATCH 103/130] test --- .github/workflows/cnn_e2e.yml | 58 +++++++++++++++++------------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 375ff06..fdf0368 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -58,7 +58,7 @@ jobs: cd ${{ env.ONEFLOW_SRC }} oneflow_commit=`git rev-parse HEAD` echo "oneflow_commit=${oneflow_commit}" >> $GITHUB_ENV - oneflow_wheel_path=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/commit/${oneflow_commit}/${{ github.event.inputs.compute_platform }} + oneflow_wheel_path=commit/${oneflow_commit}/${{ github.event.inputs.compute_platform }} echo "::set-output name=ONEFLOW_WHEEL_PATH::${oneflow_wheel_path}" set +x - name: Check if wheel available @@ -109,34 +109,34 @@ jobs: echo "oss_dir=${oss_dir}" >> $GITHUB_ENV cd $current_dir set +x - - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 - name: Build OneFlow ${{ github.event.inputs.compute_platform }} - id: build-cuda - with: - cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake - build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh - oneflow-src: ${{ env.ONEFLOW_SRC }} - oneflow-build-env: manylinux - wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} - clear-wheelhouse-dir: true - self-hosted: true - cuda-version: "10.2" - manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} - docker-run-use-system-http-proxy: false - docker-run-use-lld: false - retry-failed-build: true - python-versions: | - ${{ github.event.inputs.python_version }} - - name: Upload wheel - if: ${{ steps.build-cuda.outcome == 'success' }} - uses: ./.github/actions/upload_oss - with: - src_path: ${{ env.WHEELHOUSE_DIR }} - oss_dst_path: oss://oneflow-staging/${{ env.oss_dir }} - oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} - oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + # - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 + # name: Build OneFlow ${{ github.event.inputs.compute_platform }} + # id: build-cuda + # with: + # cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake + # build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh + # oneflow-src: ${{ env.ONEFLOW_SRC }} + # oneflow-build-env: manylinux + # wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} + # clear-wheelhouse-dir: true + # self-hosted: true + # cuda-version: "10.2" + # manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} + # docker-run-use-system-http-proxy: false + # docker-run-use-lld: false + # retry-failed-build: true + # python-versions: | + # ${{ github.event.inputs.python_version }} + # - name: Upload wheel + # if: ${{ steps.build-cuda.outcome == 'success' }} + # uses: ./.github/actions/upload_oss + # with: + # src_path: ${{ env.WHEELHOUSE_DIR }} + # oss_dst_path: oss://oneflow-staging/${{ env.oss_dir }} + # oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} + # oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - name: Update pip index - if: ${{ steps.build-cuda.outcome == 'success' }} + # if: ${{ steps.build-cuda.outcome == 'success' }} env: OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} @@ -218,7 +218,7 @@ jobs: # run: | # docker exec ${TEST_CONTAINER_NAME} python3 --version # docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - # docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${{ needs.build-oneflow.outputs.ONEFLOW_WHEEL_PATH }} oneflow + # docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/${{ needs.build-oneflow.outputs.ONEFLOW_WHEEL_PATH }} oneflow # - name: Test container # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} # run: | From 0ccfdd71a4d8e28dcee3eca58c4bf798bb0a7dc6 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 26 Oct 2021 15:51:02 +0800 Subject: [PATCH 104/130] test --- .github/workflows/cnn_e2e.yml | 62 +++++++++++++++++------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index fdf0368..074132a 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -78,37 +78,37 @@ jobs: OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} steps: - - name: Fix permissions - run: | - set -x - docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - - name: Remove leftover cuda-installer.log - run: | - docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log - - uses: actions/checkout@v2 - - name: Checkout Oneflow-Inc/oneflow - uses: actions/checkout@v2 - with: - repository: Oneflow-Inc/oneflow - ref: ${{ github.event.inputs.of_branch_or_commit }} - path: ${{ env.ONEFLOW_SRC }} - - name: Set environment variables - id: set-env - run: | - set -x - current_dir=$PWD - cd ${{ env.ONEFLOW_SRC }} - oneflow_branch=`git rev-parse --abbrev-ref HEAD` - # oneflow_branch=`git branch --show-current` - # oneflow_branch=`git symbolic-ref --short HEAD` - oneflow_commit=`git rev-parse HEAD` - # oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} - oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} - oss_dir=${oss_branch_dir}/${oneflow_commit} - echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV - echo "oss_dir=${oss_dir}" >> $GITHUB_ENV - cd $current_dir - set +x + # - name: Fix permissions + # run: | + # set -x + # docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . + # - name: Remove leftover cuda-installer.log + # run: | + # docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log + # - uses: actions/checkout@v2 + # - name: Checkout Oneflow-Inc/oneflow + # uses: actions/checkout@v2 + # with: + # repository: Oneflow-Inc/oneflow + # ref: ${{ github.event.inputs.of_branch_or_commit }} + # path: ${{ env.ONEFLOW_SRC }} + # - name: Set environment variables + # id: set-env + # run: | + # set -x + # current_dir=$PWD + # cd ${{ env.ONEFLOW_SRC }} + # oneflow_branch=`git rev-parse --abbrev-ref HEAD` + # # oneflow_branch=`git branch --show-current` + # # oneflow_branch=`git symbolic-ref --short HEAD` + # oneflow_commit=`git rev-parse HEAD` + # # oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} + # oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} + # oss_dir=${oss_branch_dir}/${oneflow_commit} + # echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV + # echo "oss_dir=${oss_dir}" >> $GITHUB_ENV + # cd $current_dir + # set +x # - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 # name: Build OneFlow ${{ github.event.inputs.compute_platform }} # id: build-cuda From 8f3bb98aa0533a6e5d08cd6ebe11081420855196 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 26 Oct 2021 15:53:04 +0800 Subject: [PATCH 105/130] test --- .github/workflows/cnn_e2e.yml | 62 +++++++++++++++++------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 074132a..fdf0368 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -78,37 +78,37 @@ jobs: OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} steps: - # - name: Fix permissions - # run: | - # set -x - # docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - # - name: Remove leftover cuda-installer.log - # run: | - # docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log - # - uses: actions/checkout@v2 - # - name: Checkout Oneflow-Inc/oneflow - # uses: actions/checkout@v2 - # with: - # repository: Oneflow-Inc/oneflow - # ref: ${{ github.event.inputs.of_branch_or_commit }} - # path: ${{ env.ONEFLOW_SRC }} - # - name: Set environment variables - # id: set-env - # run: | - # set -x - # current_dir=$PWD - # cd ${{ env.ONEFLOW_SRC }} - # oneflow_branch=`git rev-parse --abbrev-ref HEAD` - # # oneflow_branch=`git branch --show-current` - # # oneflow_branch=`git symbolic-ref --short HEAD` - # oneflow_commit=`git rev-parse HEAD` - # # oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} - # oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} - # oss_dir=${oss_branch_dir}/${oneflow_commit} - # echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV - # echo "oss_dir=${oss_dir}" >> $GITHUB_ENV - # cd $current_dir - # set +x + - name: Fix permissions + run: | + set -x + docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . + - name: Remove leftover cuda-installer.log + run: | + docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log + - uses: actions/checkout@v2 + - name: Checkout Oneflow-Inc/oneflow + uses: actions/checkout@v2 + with: + repository: Oneflow-Inc/oneflow + ref: ${{ github.event.inputs.of_branch_or_commit }} + path: ${{ env.ONEFLOW_SRC }} + - name: Set environment variables + id: set-env + run: | + set -x + current_dir=$PWD + cd ${{ env.ONEFLOW_SRC }} + oneflow_branch=`git rev-parse --abbrev-ref HEAD` + # oneflow_branch=`git branch --show-current` + # oneflow_branch=`git symbolic-ref --short HEAD` + oneflow_commit=`git rev-parse HEAD` + # oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} + oss_branch_dir=branch/${oneflow_branch}/${{ github.event.inputs.compute_platform }} + oss_dir=${oss_branch_dir}/${oneflow_commit} + echo "oss_branch_dir=${oss_branch_dir}" >> $GITHUB_ENV + echo "oss_dir=${oss_dir}" >> $GITHUB_ENV + cd $current_dir + set +x # - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 # name: Build OneFlow ${{ github.event.inputs.compute_platform }} # id: build-cuda From 42b9dda5ef51580474629d198c296f614aaec14f Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 26 Oct 2021 15:57:41 +0800 Subject: [PATCH 106/130] test --- .github/workflows/cnn_e2e.yml | 54 +++++++++++++++++------------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index fdf0368..359bbfc 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -109,34 +109,34 @@ jobs: echo "oss_dir=${oss_dir}" >> $GITHUB_ENV cd $current_dir set +x - # - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 - # name: Build OneFlow ${{ github.event.inputs.compute_platform }} - # id: build-cuda - # with: - # cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake - # build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh - # oneflow-src: ${{ env.ONEFLOW_SRC }} - # oneflow-build-env: manylinux - # wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} - # clear-wheelhouse-dir: true - # self-hosted: true - # cuda-version: "10.2" - # manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} - # docker-run-use-system-http-proxy: false - # docker-run-use-lld: false - # retry-failed-build: true - # python-versions: | - # ${{ github.event.inputs.python_version }} - # - name: Upload wheel - # if: ${{ steps.build-cuda.outcome == 'success' }} - # uses: ./.github/actions/upload_oss - # with: - # src_path: ${{ env.WHEELHOUSE_DIR }} - # oss_dst_path: oss://oneflow-staging/${{ env.oss_dir }} - # oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} - # oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 + name: Build OneFlow ${{ github.event.inputs.compute_platform }} + id: build-cuda + with: + cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake + build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh + oneflow-src: ${{ env.ONEFLOW_SRC }} + oneflow-build-env: manylinux + wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} + clear-wheelhouse-dir: true + self-hosted: true + cuda-version: "10.2" + manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} + docker-run-use-system-http-proxy: false + docker-run-use-lld: false + retry-failed-build: true + python-versions: | + ${{ github.event.inputs.python_version }} + - name: Upload wheel + if: ${{ steps.build-cuda.outcome == 'success' }} + uses: ./.github/actions/upload_oss + with: + src_path: ${{ env.WHEELHOUSE_DIR }} + oss_dst_path: oss://oneflow-staging/${{ env.oss_dir }} + oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} + oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - name: Update pip index - # if: ${{ steps.build-cuda.outcome == 'success' }} + if: ${{ steps.build-cuda.outcome == 'success' }} env: OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} From fb605e05e81fcfac1b20e290d7e278e300e95ed6 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 26 Oct 2021 19:50:02 +0800 Subject: [PATCH 107/130] test find wheel --- .github/workflows/cnn_e2e.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 359bbfc..4d6f303 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -63,14 +63,15 @@ jobs: set +x - name: Check if wheel available id: find-wheel - uses: Oneflow-Inc/get-oneflow/find-wheel@ee5b8d83dfc4645d0e67ae603d31f78fd4b1c1a7 + uses: Oneflow-Inc/get-oneflow/find-wheel@ecd186e8c13383f0a2b7eabe6ea37c53a8660572 with: ref: ${{ env.oneflow_commit }} entry: ${{ github.event.inputs.compute_platform }} + python-version: ${{ github.event.inputs.python_version }} build-oneflow: name: "Build OneFlow ${{ github.event.inputs.compute_platform }}" needs: [find-oss-wheel] - if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit == 1 }} + if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} runs-on: ['self-hosted', 'linux', 'provision'] env: MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ github.event.inputs.compute_platform }} From 409f18904d29cd33f87813b0c0556d0b9953c7d4 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 26 Oct 2021 20:11:09 +0800 Subject: [PATCH 108/130] update find-wheel commit --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 4d6f303..5208bee 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -63,7 +63,7 @@ jobs: set +x - name: Check if wheel available id: find-wheel - uses: Oneflow-Inc/get-oneflow/find-wheel@ecd186e8c13383f0a2b7eabe6ea37c53a8660572 + uses: Oneflow-Inc/get-oneflow/find-wheel@5ef34358307145c688d820531b512b467fc01fe2 with: ref: ${{ env.oneflow_commit }} entry: ${{ github.event.inputs.compute_platform }} From 7d3dfdb59738ce891381f340d146db487077ae13 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 12:59:05 +0800 Subject: [PATCH 109/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 5208bee..5f663fd 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -63,7 +63,7 @@ jobs: set +x - name: Check if wheel available id: find-wheel - uses: Oneflow-Inc/get-oneflow/find-wheel@5ef34358307145c688d820531b512b467fc01fe2 + uses: Oneflow-Inc/get-oneflow/find-wheel@addc8720b3780fd132a94059bc3cb2e216f4d7d4 with: ref: ${{ env.oneflow_commit }} entry: ${{ github.event.inputs.compute_platform }} From 48115dcf91ca3171cd1011449c61c699ace37a15 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 14:20:14 +0800 Subject: [PATCH 110/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 5f663fd..68d36ae 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -63,7 +63,7 @@ jobs: set +x - name: Check if wheel available id: find-wheel - uses: Oneflow-Inc/get-oneflow/find-wheel@addc8720b3780fd132a94059bc3cb2e216f4d7d4 + uses: Oneflow-Inc/get-oneflow/find-wheel@d55788aea9c9d81a61da980327de839f0ad2a733 with: ref: ${{ env.oneflow_commit }} entry: ${{ github.event.inputs.compute_platform }} From ee3233f3c41fc8b84993c043de547fe1e7f83107 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 15:15:43 +0800 Subject: [PATCH 111/130] test --- .github/workflows/cnn_e2e.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 68d36ae..e029e14 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -41,6 +41,9 @@ jobs: find-oss-wheel: name: "Set env and Find wheel in oss" runs-on: ubuntu-latest + env: + OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} + OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} outputs: find-wheel-hit: ${{ steps.find-wheel.outputs.find-wheel-hit }} ONEFLOW_WHEEL_PATH: ${{ steps.set-wheel-path.outputs.ONEFLOW_WHEEL_PATH }} From 02d5c5528443f65f0958fb28d470f3e622a8e424 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 15:28:20 +0800 Subject: [PATCH 112/130] test --- .github/workflows/cnn_e2e.yml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index e029e14..c32ff45 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -74,7 +74,6 @@ jobs: build-oneflow: name: "Build OneFlow ${{ github.event.inputs.compute_platform }}" needs: [find-oss-wheel] - if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} runs-on: ['self-hosted', 'linux', 'provision'] env: MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ github.event.inputs.compute_platform }} @@ -83,20 +82,26 @@ jobs: OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} steps: - name: Fix permissions + if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} run: | set -x + echo ${OSS_ACCESS_KEY_ID} docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - name: Remove leftover cuda-installer.log + if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} run: | docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log - uses: actions/checkout@v2 + if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} - name: Checkout Oneflow-Inc/oneflow + if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} uses: actions/checkout@v2 with: repository: Oneflow-Inc/oneflow ref: ${{ github.event.inputs.of_branch_or_commit }} path: ${{ env.ONEFLOW_SRC }} - name: Set environment variables + if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} id: set-env run: | set -x @@ -114,6 +119,7 @@ jobs: cd $current_dir set +x - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 + if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} name: Build OneFlow ${{ github.event.inputs.compute_platform }} id: build-cuda with: @@ -141,9 +147,9 @@ jobs: oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - name: Update pip index if: ${{ steps.build-cuda.outcome == 'success' }} - env: - OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} - OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + # env: + # OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} + # OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} run: | python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple python3 -m pip install oss2 beautifulsoup4 --user From eb9d9bde8b63391107721c33f54e75c1569f9796 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 15:53:09 +0800 Subject: [PATCH 113/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index c32ff45..ae8a0b7 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -82,7 +82,7 @@ jobs: OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} steps: - name: Fix permissions - if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} + if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit == false }} run: | set -x echo ${OSS_ACCESS_KEY_ID} From e89ec03ac986a08eeedb4de3cab1e3f8856628ea Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 15:57:38 +0800 Subject: [PATCH 114/130] test --- .github/workflows/cnn_e2e.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index ae8a0b7..dc42353 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -82,10 +82,11 @@ jobs: OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} steps: - name: Fix permissions - if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit == false }} + # if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit == false }} run: | set -x echo ${OSS_ACCESS_KEY_ID} + echo needs.find-oss-wheel.outputs.find-wheel-hit=${{ needs.find-oss-wheel.outputs.find-wheel-hit }} docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - name: Remove leftover cuda-installer.log if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} From 1b1ba0e135b22b2a40811ab334a89cf26109b51d Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 16:01:58 +0800 Subject: [PATCH 115/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index dc42353..088c7cd 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -89,7 +89,7 @@ jobs: echo needs.find-oss-wheel.outputs.find-wheel-hit=${{ needs.find-oss-wheel.outputs.find-wheel-hit }} docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - name: Remove leftover cuda-installer.log - if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} + if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit == 'false'}} run: | docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log - uses: actions/checkout@v2 From 1fe0540ca8f2997cf7603b034a3e0257039a5188 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 16:18:56 +0800 Subject: [PATCH 116/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 088c7cd..5e7bfab 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -66,7 +66,7 @@ jobs: set +x - name: Check if wheel available id: find-wheel - uses: Oneflow-Inc/get-oneflow/find-wheel@d55788aea9c9d81a61da980327de839f0ad2a733 + uses: Oneflow-Inc/get-oneflow/find-wheel@bcfc1a961b25e48cb4744e8682079c0569961f12 with: ref: ${{ env.oneflow_commit }} entry: ${{ github.event.inputs.compute_platform }} From ba45bceecf4fca5759b65c4b5cf7482e804ab237 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 16:21:25 +0800 Subject: [PATCH 117/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 5e7bfab..fc4d47c 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -89,7 +89,7 @@ jobs: echo needs.find-oss-wheel.outputs.find-wheel-hit=${{ needs.find-oss-wheel.outputs.find-wheel-hit }} docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - name: Remove leftover cuda-installer.log - if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit == 'false'}} + if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit }} run: | docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log - uses: actions/checkout@v2 From 115bd545620ec2265d3cc2308f8befe42926a3e6 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 16:24:00 +0800 Subject: [PATCH 118/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index fc4d47c..3e7a4e4 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -80,9 +80,9 @@ jobs: WHEELHOUSE_DIR: manylinux-wheelhouse OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} steps: - name: Fix permissions - # if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit == false }} run: | set -x echo ${OSS_ACCESS_KEY_ID} From 399e20e25e2ee11def71bd2ff7babf4b877c9537 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 16:26:00 +0800 Subject: [PATCH 119/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 3e7a4e4..0a30099 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -89,7 +89,7 @@ jobs: echo needs.find-oss-wheel.outputs.find-wheel-hit=${{ needs.find-oss-wheel.outputs.find-wheel-hit }} docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - name: Remove leftover cuda-installer.log - if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit }} + if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} run: | docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log - uses: actions/checkout@v2 From 12ecef359d8b8127ba8f705fad93d911942bd35f Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 16:31:21 +0800 Subject: [PATCH 120/130] test --- .github/workflows/cnn_e2e.yml | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 0a30099..f7a780a 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -80,13 +80,22 @@ jobs: WHEELHOUSE_DIR: manylinux-wheelhouse OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} + # if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} steps: + - name: test needs.find-oss-wheel.outputs.find-wheel-hit + if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit }} + run: | + echo test 1: ${OSS_ACCESS_KEY_ID} + echo needs.find-oss-wheel.outputs.find-wheel-hit=${{ needs.find-oss-wheel.outputs.find-wheel-hit }} + - name: test !needs.find-oss-wheel.outputs.find-wheel-hit + if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} + run: | + echo test 2: ${OSS_ACCESS_KEY_ID} + echo needs.find-oss-wheel.outputs.find-wheel-hit=${{ needs.find-oss-wheel.outputs.find-wheel-hit }} - name: Fix permissions + if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} run: | set -x - echo ${OSS_ACCESS_KEY_ID} - echo needs.find-oss-wheel.outputs.find-wheel-hit=${{ needs.find-oss-wheel.outputs.find-wheel-hit }} docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - name: Remove leftover cuda-installer.log if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} From 78617f16a6ef29c540e58f556028477968c6fd13 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 16:43:22 +0800 Subject: [PATCH 121/130] test --- .github/workflows/cnn_e2e.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index f7a780a..5e6c4b6 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -80,15 +80,15 @@ jobs: WHEELHOUSE_DIR: manylinux-wheelhouse OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - # if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} + # if: ${{ !fromJson(needs.find-oss-wheel.outputs.find-wheel-hit) }} steps: - name: test needs.find-oss-wheel.outputs.find-wheel-hit - if: ${{ needs.find-oss-wheel.outputs.find-wheel-hit }} + if: ${{ fromJson(needs.find-oss-wheel.outputs.find-wheel-hit) }} run: | echo test 1: ${OSS_ACCESS_KEY_ID} echo needs.find-oss-wheel.outputs.find-wheel-hit=${{ needs.find-oss-wheel.outputs.find-wheel-hit }} - name: test !needs.find-oss-wheel.outputs.find-wheel-hit - if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} + if: ${{ !fromJson(needs.find-oss-wheel.outputs.find-wheel-hit) }} run: | echo test 2: ${OSS_ACCESS_KEY_ID} echo needs.find-oss-wheel.outputs.find-wheel-hit=${{ needs.find-oss-wheel.outputs.find-wheel-hit }} From cdfaebe10a3396a417ebdc6af07d02b7cd922e9c Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 16:47:41 +0800 Subject: [PATCH 122/130] test --- .github/workflows/cnn_e2e.yml | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 5e6c4b6..2edcd6b 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -80,38 +80,38 @@ jobs: WHEELHOUSE_DIR: manylinux-wheelhouse OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - # if: ${{ !fromJson(needs.find-oss-wheel.outputs.find-wheel-hit) }} + if: ${{ !fromJson(needs.find-oss-wheel.outputs.find-wheel-hit) }} steps: - - name: test needs.find-oss-wheel.outputs.find-wheel-hit - if: ${{ fromJson(needs.find-oss-wheel.outputs.find-wheel-hit) }} - run: | - echo test 1: ${OSS_ACCESS_KEY_ID} - echo needs.find-oss-wheel.outputs.find-wheel-hit=${{ needs.find-oss-wheel.outputs.find-wheel-hit }} - - name: test !needs.find-oss-wheel.outputs.find-wheel-hit - if: ${{ !fromJson(needs.find-oss-wheel.outputs.find-wheel-hit) }} - run: | - echo test 2: ${OSS_ACCESS_KEY_ID} - echo needs.find-oss-wheel.outputs.find-wheel-hit=${{ needs.find-oss-wheel.outputs.find-wheel-hit }} + # - name: test needs.find-oss-wheel.outputs.find-wheel-hit + # if: ${{ fromJson(needs.find-oss-wheel.outputs.find-wheel-hit) }} + # run: | + # echo test 1: ${OSS_ACCESS_KEY_ID} + # echo needs.find-oss-wheel.outputs.find-wheel-hit=${{ needs.find-oss-wheel.outputs.find-wheel-hit }} + # - name: test !needs.find-oss-wheel.outputs.find-wheel-hit + # if: ${{ !fromJson(needs.find-oss-wheel.outputs.find-wheel-hit) }} + # run: | + # echo test 2: ${OSS_ACCESS_KEY_ID} + # echo needs.find-oss-wheel.outputs.find-wheel-hit=${{ needs.find-oss-wheel.outputs.find-wheel-hit }} - name: Fix permissions - if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} + # if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} run: | set -x docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - name: Remove leftover cuda-installer.log - if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} + # if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} run: | docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log - uses: actions/checkout@v2 - if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} + # if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} - name: Checkout Oneflow-Inc/oneflow - if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} + # if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} uses: actions/checkout@v2 with: repository: Oneflow-Inc/oneflow ref: ${{ github.event.inputs.of_branch_or_commit }} path: ${{ env.ONEFLOW_SRC }} - name: Set environment variables - if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} + # if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} id: set-env run: | set -x @@ -129,7 +129,7 @@ jobs: cd $current_dir set +x - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 - if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} + # if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} name: Build OneFlow ${{ github.event.inputs.compute_platform }} id: build-cuda with: From 5f2cc6d4a3df21a3b708c54d576cb2da67cdd35f Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 17:53:25 +0800 Subject: [PATCH 123/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 2edcd6b..dbc1e70 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -66,7 +66,7 @@ jobs: set +x - name: Check if wheel available id: find-wheel - uses: Oneflow-Inc/get-oneflow/find-wheel@bcfc1a961b25e48cb4744e8682079c0569961f12 + uses: Oneflow-Inc/get-oneflow/find-wheel@ae140da7d4e5ea983b6bbd8dc5f3621557cb472f with: ref: ${{ env.oneflow_commit }} entry: ${{ github.event.inputs.compute_platform }} From bd26486750b1d588e2d7875492b80a73f02822d0 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 18:47:38 +0800 Subject: [PATCH 124/130] test --- .github/workflows/cnn_e2e.yml | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index dbc1e70..9cbffe4 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -82,36 +82,21 @@ jobs: OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} if: ${{ !fromJson(needs.find-oss-wheel.outputs.find-wheel-hit) }} steps: - # - name: test needs.find-oss-wheel.outputs.find-wheel-hit - # if: ${{ fromJson(needs.find-oss-wheel.outputs.find-wheel-hit) }} - # run: | - # echo test 1: ${OSS_ACCESS_KEY_ID} - # echo needs.find-oss-wheel.outputs.find-wheel-hit=${{ needs.find-oss-wheel.outputs.find-wheel-hit }} - # - name: test !needs.find-oss-wheel.outputs.find-wheel-hit - # if: ${{ !fromJson(needs.find-oss-wheel.outputs.find-wheel-hit) }} - # run: | - # echo test 2: ${OSS_ACCESS_KEY_ID} - # echo needs.find-oss-wheel.outputs.find-wheel-hit=${{ needs.find-oss-wheel.outputs.find-wheel-hit }} - name: Fix permissions - # if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} run: | set -x docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - name: Remove leftover cuda-installer.log - # if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} run: | docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log - uses: actions/checkout@v2 - # if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} - name: Checkout Oneflow-Inc/oneflow - # if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} uses: actions/checkout@v2 with: repository: Oneflow-Inc/oneflow ref: ${{ github.event.inputs.of_branch_or_commit }} path: ${{ env.ONEFLOW_SRC }} - name: Set environment variables - # if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} id: set-env run: | set -x @@ -129,7 +114,6 @@ jobs: cd $current_dir set +x - uses: Oneflow-Inc/get-oneflow@2a9efceab8d45b725a687e73f870f9b75a15e472 - # if: ${{ !needs.find-oss-wheel.outputs.find-wheel-hit }} name: Build OneFlow ${{ github.event.inputs.compute_platform }} id: build-cuda with: @@ -157,9 +141,6 @@ jobs: oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - name: Update pip index if: ${{ steps.build-cuda.outcome == 'success' }} - # env: - # OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} - # OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} run: | python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple python3 -m pip install oss2 beautifulsoup4 --user From ce08ec2bacae234253cd4e72a146e46443c9d69f Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 27 Oct 2021 18:59:56 +0800 Subject: [PATCH 125/130] test --- .github/workflows/cnn_e2e.yml | 159 +++++++++++++++------------------- 1 file changed, 72 insertions(+), 87 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 9cbffe4..d1afb6e 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -146,92 +146,77 @@ jobs: python3 -m pip install oss2 beautifulsoup4 --user python3 ${{ env.ONEFLOW_SRC }}/tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html --index_key=${{ needs.find-oss-wheel.outputs.ONEFLOW_WHEEL_PATH }}/index.html - # test: - # name: Test suite - # # needs: [find-test-cache]build-oneflow - # needs: [build-oneflow] - # # runs-on: ['self-hosted', 'linux', 'provision'] - # runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] - # env: - # TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" - # TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0:e7a497b41d8b7f1bce055b1f23d027f93b1557ae - # # strategy: - # # fail-fast: true - # # max-parallel: 5 - # # matrix: ${{ fromJson(needs.find-test-cache.outputs.matrix) }} - # steps: - # - name: Fix permissions - # # if: ${{ contains(matrix.runs-on, 'self-hosted') }} - # run: | - # set -x - # docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - # - name: Checkout Oneflow-Inc/OneFlow-Benchmark - # uses: actions/checkout@v2 - # - name: Remove container - # timeout-minutes: 45 - # # if: ${{ contains(matrix.runs-on, 'self-hosted') }} - # run: | - # docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true - # - name: Enable Pytorch container - # run: | - # echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV - # # - name: Set environment variables - # # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - # # run: | - # # set -x - # # echo "ONEFLOW_TEST_CACHE_DIR=$HOME/ci-cache/test_cache" >> $GITHUB_ENV - # # echo "ONEFLOW_WHEEL_PATH=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/branch/master/cu102/41b06bf56daaa5ea0087998399d5980e9fc5ab59" >> $GITHUB_ENV - # - name: Start container - # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - # # working-directory: ${{ env.ONEFLOW_SRC }} - # # env: - # # ONEFLOW_BIN_PATH: ${{ steps.download-digest.outputs.entry-dir }}/bin - # run: | - # docker pull ${{ env.TEST_IMG_TAG }} - # docker run -d --rm --privileged --network host --shm-size=8g \ - # --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ - # --runtime=nvidia \ - # -v /DATA/disk1:/dataset:ro \ - # -e ONEFLOW_WHEEL_PATH=${{ env.ONEFLOW_WHEEL_PATH }} \ - # -v $PWD:$PWD \ - # -w $PWD \ - # -e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \ - # -e E2E_GPU_NUM_PER_NODE=${{ github.event.inputs.gpu_num_per_node }} \ - # -e E2E_NODE_NUM=1 \ - # -e E2E_BATCH_SIZE=32 \ - # -e E2E_LEARNING_RATE=1.536 \ - # -e E2E_SRC_ROOT=Classification/cnns \ - # -e E2E_DATA_ROOT=/dataset/ImageNet/ofrecord \ - # --name ${TEST_CONTAINER_NAME} \ - # ${{ env.TEST_IMG_TAG }} \ - # sleep 3600 - # # -e ONEFLOW_CI=1 \ - # # -v /model_zoo:/model_zoo:ro \ - # # -v $HOME/test-container-cache/dot-local:/root/.local \ - # # -v $HOME/test-container-cache/dot-cache:/root/.cache \ - # # -e ONEFLOW_BIN_PATH=${ONEFLOW_BIN_PATH} \ - # # -v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \ - # # -v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \ - # # -v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \ - # # -e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \ - # - name: Install OneFlow - # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }} - # run: | - # docker exec ${TEST_CONTAINER_NAME} python3 --version - # docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - # docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/${{ needs.build-oneflow.outputs.ONEFLOW_WHEEL_PATH }} oneflow - # - name: Test container - # # if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - # run: | - # docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh - # - name: Upload log - # # if: ${{ always() && (steps.distributed_try_3.outcome=='failure' || steps.new_interface_distributed_try_3.outcome=='failure') && github.event.pull_request.head.repo.full_name == github.repository }} - # uses: ./.github/actions/upload_oss - # with: - # src_path: log - # oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log - # oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} - # oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - # upload_core: false + test: + name: Test suite + needs: [build-oneflow] + if: always() + runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] + env: + TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" + TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0:e7a497b41d8b7f1bce055b1f23d027f93b1557ae + steps: + - name: just a test + run: | + echo ${TEST_CONTAINER_NAME} + echo ${TEST_WITH_TORCH_IMG_TAG} + echo ${{ needs.build-oneflow.outputs.ONEFLOW_WHEEL_PATH }} + # - name: Fix permissions + # run: | + # set -x + # docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . + # - name: Checkout Oneflow-Inc/OneFlow-Benchmark + # uses: actions/checkout@v2 + # - name: Remove container + # timeout-minutes: 45 + # run: | + # docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true + # - name: Enable Pytorch container + # run: | + # echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV + # - name: Start container + # run: | + # docker pull ${{ env.TEST_IMG_TAG }} + # docker run -d --rm --privileged --network host --shm-size=8g \ + # --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ + # --runtime=nvidia \ + # -v /DATA/disk1:/dataset:ro \ + # -e ONEFLOW_WHEEL_PATH=${{ env.ONEFLOW_WHEEL_PATH }} \ + # -v $PWD:$PWD \ + # -w $PWD \ + # -e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \ + # -e E2E_GPU_NUM_PER_NODE=${{ github.event.inputs.gpu_num_per_node }} \ + # -e E2E_NODE_NUM=1 \ + # -e E2E_BATCH_SIZE=32 \ + # -e E2E_LEARNING_RATE=1.536 \ + # -e E2E_SRC_ROOT=Classification/cnns \ + # -e E2E_DATA_ROOT=/dataset/ImageNet/ofrecord \ + # --name ${TEST_CONTAINER_NAME} \ + # ${{ env.TEST_IMG_TAG }} \ + # sleep 3600 + # # -e ONEFLOW_CI=1 \ + # # -v /model_zoo:/model_zoo:ro \ + # # -v $HOME/test-container-cache/dot-local:/root/.local \ + # # -v $HOME/test-container-cache/dot-cache:/root/.cache \ + # # -e ONEFLOW_BIN_PATH=${ONEFLOW_BIN_PATH} \ + # # -v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \ + # # -v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \ + # # -v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \ + # # -e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \ + # - name: Install OneFlow + # run: | + # docker exec ${TEST_CONTAINER_NAME} python3 --version + # docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + # docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/${{ needs.build-oneflow.outputs.ONEFLOW_WHEEL_PATH }} oneflow + # - name: Test container + # run: | + # docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh + # - name: Upload log + # uses: ./.github/actions/upload_oss + # with: + # src_path: log + # oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + # oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} + # oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + # upload_core: false From 64939ea9d886d375923ad5721cbc10e96f0e8eb9 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 28 Oct 2021 09:50:54 +0800 Subject: [PATCH 126/130] test --- .github/workflows/cnn_e2e.yml | 123 ++++++++++++++++------------------ 1 file changed, 59 insertions(+), 64 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index d1afb6e..1655bc8 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -147,7 +147,7 @@ jobs: python3 ${{ env.ONEFLOW_SRC }}/tools/create_pip_index.py --dir_key ${oss_dir} -b oneflow-staging --index_key=${oss_branch_dir}/index.html --index_key=${oss_dir}/index.html --index_key=${{ needs.find-oss-wheel.outputs.ONEFLOW_WHEEL_PATH }}/index.html test: - name: Test suite + name: Test ResNet50 needs: [build-oneflow] if: always() runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] @@ -155,68 +155,63 @@ jobs: TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0:e7a497b41d8b7f1bce055b1f23d027f93b1557ae steps: - - name: just a test - run: | - echo ${TEST_CONTAINER_NAME} - echo ${TEST_WITH_TORCH_IMG_TAG} - echo ${{ needs.build-oneflow.outputs.ONEFLOW_WHEEL_PATH }} - # - name: Fix permissions - # run: | - # set -x - # docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . - # - name: Checkout Oneflow-Inc/OneFlow-Benchmark - # uses: actions/checkout@v2 - # - name: Remove container - # timeout-minutes: 45 - # run: | - # docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true - # - name: Enable Pytorch container - # run: | - # echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV - # - name: Start container - # run: | - # docker pull ${{ env.TEST_IMG_TAG }} - # docker run -d --rm --privileged --network host --shm-size=8g \ - # --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ - # --runtime=nvidia \ - # -v /DATA/disk1:/dataset:ro \ - # -e ONEFLOW_WHEEL_PATH=${{ env.ONEFLOW_WHEEL_PATH }} \ - # -v $PWD:$PWD \ - # -w $PWD \ - # -e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \ - # -e E2E_GPU_NUM_PER_NODE=${{ github.event.inputs.gpu_num_per_node }} \ - # -e E2E_NODE_NUM=1 \ - # -e E2E_BATCH_SIZE=32 \ - # -e E2E_LEARNING_RATE=1.536 \ - # -e E2E_SRC_ROOT=Classification/cnns \ - # -e E2E_DATA_ROOT=/dataset/ImageNet/ofrecord \ - # --name ${TEST_CONTAINER_NAME} \ - # ${{ env.TEST_IMG_TAG }} \ - # sleep 3600 - # # -e ONEFLOW_CI=1 \ - # # -v /model_zoo:/model_zoo:ro \ - # # -v $HOME/test-container-cache/dot-local:/root/.local \ - # # -v $HOME/test-container-cache/dot-cache:/root/.cache \ - # # -e ONEFLOW_BIN_PATH=${ONEFLOW_BIN_PATH} \ - # # -v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \ - # # -v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \ - # # -v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \ - # # -e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \ - # - name: Install OneFlow - # run: | - # docker exec ${TEST_CONTAINER_NAME} python3 --version - # docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - # docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/${{ needs.build-oneflow.outputs.ONEFLOW_WHEEL_PATH }} oneflow - # - name: Test container - # run: | - # docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh - # - name: Upload log - # uses: ./.github/actions/upload_oss - # with: - # src_path: log - # oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log - # oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} - # oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - # upload_core: false + - name: Fix permissions + run: | + set -x + docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) . + - name: Checkout Oneflow-Inc/OneFlow-Benchmark + uses: actions/checkout@v2 + - name: Remove container + timeout-minutes: 45 + run: | + docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true + - name: Enable Pytorch container + run: | + echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV + - name: Start container + run: | + docker pull ${{ env.TEST_IMG_TAG }} + docker run -d --rm --privileged --network host --shm-size=8g \ + --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ + --runtime=nvidia \ + -v /DATA/disk1:/dataset:ro \ + -e ONEFLOW_WHEEL_PATH=${{ env.ONEFLOW_WHEEL_PATH }} \ + -v $PWD:$PWD \ + -w $PWD \ + -e E2E_NUM_EPOCHS=${{ github.event.inputs.num_epochs }} \ + -e E2E_GPU_NUM_PER_NODE=${{ github.event.inputs.gpu_num_per_node }} \ + -e E2E_NODE_NUM=1 \ + -e E2E_BATCH_SIZE=32 \ + -e E2E_LEARNING_RATE=1.536 \ + -e E2E_SRC_ROOT=Classification/cnns \ + -e E2E_DATA_ROOT=/dataset/ImageNet/ofrecord \ + --name ${TEST_CONTAINER_NAME} \ + ${{ env.TEST_IMG_TAG }} \ + sleep 3600 + # -e ONEFLOW_CI=1 \ + # -v /model_zoo:/model_zoo:ro \ + # -v $HOME/test-container-cache/dot-local:/root/.local \ + # -v $HOME/test-container-cache/dot-cache:/root/.cache \ + # -e ONEFLOW_BIN_PATH=${ONEFLOW_BIN_PATH} \ + # -v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \ + # -v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \ + # -v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \ + # -e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \ + - name: Install OneFlow + run: | + docker exec ${TEST_CONTAINER_NAME} python3 --version + docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/${{ needs.build-oneflow.outputs.ONEFLOW_WHEEL_PATH }} oneflow + - name: Test container + run: | + docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh + - name: Upload log + uses: ./.github/actions/upload_oss + with: + src_path: log + oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} + oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + upload_core: false From 8c1388071951f1ae3eea4206fe4eda6ca910db7e Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 28 Oct 2021 10:08:14 +0800 Subject: [PATCH 127/130] test --- .github/workflows/cnn_e2e.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 1655bc8..84474be 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -148,7 +148,7 @@ jobs: test: name: Test ResNet50 - needs: [build-oneflow] + needs: [build-oneflow, find-oss-wheel] if: always() runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] env: @@ -201,17 +201,17 @@ jobs: run: | docker exec ${TEST_CONTAINER_NAME} python3 --version docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/${{ needs.build-oneflow.outputs.ONEFLOW_WHEEL_PATH }} oneflow - - name: Test container - run: | - docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh - - name: Upload log - uses: ./.github/actions/upload_oss - with: - src_path: log - oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log - oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} - oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - upload_core: false + docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/${{ needs.find-oss-wheel.outputs.ONEFLOW_WHEEL_PATH }} oneflow + # - name: Test container + # run: | + # docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh + # - name: Upload log + # uses: ./.github/actions/upload_oss + # with: + # src_path: log + # oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + # oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} + # oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + # upload_core: false From f36513a9f359c48356efe79e479d43b1f11320c7 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 28 Oct 2021 11:07:26 +0800 Subject: [PATCH 128/130] test --- .github/workflows/cnn_e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 84474be..bc17e71 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -201,7 +201,7 @@ jobs: run: | docker exec ${TEST_CONTAINER_NAME} python3 --version docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=http://oneflow-staging.oss-cn-beijing.aliyuncs.com/${{ needs.find-oss-wheel.outputs.ONEFLOW_WHEEL_PATH }} oneflow + docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=https://staging.oneflow.info/${{ needs.find-oss-wheel.outputs.ONEFLOW_WHEEL_PATH }} oneflow # - name: Test container # run: | # docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh From 3c533d1a0e03ff4f8171e2cb2611f9049f3230ae Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 28 Oct 2021 12:41:32 +0800 Subject: [PATCH 129/130] test --- .github/workflows/cnn_e2e.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index bc17e71..8c4b6d8 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -202,16 +202,16 @@ jobs: docker exec ${TEST_CONTAINER_NAME} python3 --version docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=https://staging.oneflow.info/${{ needs.find-oss-wheel.outputs.ONEFLOW_WHEEL_PATH }} oneflow - # - name: Test container - # run: | - # docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh - # - name: Upload log - # uses: ./.github/actions/upload_oss - # with: - # src_path: log - # oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log - # oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} - # oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - # upload_core: false + - name: Run Test + run: | + docker exec ${{ env.TEST_CONTAINER_NAME }} bash ci/test/resnet50_e2e.sh + - name: Upload log + uses: ./.github/actions/upload_oss + with: + src_path: log + oss_dst_path: oss://oneflow-log/OneFlow-Benchmark/${{ github.ref }}.${GITHUB_SHA::7}/oneflow/${{ github.event.inputs.of_branch_or_commit }}/${{github.run_id}}/log + oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} + oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} + upload_core: false From 4b7984b45b09bc5a551c7605b75862ece67b3663 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Thu, 28 Oct 2021 14:34:43 +0800 Subject: [PATCH 130/130] test on KS --- .github/workflows/cnn_e2e.yml | 2 +- ci/test/resnet50_e2e.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cnn_e2e.yml b/.github/workflows/cnn_e2e.yml index 8c4b6d8..8dc3ee0 100644 --- a/.github/workflows/cnn_e2e.yml +++ b/.github/workflows/cnn_e2e.yml @@ -152,7 +152,7 @@ jobs: if: always() runs-on: ['self-hosted', 'linux', 'x64', 'gpu-8-titan-v'] env: - TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" + TEST_CONTAINER_NAME: "oneflow_benchmark-run-id-${{ github.run_id }}-${{ github.event.inputs.compute_platform }}-test" TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0:e7a497b41d8b7f1bce055b1f23d027f93b1557ae steps: - name: Fix permissions diff --git a/ci/test/resnet50_e2e.sh b/ci/test/resnet50_e2e.sh index eec78e4..4cf63f6 100755 --- a/ci/test/resnet50_e2e.sh +++ b/ci/test/resnet50_e2e.sh @@ -31,7 +31,7 @@ python3 ${E2E_SRC_ROOT}/of_cnn_train_val.py \ --nccl_fusion_max_ops=24 \ --gpu_image_decoder=True \ --num_epoch=$E2E_NUM_EPOCHS \ - --num_examples=1024 \ + --num_examples=1281167 \ --model=${model} 2>&1 | tee ${LOGFILE} echo "Writting log to ${LOGFILE}"