diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6397d07..a0f4e74 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -2,22 +2,37 @@ name: benchmark on: push: - branches: - - main + branches: [main] pull_request: workflow_dispatch: - concurrency: - # Cancel concurrent flows group: ci-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: + # 1. SETUP: Dynamically find the samples + setup: + runs-on: ubuntu-latest + outputs: + samples: ${{ steps.set-matrix.outputs.samples }} + steps: + - uses: actions/checkout@v4 + - id: set-matrix + run: | + # Auto-discover benchmarks from resources/regions/, but only keep + # those that include a target regions BED (required by the workflow). + SAMPLES=$(for d in resources/regions/*/; do [ -f "$d/target-regions.bed" ] && basename "$d"; done | jq -R -s -c 'split("\n")[:-1]') + echo "samples=$SAMPLES" >> $GITHUB_OUTPUT + + # 2. EVALUATE: The matrix-based analysis evaluate: + needs: setup runs-on: ubuntu-latest - permissions: - actions: write + strategy: + fail-fast: false + matrix: + sample: ${{ fromJson(needs.setup.outputs.samples) }} env: FTP_PASSWORD: ${{ secrets.FTP_PASSWORD }} ZENODO_TOKEN: ${{ secrets.ZENODO_TOKEN }} @@ -27,26 +42,12 @@ jobs: BO_CORE_UNIT_TOKEN: ${{ secrets.BO_CORE_UNIT_TOKEN }} BENCHMARK_GIAB_NA12878_TWIST_TOKEN: ${{ secrets.BENCHMARK_GIAB_NA12878_TWIST_TOKEN }} SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN: ${{ secrets.SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN }} - steps: - - uses: 8BitJonny/gh-get-current-pr@3.0.0 - id: pr - with: - sha: ${{ github.event.pull_request.head.sha }} - filterOutClosed: true - - - run: | - echo "is PR: ${{ steps.pr.outputs.pr_found }}" - echo "current branch: ${{ github.ref }}" + steps: - name: Free Disk Space (Ubuntu) uses: jlumbroso/free-disk-space@v1.3.1 with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB tool-cache: false - - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow android: true dotnet: true haskell: true @@ -58,7 +59,7 @@ jobs: uses: actions/checkout@v4 - name: Download reference genome - uses: snakemake/snakemake-github-action@v2 + uses: snakemake/snakemake-github-action@v2.0.3 with: directory: "." snakefile: "workflow/Snakefile" @@ -68,25 +69,22 @@ jobs: pip install snakemake-storage-plugin-http - name: Download truthsets - uses: snakemake/snakemake-github-action@v2 + uses: snakemake/snakemake-github-action@v2.0.3 with: directory: "." snakefile: "workflow/Snakefile" - args: "--sdm conda --cores 1 --conda-cleanup-pkgs cache --until benchmark_get_truth" + args: "--sdm conda --cores 1 --conda-cleanup-pkgs cache --until benchmark_get_truth --config benchmark=${{ matrix.sample }}" stagein: | pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http - # This step is necessary (after downloading the truthsets above) to ensure - # that the files coming from the git repo are not triggering reruns - # because their modification dates are too new or too old. - # (as git does not preserve modification dates) - name: Fix modification dates - uses: snakemake/snakemake-github-action@v2 + uses: snakemake/snakemake-github-action@v2.0.3 with: directory: "." snakefile: "workflow/Snakefile" - args: "--cores 1 --sdm conda --touch resources/regions/*/test-regions.cov-*.bed" + # Only touch the files for THIS matrix sample + args: "--cores 1 --sdm conda --touch resources/regions/${{ matrix.sample }}/test-regions.cov-*.bed" stagein: | pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http @@ -97,11 +95,35 @@ jobs: directory: "." snakefile: "workflow/Snakefile" args: > + benchmark_all + --config benchmark=${{ matrix.sample }} --cores 4 --sdm conda --conda-cleanup-pkgs cache --rerun-triggers mtime --all-temp stagein: | pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http + - name: Upload results + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.sample }} + # Upload the workflow outputs (keeps the on-disk paths Snakemake expects). + path: results/ + retention-days: 1 + + # 3. REPORT: Combine and Deploy + report: + needs: evaluate + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Download all results + uses: actions/download-artifact@v4 + with: + # Merge all benchmark artifacts back into the workspace so Snakemake sees `results/`. + path: . + merge-multiple: true + - name: Create report uses: snakemake/snakemake-github-action@v2 with: @@ -112,15 +134,14 @@ jobs: pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http - - name: Upload report as artifact - + - name: Upload final report uses: actions/upload-artifact@v4 with: name: report path: report.zip - name: Trigger homepage build - if: ${{ (steps.pr.outputs.pr_found != 'true') && (github.ref == 'refs/heads/main') }} + if: github.ref == 'refs/heads/main' uses: benc-uk/workflow-dispatch@v1 with: workflow: deploy-page diff --git a/workflow/Snakefile b/workflow/Snakefile index 7c07716..62ef004 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -18,7 +18,7 @@ module benchmark: github( "snakemake-workflows/dna-seq-benchmark", path="workflow/Snakefile", - tag="v1.13.0", + tag="v1.14.1", ) config: config diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 8ebc811..2795bf3 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -5,6 +5,26 @@ def get_zenodo_tag(entry): return "zenodo" +# Filter callsets by subcategory if specified via --config subcategory=... +# This allows running benchmarks in parallel across multiple CI runners +if "subcategory" in config: + config["variant-calls"] = { + key: callset + for key, callset in config["variant-calls"].items() + if callset.get("subcategory") == config["subcategory"] + } + + +# Filter callsets by benchmark if specified via --config benchmark=... +# This allows splitting CI work further (e.g. giab-NA12878-agilent-75M vs 200M). +if "benchmark" in config: + config["variant-calls"] = { + key: callset + for key, callset in config["variant-calls"].items() + if callset.get("benchmark") == config["benchmark"] + } + + # add path to callsets for key, callset in config["variant-calls"].items(): if "zenodo" in callset: