From 19beb5304433285a52ef1c20460b33005bb3ba05 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 9 Feb 2026 14:35:18 +0000 Subject: [PATCH 1/6] Initial plan From ebac6b2d18a27e1c208a04b7b52af85897cd11ab Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 9 Feb 2026 14:37:41 +0000 Subject: [PATCH 2/6] fix: remove problematic "Fix modification dates" step causing network failures Co-authored-by: famosab <45968370+famosab@users.noreply.github.com> --- .github/workflows/main.yml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a0f4e74..edad6ce 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -78,17 +78,6 @@ jobs: pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http - - name: Fix modification dates - uses: snakemake/snakemake-github-action@v2.0.3 - with: - directory: "." - snakefile: "workflow/Snakefile" - # Only touch the files for THIS matrix sample - args: "--cores 1 --sdm conda --touch resources/regions/${{ matrix.sample }}/test-regions.cov-*.bed" - stagein: | - pip install snakemake-storage-plugin-zenodo - pip install snakemake-storage-plugin-http - - name: Run analysis uses: snakemake/snakemake-github-action@v2 with: From 29687ef791ec2f2451cde805819452ff9462a084 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 9 Feb 2026 15:42:06 +0000 Subject: [PATCH 3/6] feat: split large benchmarks into chunks to avoid workflow timeouts Co-authored-by: famosab <45968370+famosab@users.noreply.github.com> --- .github/workflows/main.yml | 83 ++++++++++++++++++++++++++++++++------ workflow/rules/common.smk | 21 ++++++++++ 2 files changed, 91 insertions(+), 13 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index edad6ce..1b60600 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,28 +11,85 @@ concurrency: cancel-in-progress: true jobs: - # 1. SETUP: Dynamically find the samples + # 1. SETUP: Dynamically find the samples and create chunks for large benchmarks setup: runs-on: ubuntu-latest outputs: - samples: ${{ steps.set-matrix.outputs.samples }} + matrix: ${{ steps.set-matrix.outputs.matrix }} steps: - uses: actions/checkout@v4 - id: set-matrix run: | - # Auto-discover benchmarks from resources/regions/, but only keep - # those that include a target regions BED (required by the workflow). - SAMPLES=$(for d in resources/regions/*/; do [ -f "$d/target-regions.bed" ] && basename "$d"; done | jq -R -s -c 'split("\n")[:-1]') - echo "samples=$SAMPLES" >> $GITHUB_OUTPUT + # Create a matrix that splits large benchmarks into chunks to avoid timeouts. + # Benchmarks with many variant calls are split into smaller groups. + cat > /tmp/generate_matrix.py << 'EOF' + import json + import yaml + import os - # 2. EVALUATE: The matrix-based analysis + # Load config + with open('config/config.yaml', 'r') as f: + config = yaml.safe_load(f) + + # Find samples (benchmarks with target-regions.bed) + samples = [] + for entry in os.listdir('resources/regions'): + path = os.path.join('resources/regions', entry, 'target-regions.bed') + if os.path.isfile(path): + samples.append(entry) + + # Group variant calls by benchmark + benchmarks = {} + for key, callset in config['variant-calls'].items(): + benchmark = callset.get('benchmark') + if benchmark and benchmark in samples: + if benchmark not in benchmarks: + benchmarks[benchmark] = [] + benchmarks[benchmark].append(key) + + # Create matrix entries, splitting large benchmarks into chunks + matrix = [] + CHUNK_SIZE = 8 # Max variant calls per job to avoid timeouts + + for sample in sorted(samples): + calls = benchmarks.get(sample, []) + num_calls = len(calls) + + if num_calls <= CHUNK_SIZE: + # Small benchmark - process as a single job + matrix.append({ + 'sample': sample, + 'chunk': 'all', + 'chunk_index': 0 + }) + else: + # Large benchmark - split into chunks + num_chunks = (num_calls + CHUNK_SIZE - 1) // CHUNK_SIZE + for i in range(num_chunks): + matrix.append({ + 'sample': sample, + 'chunk': f'{i+1}of{num_chunks}', + 'chunk_index': i + }) + + print(json.dumps({'include': matrix})) + EOF + + python3 /tmp/generate_matrix.py > /tmp/matrix.json + MATRIX=$(cat /tmp/matrix.json) + echo "matrix=$MATRIX" >> $GITHUB_OUTPUT + echo "Generated matrix:" >> $GITHUB_STEP_SUMMARY + echo '```json' >> $GITHUB_STEP_SUMMARY + cat /tmp/matrix.json | jq . >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + + # 2. EVALUATE: The matrix-based analysis (split by sample and chunk) evaluate: needs: setup runs-on: ubuntu-latest strategy: fail-fast: false - matrix: - sample: ${{ fromJson(needs.setup.outputs.samples) }} + matrix: ${{fromJson(needs.setup.outputs.matrix)}} env: FTP_PASSWORD: ${{ secrets.FTP_PASSWORD }} ZENODO_TOKEN: ${{ secrets.ZENODO_TOKEN }} @@ -73,7 +130,7 @@ jobs: with: directory: "." snakefile: "workflow/Snakefile" - args: "--sdm conda --cores 1 --conda-cleanup-pkgs cache --until benchmark_get_truth --config benchmark=${{ matrix.sample }}" + args: "--sdm conda --cores 1 --conda-cleanup-pkgs cache --until benchmark_get_truth --config benchmark=${{ matrix.sample }} chunk_index=${{ matrix.chunk_index }} chunk_size=8" stagein: | pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http @@ -85,7 +142,7 @@ jobs: snakefile: "workflow/Snakefile" args: > benchmark_all - --config benchmark=${{ matrix.sample }} + --config benchmark=${{ matrix.sample }} chunk_index=${{ matrix.chunk_index }} chunk_size=8 --cores 4 --sdm conda --conda-cleanup-pkgs cache --rerun-triggers mtime --all-temp stagein: | pip install snakemake-storage-plugin-zenodo @@ -94,8 +151,8 @@ jobs: - name: Upload results uses: actions/upload-artifact@v4 with: - name: ${{ matrix.sample }} - # Upload the workflow outputs (keeps the on-disk paths Snakemake expects). + name: ${{ matrix.sample }}-${{ matrix.chunk }} + # Upload the workflow outputs (keeps the on-disk paths Snakemake sees `results/`. path: results/ retention-days: 1 diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 2795bf3..0a7c00c 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -25,6 +25,27 @@ if "benchmark" in config: } +# Filter callsets by chunk if specified via --config chunk_index=... and chunk_size=... +# This allows splitting large benchmarks into smaller groups to avoid timeouts. +if "chunk_index" in config and "chunk_size" in config: + # Get all variant calls for this benchmark (already filtered above) + all_calls = sorted(config["variant-calls"].keys()) + chunk_index = int(config["chunk_index"]) + chunk_size = int(config["chunk_size"]) + + # Select the chunk + start_idx = chunk_index * chunk_size + end_idx = start_idx + chunk_size + chunk_calls = all_calls[start_idx:end_idx] + + # Filter to only this chunk + config["variant-calls"] = { + key: callset + for key, callset in config["variant-calls"].items() + if key in chunk_calls + } + + # add path to callsets for key, callset in config["variant-calls"].items(): if "zenodo" in callset: From b8da1aff247c4595a2d78d7f95820b6c76bb2cb0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 9 Feb 2026 15:43:19 +0000 Subject: [PATCH 4/6] fix: correct comment typo in workflow file Co-authored-by: famosab <45968370+famosab@users.noreply.github.com> --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1b60600..5747efa 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -152,7 +152,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: ${{ matrix.sample }}-${{ matrix.chunk }} - # Upload the workflow outputs (keeps the on-disk paths Snakemake sees `results/`. + # Upload the workflow outputs (keeps the on-disk paths Snakemake expects). path: results/ retention-days: 1 From 2592ac00527964a8379bf690390cf00aba08c2e5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 11 Feb 2026 10:16:14 +0000 Subject: [PATCH 5/6] fix: reduce chunk size to 4 and make report job tolerant of partial failures Co-authored-by: famosab <45968370+famosab@users.noreply.github.com> --- .github/workflows/main.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5747efa..aeba330 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -49,7 +49,7 @@ jobs: # Create matrix entries, splitting large benchmarks into chunks matrix = [] - CHUNK_SIZE = 8 # Max variant calls per job to avoid timeouts + CHUNK_SIZE = 4 # Max variant calls per job to avoid timeouts for sample in sorted(samples): calls = benchmarks.get(sample, []) @@ -130,7 +130,7 @@ jobs: with: directory: "." snakefile: "workflow/Snakefile" - args: "--sdm conda --cores 1 --conda-cleanup-pkgs cache --until benchmark_get_truth --config benchmark=${{ matrix.sample }} chunk_index=${{ matrix.chunk_index }} chunk_size=8" + args: "--sdm conda --cores 1 --conda-cleanup-pkgs cache --until benchmark_get_truth --config benchmark=${{ matrix.sample }} chunk_index=${{ matrix.chunk_index }} chunk_size=4" stagein: | pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http @@ -142,7 +142,7 @@ jobs: snakefile: "workflow/Snakefile" args: > benchmark_all - --config benchmark=${{ matrix.sample }} chunk_index=${{ matrix.chunk_index }} chunk_size=8 + --config benchmark=${{ matrix.sample }} chunk_index=${{ matrix.chunk_index }} chunk_size=4 --cores 4 --sdm conda --conda-cleanup-pkgs cache --rerun-triggers mtime --all-temp stagein: | pip install snakemake-storage-plugin-zenodo @@ -159,6 +159,8 @@ jobs: # 3. REPORT: Combine and Deploy report: needs: evaluate + # Run report even if some evaluate jobs fail, as long as at least one succeeded + if: ${{ !cancelled() && (success() || failure()) }} runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -166,7 +168,8 @@ jobs: - name: Download all results uses: actions/download-artifact@v4 with: - # Merge all benchmark artifacts back into the workspace so Snakemake sees `results/`. + # Download all available artifacts (some may be missing if jobs failed) + pattern: "*-*" path: . merge-multiple: true From 4d671f6d4c30eb549aee445351bd138980c03259 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 11 Feb 2026 10:17:19 +0000 Subject: [PATCH 6/6] fix: use more specific artifact pattern in report job Co-authored-by: famosab <45968370+famosab@users.noreply.github.com> --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index aeba330..373c694 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -168,8 +168,8 @@ jobs: - name: Download all results uses: actions/download-artifact@v4 with: - # Download all available artifacts (some may be missing if jobs failed) - pattern: "*-*" + # Download all available benchmark artifacts (pattern: giab-*-*) + pattern: "giab-*" path: . merge-multiple: true