From 19beb5304433285a52ef1c20460b33005bb3ba05 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 9 Feb 2026 14:35:18 +0000
Subject: [PATCH 1/6] Initial plan


From ebac6b2d18a27e1c208a04b7b52af85897cd11ab Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 9 Feb 2026 14:37:41 +0000
Subject: [PATCH 2/6] fix: remove problematic "Fix modification dates" step
 causing network failures

Co-authored-by: famosab <45968370+famosab@users.noreply.github.com>
---
 .github/workflows/main.yml | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a0f4e74..edad6ce 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -78,17 +78,6 @@ jobs:
             pip install snakemake-storage-plugin-zenodo
             pip install snakemake-storage-plugin-http
 
-      - name: Fix modification dates
-        uses: snakemake/snakemake-github-action@v2.0.3
-        with:
-          directory: "."
-          snakefile: "workflow/Snakefile"
-          # Only touch the files for THIS matrix sample
-          args: "--cores 1 --sdm conda --touch resources/regions/${{ matrix.sample }}/test-regions.cov-*.bed"
-          stagein: |
-            pip install snakemake-storage-plugin-zenodo
-            pip install snakemake-storage-plugin-http
-
       - name: Run analysis
         uses: snakemake/snakemake-github-action@v2
         with:

From 29687ef791ec2f2451cde805819452ff9462a084 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 9 Feb 2026 15:42:06 +0000
Subject: [PATCH 3/6] feat: split large benchmarks into chunks to avoid
 workflow timeouts

Co-authored-by: famosab <45968370+famosab@users.noreply.github.com>
---
 .github/workflows/main.yml | 83 ++++++++++++++++++++++++++++++++------
 workflow/rules/common.smk  | 21 ++++++++++
 2 files changed, 91 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index edad6ce..1b60600 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -11,28 +11,85 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  # 1. SETUP: Dynamically find the samples
+  # 1. SETUP: Dynamically find the samples and create chunks for large benchmarks
   setup:
     runs-on: ubuntu-latest
     outputs:
-      samples: ${{ steps.set-matrix.outputs.samples }}
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
     steps:
       - uses: actions/checkout@v4
       - id: set-matrix
         run: |
-          # Auto-discover benchmarks from resources/regions/, but only keep
-          # those that include a target regions BED (required by the workflow).
-          SAMPLES=$(for d in resources/regions/*/; do [ -f "$d/target-regions.bed" ] && basename "$d"; done | jq -R -s -c 'split("\n")[:-1]')
-          echo "samples=$SAMPLES" >> $GITHUB_OUTPUT
+          # Create a matrix that splits large benchmarks into chunks to avoid timeouts.
+          # Benchmarks with many variant calls are split into smaller groups.
+          cat > /tmp/generate_matrix.py << 'EOF'
+          import json
+          import yaml
+          import os
 
-  # 2. EVALUATE: The matrix-based analysis
+          # Load config
+          with open('config/config.yaml', 'r') as f:
+              config = yaml.safe_load(f)
+
+          # Find samples (benchmarks with target-regions.bed)
+          samples = []
+          for entry in os.listdir('resources/regions'):
+              path = os.path.join('resources/regions', entry, 'target-regions.bed')
+              if os.path.isfile(path):
+                  samples.append(entry)
+
+          # Group variant calls by benchmark
+          benchmarks = {}
+          for key, callset in config['variant-calls'].items():
+              benchmark = callset.get('benchmark')
+              if benchmark and benchmark in samples:
+                  if benchmark not in benchmarks:
+                      benchmarks[benchmark] = []
+                  benchmarks[benchmark].append(key)
+
+          # Create matrix entries, splitting large benchmarks into chunks
+          matrix = []
+          CHUNK_SIZE = 8  # Max variant calls per job to avoid timeouts
+          
+          for sample in sorted(samples):
+              calls = benchmarks.get(sample, [])
+              num_calls = len(calls)
+              
+              if num_calls <= CHUNK_SIZE:
+                  # Small benchmark - process as a single job
+                  matrix.append({
+                      'sample': sample,
+                      'chunk': 'all',
+                      'chunk_index': 0
+                  })
+              else:
+                  # Large benchmark - split into chunks
+                  num_chunks = (num_calls + CHUNK_SIZE - 1) // CHUNK_SIZE
+                  for i in range(num_chunks):
+                      matrix.append({
+                          'sample': sample,
+                          'chunk': f'{i+1}of{num_chunks}',
+                          'chunk_index': i
+                      })
+
+          print(json.dumps({'include': matrix}))
+          EOF
+
+          python3 /tmp/generate_matrix.py > /tmp/matrix.json
+          MATRIX=$(cat /tmp/matrix.json)
+          echo "matrix=$MATRIX" >> $GITHUB_OUTPUT
+          echo "Generated matrix:" >> $GITHUB_STEP_SUMMARY
+          echo '```json' >> $GITHUB_STEP_SUMMARY
+          cat /tmp/matrix.json | jq . >> $GITHUB_STEP_SUMMARY
+          echo '```' >> $GITHUB_STEP_SUMMARY
+
+  # 2. EVALUATE: The matrix-based analysis (split by sample and chunk)
   evaluate:
     needs: setup
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
-      matrix:
-        sample: ${{ fromJson(needs.setup.outputs.samples) }}
+      matrix: ${{fromJson(needs.setup.outputs.matrix)}}
     env:
       FTP_PASSWORD: ${{ secrets.FTP_PASSWORD }}
       ZENODO_TOKEN: ${{ secrets.ZENODO_TOKEN }}
@@ -73,7 +130,7 @@ jobs:
         with:
           directory: "."
           snakefile: "workflow/Snakefile"
-          args: "--sdm conda --cores 1 --conda-cleanup-pkgs cache --until benchmark_get_truth --config benchmark=${{ matrix.sample }}"
+          args: "--sdm conda --cores 1 --conda-cleanup-pkgs cache --until benchmark_get_truth --config benchmark=${{ matrix.sample }} chunk_index=${{ matrix.chunk_index }} chunk_size=8"
           stagein: |
             pip install snakemake-storage-plugin-zenodo
             pip install snakemake-storage-plugin-http
@@ -85,7 +142,7 @@ jobs:
           snakefile: "workflow/Snakefile"
           args: >
             benchmark_all
-            --config benchmark=${{ matrix.sample }}
+            --config benchmark=${{ matrix.sample }} chunk_index=${{ matrix.chunk_index }} chunk_size=8
             --cores 4 --sdm conda --conda-cleanup-pkgs cache --rerun-triggers mtime --all-temp
           stagein: |
             pip install snakemake-storage-plugin-zenodo
@@ -94,8 +151,8 @@ jobs:
       - name: Upload results
         uses: actions/upload-artifact@v4
         with:
-          name: ${{ matrix.sample }}
-          # Upload the workflow outputs (keeps the on-disk paths Snakemake expects).
+          name: ${{ matrix.sample }}-${{ matrix.chunk }}
+          # Upload the workflow outputs (keeps the on-disk paths Snakemake sees `results/`.
           path: results/
           retention-days: 1
 
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index 2795bf3..0a7c00c 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -25,6 +25,27 @@ if "benchmark" in config:
     }
 
 
+# Filter callsets by chunk if specified via --config chunk_index=... and chunk_size=...
+# This allows splitting large benchmarks into smaller groups to avoid timeouts.
+if "chunk_index" in config and "chunk_size" in config:
+    # Get all variant calls for this benchmark (already filtered above)
+    all_calls = sorted(config["variant-calls"].keys())
+    chunk_index = int(config["chunk_index"])
+    chunk_size = int(config["chunk_size"])
+    
+    # Select the chunk
+    start_idx = chunk_index * chunk_size
+    end_idx = start_idx + chunk_size
+    chunk_calls = all_calls[start_idx:end_idx]
+    
+    # Filter to only this chunk
+    config["variant-calls"] = {
+        key: callset
+        for key, callset in config["variant-calls"].items()
+        if key in chunk_calls
+    }
+
+
 # add path to callsets
 for key, callset in config["variant-calls"].items():
     if "zenodo" in callset:

From b8da1aff247c4595a2d78d7f95820b6c76bb2cb0 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 9 Feb 2026 15:43:19 +0000
Subject: [PATCH 4/6] fix: correct comment typo in workflow file

Co-authored-by: famosab <45968370+famosab@users.noreply.github.com>
---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 1b60600..5747efa 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -152,7 +152,7 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: ${{ matrix.sample }}-${{ matrix.chunk }}
-          # Upload the workflow outputs (keeps the on-disk paths Snakemake sees `results/`.
+          # Upload the workflow outputs (keeps the on-disk paths Snakemake expects).
           path: results/
           retention-days: 1
 

From 2592ac00527964a8379bf690390cf00aba08c2e5 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Feb 2026 10:16:14 +0000
Subject: [PATCH 5/6] fix: reduce chunk size to 4 and make report job tolerant
 of partial failures

Co-authored-by: famosab <45968370+famosab@users.noreply.github.com>
---
 .github/workflows/main.yml | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 5747efa..aeba330 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -49,7 +49,7 @@ jobs:
 
           # Create matrix entries, splitting large benchmarks into chunks
           matrix = []
-          CHUNK_SIZE = 8  # Max variant calls per job to avoid timeouts
+          CHUNK_SIZE = 4  # Max variant calls per job to avoid timeouts
           
           for sample in sorted(samples):
               calls = benchmarks.get(sample, [])
@@ -130,7 +130,7 @@ jobs:
         with:
           directory: "."
           snakefile: "workflow/Snakefile"
-          args: "--sdm conda --cores 1 --conda-cleanup-pkgs cache --until benchmark_get_truth --config benchmark=${{ matrix.sample }} chunk_index=${{ matrix.chunk_index }} chunk_size=8"
+          args: "--sdm conda --cores 1 --conda-cleanup-pkgs cache --until benchmark_get_truth --config benchmark=${{ matrix.sample }} chunk_index=${{ matrix.chunk_index }} chunk_size=4"
           stagein: |
             pip install snakemake-storage-plugin-zenodo
             pip install snakemake-storage-plugin-http
@@ -142,7 +142,7 @@ jobs:
           snakefile: "workflow/Snakefile"
           args: >
             benchmark_all
-            --config benchmark=${{ matrix.sample }} chunk_index=${{ matrix.chunk_index }} chunk_size=8
+            --config benchmark=${{ matrix.sample }} chunk_index=${{ matrix.chunk_index }} chunk_size=4
             --cores 4 --sdm conda --conda-cleanup-pkgs cache --rerun-triggers mtime --all-temp
           stagein: |
             pip install snakemake-storage-plugin-zenodo
@@ -159,6 +159,8 @@ jobs:
   # 3. REPORT: Combine and Deploy
   report:
     needs: evaluate
+    # Run report even if some evaluate jobs fail, as long as at least one succeeded
+    if: ${{ !cancelled() && (success() || failure()) }}
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
@@ -166,7 +168,8 @@ jobs:
       - name: Download all results
         uses: actions/download-artifact@v4
         with:
-          # Merge all benchmark artifacts back into the workspace so Snakemake sees `results/`.
+          # Download all available artifacts (some may be missing if jobs failed)
+          pattern: "*-*"
           path: .
           merge-multiple: true
 

From 4d671f6d4c30eb549aee445351bd138980c03259 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 11 Feb 2026 10:17:19 +0000
Subject: [PATCH 6/6] fix: use more specific artifact pattern in report job

Co-authored-by: famosab <45968370+famosab@users.noreply.github.com>
---
 .github/workflows/main.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index aeba330..373c694 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -168,8 +168,8 @@ jobs:
       - name: Download all results
         uses: actions/download-artifact@v4
         with:
-          # Download all available artifacts (some may be missing if jobs failed)
-          pattern: "*-*"
+          # Download all available benchmark artifacts (pattern: giab-*-*)
+          pattern: "giab-*"
           path: .
           merge-multiple: true