From 331114a31f5bf17984c4dc2098b81ac80ebc356d Mon Sep 17 00:00:00 2001
From: Maximilien Colange <maximilien@epigenelabs.com>
Date: Wed, 19 Mar 2025 14:01:33 +0100
Subject: [PATCH 1/3] add combat-seq method

---
 CHANGELOG.md                           |  2 +
 src/methods/combat-seq/config.vsh.yaml | 51 ++++++++++++++++++++++++++
 src/methods/combat-seq/script.py       | 42 +++++++++++++++++++++
 3 files changed, 95 insertions(+)
 create mode 100644 src/methods/combat-seq/config.vsh.yaml
 create mode 100644 src/methods/combat-seq/script.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 859869e4..5c0af83f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -45,6 +45,8 @@ A major update to the OpenProblems framework, switching from a Python-based fram
 
 * Added scGPT fine-tuned (PR #17).
 
+* Added ComBat-Seq method (PR #55).
+
 
 ## Major changes
 
diff --git a/src/methods/combat-seq/config.vsh.yaml b/src/methods/combat-seq/config.vsh.yaml
new file mode 100644
index 00000000..250dd4a3
--- /dev/null
+++ b/src/methods/combat-seq/config.vsh.yaml
@@ -0,0 +1,51 @@
+__merge__: ../../api/comp_method.yaml
+name: combat_seq
+label: ComBat-Seq
+summary: Adjusting batch effects in RNA-Seq expression data using empirical Bayes
+  methods
+description: |
+  ComBat-Seq extends the ComBat method for batch correction in RNA-Seq data.
+  While ComBat assumes normally distributed data, ComBat-Seq uses a negative
+  binomial distribution to model the data.  While initially developed for
+  RNA-Seq data, ComBat-Seq can be applied to single-cell RNA-Seq data as well.
+
+  The method is implemented in Python as a part of the inmoose package.  It is
+  based on the original R implementation, distributed through the sva package.
+
+references:
+  doi:
+    - 10.1093/nargab/lqaa078
+    - 10.1186/s12859-023-05578-5
+
+links:
+  documentation: https://inmoose.readthedocs.io/en/stable/pycombatseq.html
+  repository: https://github.com/epigenelabs/inmoose
+
+# Metadata for your component
+info:
+  # Which normalisation method this component prefers to use (required).
+  preferred_normalization: counts
+
+# Resources required to run the component
+resources:
+  - type: python_script
+    path: script.py
+  - path: /src/utils/read_anndata_partial.py
+
+engines:
+  # Specifications for the Docker image for this component.
+  - type: docker
+    image: openproblems/base_python:1.0.0
+    # Add custom dependencies here (optional). For more information, see
+    # https://viash.io/reference/config/engines/docker/#setup .
+    setup:
+      - type: python
+        pip: inmoose
+
+runners:
+  # This platform allows running the component natively
+  - type: executable
+  # Allows turning the component into a Nextflow module / pipeline.
+  - type: nextflow
+    directives:
+      label: [midtime,midmem,midcpu]
diff --git a/src/methods/combat-seq/script.py b/src/methods/combat-seq/script.py
new file mode 100644
index 00000000..9ab4c759
--- /dev/null
+++ b/src/methods/combat-seq/script.py
@@ -0,0 +1,42 @@
+import sys
+
+import anndata as ad
+import numpy as np
+from inmoose.pycombat import pycombat_seq
+from scipy.sparse import csr_matrix
+
+# VIASH START
+# Note: this section is auto-generated by viash at runtime. To edit it, make changes
+# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`.
+par = {"input": "resources_test/.../input.h5ad", "output": "output.h5ad"}
+meta = {"name": "combat-seq"}
+# VIASH END
+
+sys.path.append(meta["resources_dir"])
+from read_anndata_partial import read_anndata
+
+print("Read input", flush=True)
+adata = read_anndata(
+    par["input"], X="layers/normalized", obs="obs", var="var", uns="uns"
+)
+
+print("Run Combat-Seq", flush=True)
+counts = adata.T.to_df().astype(np.double).values
+corrected_counts = pycombat_seq(adata.X, adata.obs["batch"])
+
+print("Store output", flush=True)
+output = ad.AnnData(
+    obs=adata.obs[[]],
+    var=adata.var[[]],
+    uns={
+        "dataset_id": adata.uns["dataset_id"],
+        "normalization_id": adata.uns["normalization_id"],
+        "method_id": meta["name"],
+    },
+    layers={
+        "corrected_counts": csr_matrix(corrected_counts.T),
+    },
+)
+
+print("Store outputs", flush=True)
+output.write_h5ad(par["output"], compression="gzip")

From a7c6851347584a7b9a1541a39fbc39761ebe7ed4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michaela=20M=C3=BCller?=
 <51025211+mumichae@users.noreply.github.com>
Date: Thu, 25 Sep 2025 10:47:10 +0200
Subject: [PATCH 2/3] add methods_types

---
 src/methods/combat-seq/config.vsh.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/methods/combat-seq/config.vsh.yaml b/src/methods/combat-seq/config.vsh.yaml
index 250dd4a3..346c238d 100644
--- a/src/methods/combat-seq/config.vsh.yaml
+++ b/src/methods/combat-seq/config.vsh.yaml
@@ -25,6 +25,7 @@ links:
 info:
   # Which normalisation method this component prefers to use (required).
   preferred_normalization: counts
+  method_types: [feature]
 
 # Resources required to run the component
 resources:

From 84d752468fbe209103d21465412f2ec70e5b9bfc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michaela=20M=C3=BCller?=
 <51025211+mumichae@users.noreply.github.com>
Date: Thu, 25 Sep 2025 10:49:43 +0200
Subject: [PATCH 3/3] use counts df for method

---
 src/methods/combat-seq/script.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/methods/combat-seq/script.py b/src/methods/combat-seq/script.py
index 9ab4c759..9826adfd 100644
--- a/src/methods/combat-seq/script.py
+++ b/src/methods/combat-seq/script.py
@@ -22,7 +22,7 @@
 
 print("Run Combat-Seq", flush=True)
 counts = adata.T.to_df().astype(np.double).values
-corrected_counts = pycombat_seq(adata.X, adata.obs["batch"])
+corrected_counts = pycombat_seq(counts, adata.obs["batch"])
 
 print("Store output", flush=True)
 output = ad.AnnData(