From 331114a31f5bf17984c4dc2098b81ac80ebc356d Mon Sep 17 00:00:00 2001 From: Maximilien Colange Date: Wed, 19 Mar 2025 14:01:33 +0100 Subject: [PATCH 1/3] add combat-seq method --- CHANGELOG.md | 2 + src/methods/combat-seq/config.vsh.yaml | 51 ++++++++++++++++++++++++++ src/methods/combat-seq/script.py | 42 +++++++++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 src/methods/combat-seq/config.vsh.yaml create mode 100644 src/methods/combat-seq/script.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 859869e4..5c0af83f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,8 @@ A major update to the OpenProblems framework, switching from a Python-based fram * Added scGPT fine-tuned (PR #17). +* Added ComBat-Seq method (PR #55). + ## Major changes diff --git a/src/methods/combat-seq/config.vsh.yaml b/src/methods/combat-seq/config.vsh.yaml new file mode 100644 index 00000000..250dd4a3 --- /dev/null +++ b/src/methods/combat-seq/config.vsh.yaml @@ -0,0 +1,51 @@ +__merge__: ../../api/comp_method.yaml +name: combat_seq +label: ComBat-Seq +summary: Adjusting batch effects in RNA-Seq expression data using empirical Bayes + methods +description: | + ComBat-Seq extends the ComBat method for batch correction in RNA-Seq data. + While ComBat assumes normally distributed data, ComBat-Seq uses a negative + binomial distribution to model the data. While initially developed for + RNA-Seq data, ComBat-Seq can be applied to single-cell RNA-Seq data as well. + + The method is implemented in Python as a part of the inmoose package. It is + based on the original R implementation, distributed through the sva package. + +references: + doi: + - 10.1093/nargab/lqaa078 + - 10.1186/s12859-023-05578-5 + +links: + documentation: https://inmoose.readthedocs.io/en/stable/pycombatseq.html + repository: https://github.com/epigenelabs/inmoose + +# Metadata for your component +info: + # Which normalisation method this component prefers to use (required). + preferred_normalization: counts + +# Resources required to run the component +resources: + - type: python_script + path: script.py + - path: /src/utils/read_anndata_partial.py + +engines: + # Specifications for the Docker image for this component. + - type: docker + image: openproblems/base_python:1.0.0 + # Add custom dependencies here (optional). For more information, see + # https://viash.io/reference/config/engines/docker/#setup . + setup: + - type: python + pip: inmoose + +runners: + # This platform allows running the component natively + - type: executable + # Allows turning the component into a Nextflow module / pipeline. + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/methods/combat-seq/script.py b/src/methods/combat-seq/script.py new file mode 100644 index 00000000..9ab4c759 --- /dev/null +++ b/src/methods/combat-seq/script.py @@ -0,0 +1,42 @@ +import sys + +import anndata as ad +import numpy as np +from inmoose.pycombat import pycombat_seq +from scipy.sparse import csr_matrix + +# VIASH START +# Note: this section is auto-generated by viash at runtime. To edit it, make changes +# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`. +par = {"input": "resources_test/.../input.h5ad", "output": "output.h5ad"} +meta = {"name": "combat-seq"} +# VIASH END + +sys.path.append(meta["resources_dir"]) +from read_anndata_partial import read_anndata + +print("Read input", flush=True) +adata = read_anndata( + par["input"], X="layers/normalized", obs="obs", var="var", uns="uns" +) + +print("Run Combat-Seq", flush=True) +counts = adata.T.to_df().astype(np.double).values +corrected_counts = pycombat_seq(adata.X, adata.obs["batch"]) + +print("Store output", flush=True) +output = ad.AnnData( + obs=adata.obs[[]], + var=adata.var[[]], + uns={ + "dataset_id": adata.uns["dataset_id"], + "normalization_id": adata.uns["normalization_id"], + "method_id": meta["name"], + }, + layers={ + "corrected_counts": csr_matrix(corrected_counts.T), + }, +) + +print("Store outputs", flush=True) +output.write_h5ad(par["output"], compression="gzip") From a7c6851347584a7b9a1541a39fbc39761ebe7ed4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michaela=20M=C3=BCller?= <51025211+mumichae@users.noreply.github.com> Date: Thu, 25 Sep 2025 10:47:10 +0200 Subject: [PATCH 2/3] add methods_types --- src/methods/combat-seq/config.vsh.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/methods/combat-seq/config.vsh.yaml b/src/methods/combat-seq/config.vsh.yaml index 250dd4a3..346c238d 100644 --- a/src/methods/combat-seq/config.vsh.yaml +++ b/src/methods/combat-seq/config.vsh.yaml @@ -25,6 +25,7 @@ links: info: # Which normalisation method this component prefers to use (required). preferred_normalization: counts + method_types: [feature] # Resources required to run the component resources: From 84d752468fbe209103d21465412f2ec70e5b9bfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michaela=20M=C3=BCller?= <51025211+mumichae@users.noreply.github.com> Date: Thu, 25 Sep 2025 10:49:43 +0200 Subject: [PATCH 3/3] use counts df for method --- src/methods/combat-seq/script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/methods/combat-seq/script.py b/src/methods/combat-seq/script.py index 9ab4c759..9826adfd 100644 --- a/src/methods/combat-seq/script.py +++ b/src/methods/combat-seq/script.py @@ -22,7 +22,7 @@ print("Run Combat-Seq", flush=True) counts = adata.T.to_df().astype(np.double).values -corrected_counts = pycombat_seq(adata.X, adata.obs["batch"]) +corrected_counts = pycombat_seq(counts, adata.obs["batch"]) print("Store output", flush=True) output = ad.AnnData(