From 0c574ad48804517a247892cdd4a0bfc33dfda3e1 Mon Sep 17 00:00:00 2001
From: liuwd15 <1045561474@qq.com>
Date: Wed, 5 Nov 2025 11:00:22 -0600
Subject: [PATCH] Add method fadvi

---
 CHANGELOG.md                                |   2 +
 src/methods/fadvi/config.vsh.yaml           | 100 ++++++++++++++++++++
 src/methods/fadvi/script.py                 |  56 +++++++++++
 src/workflows/run_benchmark/config.vsh.yaml |   1 +
 src/workflows/run_benchmark/main.nf         |   1 +
 5 files changed, 160 insertions(+)
 create mode 100644 src/methods/fadvi/config.vsh.yaml
 create mode 100644 src/methods/fadvi/script.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 890c4eb7..4bc8b4c1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,8 @@
 * Added `methods/stacas` new method (PR #58).
     - Add non-supervised version of STACAS tool for integration of single-cell transcriptomics data. This functionality enables correction of batch effects while preserving biological variability without requiring prior cell type annotations.
 * Added `method/drvi` component (PR #61).
+* Added `method/fadvi` component.
+    - Add FActor Disentangled Variantional Inference (FADVI) for dimentionality reduction
 * Added `ARI_batch` and `NMI_batch` to `metrics/clustering_overlap` (PR #68).
 
 * Added `metrics/cilisi` new metric component (PR #57).
diff --git a/src/methods/fadvi/config.vsh.yaml b/src/methods/fadvi/config.vsh.yaml
new file mode 100644
index 00000000..80a8e7e0
--- /dev/null
+++ b/src/methods/fadvi/config.vsh.yaml
@@ -0,0 +1,100 @@
+# The API specifies which type of component this is.
+# It contains specifications for:
+#   - The input/output files
+#   - Common parameters
+#   - A unit test
+__merge__: /src/api/comp_method.yaml
+
+# A unique identifier for your component (required).
+# Can contain only lowercase letters or underscores.
+name: fadvi
+# A relatively short label, used when rendering visualisations (required)
+label: FADVI
+# A one sentence summary of how this method works (required). Used when 
+# rendering summary tables.
+summary: "FADVI is a disentangled representation learning VAE method for batch integration."
+# A multi-line description of how this component works (required). Used
+# when rendering reference documentation.
+description: |
+  FADVI (Factor Disentanglement Variational Inference) is a deep learning method for single-cell omics and spatial transcriptomics analysis that disentangles batch-related variation, label-related variation, and residual variation using adversarial training and cross-correlation penalties.
+references:
+  doi: 
+    - 10.1101/2025.11.03.683998
+  bibtex:
+    - |
+      @article{fadvi2025,
+        title={FADVI: disentangled representation learning for robust integration of single-cell and spatial omics data},
+        author={Wendao Liu, Gang Qu, Lukas M. Simon, Fabian J. Theis, Zhongming Zhao},
+        journal={bioRxiv},
+        year={2025}
+      }
+links:
+  # URL to the documentation for this method (required).
+  documentation: https://fadvi.readthedocs.io/en/latest/
+  # URL to the code repository for this method (required).
+  repository: https://github.com/liuwd15/fadvi
+
+
+
+# Metadata for your component
+info:
+  method_types: [embedding]
+  # Which normalisation method this component prefers to use (required).
+  preferred_normalization: counts
+
+# Component-specific parameters (optional)
+arguments:
+  - name: --n_hvg
+    type: integer
+    default: 2000
+    description: Number of highly variable genes to use.
+  - name: --n_latent_l
+    type: integer
+    default: 30
+    description: Number of latent dimensions for labels.
+  - name: --n_latent_b
+    type: integer
+    default: 30
+    description: Number of latent dimensions for batches.
+  - name: --lambda_l
+    type: integer
+    default: 10
+    description: weight for label classification loss.
+  - name: --n_layers
+    type: integer
+    default: 2
+    description: Number of layers.
+  - name: --max_epochs
+    type: integer
+    default: 30
+    description: Maximum number of training epochs for FADVI.
+
+# Resources required to run the component
+resources:
+  # The script of your component (required)
+  - type: python_script
+    path: script.py
+
+  # Additional resources your script needs (optional)
+  # - type: file
+  #   path: weights.pt
+
+engines:
+  # Specifications for the Docker image for this component.
+  - type: docker
+    image: openproblems/base_pytorch_nvidia:1
+    # Add custom dependencies here (optional). For more information, see
+    # https://viash.io/reference/config/engines/docker/#setup .
+    setup:
+      - type: python
+        pypi:
+          - scvi-tools>=1.3.0
+          - fadvi>=0.2.0
+
+runners:
+  # This platform allows running the component natively
+  - type: executable
+  # Allows turning the component into a Nextflow module / pipeline.
+  - type: nextflow
+    directives:
+      label: [midtime,midmem,midcpu]
diff --git a/src/methods/fadvi/script.py b/src/methods/fadvi/script.py
new file mode 100644
index 00000000..6e075d49
--- /dev/null
+++ b/src/methods/fadvi/script.py
@@ -0,0 +1,56 @@
+import anndata as ad
+from fadvi import FADVI
+
+## VIASH START
+# Note: this section is auto-generated by viash at runtime. To edit it, make changes
+# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`.
+par = {
+    'input': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
+    'output': 'output.h5ad',
+    'n_hvg': 2000,
+    'n_latent_l': 30,
+    'n_latent_b': 30,
+    'n_layers': 2,
+    'max_epochs': 30
+}
+meta = {
+  'name': 'fadvi'
+}
+## VIASH END
+
+print('Reading input files', flush=True)
+adata = ad.read_h5ad(par['input'])
+
+if par["n_hvg"]:
+    print(f"Select top {par['n_hvg']} high variable genes", flush=True)
+    idx = adata.var["hvg_score"].to_numpy().argsort()[::-1][:par["n_hvg"]]
+    adata = adata[:, idx].copy()
+
+
+print('Preprocess data', flush=True)
+FADVI.setup_anndata(adata, batch_key="batch",labels_key="cell_type",
+                    unlabeled_category='Unknown', layer='counts')
+model = FADVI(adata, n_latent_l=par["n_latent_l"], 
+              n_latent_b=par["n_latent_b"], 
+              n_layers=par["n_layers"]) 
+
+print('Train model', flush=True)
+model.train(max_epochs=par["max_epochs"])
+
+print('Generate predictions', flush=True)
+# ... generate predictions ...
+
+print("Write output AnnData to file", flush=True)
+output = ad.AnnData(
+    obs=adata.obs[[]],
+    var=adata.var[[]],
+    obsm={
+        "X_emb": model.get_latent_representation(),
+    },
+    uns={
+        "dataset_id": adata.uns["dataset_id"],
+        "normalization_id": adata.uns["normalization_id"],
+        "method_id": meta["name"],
+    },
+)
+output.write_h5ad(par['output'], compression='gzip')
diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
index 09905ad0..f158dd35 100644
--- a/src/workflows/run_benchmark/config.vsh.yaml
+++ b/src/workflows/run_benchmark/config.vsh.yaml
@@ -92,6 +92,7 @@ dependencies:
   - name: methods/batchelor_mnn_correct
   - name: methods/bbknn
   - name: methods/combat
+  - name: methods/fadvi
   - name: methods/geneformer
   - name: methods/harmony
   - name: methods/harmonypy
diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
index 6196f749..7e733aa4 100644
--- a/src/workflows/run_benchmark/main.nf
+++ b/src/workflows/run_benchmark/main.nf
@@ -20,6 +20,7 @@ methods = [
   batchelor_mnn_correct,
   bbknn,
   combat,
+  fadvi,
   geneformer,
   harmony,
   harmonypy,