From 0c574ad48804517a247892cdd4a0bfc33dfda3e1 Mon Sep 17 00:00:00 2001 From: liuwd15 <1045561474@qq.com> Date: Wed, 5 Nov 2025 11:00:22 -0600 Subject: [PATCH] Add method fadvi --- CHANGELOG.md | 2 + src/methods/fadvi/config.vsh.yaml | 100 ++++++++++++++++++++ src/methods/fadvi/script.py | 56 +++++++++++ src/workflows/run_benchmark/config.vsh.yaml | 1 + src/workflows/run_benchmark/main.nf | 1 + 5 files changed, 160 insertions(+) create mode 100644 src/methods/fadvi/config.vsh.yaml create mode 100644 src/methods/fadvi/script.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 890c4eb7..4bc8b4c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ * Added `methods/stacas` new method (PR #58). - Add non-supervised version of STACAS tool for integration of single-cell transcriptomics data. This functionality enables correction of batch effects while preserving biological variability without requiring prior cell type annotations. * Added `method/drvi` component (PR #61). +* Added `method/fadvi` component. + - Add FActor Disentangled Variantional Inference (FADVI) for dimentionality reduction * Added `ARI_batch` and `NMI_batch` to `metrics/clustering_overlap` (PR #68). * Added `metrics/cilisi` new metric component (PR #57). diff --git a/src/methods/fadvi/config.vsh.yaml b/src/methods/fadvi/config.vsh.yaml new file mode 100644 index 00000000..80a8e7e0 --- /dev/null +++ b/src/methods/fadvi/config.vsh.yaml @@ -0,0 +1,100 @@ +# The API specifies which type of component this is. +# It contains specifications for: +# - The input/output files +# - Common parameters +# - A unit test +__merge__: /src/api/comp_method.yaml + +# A unique identifier for your component (required). +# Can contain only lowercase letters or underscores. +name: fadvi +# A relatively short label, used when rendering visualisations (required) +label: FADVI +# A one sentence summary of how this method works (required). Used when +# rendering summary tables. +summary: "FADVI is a disentangled representation learning VAE method for batch integration." +# A multi-line description of how this component works (required). Used +# when rendering reference documentation. +description: | + FADVI (Factor Disentanglement Variational Inference) is a deep learning method for single-cell omics and spatial transcriptomics analysis that disentangles batch-related variation, label-related variation, and residual variation using adversarial training and cross-correlation penalties. +references: + doi: + - 10.1101/2025.11.03.683998 + bibtex: + - | + @article{fadvi2025, + title={FADVI: disentangled representation learning for robust integration of single-cell and spatial omics data}, + author={Wendao Liu, Gang Qu, Lukas M. Simon, Fabian J. Theis, Zhongming Zhao}, + journal={bioRxiv}, + year={2025} + } +links: + # URL to the documentation for this method (required). + documentation: https://fadvi.readthedocs.io/en/latest/ + # URL to the code repository for this method (required). + repository: https://github.com/liuwd15/fadvi + + + +# Metadata for your component +info: + method_types: [embedding] + # Which normalisation method this component prefers to use (required). + preferred_normalization: counts + +# Component-specific parameters (optional) +arguments: + - name: --n_hvg + type: integer + default: 2000 + description: Number of highly variable genes to use. + - name: --n_latent_l + type: integer + default: 30 + description: Number of latent dimensions for labels. + - name: --n_latent_b + type: integer + default: 30 + description: Number of latent dimensions for batches. + - name: --lambda_l + type: integer + default: 10 + description: weight for label classification loss. + - name: --n_layers + type: integer + default: 2 + description: Number of layers. + - name: --max_epochs + type: integer + default: 30 + description: Maximum number of training epochs for FADVI. + +# Resources required to run the component +resources: + # The script of your component (required) + - type: python_script + path: script.py + + # Additional resources your script needs (optional) + # - type: file + # path: weights.pt + +engines: + # Specifications for the Docker image for this component. + - type: docker + image: openproblems/base_pytorch_nvidia:1 + # Add custom dependencies here (optional). For more information, see + # https://viash.io/reference/config/engines/docker/#setup . + setup: + - type: python + pypi: + - scvi-tools>=1.3.0 + - fadvi>=0.2.0 + +runners: + # This platform allows running the component natively + - type: executable + # Allows turning the component into a Nextflow module / pipeline. + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/methods/fadvi/script.py b/src/methods/fadvi/script.py new file mode 100644 index 00000000..6e075d49 --- /dev/null +++ b/src/methods/fadvi/script.py @@ -0,0 +1,56 @@ +import anndata as ad +from fadvi import FADVI + +## VIASH START +# Note: this section is auto-generated by viash at runtime. To edit it, make changes +# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`. +par = { + 'input': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad', + 'output': 'output.h5ad', + 'n_hvg': 2000, + 'n_latent_l': 30, + 'n_latent_b': 30, + 'n_layers': 2, + 'max_epochs': 30 +} +meta = { + 'name': 'fadvi' +} +## VIASH END + +print('Reading input files', flush=True) +adata = ad.read_h5ad(par['input']) + +if par["n_hvg"]: + print(f"Select top {par['n_hvg']} high variable genes", flush=True) + idx = adata.var["hvg_score"].to_numpy().argsort()[::-1][:par["n_hvg"]] + adata = adata[:, idx].copy() + + +print('Preprocess data', flush=True) +FADVI.setup_anndata(adata, batch_key="batch",labels_key="cell_type", + unlabeled_category='Unknown', layer='counts') +model = FADVI(adata, n_latent_l=par["n_latent_l"], + n_latent_b=par["n_latent_b"], + n_layers=par["n_layers"]) + +print('Train model', flush=True) +model.train(max_epochs=par["max_epochs"]) + +print('Generate predictions', flush=True) +# ... generate predictions ... + +print("Write output AnnData to file", flush=True) +output = ad.AnnData( + obs=adata.obs[[]], + var=adata.var[[]], + obsm={ + "X_emb": model.get_latent_representation(), + }, + uns={ + "dataset_id": adata.uns["dataset_id"], + "normalization_id": adata.uns["normalization_id"], + "method_id": meta["name"], + }, +) +output.write_h5ad(par['output'], compression='gzip') diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index 09905ad0..f158dd35 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -92,6 +92,7 @@ dependencies: - name: methods/batchelor_mnn_correct - name: methods/bbknn - name: methods/combat + - name: methods/fadvi - name: methods/geneformer - name: methods/harmony - name: methods/harmonypy diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index 6196f749..7e733aa4 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -20,6 +20,7 @@ methods = [ batchelor_mnn_correct, bbknn, combat, + fadvi, geneformer, harmony, harmonypy,