From ad22867418a9d5b6127b6113873c655b8efb4bd0 Mon Sep 17 00:00:00 2001 From: JGarnica22 Date: Wed, 16 Apr 2025 12:17:19 +0200 Subject: [PATCH 1/8] add method STACAS --- src/methods/stacas/config.vsh.yaml | 81 ++++++++++++++++++++++++++++++ src/methods/stacas/script.R | 56 +++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 src/methods/stacas/config.vsh.yaml create mode 100644 src/methods/stacas/script.R diff --git a/src/methods/stacas/config.vsh.yaml b/src/methods/stacas/config.vsh.yaml new file mode 100644 index 00000000..ab08bac7 --- /dev/null +++ b/src/methods/stacas/config.vsh.yaml @@ -0,0 +1,81 @@ +# The API specifies which type of component this is. +# It contains specifications for: +# - The input/output files +# - Common parameters +# - A unit test +__merge__: ../../api/comp_method.yaml + +# A unique identifier for your component (required). +# Can contain only lowercase letters or underscores. +name: stacas +# A relatively short label, used when rendering visualisations (required) +label: STACAS +# A one sentence summary of how this method works (required). Used when +# rendering summary tables. +summary: Accurate semi-supervised integration of single-cell transcriptomics data +# A multi-line description of how this component works (required). Used +# when rendering reference documentation. +description: | + STACAS is a method for scRNA-seq integration, + especially suited to accurately integrate datasets with large cell type imbalance + (e.g. in terms of proportions of distinct cell populations). + Prior cell type knowledge, given as cell type labels, can be provided to the algorithm to perform + semi-supervised integration, leading to increased preservation of biological variability + in the resulting integrated space. + STACAS is robust to incomplete cell type labels and can be applied to large-scale integration tasks. +references: + doi: 10.1038/s41467-024-45240-z + # Andreatta M, Hérault L, Gueguen P, Gfeller D, Berenstein AJ, Carmona SJ. + # Semi-supervised integration of single-cell transcriptomics data. + # Nature Communications*. 2024;15(1):1-13. doi:10.1038/s41467-024-45240-z +links: + # URL to the documentation for this method (required). + documentation: https://carmonalab.github.io/STACAS.demo/STACAS.demo.html + # URL to the code repository for this method (required). + repository: https://github.com/carmonalab/STACAS +# Metadata for your component +info: + # Which normalisation method this component prefers to use (required). + preferred_normalization: log_cp10k + +# Component-specific parameters (optional) +# arguments: +# - name: "--n_neighbors" +# type: "integer" +# default: 5 +# description: Number of neighbors to use. + +# Resources required to run the component +resources: + # The script of your component (required) + - type: r_script + path: script.R + # Additional resources your script needs (optional) + # - type: file + # path: weights.pt + +engines: + # Specifications for the Docker image for this component. + - type: docker + image: openproblems/base_r:1.0.0 + # Add custom dependencies here (optional). For more information, see + # https://viash.io/reference/config/engines/docker/#setup . + setup: + - type: r + #github: https://github.com/carmonalab/STACAS.git@2.2.0 + cran: + - Seurat + - SeuratObject + - R.utils + bioc: + - BiocNeighbors + - BiocParallel + script: remotes::install_github("carmonalab/STACAS@2.2.0", dependencies = FALSE) + +runners: + # This platform allows running the component natively + - type: executable + # Allows turning the component into a Nextflow module / pipeline. + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/methods/stacas/script.R b/src/methods/stacas/script.R new file mode 100644 index 00000000..19fcaf31 --- /dev/null +++ b/src/methods/stacas/script.R @@ -0,0 +1,56 @@ +requireNamespace("anndata", quietly = TRUE) +suppressPackageStartupMessages({ + library(STACAS) + library(Matrix) + library(SeuratObject) + library(Seurat) +}) + +## VIASH START +par <- list( + input = "resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad", + output = "output.h5ad" +) +meta <- list( + name = "stacas" +) +## VIASH END + +cat("Reading input file\n") +adata <- anndata::read_h5ad(par[["input"]]) + +cat("Create Seurat object\n") +# Transpose because Seurat expects genes in rows, cells in columns +counts_r <- Matrix::t(adata$layers[["counts"]]) +normalized_r <- Matrix::t(adata$layers[["normalized"]]) +# Convert to a regular sparse matrix first and then to dgCMatrix +counts_c <- as(as(counts_r, "CsparseMatrix"), "dgCMatrix") +normalized_c <- as(as(normalized_r, "CsparseMatrix"), "dgCMatrix") + +# Create Seurat object with raw counts, these are needed to compute Variable Genes +seurat_obj <- Seurat::CreateSeuratObject(counts = counts_c, + meta.data = adata$obs) +# Manually assign pre-normalized values to the "data" slot +seurat_obj@assays$RNA$data <- normalized_c + +cat("Run STACAS\n") +object_integrated <- seurat_obj |> + Seurat::SplitObject(split.by = "batch") |> + STACAS::Run.STACAS() + +cat("Store outputs\n") +output <- anndata::AnnData( + uns = list( + dataset_id = adata$uns[["dataset_id"]], + normalization_id = adata$uns[["normalization_id"]], + method_id = meta$name + ), + obs = adata$obs, + var = adata$var, + obsm = list( + X_emb = object_integrated@reductions$pca@cell.embeddings + ) +) + +cat("Write output AnnData to file\n") +output$write_h5ad(par[["output"]], compression = "gzip") From a0e70f17582c85b829ffc1c90e74885f03e2b0c2 Mon Sep 17 00:00:00 2001 From: JGarnica22 Date: Wed, 16 Apr 2025 12:20:08 +0200 Subject: [PATCH 2/8] add method STACAS --- src/methods/stacas/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/methods/stacas/config.vsh.yaml b/src/methods/stacas/config.vsh.yaml index ab08bac7..502f38d2 100644 --- a/src/methods/stacas/config.vsh.yaml +++ b/src/methods/stacas/config.vsh.yaml @@ -21,7 +21,7 @@ description: | (e.g. in terms of proportions of distinct cell populations). Prior cell type knowledge, given as cell type labels, can be provided to the algorithm to perform semi-supervised integration, leading to increased preservation of biological variability - in the resulting integrated space. + in the resulting integrated space. STACAS is robust to incomplete cell type labels and can be applied to large-scale integration tasks. references: doi: 10.1038/s41467-024-45240-z From 3d8adcb32319b8c4f9e393e5c2d9aa953a0f84f2 Mon Sep 17 00:00:00 2001 From: JGarnica22 Date: Wed, 16 Apr 2025 12:52:12 +0200 Subject: [PATCH 3/8] updata changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09d672d0..22b80c7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # task_batch_integration devel +## New functionality +* Add `methods/stacas` new method. +Add non-supervised version of STACAS tool for integration of single-cell transcriptomics data. +This functionality enables correction of batch effects while preserving biological variability without requiring prior cell type annotations. + ## New functionality * Added `metrics/kbet_pg` and `metrics/kbet_pg_label` components (PR #52). From 06ca735018a8add2edc351475f80481e8501f262 Mon Sep 17 00:00:00 2001 From: Josep Garnica <61703467+JGarnica22@users.noreply.github.com> Date: Wed, 17 Sep 2025 16:09:13 +0200 Subject: [PATCH 4/8] Update: base_r container MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michaela Müller <51025211+mumichae@users.noreply.github.com> --- src/methods/stacas/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/methods/stacas/config.vsh.yaml b/src/methods/stacas/config.vsh.yaml index 502f38d2..b1fbacfd 100644 --- a/src/methods/stacas/config.vsh.yaml +++ b/src/methods/stacas/config.vsh.yaml @@ -57,7 +57,7 @@ resources: engines: # Specifications for the Docker image for this component. - type: docker - image: openproblems/base_r:1.0.0 + image: openproblems/base_r:1 # Add custom dependencies here (optional). For more information, see # https://viash.io/reference/config/engines/docker/#setup . setup: From ce4e3cc4580d1ce35068bc247db4bb2dce08a5a1 Mon Sep 17 00:00:00 2001 From: Josep Garnica <61703467+JGarnica22@users.noreply.github.com> Date: Wed, 17 Sep 2025 16:15:23 +0200 Subject: [PATCH 5/8] Update: STACAS installation with depedencies, compatible with new R container baser_r:1 Co-authored-by: Luke Zappia --- src/methods/stacas/config.vsh.yaml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/methods/stacas/config.vsh.yaml b/src/methods/stacas/config.vsh.yaml index b1fbacfd..a79c66a6 100644 --- a/src/methods/stacas/config.vsh.yaml +++ b/src/methods/stacas/config.vsh.yaml @@ -62,15 +62,7 @@ engines: # https://viash.io/reference/config/engines/docker/#setup . setup: - type: r - #github: https://github.com/carmonalab/STACAS.git@2.2.0 - cran: - - Seurat - - SeuratObject - - R.utils - bioc: - - BiocNeighbors - - BiocParallel - script: remotes::install_github("carmonalab/STACAS@2.2.0", dependencies = FALSE) + github: carmonalab/STACAS@2.3.0 runners: # This platform allows running the component natively From 1306cab38c96b6d0eee0b26702094a845f6db23a Mon Sep 17 00:00:00 2001 From: JGarnica22 Date: Wed, 17 Sep 2025 16:38:51 +0200 Subject: [PATCH 6/8] fix: move STACAS comment below the kBET on New functionality section --- CHANGELOG.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22b80c7c..ce48dbb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,10 @@ # task_batch_integration devel -## New functionality -* Add `methods/stacas` new method. -Add non-supervised version of STACAS tool for integration of single-cell transcriptomics data. -This functionality enables correction of batch effects while preserving biological variability without requiring prior cell type annotations. - ## New functionality * Added `metrics/kbet_pg` and `metrics/kbet_pg_label` components (PR #52). +* Added `methods/stacas` new method. + - Add non-supervised version of STACAS tool for integration of single-cell transcriptomics data. This functionality enables correction of batch effects while preserving biological variability without requiring prior cell type annotations. ## Minor changes From dddaf12c210a5f5b41dca7ff745f4ca1ed604fc0 Mon Sep 17 00:00:00 2001 From: JGarnica22 Date: Wed, 17 Sep 2025 16:45:40 +0200 Subject: [PATCH 7/8] fix: remove boilerplate comments for better readability --- src/methods/stacas/config.vsh.yaml | 37 ------------------------------ 1 file changed, 37 deletions(-) diff --git a/src/methods/stacas/config.vsh.yaml b/src/methods/stacas/config.vsh.yaml index a79c66a6..546b8eaa 100644 --- a/src/methods/stacas/config.vsh.yaml +++ b/src/methods/stacas/config.vsh.yaml @@ -1,20 +1,7 @@ -# The API specifies which type of component this is. -# It contains specifications for: -# - The input/output files -# - Common parameters -# - A unit test __merge__: ../../api/comp_method.yaml - -# A unique identifier for your component (required). -# Can contain only lowercase letters or underscores. name: stacas -# A relatively short label, used when rendering visualisations (required) label: STACAS -# A one sentence summary of how this method works (required). Used when -# rendering summary tables. summary: Accurate semi-supervised integration of single-cell transcriptomics data -# A multi-line description of how this component works (required). Used -# when rendering reference documentation. description: | STACAS is a method for scRNA-seq integration, especially suited to accurately integrate datasets with large cell type imbalance @@ -29,45 +16,21 @@ references: # Semi-supervised integration of single-cell transcriptomics data. # Nature Communications*. 2024;15(1):1-13. doi:10.1038/s41467-024-45240-z links: - # URL to the documentation for this method (required). documentation: https://carmonalab.github.io/STACAS.demo/STACAS.demo.html - # URL to the code repository for this method (required). repository: https://github.com/carmonalab/STACAS -# Metadata for your component info: - # Which normalisation method this component prefers to use (required). preferred_normalization: log_cp10k - -# Component-specific parameters (optional) -# arguments: -# - name: "--n_neighbors" -# type: "integer" -# default: 5 -# description: Number of neighbors to use. - -# Resources required to run the component resources: - # The script of your component (required) - type: r_script path: script.R - # Additional resources your script needs (optional) - # - type: file - # path: weights.pt - engines: - # Specifications for the Docker image for this component. - type: docker image: openproblems/base_r:1 - # Add custom dependencies here (optional). For more information, see - # https://viash.io/reference/config/engines/docker/#setup . setup: - type: r github: carmonalab/STACAS@2.3.0 - runners: - # This platform allows running the component natively - type: executable - # Allows turning the component into a Nextflow module / pipeline. - type: nextflow directives: label: [midtime,midmem,midcpu] From 000319eb3c1c7e87872fecb3bb1ea9d87a203210 Mon Sep 17 00:00:00 2001 From: JGarnica22 Date: Thu, 18 Sep 2025 10:21:06 +0200 Subject: [PATCH 8/8] add: method_types configuration --- src/methods/stacas/config.vsh.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/methods/stacas/config.vsh.yaml b/src/methods/stacas/config.vsh.yaml index 546b8eaa..7e0c9735 100644 --- a/src/methods/stacas/config.vsh.yaml +++ b/src/methods/stacas/config.vsh.yaml @@ -20,6 +20,7 @@ links: repository: https://github.com/carmonalab/STACAS info: preferred_normalization: log_cp10k + method_types: [embedding] resources: - type: r_script path: script.R