diff --git a/CHANGELOG.md b/CHANGELOG.md index 859869e4..9094ecc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,8 +3,9 @@ ## New functionality * Added `metrics/kbet_pg` and `metrics/kbet_pg_label` components (PR #52). +* Added `methods/stacas` new method (PR #58). + - Add non-supervised version of STACAS tool for integration of single-cell transcriptomics data. This functionality enables correction of batch effects while preserving biological variability without requiring prior cell type annotations. * Added `method/drvi` component (PR #61). - * Added `ARI_batch` and `NMI_batch` to `metrics/clustering_overlap` (PR #68). ## Minor changes diff --git a/src/methods/stacas/config.vsh.yaml b/src/methods/stacas/config.vsh.yaml new file mode 100644 index 00000000..7e0c9735 --- /dev/null +++ b/src/methods/stacas/config.vsh.yaml @@ -0,0 +1,37 @@ +__merge__: ../../api/comp_method.yaml +name: stacas +label: STACAS +summary: Accurate semi-supervised integration of single-cell transcriptomics data +description: | + STACAS is a method for scRNA-seq integration, + especially suited to accurately integrate datasets with large cell type imbalance + (e.g. in terms of proportions of distinct cell populations). + Prior cell type knowledge, given as cell type labels, can be provided to the algorithm to perform + semi-supervised integration, leading to increased preservation of biological variability + in the resulting integrated space. + STACAS is robust to incomplete cell type labels and can be applied to large-scale integration tasks. +references: + doi: 10.1038/s41467-024-45240-z + # Andreatta M, Hérault L, Gueguen P, Gfeller D, Berenstein AJ, Carmona SJ. + # Semi-supervised integration of single-cell transcriptomics data. + # Nature Communications*. 2024;15(1):1-13. doi:10.1038/s41467-024-45240-z +links: + documentation: https://carmonalab.github.io/STACAS.demo/STACAS.demo.html + repository: https://github.com/carmonalab/STACAS +info: + preferred_normalization: log_cp10k + method_types: [embedding] +resources: + - type: r_script + path: script.R +engines: + - type: docker + image: openproblems/base_r:1 + setup: + - type: r + github: carmonalab/STACAS@2.3.0 +runners: + - type: executable + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/methods/stacas/script.R b/src/methods/stacas/script.R new file mode 100644 index 00000000..19fcaf31 --- /dev/null +++ b/src/methods/stacas/script.R @@ -0,0 +1,56 @@ +requireNamespace("anndata", quietly = TRUE) +suppressPackageStartupMessages({ + library(STACAS) + library(Matrix) + library(SeuratObject) + library(Seurat) +}) + +## VIASH START +par <- list( + input = "resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad", + output = "output.h5ad" +) +meta <- list( + name = "stacas" +) +## VIASH END + +cat("Reading input file\n") +adata <- anndata::read_h5ad(par[["input"]]) + +cat("Create Seurat object\n") +# Transpose because Seurat expects genes in rows, cells in columns +counts_r <- Matrix::t(adata$layers[["counts"]]) +normalized_r <- Matrix::t(adata$layers[["normalized"]]) +# Convert to a regular sparse matrix first and then to dgCMatrix +counts_c <- as(as(counts_r, "CsparseMatrix"), "dgCMatrix") +normalized_c <- as(as(normalized_r, "CsparseMatrix"), "dgCMatrix") + +# Create Seurat object with raw counts, these are needed to compute Variable Genes +seurat_obj <- Seurat::CreateSeuratObject(counts = counts_c, + meta.data = adata$obs) +# Manually assign pre-normalized values to the "data" slot +seurat_obj@assays$RNA$data <- normalized_c + +cat("Run STACAS\n") +object_integrated <- seurat_obj |> + Seurat::SplitObject(split.by = "batch") |> + STACAS::Run.STACAS() + +cat("Store outputs\n") +output <- anndata::AnnData( + uns = list( + dataset_id = adata$uns[["dataset_id"]], + normalization_id = adata$uns[["normalization_id"]], + method_id = meta$name + ), + obs = adata$obs, + var = adata$var, + obsm = list( + X_emb = object_integrated@reductions$pca@cell.embeddings + ) +) + +cat("Write output AnnData to file\n") +output$write_h5ad(par[["output"]], compression = "gzip")