Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,7 @@
/output
trace-*
.ipynb_checkpoints
__pycache__
__pycache__
.Rproj.user
.Rhistory
*.Rproj
10 changes: 9 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@
"common/schemas/task_config.yaml": "_viash.yaml",
"common/schemas/task_method.yaml": "**/methods/**/config.vsh.yaml",
"common/schemas/task_control_method.yaml": "**/control_methods/**/config.vsh.yaml",
"common/schemas/task_metric.yaml": "**/metrics/**/config.vsh.yaml"
"common/schemas/task_metric.yaml": "**/metrics/**/config.vsh.yaml",
"https://raw.githubusercontent.com/viash-io/viash-schemas/refs/heads/main/json_schemas/unknown/config.schema.json": [
"*.vsh.yaml",
"*.vsh.yml"
],
"https://raw.githubusercontent.com/viash-io/viash-schemas/refs/heads/main/json_schemas/unknown/package.schema.json": [
"_viash.yaml",
"_viash.yml"
]
}
}
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
## New functionality

* Added `metrics/kbet_pg` and `metrics/kbet_pg_label` components (PR #52).
* Add `methods/ss_stacas` new method (PR #59).
- Add semi-supervised version of STACAS tool for integration of single-cell transcriptomics data. This functionality leverages partial or imperfect knowledge of cell identity to improve integration quality by preserving biological variation while correcting for batch effects.
* Added `methods/stacas` new method (PR #58).
- Add non-supervised version of STACAS tool for integration of single-cell transcriptomics data. This functionality enables correction of batch effects while preserving biological variability without requiring prior cell type annotations.
* Added `method/drvi` component (PR #61).
* Added `ARI_batch` and `NMI_batch` to `metrics/clustering_overlap` (PR #68).

* Added `metrics/cilisi` new metric component (PR #57).
- ciLISI measures batch mixing in a cell type-aware manner by computing iLISI within each cell type and normalizing
the scores between 0 and 1. Unlike iLISI, ciLISI preserves sensitivity to biological variance and avoids favoring
Expand Down
2 changes: 1 addition & 1 deletion scripts/create_component/create_r_method.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
set -e

common/scripts/create_component \
--name my_r_method \
--name ss_stacas \
--language r \
--type method
37 changes: 37 additions & 0 deletions src/methods/ss_stacas/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
__merge__: ../../api/comp_method.yaml
name: ss_stacas
label: ssSTACAS
summary: Accurate semi-supervised integration of single-cell transcriptomics data
description: |
STACAS is a method for scRNA-seq integration,
especially suited to accurately integrate datasets with large cell type imbalance
(e.g. in terms of proportions of distinct cell populations).
Prior cell type knowledge, given as cell type labels, can be provided to the algorithm to perform
semi-supervised integration, leading to increased preservation of biological variability
in the resulting integrated space.
STACAS is robust to incomplete cell type labels and can be applied to large-scale integration tasks.
references:
doi: 10.1038/s41467-024-45240-z
# Andreatta M, Hérault L, Gueguen P, Gfeller D, Berenstein AJ, Carmona SJ.
# Semi-supervised integration of single-cell transcriptomics data.
# Nature Communications*. 2024;15(1):1-13. doi:10.1038/s41467-024-45240-z
links:
documentation: https://carmonalab.github.io/STACAS.demo/STACAS.demo.html
repository: https://github.com/carmonalab/STACAS
info:
preferred_normalization: log_cp10k
method_types: [embedding]
resources:
- type: r_script
path: script.R
engines:
- type: docker
image: openproblems/base_r:1
setup:
- type: r
github: carmonalab/STACAS@2.3.0
runners:
- type: executable
- type: nextflow
directives:
label: [midtime,midmem,midcpu]
62 changes: 62 additions & 0 deletions src/methods/ss_stacas/script.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
requireNamespace("anndata", quietly = TRUE)
suppressPackageStartupMessages({
library(STACAS)
library(Matrix)
library(SeuratObject)
library(Seurat)
})

## VIASH START
par <- list(
input = "resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad",
output = "output.h5ad"
)
meta <- list(
name = "ss_stacas"
)
## VIASH END

cat("Reading input file\n")
adata <- anndata::read_h5ad(par[["input"]])

cat("Create Seurat object\n")
# Only loading normalized values, as raw counts are not needed

# Transpose because Seurat expects genes in rows, cells in columns
normalized <- Matrix::t(adata$layers[["normalized"]])
# Convert to a regular sparse matrix first and then to dgCMatrix
normalized <- as(as(normalized, "CsparseMatrix"), "dgCMatrix")

# Create Seurat object
seurat_obj <- Seurat::CreateSeuratObject(counts = normalized,
meta.data = adata$obs)
# Manually assign pre-normalized values to the "data" slot
seurat_obj@assays$RNA$data <- normalized
seurat_obj@assays$RNA$counts <- NULL # remove counts


# Obtain anchor features from the preprocessing pipeline
anchor.features <- head(adata$var[order(adata$var$hvg_score, decreasing = T), "feature_id"], 2000)

cat("Run STACAS\n")
object_integrated <- seurat_obj |>
Seurat::SplitObject(split.by = "batch") |>
STACAS::Run.STACAS(cell.labels = "cell_type",
anchor.features = anchor.features)

cat("Store outputs\n")
output <- anndata::AnnData(
uns = list(
dataset_id = adata$uns[["dataset_id"]],
normalization_id = adata$uns[["normalization_id"]],
method_id = meta$name
),
obs = adata$obs,
var = adata$var,
obsm = list(
X_emb = object_integrated@reductions$pca@cell.embeddings
)
)

cat("Write output AnnData to file\n")
output$write_h5ad(par[["output"]], compression = "gzip")