Skip to content

Commit ddfd26d

Browse files
can train Novae on a subset of vars
1 parent 376e876 commit ddfd26d

File tree

4 files changed

+30
-2
lines changed

4 files changed

+30
-2
lines changed

docs/tutorials/proteins.ipynb

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,19 @@
195195
"novae.data.quantile_scaling(adatas)"
196196
]
197197
},
198+
{
199+
"cell_type": "markdown",
200+
"metadata": {},
201+
"source": [
202+
"## (Optional) Remove low quality proteins\n",
203+
"\n",
204+
"You can check your proteins intensity, e.g., using `sc.pl.spatial(adata, color=adata.var_names, vmax=\"p95\", spot_size=20)`\n",
205+
"\n",
206+
"If you have proteins whose intensity **highly depends on the Field-Of-View** (typically, when you see a grid-like pattern) then you should not provide these to Novae. Otherwise, since you'll have FOV-specific intensities, Novae will detect the FOVs as specific domains.\n",
207+
"\n",
208+
"> Practically, you can either (i) subset your `AnnData` object, or (ii) provide `var_names=<list-of-proteins-to-keep>` to `novae.Novae` when initializing the model (see next section). In the latter case, make sure to update `embedding_size` to be lower than the number of proteins you have chosen.\n"
209+
]
210+
},
198211
{
199212
"cell_type": "markdown",
200213
"metadata": {},

novae/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def __init__(
7878
sensitivity_noise_std: Standard deviation for the multiplicative for for the noise augmentation.
7979
dropout_rate: Dropout rate for the genes during augmentation.
8080
scgpt_model_dir: Path to a directory containing a scGPT checkpoint, i.e. a `vocab.json` and a `best_model.pt` file.
81-
var_names: Only used when loading a pretrained model. Do not use it yourself.
81+
var_names: Used when loading a pretrained model. Can also be used to specify the names of the variables to train on, e.g. to not consider low quality proteins whose intensity highly depends on the FOV.
8282
"""
8383
super().__init__()
8484
### Initialize cell embedder and prepare adata(s) object(s)

novae/utils/_validate.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,13 @@ def prepare_adatas(
2828
2929
Args:
3030
adata: An `AnnData` object, or a list of `AnnData` objects. Optional if the model was initialized with `adata`.
31-
var_names: Only used when loading a pretrained model. Do not use it yourself.
31+
var_names: Only used when loading a pretrained model, or to select a subset of vars to use.
3232
3333
Returns:
3434
A list of `AnnData` objects ready to be used by the model. If only one `adata` object is provided, it will be wrapped in a list.
3535
"""
3636
assert adata is not None or var_names is not None, "One of `adata` and `var_names` must not be None"
37+
var_names = lower_var_names(var_names) if var_names is not None else None
3738

3839
if adata is None:
3940
return None, var_names

tests/test_model.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pandas as pd
66
import pytest
77
import torch
8+
from anndata import AnnData
89

910
import novae
1011
from novae._constants import Keys
@@ -268,3 +269,16 @@ def test_init_prototypes():
268269
prototypes = model.swav_head.prototypes.data.clone()
269270
model.init_prototypes(adata)
270271
assert (model.swav_head.prototypes.data != prototypes).all()
272+
273+
274+
def test_var_name_subset():
275+
adata = AnnData(np.random.rand(10, 30))
276+
adata.var_names = [f"GENE{i}" for i in range(30)]
277+
adata.obsm["spatial"] = np.random.randn(10, 2)
278+
279+
novae.spatial_neighbors(adata)
280+
281+
selected_genes = ["gene1"] + [f"GENE{i}" for i in range(5, 25)] # check case insensitivity
282+
model = novae.Novae(adata, var_names=selected_genes, embedding_size=10)
283+
284+
assert model.hparams.var_names == [x.lower() for x in selected_genes]

0 commit comments

Comments
 (0)