Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
954 changes: 954 additions & 0 deletions CODE_REVIEW_ANALYSIS.md

Large diffs are not rendered by default.

39 changes: 24 additions & 15 deletions R/feature_selection.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#' @param m2_matrix A matrix representing the exclusion matrix. Rows are events, columns are barcodes.
#' @param n_threads If the module OpenPM is available for your device, the function suggests using multi-thread processing for even faster computation.
#' @param min_row_sum A numeric value specifying the minimum row sum threshold for filtering events. Defaults to 50.
#' @param verbose Logical. If \code{TRUE} (default), prints progress and informational messages.
#' @param verbose Logical. If \code{TRUE}, prints progress and informational messages. Defaults to \code{FALSE}.
#' @param ... Additional arguments to be passed.
#'
#' @return A \code{data.table} containing the events and their corresponding sum deviance values.
Expand All @@ -19,22 +19,31 @@
#' # printing the results
#' print(HVE[order(-sum_deviance)])
#' @export
find_variable_events <- function(m1_matrix, m2_matrix, min_row_sum = 50, n_threads=1, verbose=TRUE, ...) {
find_variable_events <- function(m1_matrix, m2_matrix, min_row_sum = 50, n_threads=1, verbose=FALSE, ...) {

# Check if matrices are sparse
if (!(inherits(m1_matrix, "Matrix") && inherits(m2_matrix, "Matrix"))) {
stop("Both 'm1_matrix' and 'm2_matrix' must be sparse matrices of class 'Matrix'.")
stop("Both 'm1_matrix' and 'm2_matrix' must be sparse matrices of class 'Matrix'.", call. = FALSE)
}
# Check matrix compatibility
if (!identical(colnames(m1_matrix), colnames(m2_matrix))) {
stop("The colnames (barcodes) of inclusion and exclusion matrices are not identical.")
stop("The colnames (barcodes) of inclusion and exclusion matrices are not identical.", call. = FALSE)
}
if (!identical(rownames(m1_matrix), rownames(m2_matrix))) {
stop("The rownames (junction events) of inclusion and exclusion matrices are not identical.")
stop("The rownames (junction events) of inclusion and exclusion matrices are not identical.", call. = FALSE)
}

# Filter rows based on minimum row sum criteria
to_keep_events <- which(rowSums(m1_matrix) > min_row_sum & rowSums(m2_matrix) > min_row_sum)
m1_sums <- rowSums(m1_matrix)
m2_sums <- rowSums(m2_matrix)
to_keep_events <- which(m1_sums > min_row_sum & m2_sums > min_row_sum)

# Check if any events pass the threshold
if (length(to_keep_events) == 0) {
stop("No events pass the min_row_sum threshold of ", min_row_sum,
". Consider lowering the threshold or checking your data.", call. = FALSE)
}

m1_matrix <- m1_matrix[to_keep_events, , drop = FALSE]
m2_matrix <- m2_matrix[to_keep_events, , drop = FALSE]

Expand All @@ -58,7 +67,7 @@ find_variable_events <- function(m1_matrix, m2_matrix, min_row_sum = 50, n_threa
deviance_values <- tryCatch({
calcDeviances_ratio(M1_sub, M2_sub, n_threads)
}, error = function(e) {
stop("Error in calcDeviances_ratio function: ", e$message)
stop("Error in calcDeviances_ratio function: ", e$message, call. = FALSE)
})
deviance_values <- c(deviance_values)
names(deviance_values) <- rownames(M1_sub)
Expand Down Expand Up @@ -87,7 +96,7 @@ find_variable_events <- function(m1_matrix, m2_matrix, min_row_sum = 50, n_threa
#' \code{"vst"} uses a variance-stabilizing transformation to identify variable genes.
#' \code{"sum_deviance"} computes per-library deviances and combines them with a row variance metric.
#' @param n_threads If OpenMP is available for your device, the function suggests using multi-thread processing for even faster computation (only for sum_deviance method).
#' @param verbose Logical. If \code{TRUE} (default), prints progress and informational messages.
#' @param verbose Logical. If \code{TRUE}, prints progress and informational messages. Defaults to \code{FALSE}.
#' @param ... Additional arguments (currently unused).
#'
#' @return A \code{data.table} containing gene names (column \code{events}) and computed metrics.
Expand Down Expand Up @@ -116,24 +125,24 @@ find_variable_events <- function(m1_matrix, m2_matrix, min_row_sum = 50, n_threa
#' @import Matrix
#' @importClassesFrom Matrix dgCMatrix dsCMatrix dgTMatrix dsTMatrix
#' @export
find_variable_genes <- function(gene_expression_matrix, method = "vst", n_threads = 1, verbose = TRUE, ...) {
find_variable_genes <- function(gene_expression_matrix, method = "vst", n_threads = 1, verbose = FALSE, ...) {
# adding the vst method as the default
method <- match.arg(method, choices = c("vst", "sum_deviance"))

# Verify that gene_expression_matrix is a sparse Matrix
if (!inherits(gene_expression_matrix, "Matrix")) {
stop("The 'gene_expression_matrix' must be a sparse matrix of class 'Matrix'.")
stop("The 'gene_expression_matrix' must be a sparse matrix of class 'Matrix'.", call. = FALSE)
}

if (method == "vst") {
if(verbose) cat("The method we are using is vst (Seurat)...\n")
if (!exists("standardizeSparse_variance_vst")) {
stop("The function 'standardizeSparse_variance_vst' is not available. Check your C++ source files.")
stop("The function 'standardizeSparse_variance_vst' is not available. Check your C++ source files.", call. = FALSE)
}
rez_vector <- tryCatch({
standardizeSparse_variance_vst(matSEXP = gene_expression_matrix)
}, error = function(e) {
stop("Error in standardizeSparse_variance_vst: ", e$message)
stop("Error in standardizeSparse_variance_vst: ", e$message, call. = FALSE)
})
rez <- data.table::data.table(events = rownames(gene_expression_matrix),
standardize_variance = rez_vector)
Expand All @@ -143,7 +152,7 @@ find_variable_genes <- function(gene_expression_matrix, method = "vst", n_thread
# Filter rows based on minimum row sum criteria
to_keep_features <- which(rowSums(gene_expression_matrix) > 0)
if (length(to_keep_features) == 0) {
stop("No genes with a positive row sum were found.")
stop("No genes with a positive row sum were found.", call. = FALSE)
}
gene_expression_matrix <- gene_expression_matrix[to_keep_features, , drop = FALSE]

Expand Down Expand Up @@ -174,7 +183,7 @@ find_variable_genes <- function(gene_expression_matrix, method = "vst", n_thread
deviance_values <- tryCatch({
calcNBDeviancesWithThetaEstimation(as(gene_expression_matrix_sub, "dgCMatrix"), n_threads)
}, error = function(e) {
stop("Error in calcNBDeviancesWithThetaEstimation function: ", e$message)
stop("Error in calcNBDeviancesWithThetaEstimation function: ", e$message, call. = FALSE)
})
deviance_values <- c(deviance_values)
names(deviance_values) <- rownames(gene_expression_matrix_sub)
Expand All @@ -197,7 +206,7 @@ find_variable_genes <- function(gene_expression_matrix, method = "vst", n_thread
row_var <- tryCatch({
splikit::get_rowVar(M = gene_expression_matrix)
}, error = function(e) {
stop("Error in splikit::get_rowVar: ", e$message)
stop("Error in splikit::get_rowVar: ", e$message, call. = FALSE)
})

row_var_cpp_dt <- data.table::data.table(events = rownames(gene_expression_matrix),
Expand Down
43 changes: 31 additions & 12 deletions R/general_tools.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ NULL
#' @param m1_inclusion A numeric matrix (dense or sparse) of the same number of rows as `ZDB_matrix`, representing inclusion features.
#' @param m2_exclusion A numeric matrix (dense or sparse) of the same number of rows as `ZDB_matrix`, representing exclusion features.
#' @param metric Character string specifying which R² metric to compute. Options are "CoxSnell" (default) or "Nagelkerke".
#' @param suppress_warnings Logical. If \code{TRUE} (default), suppresses warnings during any warnings triggered during
#' computation (e.g., due to ill-conditioned inputs)
#' @param suppress_warnings Logical. If \code{TRUE}, suppresses warnings during computation
#' (e.g., due to ill-conditioned inputs). Defaults to \code{FALSE}.
#'
#' @return A `data.table` with the following columns:
#' \describe{
Expand Down Expand Up @@ -56,27 +56,36 @@ NULL
#' print(pseudo_r_square_nagel)
#'
#' @export
get_pseudo_correlation <- function(ZDB_matrix, m1_inclusion, m2_exclusion, metric = "CoxSnell", suppress_warnings=TRUE) {
get_pseudo_correlation <- function(ZDB_matrix, m1_inclusion, m2_exclusion, metric = "CoxSnell", suppress_warnings=FALSE) {

# Validate metric parameter
metric <- match.arg(metric, choices = c("CoxSnell", "Nagelkerke"))

# Check ZDB_matrix (must be dense)
if (!is.matrix(ZDB_matrix)) stop("ZDB_matrix must be a dense matrix.")
if (!is.matrix(ZDB_matrix)) stop("ZDB_matrix must be a dense matrix.", call. = FALSE)

# Check m1 and m2 (can be sparse or dense)
if (!is.matrix(m1_inclusion) && !inherits(m1_inclusion, "Matrix")) {
stop("m1_inclusion must be either a dense matrix or a sparse Matrix.")
stop("m1_inclusion must be either a dense matrix or a sparse Matrix.", call. = FALSE)
}
if (!is.matrix(m2_exclusion) && !inherits(m2_exclusion, "Matrix")) {
stop("m2_exclusion must be either a dense matrix or a sparse Matrix.")
stop("m2_exclusion must be either a dense matrix or a sparse Matrix.", call. = FALSE)
}

# Check row dimensions
if (nrow(m1_inclusion) != nrow(ZDB_matrix)) {
stop("m1_inclusion must have the same number of rows as ZDB_matrix.")
stop("m1_inclusion must have the same number of rows as ZDB_matrix.", call. = FALSE)
}
if (nrow(m2_exclusion) != nrow(ZDB_matrix)) {
stop("m2_exclusion must have the same number of rows as ZDB_matrix.")
stop("m2_exclusion must have the same number of rows as ZDB_matrix.", call. = FALSE)
}

# Check column dimensions
if (ncol(m1_inclusion) != ncol(ZDB_matrix)) {
stop("m1_inclusion must have the same number of columns as ZDB_matrix.", call. = FALSE)
}
if (ncol(m2_exclusion) != ncol(ZDB_matrix)) {
stop("m2_exclusion must have the same number of columns as ZDB_matrix.", call. = FALSE)
}

cat("Input dimension check passed. Proceeding with computation.\n")
Expand Down Expand Up @@ -151,7 +160,17 @@ get_pseudo_correlation <- function(ZDB_matrix, m1_inclusion, m2_exclusion, metri
})
}

# Warn about NA removal
n_before <- nrow(results)
results <- na.omit(results)
n_after <- nrow(results)

if (n_before > n_after) {
n_removed <- n_before - n_after
message("Removed ", n_removed, " event(s) with NA values (",
round(100 * n_removed / n_before, 1), "% of total).")
message("Reasons for NA: insufficient data (n<2), no variation, or convergence failure.")
}

cat("Computation completed successfully.\n")
return(results)
Expand Down Expand Up @@ -185,7 +204,7 @@ get_pseudo_correlation <- function(ZDB_matrix, m1_inclusion, m2_exclusion, metri
#' @export
get_rowVar <- function(M, verbose=FALSE) {
if (! (is.matrix(M) || inherits(M, "dgCMatrix")) ) {
stop("`M` must be either a dense numeric matrix or a dgCMatrix.")
stop("`M` must be either a dense numeric matrix or a dgCMatrix.", call. = FALSE)
}
if(verbose) message("[get_rowVar] Starting computation")
result <- rowVariance_cpp(M)
Expand All @@ -209,7 +228,7 @@ get_rowVar <- function(M, verbose=FALSE) {
#' # Preparing the inputs
#' set.seed(42)
#' pc_matrix <- matrix(data = rnorm(n = 10000 * 15, sd = 2), nrow = 10000, ncol = 15)
#' cluster_numbers <- runif(n = 10000, min = 1, max = 10) |> as.integer()
#' cluster_numbers <- as.integer(runif(n = 10000, min = 1, max = 10))
#'
#' # Getting the mean silhouette score
#' n_threads <- parallel::detectCores()
Expand Down
34 changes: 23 additions & 11 deletions R/star_solo_processing.R
Original file line number Diff line number Diff line change
Expand Up @@ -772,15 +772,15 @@ make_m2 <- function(m1_inclusion_matrix, eventdata, batch_size = 5000,

tryCatch({
# Convert m1_inclusion_matrix to data.table
m1 <- summary(m1_inclusion_matrix) |> data.table::as.data.table()
m1 <- data.table::as.data.table(summary(m1_inclusion_matrix))
data.table::setnames(m1, "x", "x_1")

if (verbose) cat("| |-- Converted matrix to data.table format\n")

# Merge group information
m1 <- merge(m1, group_ids, by = "i")
m1[, x_tot := sum(x_1), .(group_id, j)]
m_tot <- m1[, .(group_id, j, x_tot)] |> unique()
m_tot <- unique(m1[, .(group_id, j, x_tot)])

if (verbose) cat("| |-- Calculated group totals\n")

Expand Down Expand Up @@ -847,7 +847,7 @@ make_m2 <- function(m1_inclusion_matrix, eventdata, batch_size = 5000,

# Convert m1_inclusion_matrix to data.table for merging (once, outside the apply)
tryCatch({
m1 <- summary(m1_inclusion_matrix) |> data.table::as.data.table()
m1 <- data.table::as.data.table(summary(m1_inclusion_matrix))
data.table::setnames(m1, "x", "x_1")
m1 <- merge(m1, group_ids, by = "i")

Expand Down Expand Up @@ -982,15 +982,27 @@ make_m2 <- function(m1_inclusion_matrix, eventdata, batch_size = 5000,
#' @export
make_eventdata_plus <- function(eventdata, GTF_file_direction) {

# Validate GTF file path
if (!file.exists(GTF_file_direction)) {
stop("GTF file not found: ", GTF_file_direction, call. = FALSE)
}
if (!file.access(GTF_file_direction, mode = 4) == 0) {
stop("GTF file is not readable: ", GTF_file_direction, call. = FALSE)
}

# Read GTF file as plain text using fread
GTF <- data.table::fread(
GTF_file_direction,
col.names = c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attribute"),
sep = "\t",
header = FALSE,
quote = "",
showProgress = FALSE
)
GTF <- tryCatch({
data.table::fread(
GTF_file_direction,
col.names = c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attribute"),
sep = "\t",
header = FALSE,
quote = "",
showProgress = FALSE
)
}, error = function(e) {
stop("Error reading GTF file: ", e$message, call. = FALSE)
})

# Filter for 'gene' entries
ref_gtf <- GTF[type == "gene"]
Expand Down
24 changes: 19 additions & 5 deletions configure
Original file line number Diff line number Diff line change
@@ -1,9 +1,23 @@
#!/bin/bash

if [ "$(uname)" = "Darwin" ]; then
cp -f src/Makevars.mac src/Makevars
else
cp -f src/Makevars.linux src/Makevars
fi
# Detect operating system and copy appropriate Makevars file
# Note: Windows typically uses src/Makevars.win directly, but we handle it here for completeness

OS_TYPE="$(uname -s)"

case "${OS_TYPE}" in
Darwin*)
# macOS
cp -f src/Makevars.mac src/Makevars
;;
MINGW*|MSYS*|CYGWIN*)
# Windows (Git Bash, MSYS2, Cygwin)
cp -f src/Makevars.win src/Makevars
;;
*)
# Linux and other Unix-like systems
cp -f src/Makevars.linux src/Makevars
;;
esac

exit 0
Loading
Loading