From 9acf16e38a8dd2fa177c7e7ddae85edaae4b711f Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Wed, 27 Jun 2018 14:23:01 -0500 Subject: [PATCH 01/23] add back in deleted 'gene_from_gene' method to fix #190 --- NAMESPACE | 1 + R/sleuth.R | 46 +++++++++++++++++++++++++++++++++++++++++++ man/gene_from_gene.Rd | 34 ++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 man/gene_from_gene.Rd diff --git a/NAMESPACE b/NAMESPACE index 01d495b..bb5f80d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,6 +31,7 @@ export(design_matrix) export(enclosed_brush) export(excluded_ids) export(extract_model) +export(gene_from_gene) export(get_bootstrap_summary) export(get_bootstraps) export(get_quantile) diff --git a/R/sleuth.R b/R/sleuth.R index fea2305..1c1e6bd 100644 --- a/R/sleuth.R +++ b/R/sleuth.R @@ -1150,6 +1150,52 @@ transcripts_from_gene <- function(obj, test, test_type, table$target_id[table[, 2] == gene_name] } +#' Get the gene ID using other gene identifiers +#' +#' Get the \code{target_id} of a gene using other gene identifiers. +#' The identifiers found under the \code{obj$gene_column} are often +#' difficult to remember (e.g. ensembl gene ID, ENSG00000111640). +#' This function allows a user to find that difficult-to-remember +#' identifier using more-easily-remembered identifiers, such as +#' gene symbol (e.g. "GAPDH"). +#' +#' @param obj a \code{sleuth} object +#' @param gene_colname the name of the column containing 'gene_name'. +#' This parameter refers to the name of the column that the gene you are searching for appears in. +#' Check the column names using \code{colnames(obj$target_mapping)}. +#' @param gene_name a string containing the name of the gene you are interested in. +#' @return a character vector containing the \code{target_id} of the gene, found under +#' \code{obj$gene_column} within \code{obj$target_mapping}. +#' If the column name provided is the same as \code{obj$gene_column}, and the +#' gene_name used is found, that gene_name will be returned. +#' @examples +#' \dontrun{gene_from_gene(obj, "gene_symbol", "GAPDH")} +#' @export +gene_from_gene <- function(obj, gene_colname, gene_name) { + + if (!obj$gene_mode) { + stop("this sleuth object is in transcript mode. Please use 'transcripts_from_gene' instead.") + } + + table <- as.data.frame(obj$target_mapping) + if (gene_colname == obj$gene_column) { + if (!(gene_name %in% table[, eval(parse(text = obj$gene_column))])) { + stop("Couldn't find gene ", gene_name) + } else { + return(gene_name) + } + } + + table <- unique(dplyr::select_(table, obj$gene_column, gene_colname)) + if (!(gene_name %in% table[, 2])) { + stop("Couldn't find gene ", gene_name) + } + hits <- unique(table[table[,2] == gene_name, 1]) + if (length(hits) > 1) { + warning("there was more than one gene ID that matched this identifier; taking the first one") + } + hits[1] + } #' Change sleuth transform counts function #' diff --git a/man/gene_from_gene.Rd b/man/gene_from_gene.Rd new file mode 100644 index 0000000..0f7077e --- /dev/null +++ b/man/gene_from_gene.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sleuth.R +\name{gene_from_gene} +\alias{gene_from_gene} +\title{Get the gene ID using other gene identifiers} +\usage{ +gene_from_gene(obj, gene_colname, gene_name) +} +\arguments{ +\item{obj}{a \code{sleuth} object} + +\item{gene_colname}{the name of the column containing 'gene_name'. +This parameter refers to the name of the column that the gene you are searching for appears in. +Check the column names using \code{colnames(obj$target_mapping)}.} + +\item{gene_name}{a string containing the name of the gene you are interested in.} +} +\value{ +a character vector containing the \code{target_id} of the gene, found under + \code{obj$gene_column} within \code{obj$target_mapping}. + If the column name provided is the same as \code{obj$gene_column}, and the + gene_name used is found, that gene_name will be returned. +} +\description{ +Get the \code{target_id} of a gene using other gene identifiers. +The identifiers found under the \code{obj$gene_column} are often +difficult to remember (e.g. ensembl gene ID, ENSG00000111640). +This function allows a user to find that difficult-to-remember +identifier using more-easily-remembered identifiers, such as +gene symbol (e.g. "GAPDH"). +} +\examples{ + \dontrun{gene_from_gene(obj, "gene_symbol", "GAPDH")} +} From 50b35d0629c7f46fa8303b08b3ce30efce462b31 Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Wed, 27 Jun 2018 14:23:27 -0500 Subject: [PATCH 02/23] fix outdated documentation for 'sleuth_to_matrix' --- R/matrix.R | 5 +++-- man/sleuth_to_matrix.Rd | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/R/matrix.R b/R/matrix.R index 4ffecf3..b0fff75 100644 --- a/R/matrix.R +++ b/R/matrix.R @@ -24,8 +24,9 @@ #' @param which_df character vector of length one. Which type of data to use #' ("obs_norm" or "obs_raw") #' @param which_units character vector of length one. Which units to use ("tpm" -#' or "est_counts") -#' @return a matrix which contains a matrix of target_ids and transcript expression in \code{which_units} +#' or "est_counts" (for transcript-level analyses) or "scaled_reads_per_base" (for gene-level analyses)) +#' @return a matrix which contains a matrix of target_ids and transcript (or gene) expression in \code{which_units}. +#' Note this currently does not support returning raw values for gene-level counts or TPMs. #' @examples #' sleuth_matrix <- sleuth_to_matrix(sleuth_obj, 'obs_norm', 'tpm') #' head(sleuth_matrix) # look at first 5 transcripts, sorted by name diff --git a/man/sleuth_to_matrix.Rd b/man/sleuth_to_matrix.Rd index bc4bc30..c5e0f6d 100644 --- a/man/sleuth_to_matrix.Rd +++ b/man/sleuth_to_matrix.Rd @@ -13,16 +13,16 @@ sleuth_to_matrix(obj, which_df, which_units) ("obs_norm" or "obs_raw")} \item{which_units}{character vector of length one. Which units to use ("tpm" -or "est_counts")} +or "est_counts" (for transcript-level analyses) or "scaled_reads_per_base" (for gene-level analyses))} } \value{ -a \code{list} with an attribute 'data', which contains a matrix of target_ids - and transcript expression in \code{which_units} +a matrix which contains a matrix of target_ids and transcript (or gene) expression in \code{which_units}. + Note this currently does not support returning raw values for gene-level counts or TPMs. } \description{ Convert a sleuth object to a matrix with the condition names. } \examples{ sleuth_matrix <- sleuth_to_matrix(sleuth_obj, 'obs_norm', 'tpm') -head(sleuth_matrix$data) # look at first 5 transcripts, sorted by name +head(sleuth_matrix) # look at first 5 transcripts, sorted by name } From 5b4a3308d69254c0fab838b188cd0a6bb75da54c Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Thu, 28 Jun 2018 16:48:32 -0500 Subject: [PATCH 03/23] fix bug found by @lmigueel in #190; switch condition to just check for obj so that this function can be used with p-value aggregation mode --- R/sleuth.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/sleuth.R b/R/sleuth.R index 1c1e6bd..4025979 100644 --- a/R/sleuth.R +++ b/R/sleuth.R @@ -1173,13 +1173,13 @@ transcripts_from_gene <- function(obj, test, test_type, #' @export gene_from_gene <- function(obj, gene_colname, gene_name) { - if (!obj$gene_mode) { + if (is.null(obj$gene_column)) { stop("this sleuth object is in transcript mode. Please use 'transcripts_from_gene' instead.") } table <- as.data.frame(obj$target_mapping) if (gene_colname == obj$gene_column) { - if (!(gene_name %in% table[, eval(parse(text = obj$gene_column))])) { + if (!(gene_name %in% table[, obj$gene_column])) { stop("Couldn't find gene ", gene_name) } else { return(gene_name) From 4da1c6c567934a98ce738ca8fa3afad76cef19e8 Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Thu, 21 Jun 2018 17:51:10 -0500 Subject: [PATCH 04/23] speed up measurement model estimation: + this is done by using the whole matrix rather than modeling by row + an additional small change is to make target_id the first column, which means that the final summary table in the sleuth_fit object will have target_id first, instead of in the middle. This will ease user readability if decide to view the summary table directly for columns not selected in sleuth_results + add documentation for me_model --- R/measurement_error.R | 113 ++++++++++++++++++++---------------------- man/me_model.Rd | 43 ++++++++++++++++ 2 files changed, 98 insertions(+), 58 deletions(-) create mode 100644 man/me_model.Rd diff --git a/R/measurement_error.R b/R/measurement_error.R index 59cbddc..f0b80bd 100644 --- a/R/measurement_error.R +++ b/R/measurement_error.R @@ -151,19 +151,8 @@ sleuth_fit <- function(obj, formula = NULL, fit_name = NULL, ...) { A <- solve(t(X) %*% X) msg("fitting measurement error models") - mes <- me_model_by_row(obj, X, obj$bs_summary, which_var) - tid <- names(mes) - - mes_df <- dplyr::bind_rows(lapply(mes, - function(x) { - data.frame(rss = x$rss, sigma_sq = x$sigma_sq, sigma_q_sq = x$sigma_q_sq, - mean_obs = x$mean_obs, var_obs = x$var_obs) - })) - - mes_df$target_id <- tid - rm(tid) - - mes_df <- dplyr::mutate(mes_df, sigma_sq_pmax = pmax(sigma_sq, 0)) + mes <- me_model(obj, X, obj$bs_summary, which_var) + mes_df <- mes$mes_df # FIXME: sometimes when sigma is negative the shrinkage estimation becomes NA # this is for the few set of transcripts, but should be able to just do some @@ -194,7 +183,7 @@ sleuth_fit <- function(obj, formula = NULL, fit_name = NULL, ...) { } obj$fits[[fit_name]] <- list( - models = mes, + models = mes$models, summary = l_smooth, beta_covars = beta_covars, formula = formula, @@ -325,33 +314,6 @@ covar_beta <- function(sigma, X, A) { A %*% (t(X) %*% diag(sigma) %*% X) %*% A } -# Measurement error model -# -# Fit the measurement error model across all samples -# -# @param obj a \code{sleuth} object -# @param design a design matrix -# @param bs_summary a list from \code{bs_sigma_summary} -# @return a list with a bunch of objects that are useful for shrinking -me_model_by_row <- function(obj, design, bs_summary, which_var = 'obs_counts') { - which_var <- match.arg(which_var, c('obs_counts', 'obs_tpm')) - if (which_var == "obs_counts") - sigma_var <- "sigma_q_sq" - else - sigma_var <- "sigma_q_sq_tpm" - - stopifnot( all.equal(names(bs_summary[[sigma_var]]), rownames(bs_summary[[which_var]])) ) - stopifnot( length(bs_summary[[sigma_var]]) == nrow(bs_summary[[which_var]])) - - models <- lapply(1:nrow(bs_summary[[which_var]]), - function(i) { - me_model(design, bs_summary[[which_var]][i, ], bs_summary[[sigma_var]][i]) - }) - names(models) <- rownames(bs_summary[[which_var]]) - - models -} - # non-equal var # # word @@ -565,24 +527,59 @@ gene_summary <- function(obj, which_column, transform = identity, list(obs_counts = obs_counts, sigma_q_sq = bs_sigma) } -me_model <- function(X, y, sigma_q_sq) { +#' Measurement error model +#' +#' Fit the measurement error model across all samples. This is an internal function +#' called by \code{sleuth_fit}, and is not intended to be used directly by users. +#' +#' @param obj a \code{sleuth} object +#' @param design a design matrix +#' @param bs_summary a list from \code{bs_sigma_summary} +#' @param which_var "obs_counts" (default) to model estimated counts; +#' "obs_tpm" to model TPMs +#' @return a list with two items: +#' \itemize{ +#' \item models the list of values returned by lm.fit. At the end of \code{sleuth_fit}, +#' this object is found here: obj$fits$[[]]$models +#' \item mes_df this data frame contains all of the important variables that are used for +#' variance shrinkage estimation and for testing. The final result of this table +#' is found here: obj$fits[[]]$summary. This data.frame contains the +#' the following columns: +#' \itemize{ +#' \item \code{target_id}: the feature ID (transcript or gene, depending on +#' \code{obj$gene_mode}) +#' \item \code{rss}: the residual sum of squares from the linear model +#' \item \code{sigma_sq}: the estimated biological variance from the linear model +#' \item \code{mean_obs}: the mean of the transformed observations +#' \item \code{var_obs}: the raw variance of the transformed observations +#' \item \code{sigma_sq_pmax}: the greater of sigma_sq or 0. This prevents +#' negative estimates for the biological variance +#' } +#' } +me_model <- function (obj, design, bs_summary, which_var = "obs_counts") { + which_var <- match.arg(which_var, c("obs_counts", "obs_tpm")) + if (which_var == "obs_counts") + sigma_var <- "sigma_q_sq" + else sigma_var <- "sigma_q_sq_tpm" + + stopifnot(all.equal(names(bs_summary[[sigma_var]]), rownames(bs_summary[[which_var]]))) + stopifnot(length(bs_summary[[sigma_var]]) == nrow(bs_summary[[which_var]])) + + sigma_q_sq <- bs_summary[[sigma_var]] + y <- t(bs_summary[[which_var]]) + X <- design n <- nrow(X) degrees_free <- n - ncol(X) - ols_fit <- lm.fit(X, y) - rss <- sum(ols_fit$residuals ^ 2) - sigma_sq <- rss / (degrees_free) - sigma_q_sq - - mean_obs <- mean(y) - var_obs <- var(y) - - list( - ols_fit = ols_fit, - b1 = ols_fit$coefficients[2], - rss = rss, - sigma_sq = sigma_sq, - sigma_q_sq = sigma_q_sq, - mean_obs = mean_obs, - var_obs = var_obs - ) + rss <- colSums(ols_fit$residuals^2) + sigma_sq <- rss/(degrees_free) - sigma_q_sq + mean_obs <- colMeans(y) + var_obs <- matrixStats::colVars(y) + mes_df <- data.frame(rss = rss, sigma_sq = sigma_sq, sigma_q_sq = sigma_q_sq, + mean_obs = mean_obs, var_obs = var_obs) + mes_df <- data.frame(target_id = rownames(mes_df), mes_df) + mes_df$target_id <- as.character(mes_df$target_id) + rownames(mes_df) <- NULL + mes_df <- dplyr::mutate(mes_df, sigma_sq_pmax = pmax(sigma_sq, 0)) + list(models = ols_fit, mes_df = mes_df) } diff --git a/man/me_model.Rd b/man/me_model.Rd new file mode 100644 index 0000000..70989f9 --- /dev/null +++ b/man/me_model.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/measurement_error.R +\name{me_model} +\alias{me_model} +\title{Measurement error model} +\usage{ +me_model(obj, design, bs_summary, which_var = "obs_counts") +} +\arguments{ +\item{obj}{a \code{sleuth} object} + +\item{design}{a design matrix} + +\item{bs_summary}{a list from \code{bs_sigma_summary}} + +\item{which_var}{"obs_counts" (default) to model estimated counts; +"obs_tpm" to model TPMs} +} +\value{ +a list with two items: + \itemize{ + \item models the list of values returned by lm.fit. At the end of \code{sleuth_fit}, + this object is found here: obj$fits$[[]]$models + \item mes_df this data frame contains all of the important variables that are used for + variance shrinkage estimation and for testing. The final result of this table + is found here: obj$fits[[]]$summary. This data.frame contains the + the following columns: + \itemize{ + \item \code{target_id}: the feature ID (transcript or gene, depending on + \code{obj$gene_mode}) + \item \code{rss}: the residual sum of squares from the linear model + \item \code{sigma_sq}: the estimated biological variance from the linear model + \item \code{mean_obs}: the mean of the transformed observations + \item \code{var_obs}: the raw variance of the transformed observations + \item \code{sigma_sq_pmax}: the greater of sigma_sq or 0. This prevents + negative estimates for the biological variance + } + } +} +\description{ +Fit the measurement error model across all samples. This is an internal function +called by \code{sleuth_fit}, and is not intended to be used directly by users. +} From a289b4dc0fb6feea32dbe1d018f3d568f9ea75ee Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Thu, 21 Jun 2018 18:17:47 -0500 Subject: [PATCH 05/23] remove redundant code; this is taken care of in sleuth_fit --- R/likelihood.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index 675eb63..61ddff3 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -12,11 +12,6 @@ compute_likelihood <- function(obj, which_model) { # the observations can be recovered by: # obj$fits$full$models[[1]]$ols_fit$residuals + fitted values - # TODO: move this elsewhere - obj$fits[[which_model]]$summary <- obj$fits[[which_model]]$summary[ - match(names(obj$fits[[which_model]]$models), - obj$fits[[which_model]]$summary$target_id), ] - all_likelihood <- sapply(seq_along(obj$fits[[which_model]]$models), function( i ) { cur_model <- obj$fits[[which_model]]$models[[ i ]] From b7cdbc8686031072a93284f4ed2d9a433296231b Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Thu, 21 Jun 2018 18:31:11 -0500 Subject: [PATCH 06/23] change sleuth_lrt, sleuth_wt, and extract_model to handle the new sleuth_fit API: + the models slot for the sleuth_model object is now the one model with the full matrix + these functions have retained the old code to remain backwards compatible with older versions of sleuth --- R/likelihood.R | 48 ++++++++++++++++++++++++++----------------- R/measurement_error.R | 12 ++++++++--- R/model.R | 22 +++++++++++++++----- 3 files changed, 55 insertions(+), 27 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index 61ddff3..63f751e 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -4,29 +4,39 @@ compute_likelihood <- function(obj, which_model) { stopifnot(is(obj, "sleuth")) model_exists(obj, which_model) - # we basically do lapply on all of the models - # # the fitted values are here: - # obj$fits[[which_model]]$models$ols_fit$fitted.values + # obj$fits[[which_model]]$models$fitted.values # # the observations can be recovered by: - # obj$fits$full$models[[1]]$ols_fit$residuals + fitted values - - all_likelihood <- sapply(seq_along(obj$fits[[which_model]]$models), - function( i ) { - cur_model <- obj$fits[[which_model]]$models[[ i ]] - cur_mu <- cur_model$ols_fit$fitted.values - obs <- cur_model$ols_fit$residuals + cur_mu - - cur_summary <- obj$fits[[which_model]]$summary - - cur_var <- cur_summary[i, "smooth_sigma_sq_pmax"] + - cur_summary[i, "sigma_q_sq"] - - sum(dnorm(obs, mean = cur_mu, sd = sqrt(cur_var), log = TRUE)) + # obj$fits[[which_model]]$models$residuals + fitted values + + models <- obj$fits[[which_model]]$models + if (names(models)[1] == "coefficients") { + cur_model <- models + cur_mu <- cur_model$fitted.values + obs <- cur_model$residuals + cur_mu + cur_summary <- obj$fits[[which_model]]$summary + cur_var <- cur_summary$smooth_sigma_sq_pmax + cur_summary$sigma_q_sq + cur_var <- matrix(rep(cur_var, nrow(obs)), nrow = nrow(obs), byrow = TRUE) + + all_likelihood <- colSums(dnorm(obs, mean = cur_mu, sd = sqrt(cur_var), log = TRUE)) + } else { + # This is retained for backward compatibility with older versions of sleuth + all_likelihood <- sapply(seq_along(models), + function(i) { + cur_model <- models[[i]] + cur_mu <- cur_model$ols_fit$fitted.values + obs <- cur_model$ols_fit$residuals + cur_mu + + cur_summary <- obj$fits[[which_model]]$summary + + cur_var <- cur_summary[i, "smooth_sigma_sq_pmax"] + + cur_summary[i, "sigma_q_sq"] + + sum(dnorm(obs, mean = cur_mu, sd = sqrt(cur_var), log = TRUE)) }) - - names(all_likelihood) <- names(obj$fits[[which_model]]$models) + names(all_likelihood) <- names(models) + } obj$fits[[which_model]]$likelihood <- all_likelihood diff --git a/R/measurement_error.R b/R/measurement_error.R index f0b80bd..96f1e4b 100644 --- a/R/measurement_error.R +++ b/R/measurement_error.R @@ -263,11 +263,17 @@ sleuth_wt <- function(obj, which_beta, which_model = 'full') { paste(colnames(d_matrix[beta_i]), collapse = ' '))) } - b <- sapply(obj$fits[[ which_model ]]$models, - function(x) { + fit <- obj$fits[[which_model]]$models + if (names(fit)[1] == "coefficients") { + b <- fit$coefficients[beta_i, ] + names(b) <- colnames(fit$coefficients) + } else { + # This is retained for backward compatibility with older versions of sleuth + b <- sapply(fit, function(x) { x$ols_fit$coefficients[ beta_i ] }) - names(b) <- names(obj$fits[[ which_model ]]$models) + names(b) <- names(obj$fits[[ which_model ]]$models) + } res <- obj$fits[[ which_model ]]$summary res$target_id <- as.character(res$target_id) diff --git a/R/model.R b/R/model.R index 3cd4bd4..e8f7abe 100644 --- a/R/model.R +++ b/R/model.R @@ -537,14 +537,26 @@ extract_model <- function(obj, which_model) { ". Please check models(", substitute(obj), ") for fitted models.") } - res <- lapply(seq_along(obj$fits[[which_model]]$models), + fit <- obj$fits[[which_model]] + if (names(fit$models)[1] == "coefficients") { + coefficients <- fit$models$coefficients + target_ids <- rep(colnames(coefficients), each = nrow(coefficients)) + terms <- rownames(coefficients) + sq_std_err <- as.numeric(sapply(fit$beta_covars, function(x) diag(x))) + res <- data.frame(target_id = as.character(target_ids), term = as.character(terms), + estimate = as.numeric(coefficients), std_error = sqrt(sq_std_err)) + res + } else { + # This is retained for backward compatibility with older versions of sleuth + res <- lapply(seq_along(fit$models), function(i) { - x <- obj$fits[[which_model]]$models[[i]] + x <- fit$models[[i]] coefficients <- coef(x$ols_fit) list( - target_id = rep_len(names(obj$fits[[which_model]]$models)[i], length(coefficients)), + target_id = rep_len(names(fit$models)[i], length(coefficients)), term = names(coefficients), estimate = coefficients, - std_error = sqrt(diag(obj$fits[[which_model]]$beta_covars[[i]]))) + std_error = sqrt(diag(fit$beta_covars[[i]]))) }) - dplyr::bind_rows(res) + as.data.frame(dplyr::bind_rows(res)) + } } From 0a8034c293585166e94585cd42b01fd27250e5b9 Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Tue, 24 Jul 2018 13:58:51 -0500 Subject: [PATCH 07/23] update how beta_covars is calculated and retrieved for sleuth_model objects: + speed up the calculation using matrix multiplication rather than lapply + calculate beta covariances for later use + bonus that this reduces memory footprint of the sleuth_model object further + update 'extract_model' and 'sleuth_wt' to use the new matrix format --- R/measurement_error.R | 45 ++++++++++++++++++++++++++++--------------- R/model.R | 2 +- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/R/measurement_error.R b/R/measurement_error.R index 96f1e4b..0218f3d 100644 --- a/R/measurement_error.R +++ b/R/measurement_error.R @@ -169,14 +169,28 @@ sleuth_fit <- function(obj, formula = NULL, fit_name = NULL, ...) { smooth_sigma_sq_pmax = pmax(smooth_sigma_sq, sigma_sq)) msg('computing variance of betas') - beta_covars <- lapply(1:nrow(l_smooth), + sigma <- rowSums(l_smooth[, c("smooth_sigma_sq_pmax", "sigma_q_sq")]) + beta_covars <- sigma %*% t(diag(A)) + # the full beta covariance matrix is symmetric + # so we just need one set of off-diagonals + off_diag_covars <- sigma %*% t(A[lower.tri(A)]) + # we set the column names of the off-diagonal covariances + # as (beta #1):(beta #2) + # for example, if the formula is ~condition and + # the experiment has two conditions -- "A" and "B" -- + # there would be one off-diagonal column with the label + # "(Intercept):conditionB" + colnames(off_diag_covars) <- unlist(lapply(1:(nrow(A)-1), function(i) { - row <- l_smooth[i, ] - with(row, - covar_beta(smooth_sigma_sq_pmax + sigma_q_sq, X, A) - ) + c_names <- colnames(A)[i:ncol(A)] + r_name <- rownames(A)[i] + if (r_name == "(Intercept)") r_name <- "Intercept" + paste0("(", r_name, "):(", c_names, ")") }) - names(beta_covars) <- l_smooth$target_id + ) + + beta_covars <- cbind(beta_covars, off_diag_covars) + rownames(beta_covars) <- l_smooth$target_id if ( is.null(obj$fits) ) { obj$fits <- list() @@ -267,29 +281,30 @@ sleuth_wt <- function(obj, which_beta, which_model = 'full') { if (names(fit)[1] == "coefficients") { b <- fit$coefficients[beta_i, ] names(b) <- colnames(fit$coefficients) + se <- obj$fits[[which_model]]$beta_covars[, beta_i] } else { # This is retained for backward compatibility with older versions of sleuth b <- sapply(fit, function(x) { x$ols_fit$coefficients[ beta_i ] }) names(b) <- names(obj$fits[[ which_model ]]$models) + se <- sapply(obj$fits[[ which_model ]]$beta_covars, + function(x) { + x[beta_i, beta_i] + }) } + se <- sqrt( se ) + se <- se[ names(b) ] + + stopifnot( all.equal(names(b), names(se)) ) + res <- obj$fits[[ which_model ]]$summary res$target_id <- as.character(res$target_id) res <- res[match(names(b), res$target_id), ] stopifnot( all.equal(res$target_id, names(b)) ) - se <- sapply(obj$fits[[ which_model ]]$beta_covars, - function(x) { - x[beta_i, beta_i] - }) - se <- sqrt( se ) - se <- se[ names(b) ] - - stopifnot( all.equal(names(b), names(se)) ) - res <- dplyr::mutate(res, b = b, se_b = se, diff --git a/R/model.R b/R/model.R index e8f7abe..c91ba82 100644 --- a/R/model.R +++ b/R/model.R @@ -542,7 +542,7 @@ extract_model <- function(obj, which_model) { coefficients <- fit$models$coefficients target_ids <- rep(colnames(coefficients), each = nrow(coefficients)) terms <- rownames(coefficients) - sq_std_err <- as.numeric(sapply(fit$beta_covars, function(x) diag(x))) + sq_std_err <- as.numeric(t(fit$beta_covars[, terms])) res <- data.frame(target_id = as.character(target_ids), term = as.character(terms), estimate = as.numeric(coefficients), std_error = sqrt(sq_std_err)) res From 4176b7a692bb18d251912717ddadff6c48e55d11 Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Wed, 27 Jun 2018 11:17:35 -0500 Subject: [PATCH 08/23] move sleuth shrinkage procedure to a separate function: + this will allow the option to supply custom shrinkage procedures + the default is now 'basic_shrink_fun', which contains the old sleuth shrinkage procedure done by 'sliding_window_grouping' and then 'shrink_df' + any custom function must take the list produced by me_model, and then output the mes_df within that list modified with at least one new column: "smooth_sigma_sq", the smooth variances + add updated documentation to 'sleuth_fit' + add documentation for 'basic_shrink_fun' + shrink_fun is now an additional slot on the sleuth_model object, to provide provenance for how the variance estimation was done --- NAMESPACE | 1 + R/measurement_error.R | 124 ++++++++++++++++++++++++++++++---------- man/basic_shrink_fun.Rd | 51 +++++++++++++++++ man/sleuth_fit.Rd | 16 +++++- 4 files changed, 159 insertions(+), 33 deletions(-) create mode 100644 man/basic_shrink_fun.Rd diff --git a/NAMESPACE b/NAMESPACE index 01d495b..1e04e93 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,6 +24,7 @@ S3method(transform_status,sleuth_model) export("transform_fun_counts<-") export("transform_fun_tpm<-") export(basic_filter) +export(basic_shrink_fun) export(bias_table) export(counts_to_fpkm) export(counts_to_tpm) diff --git a/R/measurement_error.R b/R/measurement_error.R index 0218f3d..01a3c53 100644 --- a/R/measurement_error.R +++ b/R/measurement_error.R @@ -51,8 +51,19 @@ #' the \code{'scaled_reads_per_base'} summary statistic. #' } #' -#' Advanced options for the sliding window shrinkage procedure (these options are passed to -#' \code{\link{sliding_window_grouping}}): +#' Advanced options for the shrinkage procedure: +#' +#' \itemize{ +#' \item \code{shrink_fun}: this function does the shrinkage procedure. It must take the list produced +#' by \code{\link{me_model}}, with the full model and the data.frame with means (the \code{'mean_obs'} +#' column) and variances (the \code{'sigma_sq_pmax'} column), as input, and must output a modified +#' data.frame from list$mes_df, with the required column \code{'smooth_sigma_sq'} containing the +#' smoothed variances. Additional columns are allowed, but will not be directly processed by +#' \code{\link{sleuth_results}}. The default option for this option is \code{\link{basic_shrink_fun}}, +#' which is the procedure described in the original sleuth paper. +#' } +#' +#' Advanced options for the default sleuth shrinkage procedure (see \code{\link{basic_shrink_fun}}): #' #' \itemize{ #' \item \code{n_bins}: the number of bins that the data should be split for the sliding window shrinkage @@ -70,6 +81,7 @@ #' so <- sleuth_fit(so, ~1, 'reduced') #' @seealso \code{\link{models}} for seeing which models have been fit, #' \code{\link{sleuth_prep}} for creating a sleuth object, +#' \code{\link{basic_shrink_fun}} for the sleuth variance shrinkage procedure, #' \code{\link{sleuth_wt}} to test whether a coefficient is zero, #' \code{\link{sleuth_lrt}} to test nested models. #' @export @@ -80,26 +92,10 @@ sleuth_fit <- function(obj, formula = NULL, fit_name = NULL, ...) { extra_opts <- list(...) if ('which_var' %in% names(extra_opts)) { which_var <- extra_opts$which_var + which_var <- match.arg(which_var, c('obs_counts', 'obs_tpm')) } else { which_var <- 'obs_counts' } - if ('n_bins' %in% names(extra_opts)) { - n_bins <- extra_opts$n_bins - } else { - n_bins <- 100 - } - if ('lwr' %in% names(extra_opts)) { - lwr <- extra_opts$lwr - } else { - lwr <- 0.25 - } - if ('upr' %in% names(extra_opts)) { - upr <- extra_opts$lwr - } else { - upr <- 0.75 - } - - which_var <- match.arg(which_var, c('obs_counts', 'obs_tpm')) if (!is.null(obj$fits) && any(sapply(obj$fits, function(x) !x$transform_synced))) { stop("Your sleuth has fits which are not based on the current transform function. ", @@ -152,19 +148,9 @@ sleuth_fit <- function(obj, formula = NULL, fit_name = NULL, ...) { msg("fitting measurement error models") mes <- me_model(obj, X, obj$bs_summary, which_var) - mes_df <- mes$mes_df - # FIXME: sometimes when sigma is negative the shrinkage estimation becomes NA - # this is for the few set of transcripts, but should be able to just do some - # simple fix msg('shrinkage estimation') - swg <- sliding_window_grouping(mes_df, 'mean_obs', 'sigma_sq_pmax', - n_bins = n_bins, lwr = lwr, upr = upr, ignore_zeroes = TRUE) - l_smooth <- shrink_df(swg, sqrt(sqrt(sigma_sq_pmax)) ~ mean_obs, 'iqr') - l_smooth <- dplyr::select( - dplyr::mutate(l_smooth, smooth_sigma_sq = shrink ^ 4), - -shrink) - + l_smooth <- shrink_fun(mes, ...) l_smooth <- dplyr::mutate(l_smooth, smooth_sigma_sq_pmax = pmax(smooth_sigma_sq, sigma_sq)) @@ -203,6 +189,7 @@ sleuth_fit <- function(obj, formula = NULL, fit_name = NULL, ...) { formula = formula, design_matrix = X, transform_synced = TRUE, + shrink_fun = shrink_fun, which_var = which_var) class(obj$fits[[fit_name]]) <- 'sleuth_model' @@ -585,7 +572,6 @@ me_model <- function (obj, design, bs_summary, which_var = "obs_counts") { stopifnot(all.equal(names(bs_summary[[sigma_var]]), rownames(bs_summary[[which_var]]))) stopifnot(length(bs_summary[[sigma_var]]) == nrow(bs_summary[[which_var]])) - sigma_q_sq <- bs_summary[[sigma_var]] y <- t(bs_summary[[which_var]]) X <- design @@ -604,3 +590,79 @@ me_model <- function (obj, design, bs_summary, which_var = "obs_counts") { mes_df <- dplyr::mutate(mes_df, sigma_sq_pmax = pmax(sigma_sq, 0)) list(models = ols_fit, mes_df = mes_df) } + +#' Default Sleuth Variance Shrinkage Estimation +#' +#' This function performs the default procedure to shrink variance estimation. +#' +#' @param me_list the list produced by \code{\link{me_model}}. +#' @param ... advanced options to tweak the shrinkage procedure. See "details" +#' for more information. +#' +#' @return a modified data.frame with the following additional columns: +#' +#' \itemize{ +#' \item \code{'iqr'}: boolean for whether this feature was within the interquartile +#' range and used for shrinkage estimation +#' \item \code{'failed_ise'}: boolean for whether this feature failed the initial +#' round of shrinkage estimation using interpolation from the LOESS curve. This feature's +#' shrunken variance was estimated using the 'exact' option for LOESS. +#' \item \code{'smooth_sigma_sq'}: the estimated biological variance after the shrinkage +#' procedure +#' } +#' +#' @details The default procedure does the following steps: it first bins the +#' features by the mean of each feature's observations. It then takes the +#' interquartile (25th-75th percentile) of variances within each bin. +#' Those means and variances are then input into a LOESS to calculate +#' a curve using the function mean ~ (variance)^1/4. The LOESS is then used +#' to interpolate the shrunken variance for all features. The larger of the +#' variance estimated from the bootstraps or the variance from the shrinkage procedure is used. +#' +#' Advanced options for the shrinkage procedure (these options are passed to +#' \code{\link{sliding_window_grouping}}): +#' +#' \itemize{ +#' \item \code{n_bins}: the number of bins that the data should be split for the sliding window shrinkage +#' using the mean-variance curve. The default is 100. +#' \item \code{lwr}: the lower range of variances within each bin that should be included for the shrinkage +#' procedure. The default is 0.25 (meaning the 25th percentile). +#' \item \code{upr}: the upper range of variances within each bin that should be included for the shrinkage +#' procedure. The default is 0.75 (meaning the 75th percentile). +#' } +#' @export +basic_shrink_fun <- function(me_list, ...) { + extra_opts <- list(...) + mes_df <- me_list$mes_df + good_opts <- c('n_bins', 'lwr', 'upr') + if (any(!(names(extra_opts) %in% good_opts))) { + bad_opts <- names(extra_opts)[which(!(names(extra_opts) %in% good_opts))] + warning("Some unrecognized options were passed to 'basic_shrink_fun': ", paste(bad_opts, sep = ", "), + ". These are being ignored.") + } + + if ('n_bins' %in% names(extra_opts)) { + n_bins <- extra_opts$n_bins + } else { + n_bins <- 100 + } + if ('lwr' %in% names(extra_opts)) { + lwr <- extra_opts$lwr + } else { + lwr <- 0.25 + } + if ('upr' %in% names(extra_opts)) { + upr <- extra_opts$lwr + } else { + upr <- 0.75 + } + + swg <- sliding_window_grouping(mes_df, 'mean_obs', 'sigma_sq_pmax', + n_bins = n_bins, lwr = lwr, upr = upr, ignore_zeroes = TRUE) + l_smooth <- shrink_df(swg, sqrt(sqrt(sigma_sq_pmax)) ~ mean_obs, 'iqr') + l_smooth <- dplyr::select( + dplyr::mutate(l_smooth, smooth_sigma_sq = shrink ^ 4), + -shrink) + + l_smooth +} diff --git a/man/basic_shrink_fun.Rd b/man/basic_shrink_fun.Rd new file mode 100644 index 0000000..bd15d77 --- /dev/null +++ b/man/basic_shrink_fun.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/measurement_error.R +\name{basic_shrink_fun} +\alias{basic_shrink_fun} +\title{Default Sleuth Variance Shrinkage Estimation} +\usage{ +basic_shrink_fun(me_list, ...) +} +\arguments{ +\item{me_list}{the list produced by \code{\link{me_model}}.} + +\item{...}{advanced options to tweak the shrinkage procedure. See "details" +for more information.} +} +\value{ +a modified data.frame with the following additional columns: + +\itemize{ + \item \code{'iqr'}: boolean for whether this feature was within the interquartile + range and used for shrinkage estimation + \item \code{'failed_ise'}: boolean for whether this feature failed the initial + round of shrinkage estimation using interpolation from the LOESS curve. This feature's + shrunken variance was estimated using the 'exact' option for LOESS. + \item \code{'smooth_sigma_sq'}: the estimated biological variance after the shrinkage + procedure +} +} +\description{ +This function performs the default procedure to shrink variance estimation. +} +\details{ +The default procedure does the following steps: it first bins the +features by the mean of each feature's observations. It then takes the +interquartile (25th-75th percentile) of variances within each bin. +Those means and variances are then input into a LOESS to calculate +a curve using the function mean ~ (variance)^1/4. The LOESS is then used +to interpolate the shrunken variance for all features. The larger of the +variance estimated from the bootstraps or the variance from the shrinkage procedure is used. + +Advanced options for the shrinkage procedure (these options are passed to +\code{\link{sliding_window_grouping}}): + +\itemize{ + \item \code{n_bins}: the number of bins that the data should be split for the sliding window shrinkage + using the mean-variance curve. The default is 100. + \item \code{lwr}: the lower range of variances within each bin that should be included for the shrinkage + procedure. The default is 0.25 (meaning the 25th percentile). + \item \code{upr}: the upper range of variances within each bin that should be included for the shrinkage + procedure. The default is 0.75 (meaning the 75th percentile). +} +} diff --git a/man/sleuth_fit.Rd b/man/sleuth_fit.Rd index 7aa2f26..0f5eac8 100644 --- a/man/sleuth_fit.Rd +++ b/man/sleuth_fit.Rd @@ -48,8 +48,19 @@ Advanced options for modeling choice: the \code{'scaled_reads_per_base'} summary statistic. } -Advanced options for the sliding window shrinkage procedure (these options are passed to -\code{\link{sliding_window_grouping}}): +Advanced options for the shrinkage procedure: + +\itemize{ + \item \code{shrink_fun}: this function does the shrinkage procedure. It must take the list produced + by \code{\link{me_model}}, with the full model and the data.frame with means (the \code{'mean_obs'} + column) and variances (the \code{'sigma_sq_pmax'} column), as input, and must output a modified + data.frame from list$mes_df, with the required column \code{'smooth_sigma_sq'} containing the + smoothed variances. Additional columns are allowed, but will not be directly processed by + \code{\link{sleuth_results}}. The default option for this option is \code{\link{basic_shrink_fun}}, + which is the procedure described in the original sleuth paper. +} + +Advanced options for the default sleuth shrinkage procedure (see \code{\link{basic_shrink_fun}}): \itemize{ \item \code{n_bins}: the number of bins that the data should be split for the sliding window shrinkage @@ -69,6 +80,7 @@ so <- sleuth_fit(so, ~1, 'reduced') \seealso{ \code{\link{models}} for seeing which models have been fit, \code{\link{sleuth_prep}} for creating a sleuth object, +\code{\link{basic_shrink_fun}} for the sleuth variance shrinkage procedure, \code{\link{sleuth_wt}} to test whether a coefficient is zero, \code{\link{sleuth_lrt}} to test nested models. } From a90ac317ae059a556e67bbabdc5df1281088c5c3 Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Fri, 29 Jun 2018 14:26:16 -0500 Subject: [PATCH 09/23] add sanity checks for shrink fun --- R/measurement_error.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/R/measurement_error.R b/R/measurement_error.R index 01a3c53..82177d7 100644 --- a/R/measurement_error.R +++ b/R/measurement_error.R @@ -97,6 +97,16 @@ sleuth_fit <- function(obj, formula = NULL, fit_name = NULL, ...) { which_var <- 'obs_counts' } + if ('shrink_fun' %in% names(extra_opts)) { + shrink_fun <- extra_opts$shrink_fun + } else { + shrink_fun <- basic_shrink_fun + } + + if (!is(shrink_fun, 'function')) { + stop("The advanced 'shrink_fun' argument must be a function.") + } + if (!is.null(obj$fits) && any(sapply(obj$fits, function(x) !x$transform_synced))) { stop("Your sleuth has fits which are not based on the current transform function. ", "Please rerun sleuth_prep using the current or new function before running sleuth_fit.") From 73c61ff230c1e784032b959445469ebb6fb338e6 Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Tue, 17 Jul 2018 18:04:36 -0500 Subject: [PATCH 10/23] modify how the shrinkage function is called to prevent warnings: + remove specified extra options needed in sleuth_fit call + use 'do.call' instead of directly calling shrink_fun + this solution was discussed [here](https://stackoverflow.com/a/7028630) --- R/measurement_error.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/measurement_error.R b/R/measurement_error.R index 82177d7..b0fbf39 100644 --- a/R/measurement_error.R +++ b/R/measurement_error.R @@ -93,12 +93,14 @@ sleuth_fit <- function(obj, formula = NULL, fit_name = NULL, ...) { if ('which_var' %in% names(extra_opts)) { which_var <- extra_opts$which_var which_var <- match.arg(which_var, c('obs_counts', 'obs_tpm')) + extra_opts$which_var <- NULL } else { which_var <- 'obs_counts' } if ('shrink_fun' %in% names(extra_opts)) { shrink_fun <- extra_opts$shrink_fun + extra_opts$shrink_fun <- NULL } else { shrink_fun <- basic_shrink_fun } @@ -160,7 +162,9 @@ sleuth_fit <- function(obj, formula = NULL, fit_name = NULL, ...) { mes <- me_model(obj, X, obj$bs_summary, which_var) msg('shrinkage estimation') - l_smooth <- shrink_fun(mes, ...) + shrink_opts <- list(mes) + shrink_opts <- append(shrink_opts, extra_opts) + l_smooth <- do.call(shrink_fun, shrink_opts) l_smooth <- dplyr::mutate(l_smooth, smooth_sigma_sq_pmax = pmax(smooth_sigma_sq, sigma_sq)) From a02423d38dc087e1a0ec63e1416bd26a2d321839 Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Thu, 28 Jun 2018 15:06:28 -0500 Subject: [PATCH 11/23] add a variance shrinkage function to use limma's procedure --- NAMESPACE | 1 + R/measurement_error.R | 63 +++++++++++++++++++++++++++++++++++++++++ man/limma_shrink_fun.Rd | 42 +++++++++++++++++++++++++++ 3 files changed, 106 insertions(+) create mode 100644 man/limma_shrink_fun.Rd diff --git a/NAMESPACE b/NAMESPACE index 1e04e93..03022b8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,6 +37,7 @@ export(get_bootstraps) export(get_quantile) export(is_kallisto_subset) export(kallisto_table) +export(limma_shrink_fun) export(log_transform) export(melt_bootstrap_sleuth) export(models) diff --git a/R/measurement_error.R b/R/measurement_error.R index b0fbf39..d57c616 100644 --- a/R/measurement_error.R +++ b/R/measurement_error.R @@ -680,3 +680,66 @@ basic_shrink_fun <- function(me_list, ...) { l_smooth } + +#' Limma-style variance shrinkage estimation +#' +#' This function uses limma's procedure for variance shrinkage estimation +#' See ?limma::squeezeVar for more details for how it works and advanced options +#' available. +#' +#' @param me_list the list produced by \code{link{me_model}} +#' @param ... advanced options that will be passed to the squeezeVar function from limma. +#' the two advanced options available are: +#' \itemize{ +#' \item \code{robust}: boolean for whether the estimation should be made robust to +#' outlier variances. The default is \code{FALSE}. +#' \item \code{winsor.tail.p}: if robust is \code{TRUE}, this is a numeric vector of +#' length 1 or 2 to give the left and right tail proportions of 'x' to Winsorize. +#' } +#' please see ?limma::squeezeVar for more information. +#' +#' @return a data.frame taken from me_list$mes_df and modified to have the additional +#' columns: +#' \itemize{ +#' \item \code{'limma_s2_prior'}: contains the location of the distribution; this +#' corresponds to the 's2.prior' output by 'limma::eBayes'. +#' \item \code{'limma_df_prior'}: contains the degrees of freedom for the +#' prior distribution. This will be the same if robust is \code{FALSE} and +#' different for each feature if robust is \code{TRUE}. +#' \item \code{'smooth_sigma_sq'}: contains limma's estimated shrunken variances. +#' This corresponds to the 's2.post' output by 'limma::eBayes'. +#' } +#' @seealso \code{\link{me_model}} for the measurement error model function +#' @export +limma_shrink_fun <- function(me_list, ...) { + extra_opts <- list(...) + good_opts <- c('robust', 'winsor.tail.p') + if (any(!(names(extra_opts) %in% good_opts))) { + bad_opts <- names(extra_opts)[which(!(names(extra_opts) %in% good_opts))] + warning("Some unrecognized options were passed to 'limma_shrink_fun': ", paste(bad_opts, sep = ", "), + ". These are being ignored.") + } + + if ('robust' %in% names(extra_opts)) { + robust <- extra_opts$robust + } else { + robust <- FALSE + } + if ('winsor.tail.p' %in% names(extra_opts)) { + winsor.tail.p <- extra_opts$winsor.tail.p + } else { + winsor.tail.p <- c(0.05, 0.1) + } + + limma_sv <- suppressWarnings(limma::squeezeVar( + var = me_list$mes_df$sigma_sq_pmax, + df = me_list$models$df.residual, + covariate = me_list$mes_df$mean_obs, + robust = robust, + winsor.tail.p = winsor.tail.p)) + l_smooth <- dplyr::mutate(me_list$mes_df, + limma_s2_prior = limma_sv$var.prior, + limma_df_prior = limma_sv$df.prior, + smooth_sigma_sq = limma_sv$var.post) + l_smooth +} diff --git a/man/limma_shrink_fun.Rd b/man/limma_shrink_fun.Rd new file mode 100644 index 0000000..54799e8 --- /dev/null +++ b/man/limma_shrink_fun.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/measurement_error.R +\name{limma_shrink_fun} +\alias{limma_shrink_fun} +\title{Limma-style variance shrinkage estimation} +\usage{ +limma_shrink_fun(me_list, ...) +} +\arguments{ +\item{me_list}{the list produced by \code{link{me_model}}} + +\item{...}{advanced options that will be passed to the squeezeVar function from limma. +the two advanced options available are: +\itemize{ + \item \code{robust}: boolean for whether the estimation should be made robust to + outlier variances. The default is \code{FALSE}. + \item \code{winsor.tail.p}: if robust is \code{TRUE}, this is a numeric vector of + length 1 or 2 to give the left and right tail proportions of 'x' to Winsorize. +} +please see ?limma::squeezeVar for more information.} +} +\value{ +a data.frame taken from me_list$mes_df and modified to have the additional +columns: + \itemize{ + \item \code{'limma_s2_prior'}: contains the location of the distribution; this + corresponds to the 's2.prior' output by 'limma::eBayes'. + \item \code{'limma_df_prior'}: contains the degrees of freedom for the + prior distribution. This will be the same if robust is \code{FALSE} and + different for each feature if robust is \code{TRUE}. + \item \code{'smooth_sigma_sq'}: contains limma's estimated shrunken variances. + This corresponds to the 's2.post' output by 'limma::eBayes'. + } +} +\description{ +This function uses limma's procedure for variance shrinkage estimation +See ?limma::squeezeVar for more details for how it works and advanced options +available. +} +\seealso{ +\code{\link{me_model}} for the measurement error model function +} From 55e061dacfd193193f55559fe6ab905e371555c5 Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Fri, 29 Jun 2018 13:49:02 -0500 Subject: [PATCH 12/23] add limma to imports list to reflect adding the squeezeVar function --- DESCRIPTION | 3 ++- NAMESPACE | 1 + R/measurement_error.R | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2a567bc..baea0c0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,8 @@ Imports: matrixStats, pheatmap, shiny, - aggregation + aggregation, + limma Suggests: MASS, lintr, diff --git a/NAMESPACE b/NAMESPACE index 03022b8..e88b10c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -86,5 +86,6 @@ importFrom(data.table,fread) importFrom(dplyr,"%>%") importFrom(lazyeval,interp) importFrom(lazyeval,lazy) +importFrom(limma,squeezeVar) importFrom(rhdf5,h5write) importFrom(rhdf5,h5write.default) diff --git a/R/measurement_error.R b/R/measurement_error.R index d57c616..7e3c116 100644 --- a/R/measurement_error.R +++ b/R/measurement_error.R @@ -710,6 +710,7 @@ basic_shrink_fun <- function(me_list, ...) { #' This corresponds to the 's2.post' output by 'limma::eBayes'. #' } #' @seealso \code{\link{me_model}} for the measurement error model function +#' @importFrom limma squeezeVar #' @export limma_shrink_fun <- function(me_list, ...) { extra_opts <- list(...) From f211f2fabb938edac3904050305fd7f50ae5fa25 Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Fri, 29 Jun 2018 14:26:51 -0500 Subject: [PATCH 13/23] fix bug that occurs when calculating degrees of freedom between two models in sleuth_lrt --- R/likelihood.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index 63f751e..a305872 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -96,8 +96,13 @@ sleuth_lrt <- function(obj, null_model, alt_model) { test_statistic <- 2 * (a_ll - n_ll) - degrees_free <- obj$fits[[null_model]]$models[[1]]$ols_fit$df.residual - - obj$fits[[alt_model]]$models[[1]]$ols_fit$df.residual + if (names(obj$fits[[alt_model]]$models)[1] == "coefficients") { + degrees_free <- obj$fits[[null_model]]$models$df.residual - + obj$fits[[alt_model]]$models$df.residual + } else { + degrees_free <- obj$fits[[null_model]]$models[[1]]$ols_fit$df.residual - + obj$fits[[alt_model]]$models[[1]]$ols_fit$df.residual + } # P(chisq > test_statistic) p_value <- pchisq(test_statistic, degrees_free, lower.tail = FALSE) From d33e6205c19bf907cba3901f1ab1cf243271a4cb Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Fri, 29 Jun 2018 14:28:02 -0500 Subject: [PATCH 14/23] remove a line referencing a basic_shrink_fun specific column --- R/likelihood.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/likelihood.R b/R/likelihood.R index a305872..3bd998c 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -110,7 +110,6 @@ sleuth_lrt <- function(obj, null_model, alt_model) { test_stat = test_statistic, pval = p_value) result <- dplyr::mutate(result, qval = p.adjust(pval, method = "BH")) model_info <- data.table::data.table(obj$fits[[null_model]]$summary) - model_info <- dplyr::select(model_info, -c(iqr)) result <- dplyr::left_join( data.table::data.table(result), model_info, From 243716bb1a9ccfd45fb7f015243478d8bf29ca1b Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Tue, 24 Jul 2018 13:34:32 -0500 Subject: [PATCH 15/23] fix memory leak from formulas that have captured external environments + in testing, this was capturing the full sleuth object, essentially doubling the size of the final sleuth object --- R/measurement_error.R | 1 + R/sleuth.R | 1 + 2 files changed, 2 insertions(+) diff --git a/R/measurement_error.R b/R/measurement_error.R index 7e3c116..145fba1 100644 --- a/R/measurement_error.R +++ b/R/measurement_error.R @@ -148,6 +148,7 @@ sleuth_fit <- function(obj, formula = NULL, fit_name = NULL, ...) { # TODO: check if model matrix is full rank X <- NULL if ( is(formula, 'formula') ) { + environment(formula) <- new.env() X <- model.matrix(formula, obj$sample_to_covariates) } else { if ( is.null(colnames(formula)) ) { diff --git a/R/sleuth.R b/R/sleuth.R index d0b4e5f..6770151 100644 --- a/R/sleuth.R +++ b/R/sleuth.R @@ -385,6 +385,7 @@ sleuth_prep <- function( design_matrix <- NULL if (is(full_model, 'formula')) { + environment(full_model) <- new.env() design_matrix <- model.matrix(full_model, sample_to_covariates) } else if (is(full_model, 'matrix')) { if (is.null(colnames(full_model))) { From f12f6ad5d934756d5b67885b77ef6108d638c32d Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Mon, 13 Aug 2018 15:02:24 -0500 Subject: [PATCH 16/23] replace outdated aperm with t --- R/bootstrap.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/bootstrap.R b/R/bootstrap.R index 6e1d11f..72459f9 100644 --- a/R/bootstrap.R +++ b/R/bootstrap.R @@ -406,8 +406,8 @@ process_bootstrap <- function(i, samp_name, kal_path, est_count_sf = est_count_sf) if (read_bootstrap_tpm) { - bs_tpm <- aperm(apply(bs_mat, 1, counts_to_tpm, - eff_len)) + bs_tpm <- t(apply(bs_mat, 1, counts_to_tpm, + eff_len)) colnames(bs_tpm) <- colnames(bs_mat) # gene level code is analogous here to below code @@ -439,8 +439,8 @@ process_bootstrap <- function(i, samp_name, kal_path, rm(tidy_tpm) # these tables are very large } bs_tpm <- transform_fun_tpm(bs_tpm[, which_ids]) - bs_quant_tpm <- aperm(apply(bs_tpm, 2, - quantile)) + bs_quant_tpm <- t(apply(bs_tpm, 2, + quantile)) colnames(bs_quant_tpm) <- c("min", "lower", "mid", "upper", "max") bs_quants$tpm <- bs_quant_tpm @@ -495,8 +495,8 @@ process_bootstrap <- function(i, samp_name, kal_path, bs_mat <- transform_fun_counts(bs_mat[, which_ids]) if (extra_bootstrap_summary) { - bs_quant_est_counts <- aperm(apply(bs_mat, 2, - quantile)) + bs_quant_est_counts <- t(apply(bs_mat, 2, + quantile)) colnames(bs_quant_est_counts) <- c("min", "lower", "mid", "upper", "max") bs_quants$est_counts <- bs_quant_est_counts From 17242ba7a57a6986690703a584336dce07280899 Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Mon, 13 Aug 2018 15:03:12 -0500 Subject: [PATCH 17/23] add sanity check for adding off diagonal beta covariances + now only happens if there is more than one row, indicating that there is a beta beyond the intercept-only + this prevents an error occurring if the intercept-only model is used --- R/measurement_error.R | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/R/measurement_error.R b/R/measurement_error.R index 145fba1..ffd2770 100644 --- a/R/measurement_error.R +++ b/R/measurement_error.R @@ -174,23 +174,24 @@ sleuth_fit <- function(obj, formula = NULL, fit_name = NULL, ...) { beta_covars <- sigma %*% t(diag(A)) # the full beta covariance matrix is symmetric # so we just need one set of off-diagonals - off_diag_covars <- sigma %*% t(A[lower.tri(A)]) - # we set the column names of the off-diagonal covariances - # as (beta #1):(beta #2) - # for example, if the formula is ~condition and - # the experiment has two conditions -- "A" and "B" -- - # there would be one off-diagonal column with the label - # "(Intercept):conditionB" - colnames(off_diag_covars) <- unlist(lapply(1:(nrow(A)-1), - function(i) { - c_names <- colnames(A)[i:ncol(A)] - r_name <- rownames(A)[i] - if (r_name == "(Intercept)") r_name <- "Intercept" - paste0("(", r_name, "):(", c_names, ")") - }) - ) - - beta_covars <- cbind(beta_covars, off_diag_covars) + if (nrow(A) > 1) { + off_diag_covars <- sigma %*% t(A[lower.tri(A)]) + # we set the column names of the off-diagonal covariances + # as (beta #1):(beta #2) + # for example, if the formula is ~condition and + # the experiment has two conditions -- "A" and "B" -- + # there would be one off-diagonal column with the label + # "(Intercept):conditionB" + colnames(off_diag_covars) <- unlist(lapply(1:(nrow(A)-1), + function(i) { + c_names <- colnames(A)[(i+1):ncol(A)] + r_name <- rownames(A)[i] + if (r_name == "(Intercept)") r_name <- "Intercept" + paste0("(", r_name, "):(", c_names, ")") + }) + ) + beta_covars <- cbind(beta_covars, off_diag_covars) + } rownames(beta_covars) <- l_smooth$target_id if ( is.null(obj$fits) ) { From 0d7b5cb9405c7b32475f64f745748c3acb6b6a08 Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Mon, 13 Aug 2018 15:42:50 -0500 Subject: [PATCH 18/23] change API for transform functions to explicitly normalize within the function + process bootstrap now normalizes TPMs by TPM size factors instead of indirectly by count size factor + add 'sf' argument to the default log_transform function, taking a single size factor when normalizing bootstraps, or a vector of size factors equal in length to the number of samples when normalizing the observed counts + add 'tpm_transform' as an equivalent default function for TPMs. + this will facilitate an alternative approach for sleuth-ALR that decouples normalization and transformation steps --- R/bootstrap.R | 9 +++---- R/read_write.R | 5 ++-- R/sleuth.R | 66 +++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 58 insertions(+), 22 deletions(-) diff --git a/R/bootstrap.R b/R/bootstrap.R index 72459f9..11f014e 100644 --- a/R/bootstrap.R +++ b/R/bootstrap.R @@ -380,7 +380,7 @@ dcast_bootstrap.kallisto <- function(obj, units, nsamples = NULL) { # Function to process bootstraps for parallelization process_bootstrap <- function(i, samp_name, kal_path, - num_transcripts, est_count_sf, + num_transcripts, est_count_sf, est_tpm_sf, read_bootstrap_tpm, gene_mode, extra_bootstrap_summary, target_id, mappings, which_ids, @@ -402,8 +402,7 @@ process_bootstrap <- function(i, samp_name, kal_path, eff_len <- rhdf5::h5read(kal_path$path, "aux/eff_lengths") bs_mat <- read_bootstrap_mat(fname = kal_path$path, num_bootstraps = num_bootstrap, - num_transcripts = num_transcripts, - est_count_sf = est_count_sf) + num_transcripts = num_transcripts) if (read_bootstrap_tpm) { bs_tpm <- t(apply(bs_mat, 1, counts_to_tpm, @@ -438,7 +437,7 @@ process_bootstrap <- function(i, samp_name, kal_path, bs_tpm <- as.matrix(bs_tpm[, -1]) rm(tidy_tpm) # these tables are very large } - bs_tpm <- transform_fun_tpm(bs_tpm[, which_ids]) + bs_tpm <- transform_fun_tpm(bs_tpm[, which_ids], sf = est_tpm_sf) bs_quant_tpm <- t(apply(bs_tpm, 2, quantile)) colnames(bs_quant_tpm) <- c("min", "lower", "mid", @@ -493,7 +492,7 @@ process_bootstrap <- function(i, samp_name, kal_path, rm(tidy_bs, scaled_bs) } - bs_mat <- transform_fun_counts(bs_mat[, which_ids]) + bs_mat <- transform_fun_counts(bs_mat[, which_ids], sf = est_count_sf) if (extra_bootstrap_summary) { bs_quant_est_counts <- t(apply(bs_mat, 2, quantile)) diff --git a/R/read_write.R b/R/read_write.R index 26c23c3..97bc100 100644 --- a/R/read_write.R +++ b/R/read_write.R @@ -171,11 +171,10 @@ h5check <- function(fname, group, name) { # @return a matrix with each row being a bootstrap sample read_bootstrap_mat <- function(fname, num_bootstraps, - num_transcripts, - est_count_sf) { + num_transcripts) { bs_mat <- matrix(ncol = num_bootstraps, nrow = num_transcripts) for (i in 1:ncol(bs_mat)) { - bs_mat[, i] <- rhdf5::h5read(fname, paste0("bootstrap/bs", i - 1)) / est_count_sf + bs_mat[, i] <- rhdf5::h5read(fname, paste0("bootstrap/bs", i - 1)) } bs_mat <- t(bs_mat) target_id <- as.character(rhdf5::h5read(fname, "aux/ids")) diff --git a/R/sleuth.R b/R/sleuth.R index 6770151..988b362 100644 --- a/R/sleuth.R +++ b/R/sleuth.R @@ -35,10 +35,40 @@ basic_filter <- function(row, min_reads = 5, min_prop = 0.47) { #' #' @param x numeric that must be >=0. represents an individual observed count (one transcript in one sample). #' @param offset numeric offset to prevent taking the log of 0. -#' @return log(x + offset) +#' @param sf the size factor to normalize the data. Must either be a single number or a numeric vector +#' equal in length to the number of samples in x. +#' @return if sf is NULL, log(x + offset); else log(x / sf + offset) #' @export -log_transform <- function(x, offset=0.5) { - log(x + offset) +log_transform <- function(x, offset = 0.5, sf = 1) { + if (length(sf) == 1) { + log(x / sf + offset) + } else if (length(sf) == ncol(x)) { + log(t(t(x) / sf) + offset) + } else { + stop("please provide a single size factor or a size factor vector equal ", + "in length to the number of samples") + } +} + +#' Default TPM transformation +#' +#' The default transformation for converting the normalized TPMs. +#' +#' @param x numeric that must be >=0. represents an individual observed count (one transcript in one sample). +#' @param offset numeric offset to prevent taking the log of 0. +#' @param sf the size factor to normalize the data. Must either be a single number or a numeric vector +#' equal in length to the number of samples in x. +#' @return if sf is NULL, log(x + offset); else log(x / sf + offset) +#' @export +tpm_transform <- function(x, sf = 1) { + if (length(sf) == 1) { + x / sf + } else if (length(sf) == ncol(x)) { + t(t(x) / sf) + } else { + stop("please provide a single size factor or a size factor vector equal ", + "in length to the number of samples") + } } # currently defunct @@ -145,10 +175,17 @@ filter_df_by_groups <- function(df, fun, group_df, ...) { #' Advanced Options for the Transformation Step: #' (NOTE: Be sure you know what you're doing before you use these options) #' \itemize{ -#' \item \code{transform_fun_counts}: the transformation that should be applied -#' to the normalized counts. Default is \code{'log(x+0.5)'} (i.e. natural log with 0.5 offset). -#' \item \code{transform_fun_tpm}: the transformation that should be applied -#' to the TPM values. Default is \code{'x'} (i.e. the identity function / no transformation) +#' \item \code{transform_fun_counts}: the transformation that should be +#' applied to the normalized counts. Any custom function must take the +#' matrix of raw counts and either a single size factor when normalizing +#' bootstrap counts or a vector of size factors when normalizing the +#' observed counts. Default is \code{'log(x/sf+0.5)'} (i.e. natural log of +#' the normalized counts with 0.5 offset). +#' \item \code{transform_fun_tpm}: the transformation that should be +#' applied to the TPM values. Any custom function must take the matrix of +#' raw TPMs and either a single size factor when normalizing bootstrap TPMs +#' or a vector of size factors when normalizing the observed TPMs. Default +#' is \code{'x/sf'} (i.e. the normalized TPMs with no further transformation) #' } #' #' Advanced Options for Gene Aggregation: @@ -228,7 +265,7 @@ sleuth_prep <- function( if ("transform_fun_tpm" %in% names(extra_opts)) { transform_fun_tpm <- extra_opts$transform_fun_tpm } else { - transform_fun_tpm <- identity + transform_fun_tpm <- tpm_transform } if ("gene_mode" %in% names(extra_opts)) { gene_mode <- extra_opts$gene_mode @@ -620,7 +657,7 @@ sleuth_prep <- function( samp_name <- sample_to_covariates$sample[i] kal_path <- get_kallisto_path(kal_dirs[i]) process_bootstrap(i, samp_name, kal_path, - num_transcripts, est_counts_sf[[i]], + num_transcripts, est_counts_sf[[i]], tpm_sf[[i]], read_bootstrap_tpm, ret$gene_mode, extra_bootstrap_summary, target_id, mappings, which_ids, ret$gene_column, @@ -678,9 +715,9 @@ sleuth_prep <- function( } sigma_q_sq <- sigma_q_sq[order(names(sigma_q_sq))] - obs_counts <- ret$transform_fun_counts(obs_counts) + obs_counts <- ret$transform_fun_counts(obs_counts, sf = est_counts_sf) obs_counts <- obs_counts[order(rownames(obs_counts)),] - obs_tpm <- ret$transform_fun_tpm(obs_tpm) + obs_tpm <- ret$transform_fun_tpm(obs_tpm, sf = tpm_sf) obs_tpm <- obs_tpm[order(rownames(obs_tpm)),] ret$bs_summary <- list(obs_counts = obs_counts, sigma_q_sq = sigma_q_sq) @@ -1010,10 +1047,11 @@ kallisto_table <- function(obj, # # @param obj is a sleuth object # @param value_name either "est_counts" or "tpm" +# @param which_df "obs_raw" or "obs_norm" # @return a matrix with the appropriate names -obs_to_matrix <- function(obj, value_name) { - - obs_counts <- reshape2::dcast(obj$obs_norm, target_id ~ sample, +obs_to_matrix <- function(obj, value_name, which_df = "obs_raw") { + which_df <- match.arg(which_df, c("obs_raw", "obs_norm")) + obs_counts <- reshape2::dcast(obj[[which_df]], target_id ~ sample, value.var = value_name) obs_counts <- as.data.frame(obs_counts) From 79e99cf535be3bf784e2d2fa5a76d5308fc3b393 Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Mon, 20 Aug 2018 10:49:58 -0500 Subject: [PATCH 19/23] fix bug where sleuth_model summary table was in a different order than the target IDs in the fit + this now guarantees that the fit object, the summary table, and the beta_covars table are all in the same order --- R/measurement_error.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/R/measurement_error.R b/R/measurement_error.R index ffd2770..a7eb93e 100644 --- a/R/measurement_error.R +++ b/R/measurement_error.R @@ -169,6 +169,13 @@ sleuth_fit <- function(obj, formula = NULL, fit_name = NULL, ...) { l_smooth <- dplyr::mutate(l_smooth, smooth_sigma_sq_pmax = pmax(smooth_sigma_sq, sigma_sq)) + tids <- colnames(mes$models$coefficients) + l_smooth <- l_smooth[match(tids, l_smooth$target_id), ] + if (!identical(l_smooth$target_id, tids)) { + stop("Something went wrong during the shrinkage estimation step. ", + "The fit summary target IDs don't match the observed data target IDs") + } + msg('computing variance of betas') sigma <- rowSums(l_smooth[, c("smooth_sigma_sq_pmax", "sigma_q_sq")]) beta_covars <- sigma %*% t(diag(A)) From d733061eb943cf73874416f169ebbf17812d0f43 Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Tue, 21 Aug 2018 15:11:02 -0500 Subject: [PATCH 20/23] fix eval statements for scatter plot on shiny: + switch to rlang::eval_tidy + add rlang to NAMESPACE and import list + fixes issue #194 --- DESCRIPTION | 3 ++- NAMESPACE | 1 + R/shiny.R | 5 +++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2a567bc..2ba3070 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,6 +21,7 @@ Imports: tidyr, reshape2, rhdf5, + rlang, parallel, lazyeval, matrixStats, @@ -33,4 +34,4 @@ Suggests: testthat, knitr VignetteBuilder: knitr -RoxygenNote: 6.0.1 +RoxygenNote: 6.1.0 diff --git a/NAMESPACE b/NAMESPACE index bb5f80d..48c983a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -87,3 +87,4 @@ importFrom(lazyeval,interp) importFrom(lazyeval,lazy) importFrom(rhdf5,h5write) importFrom(rhdf5,h5write.default) +importFrom(rlang,eval_tidy) diff --git a/R/shiny.R b/R/shiny.R index bd37f6e..faae5d7 100644 --- a/R/shiny.R +++ b/R/shiny.R @@ -26,6 +26,7 @@ #' @param options additional options which are sent to shiny #' @param ... additional parameters sent to plotting functions #' @return a \code{\link{shinyApp}} result +#' @importFrom rlang eval_tidy #' @export #' @seealso \code{\link{sleuth_fit}}, \code{\link{sleuth_live_settings}}, \code{\link{sleuth_deploy}} sleuth_live <- function(obj, settings = sleuth_live_settings(), @@ -1205,8 +1206,8 @@ sleuth_live <- function(obj, settings = sleuth_live_settings(), use_filtered = input$scatter_filt, offset = as.numeric(input$scatter_offset)) # get the data in the form that it is displayed in the plot - x <- eval(p$mapping$x, envir = p$data) - y <- eval(p$mapping$y, envir = p$data) + x <- rlang::eval_tidy(p$mapping$x, data = p$data) + y <- rlang::eval_tidy(p$mapping$y, data = p$data) rv_scatter$data <- data.frame(target_id = p$data$target_id, x = x, y = y, stringsAsFactors = FALSE) saved_plots_and_tables$scatter_plt <- p From 653908d9d7b0dc608fc12894864ee389c79b8746 Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Mon, 5 Nov 2018 13:38:41 -0600 Subject: [PATCH 21/23] update sanity check for gene_mode and pval_aggregate in sleuth_results --- R/model.R | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/R/model.R b/R/model.R index 3cd4bd4..eabb415 100644 --- a/R/model.R +++ b/R/model.R @@ -394,7 +394,17 @@ sleuth_results <- function(obj, test, test_type = 'wt', # } if (obj$gene_mode && pval_aggregate) { - stop("This shouldn't happen. Please report this issue.") + if (obj$pval_aggregate) { + stop("Both 'gene_mode' and 'pval_aggregate' are TRUE in this sleuth ", + "object. This shouldn't happen. Please report this issue.") + } else { + stop("'pval_aggregate' is set to TRUE, but the sleuth object has ", + "already gone through count aggregation when 'gene_mode' was set ", + "to TRUE in 'sleuth_prep'. If you intend count aggregation, omit ", + "the 'pval_aggregate' argument here. If you intend p-value ", + "aggregation, repeat 'sleuth_prep' without the 'gene_mode' ", + "argument.") + } } if (pval_aggregate && is.null(obj$gene_column)) { From 4dc43552626fd2649972bc87b5b412a8e390680e Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Sat, 16 Mar 2019 11:29:21 -0500 Subject: [PATCH 22/23] fix bug where obs_to_matrix was using obs_raw rather than obs_norm --- R/sleuth.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/sleuth.R b/R/sleuth.R index 0fd1a1e..fe117d1 100644 --- a/R/sleuth.R +++ b/R/sleuth.R @@ -1049,7 +1049,7 @@ kallisto_table <- function(obj, # @param value_name either "est_counts" or "tpm" # @param which_df "obs_raw" or "obs_norm" # @return a matrix with the appropriate names -obs_to_matrix <- function(obj, value_name, which_df = "obs_raw") { +obs_to_matrix <- function(obj, value_name, which_df = "obs_norm") { which_df <- match.arg(which_df, c("obs_raw", "obs_norm")) obs_counts <- reshape2::dcast(obj[[which_df]], target_id ~ sample, value.var = value_name) From b3b1c58c79d78a00b1bcf48566df1e6c762c2417 Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Mon, 18 Mar 2019 13:59:37 -0500 Subject: [PATCH 23/23] small version bump --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 87004fb..0701590 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: sleuth Title: Tools for investigating RNA-Seq -Version: 0.30.0 +Version: 0.30.0-1 Authors@R: c( person("Harold", "Pimentel", , "haroldpimentel@gmail.com", role = c("aut", "cre")), person("Warren", "McGee", , "warren-mcgee@fsm.northwestern.edu", role = "aut"))