diff --git a/R/dmpFinder.R b/R/dmpFinder.R index c570e30..cb187ae 100644 --- a/R/dmpFinder.R +++ b/R/dmpFinder.R @@ -1,9 +1,33 @@ # Exported functions ----------------------------------------------------------- -dmpFinder <- function(dat, pheno, type = c("categorical", "continuous"), - qCutoff = 1, shrinkVar = FALSE) { - - # Check inputs +dmpFinder.modified <- function(dat, + pheno, + cov.cols=NULL, + type = c("categorical", "continuous"), + qCutoff = 1, + shrinkVar = FALSE) { + if (is.list(pheno)){ + pheno <- unlist(pheno) + } + pheno.array <- as.data.frame(pheno) + suppressWarnings({ + require(SummarizedExperiment) + require(limma) + require(MatrixGenerics) + require(siggenes) + }) + .isMatrixBacked <- function(object) { + stopifnot(is(object, "SummarizedExperiment")) + all(vapply(assays(object), is.matrix, logical(1L))) + } + .isMatrixBackedOrStop <- function(object, FUN) { + if (!.isMatrixBacked(object)) { + stop("'", + FUN, + "()' only supports matrix-backed minfi objects.", + call. = FALSE) + } + } type <- match.arg(type) if (is(dat, "MethylSet")) { .isMatrixBackedOrStop(dat, "dmpFinder") @@ -13,44 +37,76 @@ dmpFinder <- function(dat, pheno, type = c("categorical", "continuous"), } else { stopifnot(is.numeric(dat)) M <- dat - if (is.vector(M)) M <- matrix(M, nrow = 1) + if (is.vector(M)) { + M <- matrix(M, nrow = 1) + } } + pheno <- pheno.array[, 1] + if (!is.null(cov.cols)) { + covs <- pheno.array[, cov.cols] + is.cat <- sapply(covs, is.factor) + if (sum(is.cat) > 1) { + cov.cat <- factor(apply(covs[, is.cat], 1, paste, collapse = "-")) + } else { + cov.cat <- factor(covs[, is.cat]) + } + covs <- cbind(covs[!is.cat], cov.cat) + } else { + covs <- NULL + } + pheno <- factor(as.character(pheno)) + if (!is.null(covs)) { + pheno.and.covs <- cbind(pheno, covs) + } else { + pheno.and.covs <- pheno + } + n <- length(pheno) - if (n != ncol(M)) stop("length of pheno does not equal number of samples") - - if (type == "categorical") { - pheno <- factor(as.character(pheno)) + if (n != ncol(M)) { + stop("length of pheno does not equal number of samples") + } + if (!is.null(covs)){ + design <- model.matrix( ~ ., data = pheno.and.covs) + } else { design <- model.matrix(~pheno) - fit <- lmFit(M, design) + } + fit <- lmFit(M, design) + if (type == "categorical") { if (shrinkVar) { - fit <- contrasts.fit(fit, contrasts(pheno)) fit <- eBayes(fit) tab <- data.frame( intercept = fit$coefficients[, 1], f = fit[["F"]], - pval = fit[["F.p.value"]]) + pval = fit[["F.p.value"]] + ) } else { - fit1 <- lmFit(M) - RSS1 <- rowSums2((M - fitted(fit1))^2) - RSS <- rowSums2((M - fitted(fit))^2) + if (is.null(covs)) { + fit1 <- lmFit(M) + } else { + fit1 <- lmFit(M, model.matrix( ~ ., data = covs)) + } + RSS1 <- rowSums2((M - fitted(fit1)) ^ 2) + RSS <- rowSums2((M - fitted(fit)) ^ 2) df1 <- length(levels(pheno)) - 1 - df2 <- n - length(levels(pheno)) - Fstat <- ((RSS1 - RSS)/df1)/(RSS/df2) + if (is.null(covs)) { + df2 <- n - length(levels(pheno)) + } else { + cat.unique <- length(unique(interaction(covs[, is.cat]))) + df2 <- n - length(levels(pheno)) - cat.unique - sum(!is.cat) + } + Fstat <- ((RSS1 - RSS) / df1) / (RSS / df2) if (df2 > 1e+06) { F.p.value <- pchisq(df1 * Fstat, df1, lower.tail = FALSE) - } - else { + } else { F.p.value <- pf(Fstat, df1, df2, lower.tail = FALSE) } tab <- data.frame( intercept = fit$coefficients[, 1], f = Fstat, - pval = F.p.value) + pval = F.p.value + ) } - } - else if (type == "continuous") { - design <- model.matrix(~pheno) - fit <- lmFit(M, design) + } else if (type == "continuous") { if (shrinkVar) { fit <- eBayes(fit) sigma <- sqrt(fit$s2.post) @@ -60,23 +116,29 @@ dmpFinder <- function(dat, pheno, type = c("categorical", "continuous"), df <- fit$df.residual } coef <- fit$coefficients - if (is.vector(coef)) coef <- matrix(coef, ncol = 2) + if (is.vector(coef)) { + coef <- matrix(coef, ncol = 2) + } stdev <- fit$stdev.unscaled - if (is.vector(stdev)) stdev <- matrix(stdev, ncol = 2) + if (is.vector(stdev)) { + stdev <- matrix(stdev, ncol = 2) + } t <- coef[, 2] / (stdev[, 2] * sigma) pval <- 2 * pt(abs(t), df = df, lower.tail = FALSE) tab <- data.frame( intercept = coef[, 1], beta = coef[, 2], t = t, - pval = pval) + pval = pval + ) } p0 <- pi0.est(tab$pval[!is.na(tab$pval)])$p0 tab$qval <- qvalue.cal(tab$pval, p0) - if (qCutoff < 1) tab <- tab[tab$qval <= qCutoff,] + if (qCutoff < 1) { + tab <- tab[tab$qval <= qCutoff, ] + } o <- order(tab$pval) tab <- tab[o, ] - ## tab <- cbind(tab, annotate(rownames(tab))) if (nrow(tab) == 0) { message(sprintf("No significant DMPs at FDR cutoff of %s.", qCutoff)) }