From 358eaed39cfbe2b95873ddb74ac93e9b9038749d Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 5 Feb 2026 15:56:50 +0000 Subject: [PATCH 01/34] Bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c1ef4673..72a5e0dc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: TwoSampleMR Title: Two Sample MR Functions and Interface to MRC Integrative Epidemiology Unit OpenGWAS Database -Version: 0.6.29 +Version: 0.6.30 Authors@R: c( person("Gibran", "Hemani", , "g.hemani@bristol.ac.uk", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-0920-1055")), From 065b4cab6c8edad90c7c1a0d1c258ad065c75ce3 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 5 Feb 2026 15:58:15 +0000 Subject: [PATCH 02/34] Delete duplicated weighted_median function --- R/mr.R | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/R/mr.R b/R/mr.R index 59fc578d..af324afe 100644 --- a/R/mr.R +++ b/R/mr.R @@ -764,19 +764,6 @@ weighted_median <- function(b_iv, weights) { return(b) } -weighted_median <- function(b_iv, weights) { - betaIV.order <- b_iv[order(b_iv)] - weights.order <- weights[order(b_iv)] - weights.sum <- cumsum(weights.order) - 0.5 * weights.order - weights.sum <- weights.sum / sum(weights.order) - below <- max(which(weights.sum < 0.5)) - b <- betaIV.order[below] + - (betaIV.order[below + 1] - betaIV.order[below]) * - (0.5 - weights.sum[below]) / - (weights.sum[below + 1] - weights.sum[below]) - return(b) -} - #' Calculate standard errors for weighted median method using bootstrap #' From f5aa0ba2c7ef37100a7b1d81077462f188317783 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 5 Feb 2026 15:58:47 +0000 Subject: [PATCH 03/34] Vectorise mr_egger_regression_bootstrap() --- R/mr.R | 54 +++++++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/R/mr.R b/R/mr.R index af324afe..2ff9f117 100644 --- a/R/mr.R +++ b/R/mr.R @@ -636,36 +636,40 @@ mr_egger_regression_bootstrap <- function(b_exp, b_out, se_exp, se_out, paramete )) } nboot <- parameters$nboot - # Do bootstraps - res <- array(0, c(nboot + 1, 2)) - # pb <- txtProgressBar(min = 0, max = nboot, initial = 0, style=3) - for (i in 1:nboot) { - # setTxtProgressBar(pb, i) - #sample from distributions of SNP betas - xs <- stats::rnorm(length(b_exp), b_exp, se_exp) - ys <- stats::rnorm(length(b_out), b_out, se_out) - - # Use absolute values for Egger reg - ys <- ys * sign(xs) - xs <- abs(xs) - - #weighted regression with given formula - # r <- summary(lm(ys ~ xs, weights=1/se_out^2)) - r <- linreg(xs, ys, 1 / se_out^2) - - #collect coefficient from given line. - res[i, 1] <- r$ahat - res[i, 2] <- r$bhat - # res[i, 1] <- r$coefficients[1,1] - # res[i, 2] <- r$coefficients[2,1] - } - cat("\n") + nsnp <- length(b_exp) + weights <- 1 / se_out^2 + + # Vectorized bootstrap: generate all random values at once + # Matrix dimensions: nboot rows x nsnp columns + xs_mat <- matrix(stats::rnorm(nboot * nsnp, mean = rep(b_exp, each = nboot), + sd = rep(se_exp, each = nboot)), + nrow = nboot, ncol = nsnp) + ys_mat <- matrix(stats::rnorm(nboot * nsnp, mean = rep(b_out, each = nboot), + sd = rep(se_out, each = nboot)), + nrow = nboot, ncol = nsnp) + + # Apply sign correction for Egger regression (vectorized) + ys_mat <- ys_mat * sign(xs_mat) + xs_mat <- abs(xs_mat) + + # Vectorized weighted linear regression for all bootstrap iterations + # For each bootstrap iteration, compute weighted regression coefficients + # Using the formula: bhat = cov(x*w, y*w) / var(x*w), ahat = mean(y) - mean(x)*bhat + res <- t(vapply(seq_len(nboot), function(i) { + xs <- xs_mat[i, ] + ys <- ys_mat[i, ] + xw <- xs * weights + yw <- ys * weights + bhat <- stats::cov(xw, yw, use = "pair") / stats::var(xw, na.rm = TRUE) + ahat <- mean(ys, na.rm = TRUE) - mean(xs, na.rm = TRUE) * bhat + c(ahat, bhat) + }, numeric(2))) return(list( b = mean(res[, 2], na.rm = TRUE), se = stats::sd(res[, 2], na.rm = TRUE), pval = sum(sign(mean(res[, 2], na.rm = TRUE)) * res[, 2] < 0) / nboot, - nsnp = length(b_exp), + nsnp = nsnp, b_i = mean(res[, 1], na.rm = TRUE), se_i = stats::sd(res[, 1], na.rm = TRUE), pval_i = sum(sign(mean(res[, 1], na.rm = TRUE)) * res[, 1] < 0) / nboot From 00ad07ce2e27be4034965a3b000157152ffdebcc Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 5 Feb 2026 15:59:02 +0000 Subject: [PATCH 04/34] Vectorise weighted_median_bootstrap() --- R/mr.R | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/R/mr.R b/R/mr.R index 2ff9f117..805b8dbc 100644 --- a/R/mr.R +++ b/R/mr.R @@ -783,13 +783,25 @@ weighted_median <- function(b_iv, weights) { #' @export #' @return Empirical standard error weighted_median_bootstrap <- function(b_exp, b_out, se_exp, se_out, weights, nboot) { - med <- rep(0, nboot) - for (i in seq_len(nboot)) { - b_exp.boot <- stats::rnorm(length(b_exp), mean = b_exp, sd = se_exp) - b_out.boot <- stats::rnorm(length(b_out), mean = b_out, sd = se_out) - betaIV.boot <- b_out.boot / b_exp.boot - med[i] <- weighted_median(betaIV.boot, weights) - } + nsnp <- length(b_exp) + + # Vectorized bootstrap: generate all random values at once + # Matrix dimensions: nboot rows x nsnp columns + b_exp_mat <- matrix(stats::rnorm(nboot * nsnp, mean = rep(b_exp, each = nboot), + sd = rep(se_exp, each = nboot)), + nrow = nboot, ncol = nsnp) + b_out_mat <- matrix(stats::rnorm(nboot * nsnp, mean = rep(b_out, each = nboot), + sd = rep(se_out, each = nboot)), + nrow = nboot, ncol = nsnp) + + # Compute Wald ratios for all bootstrap samples (element-wise division) + betaIV_mat <- b_out_mat / b_exp_mat + + # Apply weighted_median to each bootstrap iteration + med <- vapply(seq_len(nboot), function(i) { + weighted_median(betaIV_mat[i, ], weights) + }, numeric(1)) + return(stats::sd(med)) } From 976c42812fecb72b263f8410162d8ae5eb6e2efd Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 5 Feb 2026 15:59:07 +0000 Subject: [PATCH 05/34] Update NEWS.md --- NEWS.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/NEWS.md b/NEWS.md index 46b3c337..fa9e772f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# TwoSampleMR v0.6.30 + +(Release date 2026-02-00) + +* Vectorised `mr_egger_regression_bootstrap()` +* Vectorised `weighted_median_bootstrap()` +* Deleted duplicated `weighted_median()` function + # TwoSampleMR v0.6.29 (Release date 2025-12-16) From 73b0f94e9d0d669e4dc23e6d5761a25085c3e820 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 5 Feb 2026 16:23:50 +0000 Subject: [PATCH 06/34] Replace plyr calls with data.table calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - `plyr::rbind.fill(...)` → `data.table::rbindlist(..., fill = TRUE, use.names = TRUE)` - `plyr::ddply(dat, cols, func)` → `lapply()` over unique combinations + `data.table::rbindlist()` - Added `data.table::setDF()` calls to convert back to data.frame for compatibility --- R/harmonise.R | 20 ++++++++----- R/leaveoneout.R | 36 ++++++++++++++++++----- R/mr.R | 36 +++++++++++++++++------ R/query.R | 32 ++++++++++++--------- R/read_data.R | 38 ++++++++++++++++--------- R/rucker.R | 6 ++-- R/singlesnp.R | 76 +++++++++++++++++++++++++++++++++++-------------- 7 files changed, 171 insertions(+), 73 deletions(-) diff --git a/R/harmonise.R b/R/harmonise.R index 982dccba..29cc76ac 100644 --- a/R/harmonise.R +++ b/R/harmonise.R @@ -114,8 +114,10 @@ harmonise_data <- function(exposure_dat, outcome_dat, action = 2) { # return(x) # }) - jlog <- plyr::rbind.fill(lapply(fix.tab, function(x) attr(x, "log"))) - fix.tab <- plyr::rbind.fill(fix.tab) + jlog <- data.table::rbindlist(lapply(fix.tab, function(x) attr(x, "log")), fill = TRUE, use.names = TRUE) + data.table::setDF(jlog) + fix.tab <- data.table::rbindlist(fix.tab, fill = TRUE, use.names = TRUE) + data.table::setDF(fix.tab) attr(fix.tab, "log") <- jlog # fix.tab <- harmonise_make_snp_effects_positive(fix.tab) @@ -680,12 +682,16 @@ harmonise <- function(dat, tolerance, action) { action ) - jlog <- plyr::rbind.fill( - as.data.frame(attr(d22, "log"), stringsAsFactors = FALSE), - as.data.frame(attr(d21, "log"), stringsAsFactors = FALSE), - as.data.frame(attr(d12, "log"), stringsAsFactors = FALSE), - as.data.frame(attr(d11, "log"), stringsAsFactors = FALSE) + jlog <- data.table::rbindlist( + list( + as.data.frame(attr(d22, "log"), stringsAsFactors = FALSE), + as.data.frame(attr(d21, "log"), stringsAsFactors = FALSE), + as.data.frame(attr(d12, "log"), stringsAsFactors = FALSE), + as.data.frame(attr(d11, "log"), stringsAsFactors = FALSE) + ), + fill = TRUE, use.names = TRUE ) + data.table::setDF(jlog) jlog <- cbind( data.frame( id.exposure = dat$id.exposure[1], diff --git a/R/leaveoneout.R b/R/leaveoneout.R index 7cb3ac48..9c72f674 100644 --- a/R/leaveoneout.R +++ b/R/leaveoneout.R @@ -18,19 +18,28 @@ mr_leaveoneout <- function(dat, parameters = default_parameters(), method = mr_i stopifnot("se.exposure" %in% names(dat)) stopifnot("se.outcome" %in% names(dat)) - res <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(X) { - x <- subset(X, mr_keep) + dat_dt <- data.table::as.data.table(dat) + combos <- unique(dat_dt[, .(id.exposure, id.outcome)]) + + results <- lapply(seq_len(nrow(combos)), function(i) { + exp_id <- combos$id.exposure[i] + out_id <- combos$id.outcome[i] + X <- dat_dt[id.exposure == exp_id & id.outcome == out_id] + x <- X[mr_keep == TRUE] nsnp <- nrow(x) if (nsnp == 0) { x <- X[1, ] d <- data.frame( + id.exposure = exp_id, + id.outcome = out_id, SNP = "All", b = NA, se = NA, p = NA, samplesize = NA, outcome = x$outcome[1], - exposure = x$exposure[1] + exposure = x$exposure[1], + stringsAsFactors = FALSE ) return(d) } @@ -46,28 +55,36 @@ mr_leaveoneout <- function(dat, parameters = default_parameters(), method = mr_i method(beta.exposure, beta.outcome, se.exposure, se.outcome, parameters) ) d <- data.frame( + id.exposure = exp_id, + id.outcome = out_id, SNP = c(as.character(x$SNP), "All"), b = sapply(l, function(y) y$b), se = sapply(l, function(y) y$se), p = sapply(l, function(y) y$pval), - samplesize = x$samplesize.outcome[1] + samplesize = x$samplesize.outcome[1], + stringsAsFactors = FALSE ) d$outcome <- x$outcome[1] d$exposure <- x$exposure[1] } else { a <- with(x, method(beta.exposure, beta.outcome, se.exposure, se.outcome, parameters)) d <- data.frame( + id.exposure = exp_id, + id.outcome = out_id, SNP = "All", b = a$b, se = a$se, p = a$pval, - samplesize = x$samplesize.outcome[1] + samplesize = x$samplesize.outcome[1], + stringsAsFactors = FALSE ) d$outcome <- x$outcome[1] d$exposure <- x$exposure[1] } return(d) }) + res <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE) + data.table::setDF(res) res <- subset( res, select = c(exposure, outcome, id.exposure, id.outcome, samplesize, SNP, b, se, p) @@ -85,8 +102,13 @@ mr_leaveoneout <- function(dat, parameters = default_parameters(), method = mr_i #' @export #' @return List of plots mr_leaveoneout_plot <- function(leaveoneout_results) { - res <- plyr::dlply(leaveoneout_results, c("id.exposure", "id.outcome"), function(d) { - d <- plyr::mutate(d) + dat_dt <- data.table::as.data.table(leaveoneout_results) + combos <- unique(dat_dt[, .(id.exposure, id.outcome)]) + + res <- lapply(seq_len(nrow(combos)), function(i) { + exp_id <- combos$id.exposure[i] + out_id <- combos$id.outcome[i] + d <- as.data.frame(dat_dt[id.exposure == exp_id & id.outcome == out_id]) # Need to have at least 3 SNPs because IVW etc methods can't be performed with fewer than 2 SNPs if (sum(!grepl("All", d$SNP)) < 3) { return( diff --git a/R/mr.R b/R/mr.R index 805b8dbc..68fd67a8 100644 --- a/R/mr.R +++ b/R/mr.R @@ -23,37 +23,54 @@ mr <- function( } } - mr_tab <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(x1) { - # message("Performing MR analysis of '", x1$id.exposure[1], "' on '", x18WII58$id.outcome[1], "'") - x <- subset(x1, mr_keep) + # Convert to data.table for efficient grouped operations + + dat_dt <- data.table::as.data.table(dat) + + # Get unique combinations of id.exposure and id.outcome + combos <- unique(dat_dt[, .(id.exposure, id.outcome)]) + + # Process each combination + results <- lapply(seq_len(nrow(combos)), function(i) { + exp_id <- combos$id.exposure[i] + out_id <- combos$id.outcome[i] + x1 <- dat_dt[id.exposure == exp_id & id.outcome == out_id] + x <- x1[mr_keep == TRUE] + if (nrow(x) == 0) { message( "No SNPs available for MR analysis of '", - x1$id.exposure[1], + exp_id, "' on '", - x1$id.outcome[1], + out_id, "'" ) return(NULL) } else { - message("Analysing '", x1$id.exposure[1], "' on '", x1$id.outcome[1], "'") + message("Analysing '", exp_id, "' on '", out_id, "'") } res <- lapply(method_list, function(meth) { get(meth)(x$beta.exposure, x$beta.outcome, x$se.exposure, x$se.outcome, parameters) }) methl <- mr_method_list() mr_tab <- data.frame( + id.exposure = exp_id, + id.outcome = out_id, outcome = x$outcome[1], exposure = x$exposure[1], method = methl$name[match(method_list, methl$obj)], nsnp = sapply(res, function(x) x$nsnp), b = sapply(res, function(x) x$b), se = sapply(res, function(x) x$se), - pval = sapply(res, function(x) x$pval) + pval = sapply(res, function(x) x$pval), + stringsAsFactors = FALSE ) - mr_tab <- subset(mr_tab, !(is.na(b) & is.na(se) & is.na(pval))) + mr_tab <- mr_tab[!(is.na(mr_tab$b) & is.na(mr_tab$se) & is.na(mr_tab$pval)), ] return(mr_tab) }) + + mr_tab <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE) + data.table::setDF(mr_tab) return(mr_tab) } @@ -243,7 +260,8 @@ mr_method_list <- function() { ) ) a <- lapply(a, as.data.frame) - a <- plyr::rbind.fill(a) + a <- data.table::rbindlist(a, fill = TRUE, use.names = TRUE) + data.table::setDF(a) a <- as.data.frame(lapply(a, as.character), stringsAsFactors = FALSE) a$heterogeneity_test <- as.logical(a$heterogeneity_test) a$use_by_default <- as.logical(a$use_by_default) diff --git a/R/query.R b/R/query.R index 8f67172d..2260fa43 100644 --- a/R/query.R +++ b/R/query.R @@ -83,7 +83,8 @@ extract_outcome_data <- function( splitsize = proxy_splitsize ) if (!is.null(temp)) { - firstpass <- plyr::rbind.fill(firstpass, temp) + firstpass <- data.table::rbindlist(list(firstpass, temp), fill = TRUE, use.names = TRUE) + data.table::setDF(firstpass) } } } @@ -149,9 +150,10 @@ extract_outcome_data_internal <- function( for (i in seq_along(outcomes)) { message(i, " of ", length(outcomes), " outcomes") - d[[i]] <- plyr::ddply(splits, c("chunk_id"), function(x) { - x <- plyr::mutate(x) - message(" [>] ", x$chunk_id[1], " of ", max(splits$chunk_id), " chunks") + chunk_ids <- unique(splits$chunk_id) + d[[i]] <- lapply(chunk_ids, function(cid) { + x <- splits[splits$chunk_id == cid, , drop = FALSE] + message(" [>] ", cid, " of ", max(splits$chunk_id), " chunks") out <- ieugwasr::associations( variants = x$snps, id = outcomes[i], @@ -168,9 +170,11 @@ extract_outcome_data_internal <- function( } return(out) }) + d[[i]] <- data.table::rbindlist(d[[i]], fill = TRUE, use.names = TRUE) } - d <- plyr::rbind.fill(d) + d <- data.table::rbindlist(d, fill = TRUE, use.names = TRUE) + data.table::setDF(d) } else { # Split outcomes n <- length(outcomes) @@ -182,9 +186,10 @@ extract_outcome_data_internal <- function( for (i in seq_along(snps)) { message(i, " of ", length(snps), " snps") - d[[i]] <- plyr::ddply(splits, c("chunk_id"), function(x) { - x <- plyr::mutate(x) - message(" [>] ", x$chunk_id[1], " of ", max(splits$chunk_id), " chunks") + chunk_ids <- unique(splits$chunk_id) + d[[i]] <- lapply(chunk_ids, function(cid) { + x <- splits[splits$chunk_id == cid, , drop = FALSE] + message(" [>] ", cid, " of ", max(splits$chunk_id), " chunks") out <- ieugwasr::associations( variants = snps[i], @@ -203,9 +208,11 @@ extract_outcome_data_internal <- function( } return(out) }) + d[[i]] <- data.table::rbindlist(d[[i]], fill = TRUE, use.names = TRUE) } - d <- plyr::rbind.fill(d) + d <- data.table::rbindlist(d, fill = TRUE, use.names = TRUE) + data.table::setDF(d) } if (is.null(nrow(d)) || nrow(d) == 0) { # message("None of the requested SNPs were available in the specified GWASs.") @@ -281,10 +288,9 @@ format_d <- function(d) { d <- cbind(d1, p) # If two SNPs have the same proxy SNP then one has to be removed - d <- plyr::ddply(d, c("outcome"), function(x) { - x <- plyr::mutate(x) - subset(x, !duplicated(proxy_snp.outcome)) - }) + d <- data.table::as.data.table(d) + d <- d[!duplicated(proxy_snp.outcome), , by = outcome] + data.table::setDF(d) } else { d <- d1 } diff --git a/R/read_data.R b/R/read_data.R index c383c2d2..2a019e04 100644 --- a/R/read_data.R +++ b/R/read_data.R @@ -300,13 +300,15 @@ format_data <- function( } # Remove duplicated SNPs - dat <- plyr::ddply(dat, type, function(x) { - x <- plyr::mutate(x) + dat_dt <- data.table::as.data.table(dat) + types <- unique(dat_dt[[type]]) + dat_list <- lapply(types, function(t) { + x <- dat_dt[dat_dt[[type]] == t] dup <- duplicated(x$SNP) if (any(dup)) { warning( "Duplicated SNPs present in exposure data for phenotype '", - x[[type]][1], + t, ". Just keeping the first instance:\n", paste(x$SNP[dup], collapse = "\n") ) @@ -314,6 +316,8 @@ format_data <- function( } return(x) }) + dat <- data.table::rbindlist(dat_list, fill = TRUE, use.names = TRUE) + data.table::setDF(dat) # Check if columns required for MR are present mr_cols_required <- c(snp_col, beta_col, se_col, effect_allele_col) @@ -580,15 +584,19 @@ format_data <- function( } check_units <- function(x, id, col) { - temp <- plyr::ddply(x, id, function(x1) { + x_dt <- data.table::as.data.table(x) + ids <- unique(x_dt[[id]]) + temp_list <- lapply(ids, function(id_val) { + x1 <- x_dt[x_dt[[id]] == id_val] ph <- FALSE if (length(unique(x1[[col]])) > 1) { - warning("More than one type of unit specified for ", x1[[id]][1]) - x1 <- plyr::mutate(x1) + warning("More than one type of unit specified for ", id_val) ph <- TRUE } return(data.frame(ph = ph[1], stringsAsFactors = FALSE)) }) + temp <- data.table::rbindlist(temp_list, fill = TRUE, use.names = TRUE) + data.table::setDF(temp) return(temp) } @@ -774,14 +782,18 @@ combine_data <- function(x) { id_col <- paste0("id.", type) mr_keep_col <- paste0("mr_keep.", type) - x <- plyr::rbind.fill(x) - - x <- plyr::ddply(x, id_col, function(x) { - x <- plyr::mutate(x) - x <- x[order(x[[mr_keep_col]], decreasing = TRUE), ] - x <- subset(x, !duplicated(SNP)) - return(x) + x <- data.table::rbindlist(x, fill = TRUE, use.names = TRUE) + data.table::setDF(x) + + ids <- unique(x[[id_col]]) + x_list <- lapply(ids, function(id_val) { + x1 <- x[x[[id_col]] == id_val, , drop = FALSE] + x1 <- x1[order(x1[[mr_keep_col]], decreasing = TRUE), ] + x1 <- x1[!duplicated(x1$SNP), ] + return(x1) }) + x <- data.table::rbindlist(x_list, fill = TRUE, use.names = TRUE) + data.table::setDF(x) rownames(x) <- NULL return(x) diff --git a/R/rucker.R b/R/rucker.R index 7b2e95f1..97410128 100644 --- a/R/rucker.R +++ b/R/rucker.R @@ -294,7 +294,8 @@ mr_rucker_bootstrap <- function(dat, parameters = default_parameters()) { l[[i]] <- mr_rucker(dat2, parameters) } - modsel <- plyr::rbind.fill(lapply(l, function(x) x$selected)) + modsel <- data.table::rbindlist(lapply(l, function(x) x$selected), fill = TRUE, use.names = TRUE) + data.table::setDF(modsel) modsel$model <- sapply(l, function(x) x$res) bootstrap <- data.frame( @@ -444,7 +445,8 @@ mr_rucker_jackknife_internal <- function(dat, parameters = default_parameters()) l[[i]] <- mr_rucker_internal(dat2, parameters) } - modsel <- plyr::rbind.fill(lapply(l, function(x) x$selected)) + modsel <- data.table::rbindlist(lapply(l, function(x) x$selected), fill = TRUE, use.names = TRUE) + data.table::setDF(modsel) modsel$model <- sapply(l, function(x) x$res) bootstrap <- data.frame( diff --git a/R/singlesnp.R b/R/singlesnp.R index 690e2570..45becf6d 100644 --- a/R/singlesnp.R +++ b/R/singlesnp.R @@ -24,55 +24,72 @@ mr_singlesnp <- function( stopifnot("se.exposure" %in% names(dat)) stopifnot("se.outcome" %in% names(dat)) - res <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(X) { - x <- subset(X, mr_keep) + dat_dt <- data.table::as.data.table(dat) + combos <- unique(dat_dt[, .(id.exposure, id.outcome)]) + + # Pre-compute method names outside the loop + method_list <- mr_method_list() + method_names <- vapply(all_method, function(m) { + paste0("All - ", method_list$name[method_list$obj == m]) + }, character(1)) + + results <- lapply(seq_len(nrow(combos)), function(i) { + exp_id <- combos$id.exposure[i] + out_id <- combos$id.outcome[i] + X <- dat_dt[id.exposure == exp_id & id.outcome == out_id] + x <- X[mr_keep == TRUE] nsnp <- nrow(x) if (nsnp == 0) { x <- X[1, ] d <- data.frame( + id.exposure = exp_id, + id.outcome = out_id, SNP = "No available data", b = NA, se = NA, p = NA, samplesize = NA, outcome = x$outcome[1], - exposure = x$exposure[1] + exposure = x$exposure[1], + stringsAsFactors = FALSE ) return(d) } - l <- lapply(1:nsnp, function(i) { + l <- lapply(1:nsnp, function(j) { with( x, get(single_method)( - beta.exposure[i], - beta.outcome[i], - se.exposure[i], - se.outcome[i], + beta.exposure[j], + beta.outcome[j], + se.exposure[j], + se.outcome[j], parameters ) ) }) - nom <- c() - for (i in seq_along(all_method)) { - l[[nsnp + i]] <- with( + for (j in seq_along(all_method)) { + l[[nsnp + j]] <- with( x, - get(all_method[i])(beta.exposure, beta.outcome, se.exposure, se.outcome, parameters) + get(all_method[j])(beta.exposure, beta.outcome, se.exposure, se.outcome, parameters) ) - - nom <- c(nom, paste0("All - ", subset(mr_method_list(), obj == all_method[i])$name)) } d <- data.frame( - SNP = c(as.character(x$SNP), nom), + id.exposure = exp_id, + id.outcome = out_id, + SNP = c(as.character(x$SNP), method_names), b = sapply(l, function(y) y$b), se = sapply(l, function(y) y$se), p = sapply(l, function(y) y$pval), - samplesize = x$samplesize.outcome[1] + samplesize = x$samplesize.outcome[1], + stringsAsFactors = FALSE ) d$outcome <- x$outcome[1] d$exposure <- x$exposure[1] return(d) }) + res <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE) + data.table::setDF(res) res <- subset( res, select = c(exposure, outcome, id.exposure, id.outcome, samplesize, SNP, b, se, p) @@ -89,8 +106,13 @@ mr_singlesnp <- function( #' @export #' @return List of plots mr_forest_plot <- function(singlesnp_results, exponentiate = FALSE) { - res <- plyr::dlply(singlesnp_results, c("id.exposure", "id.outcome"), function(d) { - d <- plyr::mutate(d) + dat_dt <- data.table::as.data.table(singlesnp_results) + combos <- unique(dat_dt[, .(id.exposure, id.outcome)]) + + res <- lapply(seq_len(nrow(combos)), function(i) { + exp_id <- combos$id.exposure[i] + out_id <- combos$id.outcome[i] + d <- as.data.frame(dat_dt[id.exposure == exp_id & id.outcome == out_id]) if (sum(!grepl("All", d$SNP)) < 2) { return( blank_plot("Insufficient number of SNPs") @@ -195,8 +217,13 @@ mr_density_plot <- function( exponentiate = FALSE, bandwidth = "nrd0" ) { - res <- plyr::dlply(singlesnp_results, c("id.exposure", "id.outcome"), function(d) { - d <- plyr::mutate(d) + dat_dt <- data.table::as.data.table(singlesnp_results) + combos <- unique(dat_dt[, .(id.exposure, id.outcome)]) + + res <- lapply(seq_len(nrow(combos)), function(i) { + exp_id <- combos$id.exposure[i] + out_id <- combos$id.outcome[i] + d <- as.data.frame(dat_dt[id.exposure == exp_id & id.outcome == out_id]) if (sum(!grepl("All", d$SNP)) < 2) { return( blank_plot("Insufficient number of SNPs") @@ -235,8 +262,13 @@ mr_density_plot <- function( #' @export #' @return List of plots mr_funnel_plot <- function(singlesnp_results) { - res <- plyr::dlply(singlesnp_results, c("id.exposure", "id.outcome"), function(d) { - d <- plyr::mutate(d) + dat_dt <- data.table::as.data.table(singlesnp_results) + combos <- unique(dat_dt[, .(id.exposure, id.outcome)]) + + res <- lapply(seq_len(nrow(combos)), function(i) { + exp_id <- combos$id.exposure[i] + out_id <- combos$id.outcome[i] + d <- as.data.frame(dat_dt[id.exposure == exp_id & id.outcome == out_id]) if (sum(!grepl("All", d$SNP)) < 2) { return( blank_plot("Insufficient number of SNPs") From 869fc0cd0da06471aa9f0a239af31fedddf74827 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 5 Feb 2026 16:23:55 +0000 Subject: [PATCH 07/34] Update NEWS.md --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index fa9e772f..8c40411a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,10 @@ * Vectorised `mr_egger_regression_bootstrap()` * Vectorised `weighted_median_bootstrap()` * Deleted duplicated `weighted_median()` function +* Replace **plyr** function calls with **data.table** function calls + - `plyr::rbind.fill(...)` → `data.table::rbindlist(..., fill = TRUE, use.names = TRUE)` + - `plyr::ddply(dat, cols, func)` → `lapply()` over unique combinations + `data.table::rbindlist()` + - Added `data.table::setDF()` calls to convert back to data.frame for compatibility # TwoSampleMR v0.6.29 From 9137867bd730f1466d1e203fd9758997387479ab Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 5 Feb 2026 16:25:45 +0000 Subject: [PATCH 08/34] Use chartr() instead of 4 gsub() calls --- R/harmonise.R | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/R/harmonise.R b/R/harmonise.R index 29cc76ac..7aa804ef 100644 --- a/R/harmonise.R +++ b/R/harmonise.R @@ -173,12 +173,9 @@ check_palindromic <- function(A1, A2) { flip_alleles <- function(x) { - x <- toupper(x) - x <- gsub("C", "g", x) - x <- gsub("G", "c", x) - x <- gsub("A", "t", x) - x <- gsub("T", "a", x) - return(toupper(x)) + # Use chartr for efficient single-pass character substitution + # This is much faster than 4 sequential gsub calls + chartr("ACGTacgt", "TGCAtgca", x) } From 9b08ff83ece5becf2b0b331a0f59f77ba17ce2fd Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 5 Feb 2026 16:26:37 +0000 Subject: [PATCH 09/34] Use single call to sample() instead of n calls --- R/read_data.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/R/read_data.R b/R/read_data.R index 2a019e04..b7d80dd0 100644 --- a/R/read_data.R +++ b/R/read_data.R @@ -736,11 +736,13 @@ format_aries_mqtl <- function(aries_mqtl_subset, type = "exposure") { random_string <- function(n = 1, len = 6) { - randomString <- c(1:n) - for (i in 1:n) { - randomString[i] <- paste(sample(c(0:9, letters, LETTERS), len, replace = TRUE), collapse = "") - } - return(randomString) + # Vectorized random string generation + # Generate all random characters at once and reshape into matrix + chars <- c(0:9, letters, LETTERS) + random_chars <- sample(chars, n * len, replace = TRUE) + # Reshape to matrix with n rows and len columns, then collapse each row + random_matrix <- matrix(random_chars, nrow = n, ncol = len) + apply(random_matrix, 1, paste, collapse = "") } create_ids <- function(x) { From 86251c8262c558de7304b3ea8d4126b29f50b051 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 5 Feb 2026 16:28:04 +0000 Subject: [PATCH 10/34] Update NEWS.md --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 8c40411a..e1995b8d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,6 +9,8 @@ - `plyr::rbind.fill(...)` → `data.table::rbindlist(..., fill = TRUE, use.names = TRUE)` - `plyr::ddply(dat, cols, func)` → `lapply()` over unique combinations + `data.table::rbindlist()` - Added `data.table::setDF()` calls to convert back to data.frame for compatibility +* In `flip_alleles()` use `chartr()` instead of 4 `gsub()` calls +* In `random_string()` use single call to `sample()` instead of n calls # TwoSampleMR v0.6.29 From f6d22964ec8efe6f6885b882f48cf8251df15212 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 11:15:02 +0000 Subject: [PATCH 11/34] Optimize mr_mode() **1. `beta()` function:** - Replaced growing vector (`beta <- NULL; beta[length(beta) + 1] <- ...`) with pre-allocated `numeric(length(phi))` - Used `which.max(densityIV$y)` instead of `densityIV$y == max(densityIV$y)` (safer, avoids floating-point equality issues) **2. `boot()` function:** - Pre-generate all random values as two matrices (2 `rnorm` calls instead of 2000) - Pre-compute `ones <- rep(1, n)` and `weights` outside the loop instead of recomputing each iteration - Cache `nphi <- length(phi)` to avoid repeated `length()` calls The loop itself still iterates 1000 times calling `beta()` (which calls `stats::density()`), but the per-iteration overhead is reduced by eliminating random number generation and redundant computations from inside the loop --- R/mr_mode.R | 51 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/R/mr_mode.R b/R/mr_mode.R index 05c42404..8d8c587e 100644 --- a/R/mr_mode.R +++ b/R/mr_mode.R @@ -35,17 +35,17 @@ mr_mode <- function(dat, parameters = default_parameters(), mode_method = "all") #Standardised weights weights <- seBetaIV.in^-2 / sum(seBetaIV.in^-2) - beta <- NULL + result <- numeric(length(phi)) - for (cur_phi in phi) { + for (j in seq_along(phi)) { #Define the actual bandwidth - h <- max(0.00000001, s * cur_phi) + h <- max(0.00000001, s * phi[j]) #Compute the smoothed empirical density function densityIV <- stats::density(BetaIV.in, weights = weights, bw = h) #Extract the point with the highest density as the point estimate - beta[length(beta) + 1] <- densityIV$x[densityIV$y == max(densityIV$y)] + result[j] <- densityIV$x[which.max(densityIV$y)] } - return(beta) + return(result) } #------------------------------------------# @@ -55,50 +55,65 @@ mr_mode <- function(dat, parameters = default_parameters(), mode_method = "all") #seBetaIV.in: standard errors of ratio estimates #beta_Mode.in: point causal effect estimates boot <- function(BetaIV.in, seBetaIV.in, beta_Mode.in, nboot) { + n <- length(BetaIV.in) + nphi <- length(phi) + + #Pre-generate all random values as matrices (nboot x n) + #mean and sd recycle in lockstep (both length n), filling column-by-column + BetaIV.boot_mat <- matrix( + stats::rnorm(nboot * n, mean = BetaIV.in, sd = seBetaIV.in[, 1]), + nrow = nboot, ncol = n + ) + BetaIV.boot_NOME_mat <- matrix( + stats::rnorm(nboot * n, mean = BetaIV.in, sd = seBetaIV.in[, 2]), + nrow = nboot, ncol = n + ) + + #Pre-compute constants + ones <- rep(1, n) + weights <- 1 / seBetaIV.in[, 1]^2 + #Set up a matrix to store the results from each bootstrap iteration beta.boot <- matrix(nrow = nboot, ncol = length(beta_Mode.in)) for (i in 1:nboot) { - #Re-sample each ratio estimate using SEs derived not assuming NOME - BetaIV.boot <- stats::rnorm(length(BetaIV.in), mean = BetaIV.in, sd = seBetaIV.in[, 1]) - #Re-sample each ratio estimate using SEs derived under NOME - BetaIV.boot_NOME <- stats::rnorm(length(BetaIV.in), mean = BetaIV.in, sd = seBetaIV.in[, 2]) + BetaIV.boot <- BetaIV.boot_mat[i, ] + BetaIV.boot_NOME <- BetaIV.boot_NOME_mat[i, ] #Simple mode, not assuming NOME - beta.boot[i, seq_along(phi)] <- beta( + beta.boot[i, seq_len(nphi)] <- beta( BetaIV.in = BetaIV.boot, - seBetaIV.in = rep(1, length(BetaIV)), + seBetaIV.in = ones, phi = phi ) #Weighted mode, not assuming NOME - beta.boot[i, (length(phi) + 1):(2 * length(phi))] <- beta( + beta.boot[i, (nphi + 1):(2 * nphi)] <- beta( BetaIV.in = BetaIV.boot, seBetaIV.in = seBetaIV.in[, 1], phi = phi ) #Penalised mode, not assuming NOME - weights <- 1 / seBetaIV.in[, 1]^2 penalty <- stats::pchisq( - weights * (BetaIV.boot - beta.boot[i, (length(phi) + 1):(2 * length(phi))])^2, + weights * (BetaIV.boot - beta.boot[i, (nphi + 1):(2 * nphi)])^2, df = 1, lower.tail = FALSE ) pen.weights <- weights * pmin(1, penalty * parameters$penk) - beta.boot[i, (2 * length(phi) + 1):(3 * length(phi))] <- beta( + beta.boot[i, (2 * nphi + 1):(3 * nphi)] <- beta( BetaIV.in = BetaIV.boot, seBetaIV.in = sqrt(1 / pen.weights), phi = phi ) #Simple mode, assuming NOME - beta.boot[i, (3 * length(phi) + 1):(4 * length(phi))] <- beta( + beta.boot[i, (3 * nphi + 1):(4 * nphi)] <- beta( BetaIV.in = BetaIV.boot_NOME, - seBetaIV.in = rep(1, length(BetaIV)), + seBetaIV.in = ones, phi = phi ) #Weighted mode, assuming NOME - beta.boot[i, (4 * length(phi) + 1):(5 * length(phi))] <- beta( + beta.boot[i, (4 * nphi + 1):(5 * nphi)] <- beta( BetaIV.in = BetaIV.boot_NOME, seBetaIV.in = seBetaIV.in[, 2], phi = phi From f26817b22af170572251c19407dddfff6c84ef6a Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 11:15:54 +0000 Subject: [PATCH 12/34] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index e1995b8d..27e8d7a5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,6 +11,7 @@ - Added `data.table::setDF()` calls to convert back to data.frame for compatibility * In `flip_alleles()` use `chartr()` instead of 4 `gsub()` calls * In `random_string()` use single call to `sample()` instead of n calls +* Optimized `mr_mode()` # TwoSampleMR v0.6.29 From c8736992a79e6341f84b11f68095036465f4925f Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:01:33 +0000 Subject: [PATCH 13/34] Remove use of plyr from package --- DESCRIPTION | 1 - R/add_rsq.r | 14 ++++++++++---- R/enrichment.R | 20 +++++++++++++------- R/eve.R | 9 ++++++--- R/forest_plot2.R | 11 +++++++---- R/format_mr_results2.R | 7 ++++--- R/heterogeneity.R | 32 ++++++++++++++++++++++---------- R/knit.R | 11 +++++------ R/mr_mode.R | 6 +++++- R/other_formats.R | 18 +++++++++++++----- R/scatterplot.R | 6 ++++-- R/steiger.R | 14 ++++++++++---- R/steiger_filtering.R | 9 ++++++++- R/transform.R | 6 +++++- 14 files changed, 112 insertions(+), 52 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 72a5e0dc..64509010 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,7 +43,6 @@ Imports: MRMix, MRPRESSO, pbapply, - plyr, psych, RadialMR, reshape2, diff --git a/R/add_rsq.r b/R/add_rsq.r index 6bc3ff90..4394bac1 100644 --- a/R/add_rsq.r +++ b/R/add_rsq.r @@ -11,14 +11,20 @@ #' @return data frame add_rsq <- function(dat) { if ("id.exposure" %in% names(dat)) { - dat <- plyr::ddply(dat, c("id.exposure"), function(x) { - add_rsq_one(x, "exposure") + ids <- unique(dat$id.exposure) + results <- lapply(ids, function(id) { + add_rsq_one(dat[dat$id.exposure == id, ], "exposure") }) + dat <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE) + data.table::setDF(dat) } if ("id.outcome" %in% names(dat)) { - dat <- plyr::ddply(dat, c("id.outcome"), function(x) { - add_rsq_one(x, "outcome") + ids <- unique(dat$id.outcome) + results <- lapply(ids, function(id) { + add_rsq_one(dat[dat$id.outcome == id, ], "outcome") }) + dat <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE) + data.table::setDF(dat) } return(dat) } diff --git a/R/enrichment.R b/R/enrichment.R index 87e24a62..8f751722 100644 --- a/R/enrichment.R +++ b/R/enrichment.R @@ -34,7 +34,8 @@ enrichment_method_list <- function() { ) ) a <- lapply(a, as.data.frame) - a <- plyr::rbind.fill(a) + a <- data.table::rbindlist(a, fill = TRUE, use.names = TRUE) + data.table::setDF(a) a <- as.data.frame(lapply(a, as.character), stringsAsFactors = FALSE) return(a) } @@ -50,9 +51,10 @@ enrichment_method_list <- function() { #' @export #' @return data frame enrichment <- function(dat, method_list = enrichment_method_list()$obj) { - res <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(x1) { - # message("Performing enrichment analysis of '", x$id.exposure[1], "' on '", x$id.outcome[1], "'") - + methl <- enrichment_method_list() + combos <- unique(dat[, c("id.exposure", "id.outcome")]) + results <- lapply(seq_len(nrow(combos)), function(i) { + x1 <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ] x <- subset(x1, !is.na(pval.outcome)) if (nrow(x) == 0) { message( @@ -67,16 +69,20 @@ enrichment <- function(dat, method_list = enrichment_method_list()$obj) { res <- lapply(method_list, function(meth) { get(meth)(x$pval.outcome) }) - methl <- enrichment_method_list() enrichment_tab <- data.frame( + id.exposure = x1$id.exposure[1], + id.outcome = x1$id.outcome[1], outcome = x$outcome[1], exposure = x$exposure[1], method = methl$name[methl$obj %in% method_list], - nsnp = sapply(res, function(x) x$nsnp), - pval = sapply(res, function(x) x$pval) + nsnp = vapply(res, function(x) x$nsnp, numeric(1)), + pval = vapply(res, function(x) x$pval, numeric(1)), + stringsAsFactors = FALSE ) enrichment_tab <- subset(enrichment_tab, !is.na(pval)) return(enrichment_tab) }) + res <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE) + data.table::setDF(res) return(res) } diff --git a/R/eve.R b/R/eve.R index 9d684bd7..be58892f 100644 --- a/R/eve.R +++ b/R/eve.R @@ -327,10 +327,13 @@ mr_wrapper_single <- function(dat, parameters = default_parameters()) { #' @export #' @return list mr_wrapper <- function(dat, parameters = default_parameters()) { - plyr::dlply(dat, c("id.exposure", "id.outcome"), function(x) { + combos <- unique(dat[, c("id.exposure", "id.outcome")]) + res <- lapply(seq_len(nrow(combos)), function(i) { + x <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ] message("Performing MR analysis of '", x$id.exposure[1], "' on '", x$id.outcome[1], "'") d <- subset(x, mr_keep) - o <- mr_wrapper_single(d, parameters = parameters) - o + mr_wrapper_single(d, parameters = parameters) }) + names(res) <- paste(combos$id.exposure, combos$id.outcome, sep = ".") + res } diff --git a/R/forest_plot2.R b/R/forest_plot2.R index 42f3322c..6174641e 100644 --- a/R/forest_plot2.R +++ b/R/forest_plot2.R @@ -101,9 +101,9 @@ format_mr_results <- function( # Fill in missing values exps <- unique(dat$exposure) - dat <- plyr::ddply(dat, c("outcome"), function(x) { - x <- plyr::mutate(x) - nc <- ncol(x) + outcomes <- unique(dat$outcome) + results <- lapply(outcomes, function(out_val) { + x <- dat[dat$outcome == out_val, ] missed <- exps[!exps %in% x$exposure] if (length(missed) >= 1) { out <- unique(x$outcome) @@ -116,10 +116,13 @@ format_mr_results <- function( sample_size = n, stringsAsFactors = FALSE ) - x <- plyr::rbind.fill(x, md) + x <- data.table::rbindlist(list(x, md), fill = TRUE, use.names = TRUE) + data.table::setDF(x) } return(x) }) + dat <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE) + data.table::setDF(dat) # dat <- dplyr::group_by(dat, outcome) %>% # dplyr::do({ # x <- . diff --git a/R/format_mr_results2.R b/R/format_mr_results2.R index 40e676a3..3687343d 100644 --- a/R/format_mr_results2.R +++ b/R/format_mr_results2.R @@ -185,10 +185,11 @@ combine_all_mrresults <- function( names(sin)[names(sin) == "method"] <- "Method" res <- merge(res, het, by = c("id.outcome", "id.exposure", "Method"), all.x = TRUE) - res <- plyr::rbind.fill( - res, - sin[, c("exposure", "outcome", "id.exposure", "id.outcome", "SNP", "b", "se", "pval", "Method")] + res <- data.table::rbindlist( + list(res, sin[, c("exposure", "outcome", "id.exposure", "id.outcome", "SNP", "b", "se", "pval", "Method")]), + fill = TRUE, use.names = TRUE ) + data.table::setDF(res) if (ao_slc) { ao <- available_outcomes() diff --git a/R/heterogeneity.R b/R/heterogeneity.R index dde0555d..b6f92438 100644 --- a/R/heterogeneity.R +++ b/R/heterogeneity.R @@ -13,8 +13,10 @@ mr_heterogeneity <- function( parameters = default_parameters(), method_list = subset(mr_method_list(), heterogeneity_test & use_by_default)$obj ) { - het_tab <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(x1) { - # message("Performing MR analysis of '", x$id.exposure[1], "' on '", x$id.outcome[1], "'") + methl <- mr_method_list() + combos <- unique(dat[, c("id.exposure", "id.outcome")]) + results <- lapply(seq_len(nrow(combos)), function(i) { + x1 <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ] x <- subset(x1, mr_keep) if (nrow(x) < 2) { message( @@ -29,18 +31,22 @@ mr_heterogeneity <- function( res <- lapply(method_list, function(meth) { get(meth)(x$beta.exposure, x$beta.outcome, x$se.exposure, x$se.outcome, parameters) }) - methl <- mr_method_list() het_tab <- data.frame( + id.exposure = x1$id.exposure[1], + id.outcome = x1$id.outcome[1], outcome = x$outcome[1], exposure = x$exposure[1], method = methl$name[match(method_list, methl$obj)], - Q = sapply(res, function(x) x$Q), - Q_df = sapply(res, function(x) x$Q_df), - Q_pval = sapply(res, function(x) x$Q_pval) + Q = vapply(res, function(x) x$Q, numeric(1)), + Q_df = vapply(res, function(x) x$Q_df, numeric(1)), + Q_pval = vapply(res, function(x) x$Q_pval, numeric(1)), + stringsAsFactors = FALSE ) het_tab <- subset(het_tab, !(is.na(Q) & is.na(Q_df) & is.na(Q_pval))) return(het_tab) }) + het_tab <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE) + data.table::setDF(het_tab) return(het_tab) } @@ -55,7 +61,9 @@ mr_heterogeneity <- function( #' @export #' @return data frame mr_pleiotropy_test <- function(dat) { - ptab <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(x1) { + combos <- unique(dat[, c("id.exposure", "id.outcome")]) + results <- lapply(seq_len(nrow(combos)), function(i) { + x1 <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ] x <- subset(x1, mr_keep) if (nrow(x) < 2) { message( @@ -74,14 +82,18 @@ mr_pleiotropy_test <- function(dat) { x$se.outcome, default_parameters() ) - out <- data.frame( + data.frame( + id.exposure = x1$id.exposure[1], + id.outcome = x1$id.outcome[1], outcome = x$outcome[1], exposure = x$exposure[1], egger_intercept = res$b_i, se = res$se_i, - pval = res$pval_i + pval = res$pval_i, + stringsAsFactors = FALSE ) - return(out) }) + ptab <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE) + data.table::setDF(ptab) return(ptab) } diff --git a/R/knit.R b/R/knit.R index 0082b21a..bfebacf1 100644 --- a/R/knit.R +++ b/R/knit.R @@ -122,14 +122,13 @@ mr_report <- function( mr_leaveoneout_plot = mr_leaveoneout_plot(m$mr_leaveoneout) ) - combinations <- plyr::ddply( - dat, - c("id.exposure", "id.outcome"), - plyr::summarise, - n = length(exposure), + dat_dt <- data.table::as.data.table(dat) + combinations <- dat_dt[, .( + n = .N, exposure = exposure[1], outcome = outcome[1] - ) + ), by = c("id.exposure", "id.outcome")] + data.table::setDF(combinations) output_file <- array("", nrow(combinations)) for (i in seq_len(nrow(combinations))) { diff --git a/R/mr_mode.R b/R/mr_mode.R index 8d8c587e..1281af9b 100644 --- a/R/mr_mode.R +++ b/R/mr_mode.R @@ -374,9 +374,13 @@ mr_simple_mode_nome <- function(b_exp, b_out, se_exp, se_out, parameters = defau mr_mode_broken <- function(dat, parameters = default_parameters(), mode_method = "all") { - res <- plyr::ddply(dat, c("id.exposure", "exposure", "id.outcome", "outcome"), function(x) { + combos <- unique(dat[, c("id.exposure", "exposure", "id.outcome", "outcome")]) + results <- lapply(seq_len(nrow(combos)), function(i) { + x <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ] mr_mode_internal(x, parameters, mode_method = "all") }) + res <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE) + data.table::setDF(res) if (mode_method != "all") { return(subset(res, method %in% mode_method)) } else { diff --git a/R/other_formats.R b/R/other_formats.R index cd0057ab..cee95686 100644 --- a/R/other_formats.R +++ b/R/other_formats.R @@ -17,8 +17,9 @@ dat_to_MRInput <- function(dat, get_correlations = FALSE, pop = "EUR") { call. = FALSE ) } - out <- plyr::dlply(dat, c("exposure", "outcome"), function(x) { - x <- plyr::mutate(x) + combos <- unique(dat[, c("exposure", "outcome")]) + out <- lapply(seq_len(nrow(combos)), function(i) { + x <- dat[dat$exposure == combos$exposure[i] & dat$outcome == combos$outcome[i], ] message("Converting:") message(" - exposure: ", x$exposure[1]) message(" - outcome: ", x$outcome[1]) @@ -60,6 +61,7 @@ dat_to_MRInput <- function(dat, get_correlations = FALSE, pop = "EUR") { ) } }) + names(out) <- paste(combos$exposure, combos$outcome, sep = ".") return(out) } @@ -179,8 +181,9 @@ run_mr_presso <- function(dat, NbDistribution = 1000, SignifThreshold = 0.05) { #' @export #' @return List of RadialMR format datasets dat_to_RadialMR <- function(dat) { - out <- plyr::dlply(dat, c("exposure", "outcome"), function(x) { - x <- plyr::mutate(x) + combos <- unique(dat[, c("exposure", "outcome")]) + out <- lapply(seq_len(nrow(combos)), function(i) { + x <- dat[dat$exposure == combos$exposure[i] & dat$outcome == combos$outcome[i], ] message("Converting:") message(" - exposure: ", x$exposure[1]) message(" - outcome: ", x$outcome[1]) @@ -194,6 +197,7 @@ dat_to_RadialMR <- function(dat) { ) return(d) }) + names(out) <- paste(combos$exposure, combos$outcome, sep = ".") return(out) } @@ -251,7 +255,9 @@ mr_ivw_radial <- function(b_exp, b_out, se_exp, se_out, parameters = default_par #' @export #' @return List of results, with one list item for every exposure/outcome pair in dat object run_mrmix <- function(dat) { - plyr::dlply(dat, c("id.exposure", "id.outcome"), function(x) { + combos <- unique(dat[, c("id.exposure", "id.outcome")]) + out <- lapply(seq_len(nrow(combos)), function(i) { + x <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ] message("Analysing ", x$id.exposure[1], " against ", x$id.outcome[1]) if (grepl("log odds", x$units.exposure[1])) { xunit <- "binary" @@ -294,4 +300,6 @@ run_mrmix <- function(dat) { x$se.outcome <- l$sy_std MRMix::MRMix(x$beta.exposure, x$beta.outcome, x$se.exposure, x$se.outcome) }) + names(out) <- paste(combos$id.exposure, combos$id.outcome, sep = ".") + out } diff --git a/R/scatterplot.R b/R/scatterplot.R index 28221e09..abc6dc88 100644 --- a/R/scatterplot.R +++ b/R/scatterplot.R @@ -8,8 +8,9 @@ #' @return List of plots mr_scatter_plot <- function(mr_results, dat) { # dat <- subset(dat, paste(id.outcome, id.exposure) %in% paste(mr_results$id.outcome, mr_results$id.exposure)) - mrres <- plyr::dlply(dat, c("id.exposure", "id.outcome"), function(d) { - d <- plyr::mutate(d) + combos <- unique(dat[, c("id.exposure", "id.outcome")]) + mrres <- lapply(seq_len(nrow(combos)), function(i) { + d <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ] if (nrow(d) < 2 | sum(d$mr_keep) == 0) { return(blank_plot("Insufficient number of SNPs")) } @@ -161,6 +162,7 @@ mr_scatter_plot <- function(mr_results, dat) { ggplot2::guides(colour = ggplot2::guide_legend(ncol = 2)) } }) + names(mrres) <- paste(combos$id.exposure, combos$id.outcome, sep = ".") mrres } diff --git a/R/steiger.R b/R/steiger.R index 745ddf56..429311f9 100644 --- a/R/steiger.R +++ b/R/steiger.R @@ -183,7 +183,9 @@ directionality_test <- function(dat) { ) } } - dtest <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(x) { + combos <- unique(dat[, c("id.exposure", "id.outcome")]) + results <- lapply(seq_len(nrow(combos)), function(i) { + x <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ] if (!"r.exposure" %in% names(x)) { x$r.exposure <- NA } @@ -198,16 +200,20 @@ directionality_test <- function(dat) { x$r.exposure, x$r.outcome ) - a <- data.frame( + data.frame( + id.exposure = combos$id.exposure[i], + id.outcome = combos$id.outcome[i], exposure = x$exposure[1], outcome = x$outcome[1], snp_r2.exposure = b$r2_exp, snp_r2.outcome = b$r2_out, correct_causal_direction = b$correct_causal_direction, - steiger_pval = b$steiger_test + steiger_pval = b$steiger_test, + stringsAsFactors = FALSE ) - return(a) }) + dtest <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE) + data.table::setDF(dtest) return(dtest) } diff --git a/R/steiger_filtering.R b/R/steiger_filtering.R index 0b79e14f..28466903 100644 --- a/R/steiger_filtering.R +++ b/R/steiger_filtering.R @@ -21,7 +21,14 @@ #' @export #' @return [harmonise_data()] style data frame with additional columns rsq.exposure, rsq.outcome, steiger_dir (which is `TRUE` if the rsq.exposure is larger than rsq.outcome) and steiger_pval which is a test to see if rsq.exposure is significantly larger than rsq.outcome. steiger_filtering <- function(dat) { - plyr::ddply(dat, c("id.exposure", "id.outcome"), steiger_filtering_internal) + combos <- unique(dat[, c("id.exposure", "id.outcome")]) + results <- lapply(seq_len(nrow(combos)), function(i) { + x <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ] + steiger_filtering_internal(x) + }) + out <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE) + data.table::setDF(out) + return(out) } diff --git a/R/transform.R b/R/transform.R index 3a5a92cf..ab5cfa53 100644 --- a/R/transform.R +++ b/R/transform.R @@ -7,7 +7,9 @@ #' @export #' @return Data frame standardise_units <- function(dat) { - out <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(d) { + combos <- unique(dat[, c("id.exposure", "id.outcome")]) + results <- lapply(seq_len(nrow(combos)), function(i) { + d <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ] if (d$units.exposure[1] != "log odds") { estsd <- mean( estimate_trait_sd(d$beta.exposure, d$se.exposure, d$samplesize.exposure, d$eaf.exposure), @@ -35,6 +37,8 @@ standardise_units <- function(dat) { } return(d) }) + out <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE) + data.table::setDF(out) return(out) } From 30823bc914a1099d3c9b6d5fd14fd1cc86f19f98 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:02:19 +0000 Subject: [PATCH 14/34] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 27e8d7a5..9598e0c2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,6 +9,7 @@ - `plyr::rbind.fill(...)` → `data.table::rbindlist(..., fill = TRUE, use.names = TRUE)` - `plyr::ddply(dat, cols, func)` → `lapply()` over unique combinations + `data.table::rbindlist()` - Added `data.table::setDF()` calls to convert back to data.frame for compatibility + - And removed **plyr** from Imports list * In `flip_alleles()` use `chartr()` instead of 4 `gsub()` calls * In `random_string()` use single call to `sample()` instead of n calls * Optimized `mr_mode()` From b108eb8cddb7feddf7c72e786273b7110b5b726f Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:04:22 +0000 Subject: [PATCH 15/34] Replace apply(..., any(is.na())) with complete.case() --- R/harmonise.R | 2 +- R/query.R | 2 +- R/read_data.R | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/harmonise.R b/R/harmonise.R index 7aa804ef..1508aacd 100644 --- a/R/harmonise.R +++ b/R/harmonise.R @@ -96,7 +96,7 @@ harmonise_data <- function(exposure_dat, outcome_dat, action = 2) { ) - nrow(x) - x$mr_keep[apply(x[, mr_cols], 1, function(y) any(is.na(y)))] <- FALSE + x$mr_keep[!complete.cases(x[, mr_cols])] <- FALSE attr(x, "log")[["total_variants"]] <- nrow(x) attr(x, "log")[["total_variants_for_mr"]] <- sum(x$mr_keep) attr(x, "log")[["proxy_variants"]] <- ifelse( diff --git a/R/query.R b/R/query.R index 2260fa43..7d7e507e 100644 --- a/R/query.R +++ b/R/query.R @@ -317,7 +317,7 @@ format_d <- function(d) { d <- cleanup_outcome_data(d) mrcols <- c("beta.outcome", "se.outcome", "effect_allele.outcome") - d$mr_keep.outcome <- apply(d[, mrcols], 1, function(x) !any(is.na(x))) + d$mr_keep.outcome <- complete.cases(d[, mrcols]) if (any(!d$mr_keep.outcome)) { missinginfosnps <- paste(subset(d, !mr_keep.outcome)$SNP, collapse = " ") warning( diff --git a/R/read_data.R b/R/read_data.R index b7d80dd0..c3c354d4 100644 --- a/R/read_data.R +++ b/R/read_data.R @@ -549,7 +549,7 @@ format_data <- function( mrcols <- c("SNP", "beta.outcome", "se.outcome", "effect_allele.outcome") mrcols_present <- mrcols[mrcols %in% names(dat)] dat$mr_keep.outcome <- dat$mr_keep.outcome & - apply(dat[, mrcols_present], 1, function(x) !any(is.na(x))) + complete.cases(dat[, mrcols_present]) if (any(!dat$mr_keep.outcome)) { missinginfosnps <- paste(subset(dat, !mr_keep.outcome)$SNP, collapse = " ") warning( From 2f30adcd1dc7f6216158ed8d12987678382fa39e Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:04:55 +0000 Subject: [PATCH 16/34] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 9598e0c2..8782d4e5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,6 +13,7 @@ * In `flip_alleles()` use `chartr()` instead of 4 `gsub()` calls * In `random_string()` use single call to `sample()` instead of n calls * Optimized `mr_mode()` +* Replaced `apply(..., any(is.na()))` with `complete.case()` # TwoSampleMR v0.6.29 From 8415eee7d73766c986f629ee99e6b8a6c6d440a4 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:08:37 +0000 Subject: [PATCH 17/34] Move mr_method_list() before the loop --- R/mr.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/mr.R b/R/mr.R index 68fd67a8..2dcdd4f0 100644 --- a/R/mr.R +++ b/R/mr.R @@ -30,6 +30,8 @@ mr <- function( # Get unique combinations of id.exposure and id.outcome combos <- unique(dat_dt[, .(id.exposure, id.outcome)]) + # Pre-compute method names once, outside the loop + methl <- mr_method_list() # Process each combination results <- lapply(seq_len(nrow(combos)), function(i) { exp_id <- combos$id.exposure[i] @@ -52,7 +54,6 @@ mr <- function( res <- lapply(method_list, function(meth) { get(meth)(x$beta.exposure, x$beta.outcome, x$se.exposure, x$se.outcome, parameters) }) - methl <- mr_method_list() mr_tab <- data.frame( id.exposure = exp_id, id.outcome = out_id, From 5e8570a3bd2a58379e26639e4dd81dce14855a76 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:09:09 +0000 Subject: [PATCH 18/34] Pre-compute method_names --- R/mr.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/mr.R b/R/mr.R index 2dcdd4f0..a871ad9e 100644 --- a/R/mr.R +++ b/R/mr.R @@ -32,6 +32,8 @@ mr <- function( # Pre-compute method names once, outside the loop methl <- mr_method_list() + method_names <- methl$name[match(method_list, methl$obj)] + # Process each combination results <- lapply(seq_len(nrow(combos)), function(i) { exp_id <- combos$id.exposure[i] @@ -59,7 +61,6 @@ mr <- function( id.outcome = out_id, outcome = x$outcome[1], exposure = x$exposure[1], - method = methl$name[match(method_list, methl$obj)], nsnp = sapply(res, function(x) x$nsnp), b = sapply(res, function(x) x$b), se = sapply(res, function(x) x$se), From 5aeb32f7364579bb88a0c798201997a97bf89125 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:09:29 +0000 Subject: [PATCH 19/34] Replace sapply() with vapply() --- R/mr.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/mr.R b/R/mr.R index a871ad9e..32fbee2a 100644 --- a/R/mr.R +++ b/R/mr.R @@ -61,10 +61,11 @@ mr <- function( id.outcome = out_id, outcome = x$outcome[1], exposure = x$exposure[1], - nsnp = sapply(res, function(x) x$nsnp), - b = sapply(res, function(x) x$b), - se = sapply(res, function(x) x$se), - pval = sapply(res, function(x) x$pval), + method = method_names, + nsnp = vapply(res, function(x) x$nsnp, numeric(1)), + b = vapply(res, function(x) x$b, numeric(1)), + se = vapply(res, function(x) x$se, numeric(1)), + pval = vapply(res, function(x) x$pval, numeric(1)), stringsAsFactors = FALSE ) mr_tab <- mr_tab[!(is.na(mr_tab$b) & is.na(mr_tab$se) & is.na(mr_tab$pval)), ] From 0d1d4fe17bbd322a10c829892ab76d4a5d8cc8dd Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:13:18 +0000 Subject: [PATCH 20/34] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 8782d4e5..ce4da264 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,6 +14,7 @@ * In `random_string()` use single call to `sample()` instead of n calls * Optimized `mr_mode()` * Replaced `apply(..., any(is.na()))` with `complete.case()` +* Optimized the `mr()` function # TwoSampleMR v0.6.29 From ba92c0befc81c7f52c6a8e04810a7e02ff6124e4 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:14:30 +0000 Subject: [PATCH 21/34] Optimize get_r_from_lor() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - **Moved `ve` computation outside the loop** — was being recalculated identically on every iteration - **Eliminated the `for` loop entirely** — the `prop` calculation, `vg`, `r`, `correction`, and `sign` operations are all vectorized now - **Single call to `get_population_allele_frequency()`** with full vectors instead of element-by-element --- R/add_rsq.r | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/R/add_rsq.r b/R/add_rsq.r index 4394bac1..81a635a9 100644 --- a/R/add_rsq.r +++ b/R/add_rsq.r @@ -267,29 +267,22 @@ get_r_from_lor <- function( ncontrol <- rep(ncontrol, length(lor)) } - nsnp <- length(lor) - r <- array(NA, nsnp) - for (i in 1:nsnp) { - if (model == "logit") { - ve <- pi^2 / 3 - } else if (model == "probit") { - ve <- 1 - } else { - stop("Model must be probit or logit") - } - popaf <- get_population_allele_frequency( - af[i], - ncase[i] / (ncase[i] + ncontrol[i]), - exp(lor[i]), - prevalence[i] - ) - vg <- (lor[i])^2 * popaf * (1 - popaf) - r[i] <- vg / (vg + ve) - if (correction) { - r[i] <- r[i] / 0.58 - } - r[i] <- sqrt(r[i]) * sign(lor[i]) + if (model == "logit") { + ve <- pi^2 / 3 + } else if (model == "probit") { + ve <- 1 + } else { + stop("Model must be probit or logit") + } + + prop <- ncase / (ncase + ncontrol) + popaf <- get_population_allele_frequency(af, prop, exp(lor), prevalence) + vg <- lor^2 * popaf * (1 - popaf) + r <- vg / (vg + ve) + if (correction) { + r <- r / 0.58 } + r <- sqrt(r) * sign(lor) return(r) } From e8e78bc70a385bcda6d145f45f59f475051fa306 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:14:57 +0000 Subject: [PATCH 22/34] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index ce4da264..b04f8c14 100644 --- a/NEWS.md +++ b/NEWS.md @@ -15,6 +15,7 @@ * Optimized `mr_mode()` * Replaced `apply(..., any(is.na()))` with `complete.case()` * Optimized the `mr()` function +* Optimized the `Optimize get_r_from_lor()` function # TwoSampleMR v0.6.29 From b34677f035614faae0812031379bcbb80fa0f7fe Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:15:29 +0000 Subject: [PATCH 23/34] Update NEWS.md --- NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index b04f8c14..9121fc2b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,8 +6,8 @@ * Vectorised `weighted_median_bootstrap()` * Deleted duplicated `weighted_median()` function * Replace **plyr** function calls with **data.table** function calls - - `plyr::rbind.fill(...)` → `data.table::rbindlist(..., fill = TRUE, use.names = TRUE)` - - `plyr::ddply(dat, cols, func)` → `lapply()` over unique combinations + `data.table::rbindlist()` + - `plyr::rbind.fill(...)` to `data.table::rbindlist(..., fill = TRUE, use.names = TRUE)` + - `plyr::ddply(dat, cols, func)` to `lapply()` over unique combinations + `data.table::rbindlist()` - Added `data.table::setDF()` calls to convert back to data.frame for compatibility - And removed **plyr** from Imports list * In `flip_alleles()` use `chartr()` instead of 4 `gsub()` calls From 886c35ddbef20735c3cb63fc160574d4677176b7 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:20:47 +0000 Subject: [PATCH 24/34] Optimize mr_rucker_bootstrap() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Pre-generate all random values as two matrices (2 `rnorm` calls instead of 2000) — `boot_exp` and `boot_out` are `nboot x nsnp` matrices - Loop now indexes rows from the pre-generated matrices instead of calling `rnorm()` each iteration - `sapply` → `vapply` for `model`, `Q`, `Qdash` extraction (4 calls) --- R/rucker.R | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/R/rucker.R b/R/rucker.R index 97410128..5ed7a7ad 100644 --- a/R/rucker.R +++ b/R/rucker.R @@ -286,22 +286,33 @@ mr_rucker_bootstrap <- function(dat, parameters = default_parameters()) { # Main result rucker <- mr_rucker(dat, parameters) + + # Pre-generate all random values as matrices (nboot x nsnp) + boot_exp <- matrix( + stats::rnorm(nboot * nsnp, mean = dat$beta.exposure, sd = dat$se.exposure), + nrow = nboot, ncol = nsnp + ) + boot_out <- matrix( + stats::rnorm(nboot * nsnp, mean = dat$beta.outcome, sd = dat$se.outcome), + nrow = nboot, ncol = nsnp + ) + dat2 <- dat l <- list() for (i in 1:nboot) { - dat2$beta.exposure <- stats::rnorm(nsnp, mean = dat$beta.exposure, sd = dat$se.exposure) - dat2$beta.outcome <- stats::rnorm(nsnp, mean = dat$beta.outcome, sd = dat$se.outcome) + dat2$beta.exposure <- boot_exp[i, ] + dat2$beta.outcome <- boot_out[i, ] l[[i]] <- mr_rucker(dat2, parameters) } modsel <- data.table::rbindlist(lapply(l, function(x) x$selected), fill = TRUE, use.names = TRUE) data.table::setDF(modsel) - modsel$model <- sapply(l, function(x) x$res) + modsel$model <- vapply(l, function(x) x$res, character(1)) bootstrap <- data.frame( - Q = c(rucker$Q$Q[1], sapply(l, function(x) x$Q$Q[1])), - Qdash = c(rucker$Q$Q[2], sapply(l, function(x) x$Q$Q[2])), - model = c(rucker$res, sapply(l, function(x) x$res)), + Q = c(rucker$Q$Q[1], vapply(l, function(x) x$Q$Q[1], numeric(1))), + Qdash = c(rucker$Q$Q[2], vapply(l, function(x) x$Q$Q[2], numeric(1))), + model = c(rucker$res, vapply(l, function(x) x$res, character(1))), i = c("Full", rep("Bootstrap", nboot)) ) From edb8a110c27243f3f1793335a3498bc24259233c Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:21:12 +0000 Subject: [PATCH 25/34] Optimize mr_rucker_jackknife_internal() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - sapply` → `vapply` for `model`, `Q`, `Qdash` extraction (4 calls) --- R/rucker.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/rucker.R b/R/rucker.R index 5ed7a7ad..e5752e06 100644 --- a/R/rucker.R +++ b/R/rucker.R @@ -458,12 +458,12 @@ mr_rucker_jackknife_internal <- function(dat, parameters = default_parameters()) modsel <- data.table::rbindlist(lapply(l, function(x) x$selected), fill = TRUE, use.names = TRUE) data.table::setDF(modsel) - modsel$model <- sapply(l, function(x) x$res) + modsel$model <- vapply(l, function(x) x$res, character(1)) bootstrap <- data.frame( - Q = c(rucker$Q$Q[1], sapply(l, function(x) x$Q$Q[1])), - Qdash = c(rucker$Q$Q[2], sapply(l, function(x) x$Q$Q[2])), - model = c(rucker$res, sapply(l, function(x) x$res)), + Q = c(rucker$Q$Q[1], vapply(l, function(x) x$Q$Q[1], numeric(1))), + Qdash = c(rucker$Q$Q[2], vapply(l, function(x) x$Q$Q[2], numeric(1))), + model = c(rucker$res, vapply(l, function(x) x$res, character(1))), i = c("Full", rep("Jackknife", nboot)) ) From 80e8b20fdd754c726eee954f8b7fb7fa7b6fd2f8 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:21:50 +0000 Subject: [PATCH 26/34] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 9121fc2b..581c3fad 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,6 +16,7 @@ * Replaced `apply(..., any(is.na()))` with `complete.case()` * Optimized the `mr()` function * Optimized the `Optimize get_r_from_lor()` function +* Optimized the `mr_rucker_bootstrap()` and `mr_rucker_jackknife_internal()` functions # TwoSampleMR v0.6.29 From 55f2a5a331f9239699da8b5458ea1bab65d9af27 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:24:39 +0000 Subject: [PATCH 27/34] Replace sapply() with vapply() --- R/singlesnp.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/singlesnp.R b/R/singlesnp.R index 45becf6d..82c9810d 100644 --- a/R/singlesnp.R +++ b/R/singlesnp.R @@ -78,9 +78,9 @@ mr_singlesnp <- function( id.exposure = exp_id, id.outcome = out_id, SNP = c(as.character(x$SNP), method_names), - b = sapply(l, function(y) y$b), - se = sapply(l, function(y) y$se), - p = sapply(l, function(y) y$pval), + b = vapply(l, function(y) y$b, numeric(1)), + se = vapply(l, function(y) y$se, numeric(1)), + p = vapply(l, function(y) y$pval, numeric(1)), samplesize = x$samplesize.outcome[1], stringsAsFactors = FALSE ) From a0143e2102be732722758448f3037e58d948e441 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:24:56 +0000 Subject: [PATCH 28/34] Replace sapply() with vapply() --- R/leaveoneout.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/leaveoneout.R b/R/leaveoneout.R index 9c72f674..5f44ce3d 100644 --- a/R/leaveoneout.R +++ b/R/leaveoneout.R @@ -58,9 +58,9 @@ mr_leaveoneout <- function(dat, parameters = default_parameters(), method = mr_i id.exposure = exp_id, id.outcome = out_id, SNP = c(as.character(x$SNP), "All"), - b = sapply(l, function(y) y$b), - se = sapply(l, function(y) y$se), - p = sapply(l, function(y) y$pval), + b = vapply(l, function(y) y$b, numeric(1)), + se = vapply(l, function(y) y$se, numeric(1)), + p = vapply(l, function(y) y$pval, numeric(1)), samplesize = x$samplesize.outcome[1], stringsAsFactors = FALSE ) From b7c965a52b1507ecce1a3e94e2be534bef7420af Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:26:11 +0000 Subject: [PATCH 29/34] Optimize factor to character conversion --- R/format_mr_results2.R | 29 +++-------------------------- 1 file changed, 3 insertions(+), 26 deletions(-) diff --git a/R/format_mr_results2.R b/R/format_mr_results2.R index 3687343d..e60782c9 100644 --- a/R/format_mr_results2.R +++ b/R/format_mr_results2.R @@ -146,32 +146,9 @@ combine_all_mrresults <- function( het <- het[, c("id.exposure", "id.outcome", "method", "Q", "Q_df", "Q_pval")] # Convert all factors to character - # lapply(names(Res), FUN=function(x) class(Res[,x])) - Class <- unlist(lapply(names(res), FUN = function(x) class(res[, x]))) - if (any(Class == "factor")) { - Pos <- which(unlist(lapply(names(res), FUN = function(x) class(res[, x]))) == "factor") - for (i in seq_along(Pos)) { - res[, Pos[i]] <- as.character(res[, Pos[i]]) - } - } - - # lapply(names(Het), FUN=function(x) class(Het[,x])) - Class <- unlist(lapply(names(het), FUN = function(x) class(het[, x]))) - if (any(Class == "factor")) { - Pos <- which(unlist(lapply(names(het), FUN = function(x) class(het[, x]))) == "factor") - for (i in seq_along(Pos)) { - het[, Pos[i]] <- as.character(het[, Pos[i]]) - } - } - - # lapply(names(Sin), FUN=function(x) class(Sin[,x])) - Class <- unlist(lapply(names(sin), FUN = function(x) class(sin[, x]))) - if (any(Class == "factor")) { - Pos <- which(unlist(lapply(names(sin), FUN = function(x) class(sin[, x]))) == "factor") - for (i in seq_along(Pos)) { - sin[, Pos[i]] <- as.character(sin[, Pos[i]]) - } - } + res[] <- lapply(res, function(x) if (is.factor(x)) as.character(x) else x) + het[] <- lapply(het, function(x) if (is.factor(x)) as.character(x) else x) + sin[] <- lapply(sin, function(x) if (is.factor(x)) as.character(x) else x) sin <- sin[grep("[:0-9:]", sin$SNP), ] sin$method <- "Wald ratio" From d8c8016924fa7c843833d830e3359acb009a9c29 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:26:47 +0000 Subject: [PATCH 30/34] Combine 4 gsub() calls into 1 --- R/backward_compatibility.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/R/backward_compatibility.R b/R/backward_compatibility.R index ea8d7ae5..580f2358 100644 --- a/R/backward_compatibility.R +++ b/R/backward_compatibility.R @@ -4,10 +4,7 @@ ids_new_to_old <- function(id) { ids_new_to_old2 <- function(id) { id <- gsub("IEU-a-", "", id) - id <- gsub("-a-", "-a:", id) - id <- gsub("-b-", "-b:", id) - id <- gsub("-c-", "-c:", id) - id <- gsub("-d-", "-d:", id) + id <- gsub("-([a-d])-", "-\\1:", id) return(id) } From cb90e1c45a34453c3ec36972d22f28018e96e5f4 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:27:04 +0000 Subject: [PATCH 31/34] Vectorise simple_cap() --- R/forest_plot2.R | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/R/forest_plot2.R b/R/forest_plot2.R index 6174641e..9cfd674c 100644 --- a/R/forest_plot2.R +++ b/R/forest_plot2.R @@ -165,11 +165,8 @@ format_mr_results <- function( #' @keywords internal #' @return Character or array of character simple_cap <- function(x) { - sapply(x, function(x) { - x <- tolower(x) - s <- strsplit(x, " ")[[1]] - paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ") - }) + x <- tolower(x) + gsub("\\b(\\w)", "\\U\\1", x, perl = TRUE) } #' Trim function to remove leading and trailing blank spaces From 514b1c2a74d77b678ce5419f920ab40efba2dab7 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:27:46 +0000 Subject: [PATCH 32/34] Update NEWS.md --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index 581c3fad..2f623a23 100644 --- a/NEWS.md +++ b/NEWS.md @@ -17,6 +17,9 @@ * Optimized the `mr()` function * Optimized the `Optimize get_r_from_lor()` function * Optimized the `mr_rucker_bootstrap()` and `mr_rucker_jackknife_internal()` functions +* Replaced `sapply()` with `vapply()` in several cases +* Optimized the `simple_cap()` function +* And a few other minor optimizations # TwoSampleMR v0.6.29 From 72d678d394a16d1a18b2c5686cd13ccef1f897d1 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:28:05 +0000 Subject: [PATCH 33/34] Update NEWS.md --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 2f623a23..ef0a8d7e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # TwoSampleMR v0.6.30 -(Release date 2026-02-00) +(Release date 2026-02-06) * Vectorised `mr_egger_regression_bootstrap()` * Vectorised `weighted_median_bootstrap()` From b1f4c70106e897c9b589af1f11fe0b68461dd518 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Fri, 6 Feb 2026 13:34:53 +0000 Subject: [PATCH 34/34] Install gettext on macOS runners --- .github/workflows/check-full.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/check-full.yaml b/.github/workflows/check-full.yaml index 69bee770..8fb371b3 100644 --- a/.github/workflows/check-full.yaml +++ b/.github/workflows/check-full.yaml @@ -61,6 +61,14 @@ jobs: shell: bash run: echo 'options(pkg.sysreqs_db_update_timeout = as.difftime(59, units = "secs"))' >> ~/.Rprofile + - name: Install gettext system dependency on macOS for robustbase package build from source + if: runner.os == 'macOS' + run: | + brew install gettext + mkdir -p ~/.R + echo "CPPFLAGS=-I$(brew --prefix gettext)/include" >> ~/.R/Makevars + echo "LDFLAGS=-L$(brew --prefix gettext)/lib" >> ~/.R/Makevars + - uses: r-lib/actions/setup-r-dependencies@v2 with: extra-packages: >