From 358eaed39cfbe2b95873ddb74ac93e9b9038749d Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 5 Feb 2026 15:56:50 +0000
Subject: [PATCH 01/34] Bump version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index c1ef4673..72a5e0dc 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: TwoSampleMR
 Title: Two Sample MR Functions and Interface to MRC Integrative
     Epidemiology Unit OpenGWAS Database
-Version: 0.6.29
+Version: 0.6.30
 Authors@R: c(
     person("Gibran", "Hemani", , "g.hemani@bristol.ac.uk", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0003-0920-1055")),

From 065b4cab6c8edad90c7c1a0d1c258ad065c75ce3 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 5 Feb 2026 15:58:15 +0000
Subject: [PATCH 02/34] Delete duplicated weighted_median function

---
 R/mr.R | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/R/mr.R b/R/mr.R
index 59fc578d..af324afe 100644
--- a/R/mr.R
+++ b/R/mr.R
@@ -764,19 +764,6 @@ weighted_median <- function(b_iv, weights) {
   return(b)
 }
 
-weighted_median <- function(b_iv, weights) {
-  betaIV.order <- b_iv[order(b_iv)]
-  weights.order <- weights[order(b_iv)]
-  weights.sum <- cumsum(weights.order) - 0.5 * weights.order
-  weights.sum <- weights.sum / sum(weights.order)
-  below <- max(which(weights.sum < 0.5))
-  b <- betaIV.order[below] +
-    (betaIV.order[below + 1] - betaIV.order[below]) *
-      (0.5 - weights.sum[below]) /
-      (weights.sum[below + 1] - weights.sum[below])
-  return(b)
-}
-
 
 #' Calculate standard errors for weighted median method using bootstrap
 #'

From f5aa0ba2c7ef37100a7b1d81077462f188317783 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 5 Feb 2026 15:58:47 +0000
Subject: [PATCH 03/34] Vectorise mr_egger_regression_bootstrap()

---
 R/mr.R | 54 +++++++++++++++++++++++++++++-------------------------
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/R/mr.R b/R/mr.R
index af324afe..2ff9f117 100644
--- a/R/mr.R
+++ b/R/mr.R
@@ -636,36 +636,40 @@ mr_egger_regression_bootstrap <- function(b_exp, b_out, se_exp, se_out, paramete
     ))
   }
   nboot <- parameters$nboot
-  # Do bootstraps
-  res <- array(0, c(nboot + 1, 2))
-  # pb <- txtProgressBar(min = 0, max = nboot, initial = 0, style=3)
-  for (i in 1:nboot) {
-    # setTxtProgressBar(pb, i)
-    #sample from distributions of SNP betas
-    xs <- stats::rnorm(length(b_exp), b_exp, se_exp)
-    ys <- stats::rnorm(length(b_out), b_out, se_out)
-
-    # Use absolute values for Egger reg
-    ys <- ys * sign(xs)
-    xs <- abs(xs)
-
-    #weighted regression with given formula
-    # r <- summary(lm(ys ~ xs, weights=1/se_out^2))
-    r <- linreg(xs, ys, 1 / se_out^2)
-
-    #collect coefficient from given line.
-    res[i, 1] <- r$ahat
-    res[i, 2] <- r$bhat
-    # res[i, 1] <- r$coefficients[1,1]
-    # res[i, 2] <- r$coefficients[2,1]
-  }
-  cat("\n")
+  nsnp <- length(b_exp)
+  weights <- 1 / se_out^2
+
+  # Vectorized bootstrap: generate all random values at once
+  # Matrix dimensions: nboot rows x nsnp columns
+  xs_mat <- matrix(stats::rnorm(nboot * nsnp, mean = rep(b_exp, each = nboot),
+                                sd = rep(se_exp, each = nboot)),
+                   nrow = nboot, ncol = nsnp)
+  ys_mat <- matrix(stats::rnorm(nboot * nsnp, mean = rep(b_out, each = nboot),
+                                sd = rep(se_out, each = nboot)),
+                   nrow = nboot, ncol = nsnp)
+
+  # Apply sign correction for Egger regression (vectorized)
+  ys_mat <- ys_mat * sign(xs_mat)
+  xs_mat <- abs(xs_mat)
+
+  # Vectorized weighted linear regression for all bootstrap iterations
+  # For each bootstrap iteration, compute weighted regression coefficients
+  # Using the formula: bhat = cov(x*w, y*w) / var(x*w), ahat = mean(y) - mean(x)*bhat
+  res <- t(vapply(seq_len(nboot), function(i) {
+    xs <- xs_mat[i, ]
+    ys <- ys_mat[i, ]
+    xw <- xs * weights
+    yw <- ys * weights
+    bhat <- stats::cov(xw, yw, use = "pair") / stats::var(xw, na.rm = TRUE)
+    ahat <- mean(ys, na.rm = TRUE) - mean(xs, na.rm = TRUE) * bhat
+    c(ahat, bhat)
+  }, numeric(2)))
 
   return(list(
     b = mean(res[, 2], na.rm = TRUE),
     se = stats::sd(res[, 2], na.rm = TRUE),
     pval = sum(sign(mean(res[, 2], na.rm = TRUE)) * res[, 2] < 0) / nboot,
-    nsnp = length(b_exp),
+    nsnp = nsnp,
     b_i = mean(res[, 1], na.rm = TRUE),
     se_i = stats::sd(res[, 1], na.rm = TRUE),
     pval_i = sum(sign(mean(res[, 1], na.rm = TRUE)) * res[, 1] < 0) / nboot

From 00ad07ce2e27be4034965a3b000157152ffdebcc Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 5 Feb 2026 15:59:02 +0000
Subject: [PATCH 04/34] Vectorise weighted_median_bootstrap()

---
 R/mr.R | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/R/mr.R b/R/mr.R
index 2ff9f117..805b8dbc 100644
--- a/R/mr.R
+++ b/R/mr.R
@@ -783,13 +783,25 @@ weighted_median <- function(b_iv, weights) {
 #' @export
 #' @return Empirical standard error
 weighted_median_bootstrap <- function(b_exp, b_out, se_exp, se_out, weights, nboot) {
-  med <- rep(0, nboot)
-  for (i in seq_len(nboot)) {
-    b_exp.boot <- stats::rnorm(length(b_exp), mean = b_exp, sd = se_exp)
-    b_out.boot <- stats::rnorm(length(b_out), mean = b_out, sd = se_out)
-    betaIV.boot <- b_out.boot / b_exp.boot
-    med[i] <- weighted_median(betaIV.boot, weights)
-  }
+  nsnp <- length(b_exp)
+
+  # Vectorized bootstrap: generate all random values at once
+  # Matrix dimensions: nboot rows x nsnp columns
+  b_exp_mat <- matrix(stats::rnorm(nboot * nsnp, mean = rep(b_exp, each = nboot),
+                                   sd = rep(se_exp, each = nboot)),
+                      nrow = nboot, ncol = nsnp)
+  b_out_mat <- matrix(stats::rnorm(nboot * nsnp, mean = rep(b_out, each = nboot),
+                                   sd = rep(se_out, each = nboot)),
+                      nrow = nboot, ncol = nsnp)
+
+  # Compute Wald ratios for all bootstrap samples (element-wise division)
+  betaIV_mat <- b_out_mat / b_exp_mat
+
+  # Apply weighted_median to each bootstrap iteration
+  med <- vapply(seq_len(nboot), function(i) {
+    weighted_median(betaIV_mat[i, ], weights)
+  }, numeric(1))
+
   return(stats::sd(med))
 }
 

From 976c42812fecb72b263f8410162d8ae5eb6e2efd Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 5 Feb 2026 15:59:07 +0000
Subject: [PATCH 05/34] Update NEWS.md

---
 NEWS.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 46b3c337..fa9e772f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,11 @@
+# TwoSampleMR v0.6.30
+
+(Release date 2026-02-00)
+
+* Vectorised `mr_egger_regression_bootstrap()`
+* Vectorised `weighted_median_bootstrap()`
+* Deleted duplicated `weighted_median()` function
+
 # TwoSampleMR v0.6.29
 
 (Release date 2025-12-16)

From 73b0f94e9d0d669e4dc23e6d5761a25085c3e820 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 5 Feb 2026 16:23:50 +0000
Subject: [PATCH 06/34] Replace plyr calls with data.table calls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- `plyr::rbind.fill(...)` → `data.table::rbindlist(..., fill = TRUE, use.names = TRUE)`
- `plyr::ddply(dat, cols, func)` → `lapply()` over unique combinations + `data.table::rbindlist()`
- Added `data.table::setDF()` calls to convert back to data.frame for compatibility
---
 R/harmonise.R   | 20 ++++++++-----
 R/leaveoneout.R | 36 ++++++++++++++++++-----
 R/mr.R          | 36 +++++++++++++++++------
 R/query.R       | 32 ++++++++++++---------
 R/read_data.R   | 38 ++++++++++++++++---------
 R/rucker.R      |  6 ++--
 R/singlesnp.R   | 76 +++++++++++++++++++++++++++++++++++--------------
 7 files changed, 171 insertions(+), 73 deletions(-)

diff --git a/R/harmonise.R b/R/harmonise.R
index 982dccba..29cc76ac 100644
--- a/R/harmonise.R
+++ b/R/harmonise.R
@@ -114,8 +114,10 @@ harmonise_data <- function(exposure_dat, outcome_dat, action = 2) {
   # 	return(x)
   # })
 
-  jlog <- plyr::rbind.fill(lapply(fix.tab, function(x) attr(x, "log")))
-  fix.tab <- plyr::rbind.fill(fix.tab)
+  jlog <- data.table::rbindlist(lapply(fix.tab, function(x) attr(x, "log")), fill = TRUE, use.names = TRUE)
+  data.table::setDF(jlog)
+  fix.tab <- data.table::rbindlist(fix.tab, fill = TRUE, use.names = TRUE)
+  data.table::setDF(fix.tab)
   attr(fix.tab, "log") <- jlog
 
   # fix.tab <- harmonise_make_snp_effects_positive(fix.tab)
@@ -680,12 +682,16 @@ harmonise <- function(dat, tolerance, action) {
     action
   )
 
-  jlog <- plyr::rbind.fill(
-    as.data.frame(attr(d22, "log"), stringsAsFactors = FALSE),
-    as.data.frame(attr(d21, "log"), stringsAsFactors = FALSE),
-    as.data.frame(attr(d12, "log"), stringsAsFactors = FALSE),
-    as.data.frame(attr(d11, "log"), stringsAsFactors = FALSE)
+  jlog <- data.table::rbindlist(
+    list(
+      as.data.frame(attr(d22, "log"), stringsAsFactors = FALSE),
+      as.data.frame(attr(d21, "log"), stringsAsFactors = FALSE),
+      as.data.frame(attr(d12, "log"), stringsAsFactors = FALSE),
+      as.data.frame(attr(d11, "log"), stringsAsFactors = FALSE)
+    ),
+    fill = TRUE, use.names = TRUE
   )
+  data.table::setDF(jlog)
   jlog <- cbind(
     data.frame(
       id.exposure = dat$id.exposure[1],
diff --git a/R/leaveoneout.R b/R/leaveoneout.R
index 7cb3ac48..9c72f674 100644
--- a/R/leaveoneout.R
+++ b/R/leaveoneout.R
@@ -18,19 +18,28 @@ mr_leaveoneout <- function(dat, parameters = default_parameters(), method = mr_i
   stopifnot("se.exposure" %in% names(dat))
   stopifnot("se.outcome" %in% names(dat))
 
-  res <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(X) {
-    x <- subset(X, mr_keep)
+  dat_dt <- data.table::as.data.table(dat)
+  combos <- unique(dat_dt[, .(id.exposure, id.outcome)])
+  
+  results <- lapply(seq_len(nrow(combos)), function(i) {
+    exp_id <- combos$id.exposure[i]
+    out_id <- combos$id.outcome[i]
+    X <- dat_dt[id.exposure == exp_id & id.outcome == out_id]
+    x <- X[mr_keep == TRUE]
     nsnp <- nrow(x)
     if (nsnp == 0) {
       x <- X[1, ]
       d <- data.frame(
+        id.exposure = exp_id,
+        id.outcome = out_id,
         SNP = "All",
         b = NA,
         se = NA,
         p = NA,
         samplesize = NA,
         outcome = x$outcome[1],
-        exposure = x$exposure[1]
+        exposure = x$exposure[1],
+        stringsAsFactors = FALSE
       )
       return(d)
     }
@@ -46,28 +55,36 @@ mr_leaveoneout <- function(dat, parameters = default_parameters(), method = mr_i
         method(beta.exposure, beta.outcome, se.exposure, se.outcome, parameters)
       )
       d <- data.frame(
+        id.exposure = exp_id,
+        id.outcome = out_id,
         SNP = c(as.character(x$SNP), "All"),
         b = sapply(l, function(y) y$b),
         se = sapply(l, function(y) y$se),
         p = sapply(l, function(y) y$pval),
-        samplesize = x$samplesize.outcome[1]
+        samplesize = x$samplesize.outcome[1],
+        stringsAsFactors = FALSE
       )
       d$outcome <- x$outcome[1]
       d$exposure <- x$exposure[1]
     } else {
       a <- with(x, method(beta.exposure, beta.outcome, se.exposure, se.outcome, parameters))
       d <- data.frame(
+        id.exposure = exp_id,
+        id.outcome = out_id,
         SNP = "All",
         b = a$b,
         se = a$se,
         p = a$pval,
-        samplesize = x$samplesize.outcome[1]
+        samplesize = x$samplesize.outcome[1],
+        stringsAsFactors = FALSE
       )
       d$outcome <- x$outcome[1]
       d$exposure <- x$exposure[1]
     }
     return(d)
   })
+  res <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+  data.table::setDF(res)
   res <- subset(
     res,
     select = c(exposure, outcome, id.exposure, id.outcome, samplesize, SNP, b, se, p)
@@ -85,8 +102,13 @@ mr_leaveoneout <- function(dat, parameters = default_parameters(), method = mr_i
 #' @export
 #' @return List of plots
 mr_leaveoneout_plot <- function(leaveoneout_results) {
-  res <- plyr::dlply(leaveoneout_results, c("id.exposure", "id.outcome"), function(d) {
-    d <- plyr::mutate(d)
+  dat_dt <- data.table::as.data.table(leaveoneout_results)
+  combos <- unique(dat_dt[, .(id.exposure, id.outcome)])
+  
+  res <- lapply(seq_len(nrow(combos)), function(i) {
+    exp_id <- combos$id.exposure[i]
+    out_id <- combos$id.outcome[i]
+    d <- as.data.frame(dat_dt[id.exposure == exp_id & id.outcome == out_id])
     # Need to have at least 3 SNPs because IVW etc methods can't be performed with fewer than 2 SNPs
     if (sum(!grepl("All", d$SNP)) < 3) {
       return(
diff --git a/R/mr.R b/R/mr.R
index 805b8dbc..68fd67a8 100644
--- a/R/mr.R
+++ b/R/mr.R
@@ -23,37 +23,54 @@ mr <- function(
     }
   }
 
-  mr_tab <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(x1) {
-    # message("Performing MR analysis of '", x1$id.exposure[1], "' on '", x18WII58$id.outcome[1], "'")
-    x <- subset(x1, mr_keep)
+  # Convert to data.table for efficient grouped operations
+
+  dat_dt <- data.table::as.data.table(dat)
+  
+  # Get unique combinations of id.exposure and id.outcome
+  combos <- unique(dat_dt[, .(id.exposure, id.outcome)])
+  
+  # Process each combination
+  results <- lapply(seq_len(nrow(combos)), function(i) {
+    exp_id <- combos$id.exposure[i]
+    out_id <- combos$id.outcome[i]
+    x1 <- dat_dt[id.exposure == exp_id & id.outcome == out_id]
+    x <- x1[mr_keep == TRUE]
+    
     if (nrow(x) == 0) {
       message(
         "No SNPs available for MR analysis of '",
-        x1$id.exposure[1],
+        exp_id,
         "' on '",
-        x1$id.outcome[1],
+        out_id,
         "'"
       )
       return(NULL)
     } else {
-      message("Analysing '", x1$id.exposure[1], "' on '", x1$id.outcome[1], "'")
+      message("Analysing '", exp_id, "' on '", out_id, "'")
     }
     res <- lapply(method_list, function(meth) {
       get(meth)(x$beta.exposure, x$beta.outcome, x$se.exposure, x$se.outcome, parameters)
     })
     methl <- mr_method_list()
     mr_tab <- data.frame(
+      id.exposure = exp_id,
+      id.outcome = out_id,
       outcome = x$outcome[1],
       exposure = x$exposure[1],
       method = methl$name[match(method_list, methl$obj)],
       nsnp = sapply(res, function(x) x$nsnp),
       b = sapply(res, function(x) x$b),
       se = sapply(res, function(x) x$se),
-      pval = sapply(res, function(x) x$pval)
+      pval = sapply(res, function(x) x$pval),
+      stringsAsFactors = FALSE
     )
-    mr_tab <- subset(mr_tab, !(is.na(b) & is.na(se) & is.na(pval)))
+    mr_tab <- mr_tab[!(is.na(mr_tab$b) & is.na(mr_tab$se) & is.na(mr_tab$pval)), ]
     return(mr_tab)
   })
+  
+  mr_tab <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+  data.table::setDF(mr_tab)
 
   return(mr_tab)
 }
@@ -243,7 +260,8 @@ mr_method_list <- function() {
     )
   )
   a <- lapply(a, as.data.frame)
-  a <- plyr::rbind.fill(a)
+  a <- data.table::rbindlist(a, fill = TRUE, use.names = TRUE)
+  data.table::setDF(a)
   a <- as.data.frame(lapply(a, as.character), stringsAsFactors = FALSE)
   a$heterogeneity_test <- as.logical(a$heterogeneity_test)
   a$use_by_default <- as.logical(a$use_by_default)
diff --git a/R/query.R b/R/query.R
index 8f67172d..2260fa43 100644
--- a/R/query.R
+++ b/R/query.R
@@ -83,7 +83,8 @@ extract_outcome_data <- function(
           splitsize = proxy_splitsize
         )
         if (!is.null(temp)) {
-          firstpass <- plyr::rbind.fill(firstpass, temp)
+          firstpass <- data.table::rbindlist(list(firstpass, temp), fill = TRUE, use.names = TRUE)
+          data.table::setDF(firstpass)
         }
       }
     }
@@ -149,9 +150,10 @@ extract_outcome_data_internal <- function(
     for (i in seq_along(outcomes)) {
       message(i, " of ", length(outcomes), " outcomes")
 
-      d[[i]] <- plyr::ddply(splits, c("chunk_id"), function(x) {
-        x <- plyr::mutate(x)
-        message(" [>] ", x$chunk_id[1], " of ", max(splits$chunk_id), " chunks")
+      chunk_ids <- unique(splits$chunk_id)
+      d[[i]] <- lapply(chunk_ids, function(cid) {
+        x <- splits[splits$chunk_id == cid, , drop = FALSE]
+        message(" [>] ", cid, " of ", max(splits$chunk_id), " chunks")
         out <- ieugwasr::associations(
           variants = x$snps,
           id = outcomes[i],
@@ -168,9 +170,11 @@ extract_outcome_data_internal <- function(
         }
         return(out)
       })
+      d[[i]] <- data.table::rbindlist(d[[i]], fill = TRUE, use.names = TRUE)
     }
 
-    d <- plyr::rbind.fill(d)
+    d <- data.table::rbindlist(d, fill = TRUE, use.names = TRUE)
+    data.table::setDF(d)
   } else {
     # Split outcomes
     n <- length(outcomes)
@@ -182,9 +186,10 @@ extract_outcome_data_internal <- function(
     for (i in seq_along(snps)) {
       message(i, " of ", length(snps), " snps")
 
-      d[[i]] <- plyr::ddply(splits, c("chunk_id"), function(x) {
-        x <- plyr::mutate(x)
-        message(" [>] ", x$chunk_id[1], " of ", max(splits$chunk_id), " chunks")
+      chunk_ids <- unique(splits$chunk_id)
+      d[[i]] <- lapply(chunk_ids, function(cid) {
+        x <- splits[splits$chunk_id == cid, , drop = FALSE]
+        message(" [>] ", cid, " of ", max(splits$chunk_id), " chunks")
 
         out <- ieugwasr::associations(
           variants = snps[i],
@@ -203,9 +208,11 @@ extract_outcome_data_internal <- function(
         }
         return(out)
       })
+      d[[i]] <- data.table::rbindlist(d[[i]], fill = TRUE, use.names = TRUE)
     }
 
-    d <- plyr::rbind.fill(d)
+    d <- data.table::rbindlist(d, fill = TRUE, use.names = TRUE)
+    data.table::setDF(d)
   }
   if (is.null(nrow(d)) || nrow(d) == 0) {
     # message("None of the requested SNPs were available in the specified GWASs.")
@@ -281,10 +288,9 @@ format_d <- function(d) {
     d <- cbind(d1, p)
 
     # If two SNPs have the same proxy SNP then one has to be removed
-    d <- plyr::ddply(d, c("outcome"), function(x) {
-      x <- plyr::mutate(x)
-      subset(x, !duplicated(proxy_snp.outcome))
-    })
+    d <- data.table::as.data.table(d)
+    d <- d[!duplicated(proxy_snp.outcome), , by = outcome]
+    data.table::setDF(d)
   } else {
     d <- d1
   }
diff --git a/R/read_data.R b/R/read_data.R
index c383c2d2..2a019e04 100644
--- a/R/read_data.R
+++ b/R/read_data.R
@@ -300,13 +300,15 @@ format_data <- function(
   }
 
   # Remove duplicated SNPs
-  dat <- plyr::ddply(dat, type, function(x) {
-    x <- plyr::mutate(x)
+  dat_dt <- data.table::as.data.table(dat)
+  types <- unique(dat_dt[[type]])
+  dat_list <- lapply(types, function(t) {
+    x <- dat_dt[dat_dt[[type]] == t]
     dup <- duplicated(x$SNP)
     if (any(dup)) {
       warning(
         "Duplicated SNPs present in exposure data for phenotype '",
-        x[[type]][1],
+        t,
         ". Just keeping the first instance:\n",
         paste(x$SNP[dup], collapse = "\n")
       )
@@ -314,6 +316,8 @@ format_data <- function(
     }
     return(x)
   })
+  dat <- data.table::rbindlist(dat_list, fill = TRUE, use.names = TRUE)
+  data.table::setDF(dat)
 
   # Check if columns required for MR are present
   mr_cols_required <- c(snp_col, beta_col, se_col, effect_allele_col)
@@ -580,15 +584,19 @@ format_data <- function(
 }
 
 check_units <- function(x, id, col) {
-  temp <- plyr::ddply(x, id, function(x1) {
+  x_dt <- data.table::as.data.table(x)
+  ids <- unique(x_dt[[id]])
+  temp_list <- lapply(ids, function(id_val) {
+    x1 <- x_dt[x_dt[[id]] == id_val]
     ph <- FALSE
     if (length(unique(x1[[col]])) > 1) {
-      warning("More than one type of unit specified for ", x1[[id]][1])
-      x1 <- plyr::mutate(x1)
+      warning("More than one type of unit specified for ", id_val)
       ph <- TRUE
     }
     return(data.frame(ph = ph[1], stringsAsFactors = FALSE))
   })
+  temp <- data.table::rbindlist(temp_list, fill = TRUE, use.names = TRUE)
+  data.table::setDF(temp)
   return(temp)
 }
 
@@ -774,14 +782,18 @@ combine_data <- function(x) {
 
   id_col <- paste0("id.", type)
   mr_keep_col <- paste0("mr_keep.", type)
-  x <- plyr::rbind.fill(x)
-
-  x <- plyr::ddply(x, id_col, function(x) {
-    x <- plyr::mutate(x)
-    x <- x[order(x[[mr_keep_col]], decreasing = TRUE), ]
-    x <- subset(x, !duplicated(SNP))
-    return(x)
+  x <- data.table::rbindlist(x, fill = TRUE, use.names = TRUE)
+  data.table::setDF(x)
+
+  ids <- unique(x[[id_col]])
+  x_list <- lapply(ids, function(id_val) {
+    x1 <- x[x[[id_col]] == id_val, , drop = FALSE]
+    x1 <- x1[order(x1[[mr_keep_col]], decreasing = TRUE), ]
+    x1 <- x1[!duplicated(x1$SNP), ]
+    return(x1)
   })
+  x <- data.table::rbindlist(x_list, fill = TRUE, use.names = TRUE)
+  data.table::setDF(x)
 
   rownames(x) <- NULL
   return(x)
diff --git a/R/rucker.R b/R/rucker.R
index 7b2e95f1..97410128 100644
--- a/R/rucker.R
+++ b/R/rucker.R
@@ -294,7 +294,8 @@ mr_rucker_bootstrap <- function(dat, parameters = default_parameters()) {
     l[[i]] <- mr_rucker(dat2, parameters)
   }
 
-  modsel <- plyr::rbind.fill(lapply(l, function(x) x$selected))
+  modsel <- data.table::rbindlist(lapply(l, function(x) x$selected), fill = TRUE, use.names = TRUE)
+  data.table::setDF(modsel)
   modsel$model <- sapply(l, function(x) x$res)
 
   bootstrap <- data.frame(
@@ -444,7 +445,8 @@ mr_rucker_jackknife_internal <- function(dat, parameters = default_parameters())
       l[[i]] <- mr_rucker_internal(dat2, parameters)
     }
 
-    modsel <- plyr::rbind.fill(lapply(l, function(x) x$selected))
+    modsel <- data.table::rbindlist(lapply(l, function(x) x$selected), fill = TRUE, use.names = TRUE)
+    data.table::setDF(modsel)
     modsel$model <- sapply(l, function(x) x$res)
 
     bootstrap <- data.frame(
diff --git a/R/singlesnp.R b/R/singlesnp.R
index 690e2570..45becf6d 100644
--- a/R/singlesnp.R
+++ b/R/singlesnp.R
@@ -24,55 +24,72 @@ mr_singlesnp <- function(
   stopifnot("se.exposure" %in% names(dat))
   stopifnot("se.outcome" %in% names(dat))
 
-  res <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(X) {
-    x <- subset(X, mr_keep)
+  dat_dt <- data.table::as.data.table(dat)
+  combos <- unique(dat_dt[, .(id.exposure, id.outcome)])
+  
+  # Pre-compute method names outside the loop
+  method_list <- mr_method_list()
+  method_names <- vapply(all_method, function(m) {
+    paste0("All - ", method_list$name[method_list$obj == m])
+  }, character(1))
+  
+  results <- lapply(seq_len(nrow(combos)), function(i) {
+    exp_id <- combos$id.exposure[i]
+    out_id <- combos$id.outcome[i]
+    X <- dat_dt[id.exposure == exp_id & id.outcome == out_id]
+    x <- X[mr_keep == TRUE]
     nsnp <- nrow(x)
     if (nsnp == 0) {
       x <- X[1, ]
       d <- data.frame(
+        id.exposure = exp_id,
+        id.outcome = out_id,
         SNP = "No available data",
         b = NA,
         se = NA,
         p = NA,
         samplesize = NA,
         outcome = x$outcome[1],
-        exposure = x$exposure[1]
+        exposure = x$exposure[1],
+        stringsAsFactors = FALSE
       )
       return(d)
     }
-    l <- lapply(1:nsnp, function(i) {
+    l <- lapply(1:nsnp, function(j) {
       with(
         x,
         get(single_method)(
-          beta.exposure[i],
-          beta.outcome[i],
-          se.exposure[i],
-          se.outcome[i],
+          beta.exposure[j],
+          beta.outcome[j],
+          se.exposure[j],
+          se.outcome[j],
           parameters
         )
       )
     })
-    nom <- c()
-    for (i in seq_along(all_method)) {
-      l[[nsnp + i]] <- with(
+    for (j in seq_along(all_method)) {
+      l[[nsnp + j]] <- with(
         x,
-        get(all_method[i])(beta.exposure, beta.outcome, se.exposure, se.outcome, parameters)
+        get(all_method[j])(beta.exposure, beta.outcome, se.exposure, se.outcome, parameters)
       )
-
-      nom <- c(nom, paste0("All - ", subset(mr_method_list(), obj == all_method[i])$name))
     }
 
     d <- data.frame(
-      SNP = c(as.character(x$SNP), nom),
+      id.exposure = exp_id,
+      id.outcome = out_id,
+      SNP = c(as.character(x$SNP), method_names),
       b = sapply(l, function(y) y$b),
       se = sapply(l, function(y) y$se),
       p = sapply(l, function(y) y$pval),
-      samplesize = x$samplesize.outcome[1]
+      samplesize = x$samplesize.outcome[1],
+      stringsAsFactors = FALSE
     )
     d$outcome <- x$outcome[1]
     d$exposure <- x$exposure[1]
     return(d)
   })
+  res <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+  data.table::setDF(res)
   res <- subset(
     res,
     select = c(exposure, outcome, id.exposure, id.outcome, samplesize, SNP, b, se, p)
@@ -89,8 +106,13 @@ mr_singlesnp <- function(
 #' @export
 #' @return List of plots
 mr_forest_plot <- function(singlesnp_results, exponentiate = FALSE) {
-  res <- plyr::dlply(singlesnp_results, c("id.exposure", "id.outcome"), function(d) {
-    d <- plyr::mutate(d)
+  dat_dt <- data.table::as.data.table(singlesnp_results)
+  combos <- unique(dat_dt[, .(id.exposure, id.outcome)])
+  
+  res <- lapply(seq_len(nrow(combos)), function(i) {
+    exp_id <- combos$id.exposure[i]
+    out_id <- combos$id.outcome[i]
+    d <- as.data.frame(dat_dt[id.exposure == exp_id & id.outcome == out_id])
     if (sum(!grepl("All", d$SNP)) < 2) {
       return(
         blank_plot("Insufficient number of SNPs")
@@ -195,8 +217,13 @@ mr_density_plot <- function(
   exponentiate = FALSE,
   bandwidth = "nrd0"
 ) {
-  res <- plyr::dlply(singlesnp_results, c("id.exposure", "id.outcome"), function(d) {
-    d <- plyr::mutate(d)
+  dat_dt <- data.table::as.data.table(singlesnp_results)
+  combos <- unique(dat_dt[, .(id.exposure, id.outcome)])
+  
+  res <- lapply(seq_len(nrow(combos)), function(i) {
+    exp_id <- combos$id.exposure[i]
+    out_id <- combos$id.outcome[i]
+    d <- as.data.frame(dat_dt[id.exposure == exp_id & id.outcome == out_id])
     if (sum(!grepl("All", d$SNP)) < 2) {
       return(
         blank_plot("Insufficient number of SNPs")
@@ -235,8 +262,13 @@ mr_density_plot <- function(
 #' @export
 #' @return List of plots
 mr_funnel_plot <- function(singlesnp_results) {
-  res <- plyr::dlply(singlesnp_results, c("id.exposure", "id.outcome"), function(d) {
-    d <- plyr::mutate(d)
+  dat_dt <- data.table::as.data.table(singlesnp_results)
+  combos <- unique(dat_dt[, .(id.exposure, id.outcome)])
+  
+  res <- lapply(seq_len(nrow(combos)), function(i) {
+    exp_id <- combos$id.exposure[i]
+    out_id <- combos$id.outcome[i]
+    d <- as.data.frame(dat_dt[id.exposure == exp_id & id.outcome == out_id])
     if (sum(!grepl("All", d$SNP)) < 2) {
       return(
         blank_plot("Insufficient number of SNPs")

From 869fc0cd0da06471aa9f0a239af31fedddf74827 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 5 Feb 2026 16:23:55 +0000
Subject: [PATCH 07/34] Update NEWS.md

---
 NEWS.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index fa9e772f..8c40411a 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,6 +5,10 @@
 * Vectorised `mr_egger_regression_bootstrap()`
 * Vectorised `weighted_median_bootstrap()`
 * Deleted duplicated `weighted_median()` function
+* Replace **plyr** function calls with **data.table** function calls
+  - `plyr::rbind.fill(...)` → `data.table::rbindlist(..., fill = TRUE, use.names = TRUE)`
+  - `plyr::ddply(dat, cols, func)` → `lapply()` over unique combinations + `data.table::rbindlist()`
+  - Added `data.table::setDF()` calls to convert back to data.frame for compatibility
 
 # TwoSampleMR v0.6.29
 

From 9137867bd730f1466d1e203fd9758997387479ab Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 5 Feb 2026 16:25:45 +0000
Subject: [PATCH 08/34] Use chartr() instead of 4 gsub() calls

---
 R/harmonise.R | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/R/harmonise.R b/R/harmonise.R
index 29cc76ac..7aa804ef 100644
--- a/R/harmonise.R
+++ b/R/harmonise.R
@@ -173,12 +173,9 @@ check_palindromic <- function(A1, A2) {
 
 
 flip_alleles <- function(x) {
-  x <- toupper(x)
-  x <- gsub("C", "g", x)
-  x <- gsub("G", "c", x)
-  x <- gsub("A", "t", x)
-  x <- gsub("T", "a", x)
-  return(toupper(x))
+  # Use chartr for efficient single-pass character substitution
+  # This is much faster than 4 sequential gsub calls
+  chartr("ACGTacgt", "TGCAtgca", x)
 }
 
 

From 9b08ff83ece5becf2b0b331a0f59f77ba17ce2fd Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 5 Feb 2026 16:26:37 +0000
Subject: [PATCH 09/34] Use single call to sample() instead of n calls

---
 R/read_data.R | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/R/read_data.R b/R/read_data.R
index 2a019e04..b7d80dd0 100644
--- a/R/read_data.R
+++ b/R/read_data.R
@@ -736,11 +736,13 @@ format_aries_mqtl <- function(aries_mqtl_subset, type = "exposure") {
 
 
 random_string <- function(n = 1, len = 6) {
-  randomString <- c(1:n)
-  for (i in 1:n) {
-    randomString[i] <- paste(sample(c(0:9, letters, LETTERS), len, replace = TRUE), collapse = "")
-  }
-  return(randomString)
+  # Vectorized random string generation
+  # Generate all random characters at once and reshape into matrix
+  chars <- c(0:9, letters, LETTERS)
+  random_chars <- sample(chars, n * len, replace = TRUE)
+  # Reshape to matrix with n rows and len columns, then collapse each row
+  random_matrix <- matrix(random_chars, nrow = n, ncol = len)
+  apply(random_matrix, 1, paste, collapse = "")
 }
 
 create_ids <- function(x) {

From 86251c8262c558de7304b3ea8d4126b29f50b051 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 5 Feb 2026 16:28:04 +0000
Subject: [PATCH 10/34] Update NEWS.md

---
 NEWS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 8c40411a..e1995b8d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -9,6 +9,8 @@
   - `plyr::rbind.fill(...)` → `data.table::rbindlist(..., fill = TRUE, use.names = TRUE)`
   - `plyr::ddply(dat, cols, func)` → `lapply()` over unique combinations + `data.table::rbindlist()`
   - Added `data.table::setDF()` calls to convert back to data.frame for compatibility
+* In `flip_alleles()` use `chartr()` instead of 4 `gsub()` calls
+* In `random_string()` use single call to `sample()` instead of n calls
 
 # TwoSampleMR v0.6.29
 

From f6d22964ec8efe6f6885b882f48cf8251df15212 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 11:15:02 +0000
Subject: [PATCH 11/34] Optimize mr_mode()

**1. `beta()` function:**

- Replaced growing vector (`beta <- NULL; beta[length(beta) + 1] <- ...`) with pre-allocated `numeric(length(phi))`
- Used `which.max(densityIV$y)` instead of `densityIV$y == max(densityIV$y)` (safer, avoids floating-point equality issues)

**2. `boot()` function:**

- Pre-generate all random values as two matrices (2 `rnorm` calls instead of 2000)
- Pre-compute `ones <- rep(1, n)` and `weights` outside the loop instead of recomputing each iteration
- Cache `nphi <- length(phi)` to avoid repeated `length()` calls

The loop itself still iterates 1000 times calling `beta()` (which calls `stats::density()`), but the per-iteration overhead is reduced by eliminating random number generation and redundant computations from inside the loop
---
 R/mr_mode.R | 51 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 18 deletions(-)

diff --git a/R/mr_mode.R b/R/mr_mode.R
index 05c42404..8d8c587e 100644
--- a/R/mr_mode.R
+++ b/R/mr_mode.R
@@ -35,17 +35,17 @@ mr_mode <- function(dat, parameters = default_parameters(), mode_method = "all")
     #Standardised weights
     weights <- seBetaIV.in^-2 / sum(seBetaIV.in^-2)
 
-    beta <- NULL
+    result <- numeric(length(phi))
 
-    for (cur_phi in phi) {
+    for (j in seq_along(phi)) {
       #Define the actual bandwidth
-      h <- max(0.00000001, s * cur_phi)
+      h <- max(0.00000001, s * phi[j])
       #Compute the smoothed empirical density function
       densityIV <- stats::density(BetaIV.in, weights = weights, bw = h)
       #Extract the point with the highest density as the point estimate
-      beta[length(beta) + 1] <- densityIV$x[densityIV$y == max(densityIV$y)]
+      result[j] <- densityIV$x[which.max(densityIV$y)]
     }
-    return(beta)
+    return(result)
   }
 
   #------------------------------------------#
@@ -55,50 +55,65 @@ mr_mode <- function(dat, parameters = default_parameters(), mode_method = "all")
   #seBetaIV.in: standard errors of ratio estimates
   #beta_Mode.in: point causal effect estimates
   boot <- function(BetaIV.in, seBetaIV.in, beta_Mode.in, nboot) {
+    n <- length(BetaIV.in)
+    nphi <- length(phi)
+
+    #Pre-generate all random values as matrices (nboot x n)
+    #mean and sd recycle in lockstep (both length n), filling column-by-column
+    BetaIV.boot_mat <- matrix(
+      stats::rnorm(nboot * n, mean = BetaIV.in, sd = seBetaIV.in[, 1]),
+      nrow = nboot, ncol = n
+    )
+    BetaIV.boot_NOME_mat <- matrix(
+      stats::rnorm(nboot * n, mean = BetaIV.in, sd = seBetaIV.in[, 2]),
+      nrow = nboot, ncol = n
+    )
+
+    #Pre-compute constants
+    ones <- rep(1, n)
+    weights <- 1 / seBetaIV.in[, 1]^2
+
     #Set up a matrix to store the results from each bootstrap iteration
     beta.boot <- matrix(nrow = nboot, ncol = length(beta_Mode.in))
 
     for (i in 1:nboot) {
-      #Re-sample each ratio estimate using SEs derived not assuming NOME
-      BetaIV.boot <- stats::rnorm(length(BetaIV.in), mean = BetaIV.in, sd = seBetaIV.in[, 1])
-      #Re-sample each ratio estimate using SEs derived under NOME
-      BetaIV.boot_NOME <- stats::rnorm(length(BetaIV.in), mean = BetaIV.in, sd = seBetaIV.in[, 2])
+      BetaIV.boot <- BetaIV.boot_mat[i, ]
+      BetaIV.boot_NOME <- BetaIV.boot_NOME_mat[i, ]
 
       #Simple mode, not assuming NOME
-      beta.boot[i, seq_along(phi)] <- beta(
+      beta.boot[i, seq_len(nphi)] <- beta(
         BetaIV.in = BetaIV.boot,
-        seBetaIV.in = rep(1, length(BetaIV)),
+        seBetaIV.in = ones,
         phi = phi
       )
       #Weighted mode, not assuming NOME
-      beta.boot[i, (length(phi) + 1):(2 * length(phi))] <- beta(
+      beta.boot[i, (nphi + 1):(2 * nphi)] <- beta(
         BetaIV.in = BetaIV.boot,
         seBetaIV.in = seBetaIV.in[, 1],
         phi = phi
       )
       #Penalised mode, not assuming NOME
-      weights <- 1 / seBetaIV.in[, 1]^2
       penalty <- stats::pchisq(
-        weights * (BetaIV.boot - beta.boot[i, (length(phi) + 1):(2 * length(phi))])^2,
+        weights * (BetaIV.boot - beta.boot[i, (nphi + 1):(2 * nphi)])^2,
         df = 1,
         lower.tail = FALSE
       )
       pen.weights <- weights * pmin(1, penalty * parameters$penk)
 
-      beta.boot[i, (2 * length(phi) + 1):(3 * length(phi))] <- beta(
+      beta.boot[i, (2 * nphi + 1):(3 * nphi)] <- beta(
         BetaIV.in = BetaIV.boot,
         seBetaIV.in = sqrt(1 / pen.weights),
         phi = phi
       )
 
       #Simple mode, assuming NOME
-      beta.boot[i, (3 * length(phi) + 1):(4 * length(phi))] <- beta(
+      beta.boot[i, (3 * nphi + 1):(4 * nphi)] <- beta(
         BetaIV.in = BetaIV.boot_NOME,
-        seBetaIV.in = rep(1, length(BetaIV)),
+        seBetaIV.in = ones,
         phi = phi
       )
       #Weighted mode, assuming NOME
-      beta.boot[i, (4 * length(phi) + 1):(5 * length(phi))] <- beta(
+      beta.boot[i, (4 * nphi + 1):(5 * nphi)] <- beta(
         BetaIV.in = BetaIV.boot_NOME,
         seBetaIV.in = seBetaIV.in[, 2],
         phi = phi

From f26817b22af170572251c19407dddfff6c84ef6a Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 11:15:54 +0000
Subject: [PATCH 12/34] Update NEWS.md

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index e1995b8d..27e8d7a5 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -11,6 +11,7 @@
   - Added `data.table::setDF()` calls to convert back to data.frame for compatibility
 * In `flip_alleles()` use `chartr()` instead of 4 `gsub()` calls
 * In `random_string()` use single call to `sample()` instead of n calls
+* Optimized `mr_mode()`
 
 # TwoSampleMR v0.6.29
 

From c8736992a79e6341f84b11f68095036465f4925f Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:01:33 +0000
Subject: [PATCH 13/34] Remove use of plyr from package

---
 DESCRIPTION            |  1 -
 R/add_rsq.r            | 14 ++++++++++----
 R/enrichment.R         | 20 +++++++++++++-------
 R/eve.R                |  9 ++++++---
 R/forest_plot2.R       | 11 +++++++----
 R/format_mr_results2.R |  7 ++++---
 R/heterogeneity.R      | 32 ++++++++++++++++++++++----------
 R/knit.R               | 11 +++++------
 R/mr_mode.R            |  6 +++++-
 R/other_formats.R      | 18 +++++++++++++-----
 R/scatterplot.R        |  6 ++++--
 R/steiger.R            | 14 ++++++++++----
 R/steiger_filtering.R  |  9 ++++++++-
 R/transform.R          |  6 +++++-
 14 files changed, 112 insertions(+), 52 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 72a5e0dc..64509010 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -43,7 +43,6 @@ Imports:
     MRMix,
     MRPRESSO,
     pbapply,
-    plyr,
     psych,
     RadialMR,
     reshape2,
diff --git a/R/add_rsq.r b/R/add_rsq.r
index 6bc3ff90..4394bac1 100644
--- a/R/add_rsq.r
+++ b/R/add_rsq.r
@@ -11,14 +11,20 @@
 #' @return data frame
 add_rsq <- function(dat) {
   if ("id.exposure" %in% names(dat)) {
-    dat <- plyr::ddply(dat, c("id.exposure"), function(x) {
-      add_rsq_one(x, "exposure")
+    ids <- unique(dat$id.exposure)
+    results <- lapply(ids, function(id) {
+      add_rsq_one(dat[dat$id.exposure == id, ], "exposure")
     })
+    dat <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+    data.table::setDF(dat)
   }
   if ("id.outcome" %in% names(dat)) {
-    dat <- plyr::ddply(dat, c("id.outcome"), function(x) {
-      add_rsq_one(x, "outcome")
+    ids <- unique(dat$id.outcome)
+    results <- lapply(ids, function(id) {
+      add_rsq_one(dat[dat$id.outcome == id, ], "outcome")
     })
+    dat <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+    data.table::setDF(dat)
   }
   return(dat)
 }
diff --git a/R/enrichment.R b/R/enrichment.R
index 87e24a62..8f751722 100644
--- a/R/enrichment.R
+++ b/R/enrichment.R
@@ -34,7 +34,8 @@ enrichment_method_list <- function() {
     )
   )
   a <- lapply(a, as.data.frame)
-  a <- plyr::rbind.fill(a)
+  a <- data.table::rbindlist(a, fill = TRUE, use.names = TRUE)
+  data.table::setDF(a)
   a <- as.data.frame(lapply(a, as.character), stringsAsFactors = FALSE)
   return(a)
 }
@@ -50,9 +51,10 @@ enrichment_method_list <- function() {
 #' @export
 #' @return data frame
 enrichment <- function(dat, method_list = enrichment_method_list()$obj) {
-  res <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(x1) {
-    # message("Performing enrichment analysis of '", x$id.exposure[1], "' on '", x$id.outcome[1], "'")
-
+  methl <- enrichment_method_list()
+  combos <- unique(dat[, c("id.exposure", "id.outcome")])
+  results <- lapply(seq_len(nrow(combos)), function(i) {
+    x1 <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ]
     x <- subset(x1, !is.na(pval.outcome))
     if (nrow(x) == 0) {
       message(
@@ -67,16 +69,20 @@ enrichment <- function(dat, method_list = enrichment_method_list()$obj) {
     res <- lapply(method_list, function(meth) {
       get(meth)(x$pval.outcome)
     })
-    methl <- enrichment_method_list()
     enrichment_tab <- data.frame(
+      id.exposure = x1$id.exposure[1],
+      id.outcome = x1$id.outcome[1],
       outcome = x$outcome[1],
       exposure = x$exposure[1],
       method = methl$name[methl$obj %in% method_list],
-      nsnp = sapply(res, function(x) x$nsnp),
-      pval = sapply(res, function(x) x$pval)
+      nsnp = vapply(res, function(x) x$nsnp, numeric(1)),
+      pval = vapply(res, function(x) x$pval, numeric(1)),
+      stringsAsFactors = FALSE
     )
     enrichment_tab <- subset(enrichment_tab, !is.na(pval))
     return(enrichment_tab)
   })
+  res <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+  data.table::setDF(res)
   return(res)
 }
diff --git a/R/eve.R b/R/eve.R
index 9d684bd7..be58892f 100644
--- a/R/eve.R
+++ b/R/eve.R
@@ -327,10 +327,13 @@ mr_wrapper_single <- function(dat, parameters = default_parameters()) {
 #' @export
 #' @return list
 mr_wrapper <- function(dat, parameters = default_parameters()) {
-  plyr::dlply(dat, c("id.exposure", "id.outcome"), function(x) {
+  combos <- unique(dat[, c("id.exposure", "id.outcome")])
+  res <- lapply(seq_len(nrow(combos)), function(i) {
+    x <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ]
     message("Performing MR analysis of '", x$id.exposure[1], "' on '", x$id.outcome[1], "'")
     d <- subset(x, mr_keep)
-    o <- mr_wrapper_single(d, parameters = parameters)
-    o
+    mr_wrapper_single(d, parameters = parameters)
   })
+  names(res) <- paste(combos$id.exposure, combos$id.outcome, sep = ".")
+  res
 }
diff --git a/R/forest_plot2.R b/R/forest_plot2.R
index 42f3322c..6174641e 100644
--- a/R/forest_plot2.R
+++ b/R/forest_plot2.R
@@ -101,9 +101,9 @@ format_mr_results <- function(
 
   # Fill in missing values
   exps <- unique(dat$exposure)
-  dat <- plyr::ddply(dat, c("outcome"), function(x) {
-    x <- plyr::mutate(x)
-    nc <- ncol(x)
+  outcomes <- unique(dat$outcome)
+  results <- lapply(outcomes, function(out_val) {
+    x <- dat[dat$outcome == out_val, ]
     missed <- exps[!exps %in% x$exposure]
     if (length(missed) >= 1) {
       out <- unique(x$outcome)
@@ -116,10 +116,13 @@ format_mr_results <- function(
         sample_size = n,
         stringsAsFactors = FALSE
       )
-      x <- plyr::rbind.fill(x, md)
+      x <- data.table::rbindlist(list(x, md), fill = TRUE, use.names = TRUE)
+      data.table::setDF(x)
     }
     return(x)
   })
+  dat <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+  data.table::setDF(dat)
   # dat <- dplyr::group_by(dat, outcome) %>%
   # 	dplyr::do({
   # 		x <- .
diff --git a/R/format_mr_results2.R b/R/format_mr_results2.R
index 40e676a3..3687343d 100644
--- a/R/format_mr_results2.R
+++ b/R/format_mr_results2.R
@@ -185,10 +185,11 @@ combine_all_mrresults <- function(
   names(sin)[names(sin) == "method"] <- "Method"
 
   res <- merge(res, het, by = c("id.outcome", "id.exposure", "Method"), all.x = TRUE)
-  res <- plyr::rbind.fill(
-    res,
-    sin[, c("exposure", "outcome", "id.exposure", "id.outcome", "SNP", "b", "se", "pval", "Method")]
+  res <- data.table::rbindlist(
+    list(res, sin[, c("exposure", "outcome", "id.exposure", "id.outcome", "SNP", "b", "se", "pval", "Method")]),
+    fill = TRUE, use.names = TRUE
   )
+  data.table::setDF(res)
 
   if (ao_slc) {
     ao <- available_outcomes()
diff --git a/R/heterogeneity.R b/R/heterogeneity.R
index dde0555d..b6f92438 100644
--- a/R/heterogeneity.R
+++ b/R/heterogeneity.R
@@ -13,8 +13,10 @@ mr_heterogeneity <- function(
   parameters = default_parameters(),
   method_list = subset(mr_method_list(), heterogeneity_test & use_by_default)$obj
 ) {
-  het_tab <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(x1) {
-    # message("Performing MR analysis of '", x$id.exposure[1], "' on '", x$id.outcome[1], "'")
+  methl <- mr_method_list()
+  combos <- unique(dat[, c("id.exposure", "id.outcome")])
+  results <- lapply(seq_len(nrow(combos)), function(i) {
+    x1 <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ]
     x <- subset(x1, mr_keep)
     if (nrow(x) < 2) {
       message(
@@ -29,18 +31,22 @@ mr_heterogeneity <- function(
     res <- lapply(method_list, function(meth) {
       get(meth)(x$beta.exposure, x$beta.outcome, x$se.exposure, x$se.outcome, parameters)
     })
-    methl <- mr_method_list()
     het_tab <- data.frame(
+      id.exposure = x1$id.exposure[1],
+      id.outcome = x1$id.outcome[1],
       outcome = x$outcome[1],
       exposure = x$exposure[1],
       method = methl$name[match(method_list, methl$obj)],
-      Q = sapply(res, function(x) x$Q),
-      Q_df = sapply(res, function(x) x$Q_df),
-      Q_pval = sapply(res, function(x) x$Q_pval)
+      Q = vapply(res, function(x) x$Q, numeric(1)),
+      Q_df = vapply(res, function(x) x$Q_df, numeric(1)),
+      Q_pval = vapply(res, function(x) x$Q_pval, numeric(1)),
+      stringsAsFactors = FALSE
     )
     het_tab <- subset(het_tab, !(is.na(Q) & is.na(Q_df) & is.na(Q_pval)))
     return(het_tab)
   })
+  het_tab <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+  data.table::setDF(het_tab)
 
   return(het_tab)
 }
@@ -55,7 +61,9 @@ mr_heterogeneity <- function(
 #' @export
 #' @return data frame
 mr_pleiotropy_test <- function(dat) {
-  ptab <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(x1) {
+  combos <- unique(dat[, c("id.exposure", "id.outcome")])
+  results <- lapply(seq_len(nrow(combos)), function(i) {
+    x1 <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ]
     x <- subset(x1, mr_keep)
     if (nrow(x) < 2) {
       message(
@@ -74,14 +82,18 @@ mr_pleiotropy_test <- function(dat) {
       x$se.outcome,
       default_parameters()
     )
-    out <- data.frame(
+    data.frame(
+      id.exposure = x1$id.exposure[1],
+      id.outcome = x1$id.outcome[1],
       outcome = x$outcome[1],
       exposure = x$exposure[1],
       egger_intercept = res$b_i,
       se = res$se_i,
-      pval = res$pval_i
+      pval = res$pval_i,
+      stringsAsFactors = FALSE
     )
-    return(out)
   })
+  ptab <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+  data.table::setDF(ptab)
   return(ptab)
 }
diff --git a/R/knit.R b/R/knit.R
index 0082b21a..bfebacf1 100644
--- a/R/knit.R
+++ b/R/knit.R
@@ -122,14 +122,13 @@ mr_report <- function(
     mr_leaveoneout_plot = mr_leaveoneout_plot(m$mr_leaveoneout)
   )
 
-  combinations <- plyr::ddply(
-    dat,
-    c("id.exposure", "id.outcome"),
-    plyr::summarise,
-    n = length(exposure),
+  dat_dt <- data.table::as.data.table(dat)
+  combinations <- dat_dt[, .(
+    n = .N,
     exposure = exposure[1],
     outcome = outcome[1]
-  )
+  ), by = c("id.exposure", "id.outcome")]
+  data.table::setDF(combinations)
 
   output_file <- array("", nrow(combinations))
   for (i in seq_len(nrow(combinations))) {
diff --git a/R/mr_mode.R b/R/mr_mode.R
index 8d8c587e..1281af9b 100644
--- a/R/mr_mode.R
+++ b/R/mr_mode.R
@@ -374,9 +374,13 @@ mr_simple_mode_nome <- function(b_exp, b_out, se_exp, se_out, parameters = defau
 
 
 mr_mode_broken <- function(dat, parameters = default_parameters(), mode_method = "all") {
-  res <- plyr::ddply(dat, c("id.exposure", "exposure", "id.outcome", "outcome"), function(x) {
+  combos <- unique(dat[, c("id.exposure", "exposure", "id.outcome", "outcome")])
+  results <- lapply(seq_len(nrow(combos)), function(i) {
+    x <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ]
     mr_mode_internal(x, parameters, mode_method = "all")
   })
+  res <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+  data.table::setDF(res)
   if (mode_method != "all") {
     return(subset(res, method %in% mode_method))
   } else {
diff --git a/R/other_formats.R b/R/other_formats.R
index cd0057ab..cee95686 100644
--- a/R/other_formats.R
+++ b/R/other_formats.R
@@ -17,8 +17,9 @@ dat_to_MRInput <- function(dat, get_correlations = FALSE, pop = "EUR") {
       call. = FALSE
     )
   }
-  out <- plyr::dlply(dat, c("exposure", "outcome"), function(x) {
-    x <- plyr::mutate(x)
+  combos <- unique(dat[, c("exposure", "outcome")])
+  out <- lapply(seq_len(nrow(combos)), function(i) {
+    x <- dat[dat$exposure == combos$exposure[i] & dat$outcome == combos$outcome[i], ]
     message("Converting:")
     message(" - exposure: ", x$exposure[1])
     message(" - outcome: ", x$outcome[1])
@@ -60,6 +61,7 @@ dat_to_MRInput <- function(dat, get_correlations = FALSE, pop = "EUR") {
       )
     }
   })
+  names(out) <- paste(combos$exposure, combos$outcome, sep = ".")
   return(out)
 }
 
@@ -179,8 +181,9 @@ run_mr_presso <- function(dat, NbDistribution = 1000, SignifThreshold = 0.05) {
 #' @export
 #' @return List of RadialMR format datasets
 dat_to_RadialMR <- function(dat) {
-  out <- plyr::dlply(dat, c("exposure", "outcome"), function(x) {
-    x <- plyr::mutate(x)
+  combos <- unique(dat[, c("exposure", "outcome")])
+  out <- lapply(seq_len(nrow(combos)), function(i) {
+    x <- dat[dat$exposure == combos$exposure[i] & dat$outcome == combos$outcome[i], ]
     message("Converting:")
     message(" - exposure: ", x$exposure[1])
     message(" - outcome: ", x$outcome[1])
@@ -194,6 +197,7 @@ dat_to_RadialMR <- function(dat) {
     )
     return(d)
   })
+  names(out) <- paste(combos$exposure, combos$outcome, sep = ".")
   return(out)
 }
 
@@ -251,7 +255,9 @@ mr_ivw_radial <- function(b_exp, b_out, se_exp, se_out, parameters = default_par
 #' @export
 #' @return List of results, with one list item for every exposure/outcome pair in dat object
 run_mrmix <- function(dat) {
-  plyr::dlply(dat, c("id.exposure", "id.outcome"), function(x) {
+  combos <- unique(dat[, c("id.exposure", "id.outcome")])
+  out <- lapply(seq_len(nrow(combos)), function(i) {
+    x <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ]
     message("Analysing ", x$id.exposure[1], " against ", x$id.outcome[1])
     if (grepl("log odds", x$units.exposure[1])) {
       xunit <- "binary"
@@ -294,4 +300,6 @@ run_mrmix <- function(dat) {
     x$se.outcome <- l$sy_std
     MRMix::MRMix(x$beta.exposure, x$beta.outcome, x$se.exposure, x$se.outcome)
   })
+  names(out) <- paste(combos$id.exposure, combos$id.outcome, sep = ".")
+  out
 }
diff --git a/R/scatterplot.R b/R/scatterplot.R
index 28221e09..abc6dc88 100644
--- a/R/scatterplot.R
+++ b/R/scatterplot.R
@@ -8,8 +8,9 @@
 #' @return List of plots
 mr_scatter_plot <- function(mr_results, dat) {
   # dat <- subset(dat, paste(id.outcome, id.exposure) %in% paste(mr_results$id.outcome, mr_results$id.exposure))
-  mrres <- plyr::dlply(dat, c("id.exposure", "id.outcome"), function(d) {
-    d <- plyr::mutate(d)
+  combos <- unique(dat[, c("id.exposure", "id.outcome")])
+  mrres <- lapply(seq_len(nrow(combos)), function(i) {
+    d <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ]
     if (nrow(d) < 2 | sum(d$mr_keep) == 0) {
       return(blank_plot("Insufficient number of SNPs"))
     }
@@ -161,6 +162,7 @@ mr_scatter_plot <- function(mr_results, dat) {
         ggplot2::guides(colour = ggplot2::guide_legend(ncol = 2))
     }
   })
+  names(mrres) <- paste(combos$id.exposure, combos$id.outcome, sep = ".")
   mrres
 }
 
diff --git a/R/steiger.R b/R/steiger.R
index 745ddf56..429311f9 100644
--- a/R/steiger.R
+++ b/R/steiger.R
@@ -183,7 +183,9 @@ directionality_test <- function(dat) {
       )
     }
   }
-  dtest <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(x) {
+  combos <- unique(dat[, c("id.exposure", "id.outcome")])
+  results <- lapply(seq_len(nrow(combos)), function(i) {
+    x <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ]
     if (!"r.exposure" %in% names(x)) {
       x$r.exposure <- NA
     }
@@ -198,16 +200,20 @@ directionality_test <- function(dat) {
       x$r.exposure,
       x$r.outcome
     )
-    a <- data.frame(
+    data.frame(
+      id.exposure = combos$id.exposure[i],
+      id.outcome = combos$id.outcome[i],
       exposure = x$exposure[1],
       outcome = x$outcome[1],
       snp_r2.exposure = b$r2_exp,
       snp_r2.outcome = b$r2_out,
       correct_causal_direction = b$correct_causal_direction,
-      steiger_pval = b$steiger_test
+      steiger_pval = b$steiger_test,
+      stringsAsFactors = FALSE
     )
-    return(a)
   })
+  dtest <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+  data.table::setDF(dtest)
 
   return(dtest)
 }
diff --git a/R/steiger_filtering.R b/R/steiger_filtering.R
index 0b79e14f..28466903 100644
--- a/R/steiger_filtering.R
+++ b/R/steiger_filtering.R
@@ -21,7 +21,14 @@
 #' @export
 #' @return [harmonise_data()] style data frame with additional columns rsq.exposure, rsq.outcome, steiger_dir (which is `TRUE` if the rsq.exposure is larger than rsq.outcome) and steiger_pval which is a test to see if rsq.exposure is significantly larger than rsq.outcome.
 steiger_filtering <- function(dat) {
-  plyr::ddply(dat, c("id.exposure", "id.outcome"), steiger_filtering_internal)
+  combos <- unique(dat[, c("id.exposure", "id.outcome")])
+  results <- lapply(seq_len(nrow(combos)), function(i) {
+    x <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ]
+    steiger_filtering_internal(x)
+  })
+  out <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+  data.table::setDF(out)
+  return(out)
 }
 
 
diff --git a/R/transform.R b/R/transform.R
index 3a5a92cf..ab5cfa53 100644
--- a/R/transform.R
+++ b/R/transform.R
@@ -7,7 +7,9 @@
 #' @export
 #' @return Data frame
 standardise_units <- function(dat) {
-  out <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(d) {
+  combos <- unique(dat[, c("id.exposure", "id.outcome")])
+  results <- lapply(seq_len(nrow(combos)), function(i) {
+    d <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ]
     if (d$units.exposure[1] != "log odds") {
       estsd <- mean(
         estimate_trait_sd(d$beta.exposure, d$se.exposure, d$samplesize.exposure, d$eaf.exposure),
@@ -35,6 +37,8 @@ standardise_units <- function(dat) {
     }
     return(d)
   })
+  out <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+  data.table::setDF(out)
   return(out)
 }
 

From 30823bc914a1099d3c9b6d5fd14fd1cc86f19f98 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:02:19 +0000
Subject: [PATCH 14/34] Update NEWS.md

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index 27e8d7a5..9598e0c2 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -9,6 +9,7 @@
   - `plyr::rbind.fill(...)` → `data.table::rbindlist(..., fill = TRUE, use.names = TRUE)`
   - `plyr::ddply(dat, cols, func)` → `lapply()` over unique combinations + `data.table::rbindlist()`
   - Added `data.table::setDF()` calls to convert back to data.frame for compatibility
+  - And removed **plyr** from Imports list
 * In `flip_alleles()` use `chartr()` instead of 4 `gsub()` calls
 * In `random_string()` use single call to `sample()` instead of n calls
 * Optimized `mr_mode()`

From b108eb8cddb7feddf7c72e786273b7110b5b726f Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:04:22 +0000
Subject: [PATCH 15/34] Replace apply(..., any(is.na())) with complete.case()

---
 R/harmonise.R | 2 +-
 R/query.R     | 2 +-
 R/read_data.R | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/harmonise.R b/R/harmonise.R
index 7aa804ef..1508aacd 100644
--- a/R/harmonise.R
+++ b/R/harmonise.R
@@ -96,7 +96,7 @@ harmonise_data <- function(exposure_dat, outcome_dat, action = 2) {
     ) -
       nrow(x)
 
-    x$mr_keep[apply(x[, mr_cols], 1, function(y) any(is.na(y)))] <- FALSE
+    x$mr_keep[!complete.cases(x[, mr_cols])] <- FALSE
     attr(x, "log")[["total_variants"]] <- nrow(x)
     attr(x, "log")[["total_variants_for_mr"]] <- sum(x$mr_keep)
     attr(x, "log")[["proxy_variants"]] <- ifelse(
diff --git a/R/query.R b/R/query.R
index 2260fa43..7d7e507e 100644
--- a/R/query.R
+++ b/R/query.R
@@ -317,7 +317,7 @@ format_d <- function(d) {
   d <- cleanup_outcome_data(d)
 
   mrcols <- c("beta.outcome", "se.outcome", "effect_allele.outcome")
-  d$mr_keep.outcome <- apply(d[, mrcols], 1, function(x) !any(is.na(x)))
+  d$mr_keep.outcome <- complete.cases(d[, mrcols])
   if (any(!d$mr_keep.outcome)) {
     missinginfosnps <- paste(subset(d, !mr_keep.outcome)$SNP, collapse = " ")
     warning(
diff --git a/R/read_data.R b/R/read_data.R
index b7d80dd0..c3c354d4 100644
--- a/R/read_data.R
+++ b/R/read_data.R
@@ -549,7 +549,7 @@ format_data <- function(
     mrcols <- c("SNP", "beta.outcome", "se.outcome", "effect_allele.outcome")
     mrcols_present <- mrcols[mrcols %in% names(dat)]
     dat$mr_keep.outcome <- dat$mr_keep.outcome &
-      apply(dat[, mrcols_present], 1, function(x) !any(is.na(x)))
+      complete.cases(dat[, mrcols_present])
     if (any(!dat$mr_keep.outcome)) {
       missinginfosnps <- paste(subset(dat, !mr_keep.outcome)$SNP, collapse = " ")
       warning(

From 2f30adcd1dc7f6216158ed8d12987678382fa39e Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:04:55 +0000
Subject: [PATCH 16/34] Update NEWS.md

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index 9598e0c2..8782d4e5 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -13,6 +13,7 @@
 * In `flip_alleles()` use `chartr()` instead of 4 `gsub()` calls
 * In `random_string()` use single call to `sample()` instead of n calls
 * Optimized `mr_mode()`
+* Replaced `apply(..., any(is.na()))` with `complete.case()`
 
 # TwoSampleMR v0.6.29
 

From 8415eee7d73766c986f629ee99e6b8a6c6d440a4 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:08:37 +0000
Subject: [PATCH 17/34] Move mr_method_list() before the loop

---
 R/mr.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/mr.R b/R/mr.R
index 68fd67a8..2dcdd4f0 100644
--- a/R/mr.R
+++ b/R/mr.R
@@ -30,6 +30,8 @@ mr <- function(
   # Get unique combinations of id.exposure and id.outcome
   combos <- unique(dat_dt[, .(id.exposure, id.outcome)])
   
+  # Pre-compute method names once, outside the loop
+  methl <- mr_method_list()
   # Process each combination
   results <- lapply(seq_len(nrow(combos)), function(i) {
     exp_id <- combos$id.exposure[i]
@@ -52,7 +54,6 @@ mr <- function(
     res <- lapply(method_list, function(meth) {
       get(meth)(x$beta.exposure, x$beta.outcome, x$se.exposure, x$se.outcome, parameters)
     })
-    methl <- mr_method_list()
     mr_tab <- data.frame(
       id.exposure = exp_id,
       id.outcome = out_id,

From 5e8570a3bd2a58379e26639e4dd81dce14855a76 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:09:09 +0000
Subject: [PATCH 18/34] Pre-compute method_names

---
 R/mr.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/mr.R b/R/mr.R
index 2dcdd4f0..a871ad9e 100644
--- a/R/mr.R
+++ b/R/mr.R
@@ -32,6 +32,8 @@ mr <- function(
   
   # Pre-compute method names once, outside the loop
   methl <- mr_method_list()
+  method_names <- methl$name[match(method_list, methl$obj)]
+
   # Process each combination
   results <- lapply(seq_len(nrow(combos)), function(i) {
     exp_id <- combos$id.exposure[i]
@@ -59,7 +61,6 @@ mr <- function(
       id.outcome = out_id,
       outcome = x$outcome[1],
       exposure = x$exposure[1],
-      method = methl$name[match(method_list, methl$obj)],
       nsnp = sapply(res, function(x) x$nsnp),
       b = sapply(res, function(x) x$b),
       se = sapply(res, function(x) x$se),

From 5aeb32f7364579bb88a0c798201997a97bf89125 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:09:29 +0000
Subject: [PATCH 19/34] Replace sapply() with vapply()

---
 R/mr.R | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/R/mr.R b/R/mr.R
index a871ad9e..32fbee2a 100644
--- a/R/mr.R
+++ b/R/mr.R
@@ -61,10 +61,11 @@ mr <- function(
       id.outcome = out_id,
       outcome = x$outcome[1],
       exposure = x$exposure[1],
-      nsnp = sapply(res, function(x) x$nsnp),
-      b = sapply(res, function(x) x$b),
-      se = sapply(res, function(x) x$se),
-      pval = sapply(res, function(x) x$pval),
+      method = method_names,
+      nsnp = vapply(res, function(x) x$nsnp, numeric(1)),
+      b = vapply(res, function(x) x$b, numeric(1)),
+      se = vapply(res, function(x) x$se, numeric(1)),
+      pval = vapply(res, function(x) x$pval, numeric(1)),
       stringsAsFactors = FALSE
     )
     mr_tab <- mr_tab[!(is.na(mr_tab$b) & is.na(mr_tab$se) & is.na(mr_tab$pval)), ]

From 0d1d4fe17bbd322a10c829892ab76d4a5d8cc8dd Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:13:18 +0000
Subject: [PATCH 20/34] Update NEWS.md

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index 8782d4e5..ce4da264 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -14,6 +14,7 @@
 * In `random_string()` use single call to `sample()` instead of n calls
 * Optimized `mr_mode()`
 * Replaced `apply(..., any(is.na()))` with `complete.case()`
+* Optimized the `mr()` function
 
 # TwoSampleMR v0.6.29
 

From ba92c0befc81c7f52c6a8e04810a7e02ff6124e4 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:14:30 +0000
Subject: [PATCH 21/34] Optimize get_r_from_lor()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- **Moved `ve` computation outside the loop** — was being recalculated identically on every iteration
- **Eliminated the `for` loop entirely** — the `prop` calculation, `vg`, `r`, `correction`, and `sign` operations are all vectorized now
- **Single call to `get_population_allele_frequency()`** with full vectors instead of element-by-element
---
 R/add_rsq.r | 37 +++++++++++++++----------------------
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/R/add_rsq.r b/R/add_rsq.r
index 4394bac1..81a635a9 100644
--- a/R/add_rsq.r
+++ b/R/add_rsq.r
@@ -267,29 +267,22 @@ get_r_from_lor <- function(
     ncontrol <- rep(ncontrol, length(lor))
   }
 
-  nsnp <- length(lor)
-  r <- array(NA, nsnp)
-  for (i in 1:nsnp) {
-    if (model == "logit") {
-      ve <- pi^2 / 3
-    } else if (model == "probit") {
-      ve <- 1
-    } else {
-      stop("Model must be probit or logit")
-    }
-    popaf <- get_population_allele_frequency(
-      af[i],
-      ncase[i] / (ncase[i] + ncontrol[i]),
-      exp(lor[i]),
-      prevalence[i]
-    )
-    vg <- (lor[i])^2 * popaf * (1 - popaf)
-    r[i] <- vg / (vg + ve)
-    if (correction) {
-      r[i] <- r[i] / 0.58
-    }
-    r[i] <- sqrt(r[i]) * sign(lor[i])
+  if (model == "logit") {
+    ve <- pi^2 / 3
+  } else if (model == "probit") {
+    ve <- 1
+  } else {
+    stop("Model must be probit or logit")
+  }
+
+  prop <- ncase / (ncase + ncontrol)
+  popaf <- get_population_allele_frequency(af, prop, exp(lor), prevalence)
+  vg <- lor^2 * popaf * (1 - popaf)
+  r <- vg / (vg + ve)
+  if (correction) {
+    r <- r / 0.58
   }
+  r <- sqrt(r) * sign(lor)
   return(r)
 }
 

From e8e78bc70a385bcda6d145f45f59f475051fa306 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:14:57 +0000
Subject: [PATCH 22/34] Update NEWS.md

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index ce4da264..b04f8c14 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -15,6 +15,7 @@
 * Optimized `mr_mode()`
 * Replaced `apply(..., any(is.na()))` with `complete.case()`
 * Optimized the `mr()` function
+* Optimized the `Optimize get_r_from_lor()` function
 
 # TwoSampleMR v0.6.29
 

From b34677f035614faae0812031379bcbb80fa0f7fe Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:15:29 +0000
Subject: [PATCH 23/34] Update NEWS.md

---
 NEWS.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index b04f8c14..9121fc2b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -6,8 +6,8 @@
 * Vectorised `weighted_median_bootstrap()`
 * Deleted duplicated `weighted_median()` function
 * Replace **plyr** function calls with **data.table** function calls
-  - `plyr::rbind.fill(...)` → `data.table::rbindlist(..., fill = TRUE, use.names = TRUE)`
-  - `plyr::ddply(dat, cols, func)` → `lapply()` over unique combinations + `data.table::rbindlist()`
+  - `plyr::rbind.fill(...)` to `data.table::rbindlist(..., fill = TRUE, use.names = TRUE)`
+  - `plyr::ddply(dat, cols, func)` to `lapply()` over unique combinations + `data.table::rbindlist()`
   - Added `data.table::setDF()` calls to convert back to data.frame for compatibility
   - And removed **plyr** from Imports list
 * In `flip_alleles()` use `chartr()` instead of 4 `gsub()` calls

From 886c35ddbef20735c3cb63fc160574d4677176b7 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:20:47 +0000
Subject: [PATCH 24/34] Optimize mr_rucker_bootstrap()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Pre-generate all random values as two matrices (2 `rnorm` calls instead of 2000) — `boot_exp` and `boot_out` are `nboot x nsnp` matrices
- Loop now indexes rows from the pre-generated matrices instead of calling `rnorm()` each iteration
- `sapply` → `vapply` for `model`, `Q`, `Qdash` extraction (4 calls)
---
 R/rucker.R | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/R/rucker.R b/R/rucker.R
index 97410128..5ed7a7ad 100644
--- a/R/rucker.R
+++ b/R/rucker.R
@@ -286,22 +286,33 @@ mr_rucker_bootstrap <- function(dat, parameters = default_parameters()) {
 
   # Main result
   rucker <- mr_rucker(dat, parameters)
+
+  # Pre-generate all random values as matrices (nboot x nsnp)
+  boot_exp <- matrix(
+    stats::rnorm(nboot * nsnp, mean = dat$beta.exposure, sd = dat$se.exposure),
+    nrow = nboot, ncol = nsnp
+  )
+  boot_out <- matrix(
+    stats::rnorm(nboot * nsnp, mean = dat$beta.outcome, sd = dat$se.outcome),
+    nrow = nboot, ncol = nsnp
+  )
+
   dat2 <- dat
   l <- list()
   for (i in 1:nboot) {
-    dat2$beta.exposure <- stats::rnorm(nsnp, mean = dat$beta.exposure, sd = dat$se.exposure)
-    dat2$beta.outcome <- stats::rnorm(nsnp, mean = dat$beta.outcome, sd = dat$se.outcome)
+    dat2$beta.exposure <- boot_exp[i, ]
+    dat2$beta.outcome <- boot_out[i, ]
     l[[i]] <- mr_rucker(dat2, parameters)
   }
 
   modsel <- data.table::rbindlist(lapply(l, function(x) x$selected), fill = TRUE, use.names = TRUE)
   data.table::setDF(modsel)
-  modsel$model <- sapply(l, function(x) x$res)
+  modsel$model <- vapply(l, function(x) x$res, character(1))
 
   bootstrap <- data.frame(
-    Q = c(rucker$Q$Q[1], sapply(l, function(x) x$Q$Q[1])),
-    Qdash = c(rucker$Q$Q[2], sapply(l, function(x) x$Q$Q[2])),
-    model = c(rucker$res, sapply(l, function(x) x$res)),
+    Q = c(rucker$Q$Q[1], vapply(l, function(x) x$Q$Q[1], numeric(1))),
+    Qdash = c(rucker$Q$Q[2], vapply(l, function(x) x$Q$Q[2], numeric(1))),
+    model = c(rucker$res, vapply(l, function(x) x$res, character(1))),
     i = c("Full", rep("Bootstrap", nboot))
   )
 

From edb8a110c27243f3f1793335a3498bc24259233c Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:21:12 +0000
Subject: [PATCH 25/34] Optimize mr_rucker_jackknife_internal()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- sapply` → `vapply` for `model`, `Q`, `Qdash` extraction (4 calls)
---
 R/rucker.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/rucker.R b/R/rucker.R
index 5ed7a7ad..e5752e06 100644
--- a/R/rucker.R
+++ b/R/rucker.R
@@ -458,12 +458,12 @@ mr_rucker_jackknife_internal <- function(dat, parameters = default_parameters())
 
     modsel <- data.table::rbindlist(lapply(l, function(x) x$selected), fill = TRUE, use.names = TRUE)
     data.table::setDF(modsel)
-    modsel$model <- sapply(l, function(x) x$res)
+    modsel$model <- vapply(l, function(x) x$res, character(1))
 
     bootstrap <- data.frame(
-      Q = c(rucker$Q$Q[1], sapply(l, function(x) x$Q$Q[1])),
-      Qdash = c(rucker$Q$Q[2], sapply(l, function(x) x$Q$Q[2])),
-      model = c(rucker$res, sapply(l, function(x) x$res)),
+      Q = c(rucker$Q$Q[1], vapply(l, function(x) x$Q$Q[1], numeric(1))),
+      Qdash = c(rucker$Q$Q[2], vapply(l, function(x) x$Q$Q[2], numeric(1))),
+      model = c(rucker$res, vapply(l, function(x) x$res, character(1))),
       i = c("Full", rep("Jackknife", nboot))
     )
 

From 80e8b20fdd754c726eee954f8b7fb7fa7b6fd2f8 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:21:50 +0000
Subject: [PATCH 26/34] Update NEWS.md

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index 9121fc2b..581c3fad 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -16,6 +16,7 @@
 * Replaced `apply(..., any(is.na()))` with `complete.case()`
 * Optimized the `mr()` function
 * Optimized the `Optimize get_r_from_lor()` function
+* Optimized the `mr_rucker_bootstrap()` and `mr_rucker_jackknife_internal()` functions
 
 # TwoSampleMR v0.6.29
 

From 55f2a5a331f9239699da8b5458ea1bab65d9af27 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:24:39 +0000
Subject: [PATCH 27/34] Replace sapply() with vapply()

---
 R/singlesnp.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/singlesnp.R b/R/singlesnp.R
index 45becf6d..82c9810d 100644
--- a/R/singlesnp.R
+++ b/R/singlesnp.R
@@ -78,9 +78,9 @@ mr_singlesnp <- function(
       id.exposure = exp_id,
       id.outcome = out_id,
       SNP = c(as.character(x$SNP), method_names),
-      b = sapply(l, function(y) y$b),
-      se = sapply(l, function(y) y$se),
-      p = sapply(l, function(y) y$pval),
+      b = vapply(l, function(y) y$b, numeric(1)),
+      se = vapply(l, function(y) y$se, numeric(1)),
+      p = vapply(l, function(y) y$pval, numeric(1)),
       samplesize = x$samplesize.outcome[1],
       stringsAsFactors = FALSE
     )

From a0143e2102be732722758448f3037e58d948e441 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:24:56 +0000
Subject: [PATCH 28/34] Replace sapply() with vapply()

---
 R/leaveoneout.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/leaveoneout.R b/R/leaveoneout.R
index 9c72f674..5f44ce3d 100644
--- a/R/leaveoneout.R
+++ b/R/leaveoneout.R
@@ -58,9 +58,9 @@ mr_leaveoneout <- function(dat, parameters = default_parameters(), method = mr_i
         id.exposure = exp_id,
         id.outcome = out_id,
         SNP = c(as.character(x$SNP), "All"),
-        b = sapply(l, function(y) y$b),
-        se = sapply(l, function(y) y$se),
-        p = sapply(l, function(y) y$pval),
+        b = vapply(l, function(y) y$b, numeric(1)),
+        se = vapply(l, function(y) y$se, numeric(1)),
+        p = vapply(l, function(y) y$pval, numeric(1)),
         samplesize = x$samplesize.outcome[1],
         stringsAsFactors = FALSE
       )

From b7c965a52b1507ecce1a3e94e2be534bef7420af Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:26:11 +0000
Subject: [PATCH 29/34] Optimize factor to character conversion

---
 R/format_mr_results2.R | 29 +++--------------------------
 1 file changed, 3 insertions(+), 26 deletions(-)

diff --git a/R/format_mr_results2.R b/R/format_mr_results2.R
index 3687343d..e60782c9 100644
--- a/R/format_mr_results2.R
+++ b/R/format_mr_results2.R
@@ -146,32 +146,9 @@ combine_all_mrresults <- function(
   het <- het[, c("id.exposure", "id.outcome", "method", "Q", "Q_df", "Q_pval")]
 
   # Convert all factors to character
-  # lapply(names(Res), FUN=function(x) class(Res[,x]))
-  Class <- unlist(lapply(names(res), FUN = function(x) class(res[, x])))
-  if (any(Class == "factor")) {
-    Pos <- which(unlist(lapply(names(res), FUN = function(x) class(res[, x]))) == "factor")
-    for (i in seq_along(Pos)) {
-      res[, Pos[i]] <- as.character(res[, Pos[i]])
-    }
-  }
-
-  # lapply(names(Het), FUN=function(x) class(Het[,x]))
-  Class <- unlist(lapply(names(het), FUN = function(x) class(het[, x])))
-  if (any(Class == "factor")) {
-    Pos <- which(unlist(lapply(names(het), FUN = function(x) class(het[, x]))) == "factor")
-    for (i in seq_along(Pos)) {
-      het[, Pos[i]] <- as.character(het[, Pos[i]])
-    }
-  }
-
-  # lapply(names(Sin), FUN=function(x) class(Sin[,x]))
-  Class <- unlist(lapply(names(sin), FUN = function(x) class(sin[, x])))
-  if (any(Class == "factor")) {
-    Pos <- which(unlist(lapply(names(sin), FUN = function(x) class(sin[, x]))) == "factor")
-    for (i in seq_along(Pos)) {
-      sin[, Pos[i]] <- as.character(sin[, Pos[i]])
-    }
-  }
+  res[] <- lapply(res, function(x) if (is.factor(x)) as.character(x) else x)
+  het[] <- lapply(het, function(x) if (is.factor(x)) as.character(x) else x)
+  sin[] <- lapply(sin, function(x) if (is.factor(x)) as.character(x) else x)
 
   sin <- sin[grep("[:0-9:]", sin$SNP), ]
   sin$method <- "Wald ratio"

From d8c8016924fa7c843833d830e3359acb009a9c29 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:26:47 +0000
Subject: [PATCH 30/34] Combine 4 gsub() calls into 1

---
 R/backward_compatibility.R | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/R/backward_compatibility.R b/R/backward_compatibility.R
index ea8d7ae5..580f2358 100644
--- a/R/backward_compatibility.R
+++ b/R/backward_compatibility.R
@@ -4,10 +4,7 @@ ids_new_to_old <- function(id) {
 
 ids_new_to_old2 <- function(id) {
   id <- gsub("IEU-a-", "", id)
-  id <- gsub("-a-", "-a:", id)
-  id <- gsub("-b-", "-b:", id)
-  id <- gsub("-c-", "-c:", id)
-  id <- gsub("-d-", "-d:", id)
+  id <- gsub("-([a-d])-", "-\\1:", id)
   return(id)
 }
 

From cb90e1c45a34453c3ec36972d22f28018e96e5f4 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:27:04 +0000
Subject: [PATCH 31/34] Vectorise simple_cap()

---
 R/forest_plot2.R | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/R/forest_plot2.R b/R/forest_plot2.R
index 6174641e..9cfd674c 100644
--- a/R/forest_plot2.R
+++ b/R/forest_plot2.R
@@ -165,11 +165,8 @@ format_mr_results <- function(
 #' @keywords internal
 #' @return Character or array of character
 simple_cap <- function(x) {
-  sapply(x, function(x) {
-    x <- tolower(x)
-    s <- strsplit(x, " ")[[1]]
-    paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ")
-  })
+  x <- tolower(x)
+  gsub("\\b(\\w)", "\\U\\1", x, perl = TRUE)
 }
 
 #' Trim function to remove leading and trailing blank spaces

From 514b1c2a74d77b678ce5419f920ab40efba2dab7 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:27:46 +0000
Subject: [PATCH 32/34] Update NEWS.md

---
 NEWS.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 581c3fad..2f623a23 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -17,6 +17,9 @@
 * Optimized the `mr()` function
 * Optimized the `Optimize get_r_from_lor()` function
 * Optimized the `mr_rucker_bootstrap()` and `mr_rucker_jackknife_internal()` functions
+* Replaced `sapply()` with `vapply()` in several cases
+* Optimized the `simple_cap()` function
+* And a few other minor optimizations
 
 # TwoSampleMR v0.6.29
 

From 72d678d394a16d1a18b2c5686cd13ccef1f897d1 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:28:05 +0000
Subject: [PATCH 33/34] Update NEWS.md

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 2f623a23..ef0a8d7e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,6 @@
 # TwoSampleMR v0.6.30
 
-(Release date 2026-02-00)
+(Release date 2026-02-06)
 
 * Vectorised `mr_egger_regression_bootstrap()`
 * Vectorised `weighted_median_bootstrap()`

From b1f4c70106e897c9b589af1f11fe0b68461dd518 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Fri, 6 Feb 2026 13:34:53 +0000
Subject: [PATCH 34/34] Install gettext on macOS runners

---
 .github/workflows/check-full.yaml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/.github/workflows/check-full.yaml b/.github/workflows/check-full.yaml
index 69bee770..8fb371b3 100644
--- a/.github/workflows/check-full.yaml
+++ b/.github/workflows/check-full.yaml
@@ -61,6 +61,14 @@ jobs:
         shell: bash
         run: echo 'options(pkg.sysreqs_db_update_timeout = as.difftime(59, units = "secs"))' >> ~/.Rprofile
 
+      - name: Install gettext system dependency on macOS for robustbase package build from source
+        if: runner.os == 'macOS'
+        run: |
+          brew install gettext
+          mkdir -p ~/.R
+          echo "CPPFLAGS=-I$(brew --prefix gettext)/include" >> ~/.R/Makevars
+          echo "LDFLAGS=-L$(brew --prefix gettext)/lib" >> ~/.R/Makevars
+
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
           extra-packages: >