MRCIEU · remlapmot · Feb 6, 2026 · Feb 5, 2026 · Feb 5, 2026 · Feb 5, 2026
diff --git a/.github/workflows/check-full.yaml b/.github/workflows/check-full.yaml
@@ -61,6 +61,14 @@ jobs:
         shell: bash
         run: echo 'options(pkg.sysreqs_db_update_timeout = as.difftime(59, units = "secs"))' >> ~/.Rprofile
 
+      - name: Install gettext system dependency on macOS for robustbase package build from source
+        if: runner.os == 'macOS'
+        run: |
+          brew install gettext
+          mkdir -p ~/.R
+          echo "CPPFLAGS=-I$(brew --prefix gettext)/include" >> ~/.R/Makevars
+          echo "LDFLAGS=-L$(brew --prefix gettext)/lib" >> ~/.R/Makevars
+
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
           extra-packages: >

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: TwoSampleMR
 Title: Two Sample MR Functions and Interface to MRC Integrative
     Epidemiology Unit OpenGWAS Database
-Version: 0.6.29
+Version: 0.6.30
 Authors@R: c(
     person("Gibran", "Hemani", , "g.hemani@bristol.ac.uk", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0003-0920-1055")),
@@ -43,7 +43,6 @@ Imports:
     MRMix,
     MRPRESSO,
     pbapply,
-    plyr,
     psych,
     RadialMR,
     reshape2,

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,26 @@
+# TwoSampleMR v0.6.30
+
+(Release date 2026-02-06)
+
+* Vectorised `mr_egger_regression_bootstrap()`
+* Vectorised `weighted_median_bootstrap()`
+* Deleted duplicated `weighted_median()` function
+* Replace **plyr** function calls with **data.table** function calls
+  - `plyr::rbind.fill(...)` to `data.table::rbindlist(..., fill = TRUE, use.names = TRUE)`
+  - `plyr::ddply(dat, cols, func)` to `lapply()` over unique combinations + `data.table::rbindlist()`
+  - Added `data.table::setDF()` calls to convert back to data.frame for compatibility
+  - And removed **plyr** from Imports list
+* In `flip_alleles()` use `chartr()` instead of 4 `gsub()` calls
+* In `random_string()` use single call to `sample()` instead of n calls
+* Optimized `mr_mode()`
+* Replaced `apply(..., any(is.na()))` with `complete.case()`
+* Optimized the `mr()` function
+* Optimized the `Optimize get_r_from_lor()` function
+* Optimized the `mr_rucker_bootstrap()` and `mr_rucker_jackknife_internal()` functions
+* Replaced `sapply()` with `vapply()` in several cases
+* Optimized the `simple_cap()` function
+* And a few other minor optimizations
+
 # TwoSampleMR v0.6.29
 
 (Release date 2025-12-16)

diff --git a/R/add_rsq.r b/R/add_rsq.r
@@ -11,14 +11,20 @@
 #' @return data frame
 add_rsq <- function(dat) {
   if ("id.exposure" %in% names(dat)) {
-    dat <- plyr::ddply(dat, c("id.exposure"), function(x) {
-      add_rsq_one(x, "exposure")
+    ids <- unique(dat$id.exposure)
+    results <- lapply(ids, function(id) {
+      add_rsq_one(dat[dat$id.exposure == id, ], "exposure")
     })
+    dat <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+    data.table::setDF(dat)
   }
   if ("id.outcome" %in% names(dat)) {
-    dat <- plyr::ddply(dat, c("id.outcome"), function(x) {
-      add_rsq_one(x, "outcome")
+    ids <- unique(dat$id.outcome)
+    results <- lapply(ids, function(id) {
+      add_rsq_one(dat[dat$id.outcome == id, ], "outcome")
     })
+    dat <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+    data.table::setDF(dat)
   }
   return(dat)
 }
@@ -261,29 +267,22 @@ get_r_from_lor <- function(
     ncontrol <- rep(ncontrol, length(lor))
   }
 
-  nsnp <- length(lor)
-  r <- array(NA, nsnp)
-  for (i in 1:nsnp) {
-    if (model == "logit") {
-      ve <- pi^2 / 3
-    } else if (model == "probit") {
-      ve <- 1
-    } else {
-      stop("Model must be probit or logit")
-    }
-    popaf <- get_population_allele_frequency(
-      af[i],
-      ncase[i] / (ncase[i] + ncontrol[i]),
-      exp(lor[i]),
-      prevalence[i]
-    )
-    vg <- (lor[i])^2 * popaf * (1 - popaf)
-    r[i] <- vg / (vg + ve)
-    if (correction) {
-      r[i] <- r[i] / 0.58
-    }
-    r[i] <- sqrt(r[i]) * sign(lor[i])
+  if (model == "logit") {
+    ve <- pi^2 / 3
+  } else if (model == "probit") {
+    ve <- 1
+  } else {
+    stop("Model must be probit or logit")
+  }
+
+  prop <- ncase / (ncase + ncontrol)
+  popaf <- get_population_allele_frequency(af, prop, exp(lor), prevalence)
+  vg <- lor^2 * popaf * (1 - popaf)
+  r <- vg / (vg + ve)
+  if (correction) {
+    r <- r / 0.58
   }
+  r <- sqrt(r) * sign(lor)
   return(r)
 }
 

diff --git a/R/backward_compatibility.R b/R/backward_compatibility.R
@@ -4,10 +4,7 @@ ids_new_to_old <- function(id) {
 
 ids_new_to_old2 <- function(id) {
   id <- gsub("IEU-a-", "", id)
-  id <- gsub("-a-", "-a:", id)
-  id <- gsub("-b-", "-b:", id)
-  id <- gsub("-c-", "-c:", id)
-  id <- gsub("-d-", "-d:", id)
+  id <- gsub("-([a-d])-", "-\\1:", id)
   return(id)
 }
 

diff --git a/R/enrichment.R b/R/enrichment.R
@@ -34,7 +34,8 @@ enrichment_method_list <- function() {
     )
   )
   a <- lapply(a, as.data.frame)
-  a <- plyr::rbind.fill(a)
+  a <- data.table::rbindlist(a, fill = TRUE, use.names = TRUE)
+  data.table::setDF(a)
   a <- as.data.frame(lapply(a, as.character), stringsAsFactors = FALSE)
   return(a)
 }
@@ -50,9 +51,10 @@ enrichment_method_list <- function() {
 #' @export
 #' @return data frame
 enrichment <- function(dat, method_list = enrichment_method_list()$obj) {
-  res <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(x1) {
-    # message("Performing enrichment analysis of '", x$id.exposure[1], "' on '", x$id.outcome[1], "'")
-
+  methl <- enrichment_method_list()
+  combos <- unique(dat[, c("id.exposure", "id.outcome")])
+  results <- lapply(seq_len(nrow(combos)), function(i) {
+    x1 <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ]
     x <- subset(x1, !is.na(pval.outcome))
     if (nrow(x) == 0) {
       message(
@@ -67,16 +69,20 @@ enrichment <- function(dat, method_list = enrichment_method_list()$obj) {
     res <- lapply(method_list, function(meth) {
       get(meth)(x$pval.outcome)
     })
-    methl <- enrichment_method_list()
     enrichment_tab <- data.frame(
+      id.exposure = x1$id.exposure[1],
+      id.outcome = x1$id.outcome[1],
       outcome = x$outcome[1],
       exposure = x$exposure[1],
       method = methl$name[methl$obj %in% method_list],
-      nsnp = sapply(res, function(x) x$nsnp),
-      pval = sapply(res, function(x) x$pval)
+      nsnp = vapply(res, function(x) x$nsnp, numeric(1)),
+      pval = vapply(res, function(x) x$pval, numeric(1)),
+      stringsAsFactors = FALSE
     )
     enrichment_tab <- subset(enrichment_tab, !is.na(pval))
     return(enrichment_tab)
   })
+  res <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+  data.table::setDF(res)
   return(res)
 }
diff --git a/R/eve.R b/R/eve.R
@@ -327,10 +327,13 @@ mr_wrapper_single <- function(dat, parameters = default_parameters()) {
 #' @export
 #' @return list
 mr_wrapper <- function(dat, parameters = default_parameters()) {
-  plyr::dlply(dat, c("id.exposure", "id.outcome"), function(x) {
+  combos <- unique(dat[, c("id.exposure", "id.outcome")])
+  res <- lapply(seq_len(nrow(combos)), function(i) {
+    x <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ]
     message("Performing MR analysis of '", x$id.exposure[1], "' on '", x$id.outcome[1], "'")
     d <- subset(x, mr_keep)
-    o <- mr_wrapper_single(d, parameters = parameters)
-    o
+    mr_wrapper_single(d, parameters = parameters)
   })
+  names(res) <- paste(combos$id.exposure, combos$id.outcome, sep = ".")
+  res
 }
diff --git a/R/forest_plot2.R b/R/forest_plot2.R
@@ -101,9 +101,9 @@ format_mr_results <- function(
 
   # Fill in missing values
   exps <- unique(dat$exposure)
-  dat <- plyr::ddply(dat, c("outcome"), function(x) {
-    x <- plyr::mutate(x)
-    nc <- ncol(x)
+  outcomes <- unique(dat$outcome)
+  results <- lapply(outcomes, function(out_val) {
+    x <- dat[dat$outcome == out_val, ]
     missed <- exps[!exps %in% x$exposure]
     if (length(missed) >= 1) {
       out <- unique(x$outcome)
@@ -116,10 +116,13 @@ format_mr_results <- function(
         sample_size = n,
         stringsAsFactors = FALSE
       )
-      x <- plyr::rbind.fill(x, md)
+      x <- data.table::rbindlist(list(x, md), fill = TRUE, use.names = TRUE)
+      data.table::setDF(x)
     }
     return(x)
   })
+  dat <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
+  data.table::setDF(dat)
   # dat <- dplyr::group_by(dat, outcome) %>%
   # 	dplyr::do({
   # 		x <- .
@@ -162,11 +165,8 @@ format_mr_results <- function(
 #' @keywords internal
 #' @return Character or array of character
 simple_cap <- function(x) {
-  sapply(x, function(x) {
-    x <- tolower(x)
-    s <- strsplit(x, " ")[[1]]
-    paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ")
-  })
+  x <- tolower(x)
+  gsub("\\b(\\w)", "\\U\\1", x, perl = TRUE)
 }
 
 #' Trim function to remove leading and trailing blank spaces

diff --git a/R/format_mr_results2.R b/R/format_mr_results2.R
@@ -146,32 +146,9 @@ combine_all_mrresults <- function(
   het <- het[, c("id.exposure", "id.outcome", "method", "Q", "Q_df", "Q_pval")]
 
   # Convert all factors to character
-  # lapply(names(Res), FUN=function(x) class(Res[,x]))
-  Class <- unlist(lapply(names(res), FUN = function(x) class(res[, x])))
-  if (any(Class == "factor")) {
-    Pos <- which(unlist(lapply(names(res), FUN = function(x) class(res[, x]))) == "factor")
-    for (i in seq_along(Pos)) {
-      res[, Pos[i]] <- as.character(res[, Pos[i]])
-    }
-  }
-
-  # lapply(names(Het), FUN=function(x) class(Het[,x]))
-  Class <- unlist(lapply(names(het), FUN = function(x) class(het[, x])))
-  if (any(Class == "factor")) {
-    Pos <- which(unlist(lapply(names(het), FUN = function(x) class(het[, x]))) == "factor")
-    for (i in seq_along(Pos)) {
-      het[, Pos[i]] <- as.character(het[, Pos[i]])
-    }
-  }
-
-  # lapply(names(Sin), FUN=function(x) class(Sin[,x]))
-  Class <- unlist(lapply(names(sin), FUN = function(x) class(sin[, x])))
-  if (any(Class == "factor")) {
-    Pos <- which(unlist(lapply(names(sin), FUN = function(x) class(sin[, x]))) == "factor")
-    for (i in seq_along(Pos)) {
-      sin[, Pos[i]] <- as.character(sin[, Pos[i]])
-    }
-  }
+  res[] <- lapply(res, function(x) if (is.factor(x)) as.character(x) else x)
+  het[] <- lapply(het, function(x) if (is.factor(x)) as.character(x) else x)
+  sin[] <- lapply(sin, function(x) if (is.factor(x)) as.character(x) else x)
 
   sin <- sin[grep("[:0-9:]", sin$SNP), ]
   sin$method <- "Wald ratio"
@@ -185,10 +162,11 @@ combine_all_mrresults <- function(
   names(sin)[names(sin) == "method"] <- "Method"
 
   res <- merge(res, het, by = c("id.outcome", "id.exposure", "Method"), all.x = TRUE)
-  res <- plyr::rbind.fill(
-    res,
-    sin[, c("exposure", "outcome", "id.exposure", "id.outcome", "SNP", "b", "se", "pval", "Method")]
+  res <- data.table::rbindlist(
+    list(res, sin[, c("exposure", "outcome", "id.exposure", "id.outcome", "SNP", "b", "se", "pval", "Method")]),
+    fill = TRUE, use.names = TRUE
   )
+  data.table::setDF(res)
 
   if (ao_slc) {
     ao <- available_outcomes()

diff --git a/R/harmonise.R b/R/harmonise.R
@@ -96,7 +96,7 @@ harmonise_data <- function(exposure_dat, outcome_dat, action = 2) {
     ) -
       nrow(x)
 
-    x$mr_keep[apply(x[, mr_cols], 1, function(y) any(is.na(y)))] <- FALSE
+    x$mr_keep[!complete.cases(x[, mr_cols])] <- FALSE
     attr(x, "log")[["total_variants"]] <- nrow(x)
     attr(x, "log")[["total_variants_for_mr"]] <- sum(x$mr_keep)
     attr(x, "log")[["proxy_variants"]] <- ifelse(
@@ -114,8 +114,10 @@ harmonise_data <- function(exposure_dat, outcome_dat, action = 2) {
   # 	return(x)
   # })
 
-  jlog <- plyr::rbind.fill(lapply(fix.tab, function(x) attr(x, "log")))
-  fix.tab <- plyr::rbind.fill(fix.tab)
+  jlog <- data.table::rbindlist(lapply(fix.tab, function(x) attr(x, "log")), fill = TRUE, use.names = TRUE)
+  data.table::setDF(jlog)
+  fix.tab <- data.table::rbindlist(fix.tab, fill = TRUE, use.names = TRUE)
+  data.table::setDF(fix.tab)
   attr(fix.tab, "log") <- jlog
 
   # fix.tab <- harmonise_make_snp_effects_positive(fix.tab)
@@ -171,12 +173,9 @@ check_palindromic <- function(A1, A2) {
 
 
 flip_alleles <- function(x) {
-  x <- toupper(x)
-  x <- gsub("C", "g", x)
-  x <- gsub("G", "c", x)
-  x <- gsub("A", "t", x)
-  x <- gsub("T", "a", x)
-  return(toupper(x))
+  # Use chartr for efficient single-pass character substitution
+  # This is much faster than 4 sequential gsub calls
+  chartr("ACGTacgt", "TGCAtgca", x)
 }
 
 
@@ -680,12 +679,16 @@ harmonise <- function(dat, tolerance, action) {
     action
   )
 
-  jlog <- plyr::rbind.fill(
-    as.data.frame(attr(d22, "log"), stringsAsFactors = FALSE),
-    as.data.frame(attr(d21, "log"), stringsAsFactors = FALSE),
-    as.data.frame(attr(d12, "log"), stringsAsFactors = FALSE),
-    as.data.frame(attr(d11, "log"), stringsAsFactors = FALSE)
+  jlog <- data.table::rbindlist(
+    list(
+      as.data.frame(attr(d22, "log"), stringsAsFactors = FALSE),
+      as.data.frame(attr(d21, "log"), stringsAsFactors = FALSE),
+      as.data.frame(attr(d12, "log"), stringsAsFactors = FALSE),
+      as.data.frame(attr(d11, "log"), stringsAsFactors = FALSE)
+    ),
+    fill = TRUE, use.names = TRUE
   )
+  data.table::setDF(jlog)
   jlog <- cbind(
     data.frame(
       id.exposure = dat$id.exposure[1],