Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
358eaed
Bump version
remlapmot Feb 5, 2026
065b4ca
Delete duplicated weighted_median function
remlapmot Feb 5, 2026
f5aa0ba
Vectorise mr_egger_regression_bootstrap()
remlapmot Feb 5, 2026
00ad07c
Vectorise weighted_median_bootstrap()
remlapmot Feb 5, 2026
976c428
Update NEWS.md
remlapmot Feb 5, 2026
73b0f94
Replace plyr calls with data.table calls
remlapmot Feb 5, 2026
869fc0c
Update NEWS.md
remlapmot Feb 5, 2026
9137867
Use chartr() instead of 4 gsub() calls
remlapmot Feb 5, 2026
9b08ff8
Use single call to sample() instead of n calls
remlapmot Feb 5, 2026
86251c8
Update NEWS.md
remlapmot Feb 5, 2026
f6d2296
Optimize mr_mode()
remlapmot Feb 6, 2026
f26817b
Update NEWS.md
remlapmot Feb 6, 2026
c873699
Remove use of plyr from package
remlapmot Feb 6, 2026
30823bc
Update NEWS.md
remlapmot Feb 6, 2026
b108eb8
Replace apply(..., any(is.na())) with complete.case()
remlapmot Feb 6, 2026
2f30adc
Update NEWS.md
remlapmot Feb 6, 2026
8415eee
Move mr_method_list() before the loop
remlapmot Feb 6, 2026
5e8570a
Pre-compute method_names
remlapmot Feb 6, 2026
5aeb32f
Replace sapply() with vapply()
remlapmot Feb 6, 2026
0d1d4fe
Update NEWS.md
remlapmot Feb 6, 2026
ba92c0b
Optimize get_r_from_lor()
remlapmot Feb 6, 2026
e8e78bc
Update NEWS.md
remlapmot Feb 6, 2026
b34677f
Update NEWS.md
remlapmot Feb 6, 2026
886c35d
Optimize mr_rucker_bootstrap()
remlapmot Feb 6, 2026
edb8a11
Optimize mr_rucker_jackknife_internal()
remlapmot Feb 6, 2026
80e8b20
Update NEWS.md
remlapmot Feb 6, 2026
55f2a5a
Replace sapply() with vapply()
remlapmot Feb 6, 2026
a0143e2
Replace sapply() with vapply()
remlapmot Feb 6, 2026
b7c965a
Optimize factor to character conversion
remlapmot Feb 6, 2026
d8c8016
Combine 4 gsub() calls into 1
remlapmot Feb 6, 2026
cb90e1c
Vectorise simple_cap()
remlapmot Feb 6, 2026
514b1c2
Update NEWS.md
remlapmot Feb 6, 2026
72d678d
Update NEWS.md
remlapmot Feb 6, 2026
b1f4c70
Install gettext on macOS runners
remlapmot Feb 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/check-full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ jobs:
shell: bash
run: echo 'options(pkg.sysreqs_db_update_timeout = as.difftime(59, units = "secs"))' >> ~/.Rprofile

- name: Install gettext system dependency on macOS for robustbase package build from source
if: runner.os == 'macOS'
run: |
brew install gettext
mkdir -p ~/.R
echo "CPPFLAGS=-I$(brew --prefix gettext)/include" >> ~/.R/Makevars
echo "LDFLAGS=-L$(brew --prefix gettext)/lib" >> ~/.R/Makevars

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: >
Expand Down
3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: TwoSampleMR
Title: Two Sample MR Functions and Interface to MRC Integrative
Epidemiology Unit OpenGWAS Database
Version: 0.6.29
Version: 0.6.30
Authors@R: c(
person("Gibran", "Hemani", , "g.hemani@bristol.ac.uk", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-0920-1055")),
Expand Down Expand Up @@ -43,7 +43,6 @@ Imports:
MRMix,
MRPRESSO,
pbapply,
plyr,
psych,
RadialMR,
reshape2,
Expand Down
23 changes: 23 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,26 @@
# TwoSampleMR v0.6.30

(Release date 2026-02-06)

* Vectorised `mr_egger_regression_bootstrap()`
* Vectorised `weighted_median_bootstrap()`
* Deleted duplicated `weighted_median()` function
* Replace **plyr** function calls with **data.table** function calls
- `plyr::rbind.fill(...)` to `data.table::rbindlist(..., fill = TRUE, use.names = TRUE)`
- `plyr::ddply(dat, cols, func)` to `lapply()` over unique combinations + `data.table::rbindlist()`
- Added `data.table::setDF()` calls to convert back to data.frame for compatibility
- And removed **plyr** from Imports list
* In `flip_alleles()` use `chartr()` instead of 4 `gsub()` calls
* In `random_string()` use single call to `sample()` instead of n calls
* Optimized `mr_mode()`
* Replaced `apply(..., any(is.na()))` with `complete.case()`
* Optimized the `mr()` function
* Optimized the `Optimize get_r_from_lor()` function
* Optimized the `mr_rucker_bootstrap()` and `mr_rucker_jackknife_internal()` functions
* Replaced `sapply()` with `vapply()` in several cases
* Optimized the `simple_cap()` function
* And a few other minor optimizations

# TwoSampleMR v0.6.29

(Release date 2025-12-16)
Expand Down
51 changes: 25 additions & 26 deletions R/add_rsq.r
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,20 @@
#' @return data frame
add_rsq <- function(dat) {
if ("id.exposure" %in% names(dat)) {
dat <- plyr::ddply(dat, c("id.exposure"), function(x) {
add_rsq_one(x, "exposure")
ids <- unique(dat$id.exposure)
results <- lapply(ids, function(id) {
add_rsq_one(dat[dat$id.exposure == id, ], "exposure")
})
dat <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
data.table::setDF(dat)
}
if ("id.outcome" %in% names(dat)) {
dat <- plyr::ddply(dat, c("id.outcome"), function(x) {
add_rsq_one(x, "outcome")
ids <- unique(dat$id.outcome)
results <- lapply(ids, function(id) {
add_rsq_one(dat[dat$id.outcome == id, ], "outcome")
})
dat <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
data.table::setDF(dat)
}
return(dat)
}
Expand Down Expand Up @@ -261,29 +267,22 @@ get_r_from_lor <- function(
ncontrol <- rep(ncontrol, length(lor))
}

nsnp <- length(lor)
r <- array(NA, nsnp)
for (i in 1:nsnp) {
if (model == "logit") {
ve <- pi^2 / 3
} else if (model == "probit") {
ve <- 1
} else {
stop("Model must be probit or logit")
}
popaf <- get_population_allele_frequency(
af[i],
ncase[i] / (ncase[i] + ncontrol[i]),
exp(lor[i]),
prevalence[i]
)
vg <- (lor[i])^2 * popaf * (1 - popaf)
r[i] <- vg / (vg + ve)
if (correction) {
r[i] <- r[i] / 0.58
}
r[i] <- sqrt(r[i]) * sign(lor[i])
if (model == "logit") {
ve <- pi^2 / 3
} else if (model == "probit") {
ve <- 1
} else {
stop("Model must be probit or logit")
}

prop <- ncase / (ncase + ncontrol)
popaf <- get_population_allele_frequency(af, prop, exp(lor), prevalence)
vg <- lor^2 * popaf * (1 - popaf)
r <- vg / (vg + ve)
if (correction) {
r <- r / 0.58
}
r <- sqrt(r) * sign(lor)
return(r)
}

Expand Down
5 changes: 1 addition & 4 deletions R/backward_compatibility.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@ ids_new_to_old <- function(id) {

ids_new_to_old2 <- function(id) {
id <- gsub("IEU-a-", "", id)
id <- gsub("-a-", "-a:", id)
id <- gsub("-b-", "-b:", id)
id <- gsub("-c-", "-c:", id)
id <- gsub("-d-", "-d:", id)
id <- gsub("-([a-d])-", "-\\1:", id)
return(id)
}

Expand Down
20 changes: 13 additions & 7 deletions R/enrichment.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ enrichment_method_list <- function() {
)
)
a <- lapply(a, as.data.frame)
a <- plyr::rbind.fill(a)
a <- data.table::rbindlist(a, fill = TRUE, use.names = TRUE)
data.table::setDF(a)
a <- as.data.frame(lapply(a, as.character), stringsAsFactors = FALSE)
return(a)
}
Expand All @@ -50,9 +51,10 @@ enrichment_method_list <- function() {
#' @export
#' @return data frame
enrichment <- function(dat, method_list = enrichment_method_list()$obj) {
res <- plyr::ddply(dat, c("id.exposure", "id.outcome"), function(x1) {
# message("Performing enrichment analysis of '", x$id.exposure[1], "' on '", x$id.outcome[1], "'")

methl <- enrichment_method_list()
combos <- unique(dat[, c("id.exposure", "id.outcome")])
results <- lapply(seq_len(nrow(combos)), function(i) {
x1 <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ]
x <- subset(x1, !is.na(pval.outcome))
if (nrow(x) == 0) {
message(
Expand All @@ -67,16 +69,20 @@ enrichment <- function(dat, method_list = enrichment_method_list()$obj) {
res <- lapply(method_list, function(meth) {
get(meth)(x$pval.outcome)
})
methl <- enrichment_method_list()
enrichment_tab <- data.frame(
id.exposure = x1$id.exposure[1],
id.outcome = x1$id.outcome[1],
outcome = x$outcome[1],
exposure = x$exposure[1],
method = methl$name[methl$obj %in% method_list],
nsnp = sapply(res, function(x) x$nsnp),
pval = sapply(res, function(x) x$pval)
nsnp = vapply(res, function(x) x$nsnp, numeric(1)),
pval = vapply(res, function(x) x$pval, numeric(1)),
stringsAsFactors = FALSE
)
enrichment_tab <- subset(enrichment_tab, !is.na(pval))
return(enrichment_tab)
})
res <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
data.table::setDF(res)
return(res)
}
9 changes: 6 additions & 3 deletions R/eve.R
Original file line number Diff line number Diff line change
Expand Up @@ -327,10 +327,13 @@ mr_wrapper_single <- function(dat, parameters = default_parameters()) {
#' @export
#' @return list
mr_wrapper <- function(dat, parameters = default_parameters()) {
plyr::dlply(dat, c("id.exposure", "id.outcome"), function(x) {
combos <- unique(dat[, c("id.exposure", "id.outcome")])
res <- lapply(seq_len(nrow(combos)), function(i) {
x <- dat[dat$id.exposure == combos$id.exposure[i] & dat$id.outcome == combos$id.outcome[i], ]
message("Performing MR analysis of '", x$id.exposure[1], "' on '", x$id.outcome[1], "'")
d <- subset(x, mr_keep)
o <- mr_wrapper_single(d, parameters = parameters)
o
mr_wrapper_single(d, parameters = parameters)
})
names(res) <- paste(combos$id.exposure, combos$id.outcome, sep = ".")
res
}
18 changes: 9 additions & 9 deletions R/forest_plot2.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@ format_mr_results <- function(

# Fill in missing values
exps <- unique(dat$exposure)
dat <- plyr::ddply(dat, c("outcome"), function(x) {
x <- plyr::mutate(x)
nc <- ncol(x)
outcomes <- unique(dat$outcome)
results <- lapply(outcomes, function(out_val) {
x <- dat[dat$outcome == out_val, ]
missed <- exps[!exps %in% x$exposure]
if (length(missed) >= 1) {
out <- unique(x$outcome)
Expand All @@ -116,10 +116,13 @@ format_mr_results <- function(
sample_size = n,
stringsAsFactors = FALSE
)
x <- plyr::rbind.fill(x, md)
x <- data.table::rbindlist(list(x, md), fill = TRUE, use.names = TRUE)
data.table::setDF(x)
}
return(x)
})
dat <- data.table::rbindlist(results, fill = TRUE, use.names = TRUE)
data.table::setDF(dat)
# dat <- dplyr::group_by(dat, outcome) %>%
# dplyr::do({
# x <- .
Expand Down Expand Up @@ -162,11 +165,8 @@ format_mr_results <- function(
#' @keywords internal
#' @return Character or array of character
simple_cap <- function(x) {
sapply(x, function(x) {
x <- tolower(x)
s <- strsplit(x, " ")[[1]]
paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ")
})
x <- tolower(x)
gsub("\\b(\\w)", "\\U\\1", x, perl = TRUE)
}

#' Trim function to remove leading and trailing blank spaces
Expand Down
36 changes: 7 additions & 29 deletions R/format_mr_results2.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,32 +146,9 @@ combine_all_mrresults <- function(
het <- het[, c("id.exposure", "id.outcome", "method", "Q", "Q_df", "Q_pval")]

# Convert all factors to character
# lapply(names(Res), FUN=function(x) class(Res[,x]))
Class <- unlist(lapply(names(res), FUN = function(x) class(res[, x])))
if (any(Class == "factor")) {
Pos <- which(unlist(lapply(names(res), FUN = function(x) class(res[, x]))) == "factor")
for (i in seq_along(Pos)) {
res[, Pos[i]] <- as.character(res[, Pos[i]])
}
}

# lapply(names(Het), FUN=function(x) class(Het[,x]))
Class <- unlist(lapply(names(het), FUN = function(x) class(het[, x])))
if (any(Class == "factor")) {
Pos <- which(unlist(lapply(names(het), FUN = function(x) class(het[, x]))) == "factor")
for (i in seq_along(Pos)) {
het[, Pos[i]] <- as.character(het[, Pos[i]])
}
}

# lapply(names(Sin), FUN=function(x) class(Sin[,x]))
Class <- unlist(lapply(names(sin), FUN = function(x) class(sin[, x])))
if (any(Class == "factor")) {
Pos <- which(unlist(lapply(names(sin), FUN = function(x) class(sin[, x]))) == "factor")
for (i in seq_along(Pos)) {
sin[, Pos[i]] <- as.character(sin[, Pos[i]])
}
}
res[] <- lapply(res, function(x) if (is.factor(x)) as.character(x) else x)
het[] <- lapply(het, function(x) if (is.factor(x)) as.character(x) else x)
sin[] <- lapply(sin, function(x) if (is.factor(x)) as.character(x) else x)

sin <- sin[grep("[:0-9:]", sin$SNP), ]
sin$method <- "Wald ratio"
Expand All @@ -185,10 +162,11 @@ combine_all_mrresults <- function(
names(sin)[names(sin) == "method"] <- "Method"

res <- merge(res, het, by = c("id.outcome", "id.exposure", "Method"), all.x = TRUE)
res <- plyr::rbind.fill(
res,
sin[, c("exposure", "outcome", "id.exposure", "id.outcome", "SNP", "b", "se", "pval", "Method")]
res <- data.table::rbindlist(
list(res, sin[, c("exposure", "outcome", "id.exposure", "id.outcome", "SNP", "b", "se", "pval", "Method")]),
fill = TRUE, use.names = TRUE
)
data.table::setDF(res)

if (ao_slc) {
ao <- available_outcomes()
Expand Down
31 changes: 17 additions & 14 deletions R/harmonise.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ harmonise_data <- function(exposure_dat, outcome_dat, action = 2) {
) -
nrow(x)

x$mr_keep[apply(x[, mr_cols], 1, function(y) any(is.na(y)))] <- FALSE
x$mr_keep[!complete.cases(x[, mr_cols])] <- FALSE
attr(x, "log")[["total_variants"]] <- nrow(x)
attr(x, "log")[["total_variants_for_mr"]] <- sum(x$mr_keep)
attr(x, "log")[["proxy_variants"]] <- ifelse(
Expand All @@ -114,8 +114,10 @@ harmonise_data <- function(exposure_dat, outcome_dat, action = 2) {
# return(x)
# })

jlog <- plyr::rbind.fill(lapply(fix.tab, function(x) attr(x, "log")))
fix.tab <- plyr::rbind.fill(fix.tab)
jlog <- data.table::rbindlist(lapply(fix.tab, function(x) attr(x, "log")), fill = TRUE, use.names = TRUE)
data.table::setDF(jlog)
fix.tab <- data.table::rbindlist(fix.tab, fill = TRUE, use.names = TRUE)
data.table::setDF(fix.tab)
attr(fix.tab, "log") <- jlog

# fix.tab <- harmonise_make_snp_effects_positive(fix.tab)
Expand Down Expand Up @@ -171,12 +173,9 @@ check_palindromic <- function(A1, A2) {


flip_alleles <- function(x) {
x <- toupper(x)
x <- gsub("C", "g", x)
x <- gsub("G", "c", x)
x <- gsub("A", "t", x)
x <- gsub("T", "a", x)
return(toupper(x))
# Use chartr for efficient single-pass character substitution
# This is much faster than 4 sequential gsub calls
chartr("ACGTacgt", "TGCAtgca", x)
}


Expand Down Expand Up @@ -680,12 +679,16 @@ harmonise <- function(dat, tolerance, action) {
action
)

jlog <- plyr::rbind.fill(
as.data.frame(attr(d22, "log"), stringsAsFactors = FALSE),
as.data.frame(attr(d21, "log"), stringsAsFactors = FALSE),
as.data.frame(attr(d12, "log"), stringsAsFactors = FALSE),
as.data.frame(attr(d11, "log"), stringsAsFactors = FALSE)
jlog <- data.table::rbindlist(
list(
as.data.frame(attr(d22, "log"), stringsAsFactors = FALSE),
as.data.frame(attr(d21, "log"), stringsAsFactors = FALSE),
as.data.frame(attr(d12, "log"), stringsAsFactors = FALSE),
as.data.frame(attr(d11, "log"), stringsAsFactors = FALSE)
),
fill = TRUE, use.names = TRUE
)
data.table::setDF(jlog)
jlog <- cbind(
data.frame(
id.exposure = dat$id.exposure[1],
Expand Down
Loading
Loading