markmfredrickson · nullsatz · Jan 22, 2026 · Dec 5, 2025 · Dec 8, 2025 · Dec 8, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -39,7 +39,8 @@ Suggests:
     pander,
     xtable,
     rrelaxiv,
-    magrittr
+    magrittr,
+    MASS
 Enhances:
     CBPS,
     haven

diff --git a/NAMESPACE b/NAMESPACE
@@ -164,6 +164,7 @@ importFrom(stats,terms)
 importFrom(stats,terms.formula)
 importFrom(stats,update)
 importFrom(stats,update.formula)
+importFrom(stats,var)
 importFrom(tibble,as_tibble)
 importFrom(tibble,enframe)
 importFrom(tibble,tibble)

diff --git a/NEWS.md b/NEWS.md
@@ -2,6 +2,11 @@
 
 - `optmatch:::scoreCaliper()` gains an optional
   `within=` argument (#245)
+- `match_on(z~x, method="rank_mahalanobis", within=foo)` makes
+  better use of restrictions in foo to contain time/memory costs.
+- For rank Mahalanobis with underlying covariances pooled across
+  treatment and control groups, you can now use
+  `match_on(z~x, method="pooled_cov_rank_mahalanobis")`.
 - Updates to internal C++ code
 
 ## Changes in **optmatch** Version 0.10.7

diff --git a/R/match_on.R b/R/match_on.R
@@ -274,8 +274,14 @@ match_on.bigglm <- function(x,
 #'   redundancies among the variables by scaling down variables contributions in
 #'   proportion to their correlations with other included variables.)
 #'
-#'   Euclidean distance is also available, via \code{method="euclidean"}, and
-#'   ranked, Mahalanobis distance, via \code{method="rank_mahalanobis"}.
+#'   Euclidean distance is also available, via \code{method="euclidean"}, as
+#'   are two flavors of ranked-based Mahalanobis distance, via
+#'   \code{method="rank_mahalanobis"} or \code{method="pooled_cov_rank_mahalanobis"}.
+#'   Either rank-transforms the covariates first; they differ in whether
+#'   subsequent covariance of thus-transformed covariates is calculated
+#'   on all subjects or by pooling of with-group covariances across
+#'   treatment and control. The \code{method=} argument can be abbreviated
+#'   in the usual way (via [base::pmatch()]).
 #'
 #'   The treatment indicator \code{Z} as noted above must either be numeric
 #'   (1 representing treated units and 0 control units) or logical
@@ -404,15 +410,19 @@ match_on.formula <- function(x,
     methodname <- as.character(class(method))
   }
 
-  which.method <- pmatch(methodname, c("mahalanobis", "euclidean", "rank_mahalanobis", "function"), 4)
+  which.method <- pmatch(methodname,
+                         c("mahalanobis", "euclidean",
+                           "rank_mahalanobis", "pooled_cov_rank_mahalanobis",
+                           "function"), 5)
   tmp <- switch(which.method,
-		makedist(z, data, compute_mahalanobis, within),
-		makedist(z, data, compute_euclidean, within),
-    makedist(z, data, compute_rank_mahalanobis, within),
-    {
-      warning("Passing a user-defined `method` to `match_on.formula` is not supported and results are not guaranteed. User-defined distances should use `match_on.function` instead.")
-      makedist(z, data, match.fun(method), within)
-    }
+                makedist(z, data, compute_mahalanobis, within),
+                makedist(z, data, compute_euclidean, within),
+                makedist(z, data, compute_rank_mahalanobis, within),
+                makedist(z, data, compute_pooled_cov_rank_mahalanobis, within),
+                {
+                    warning("Passing a user-defined `method` to `match_on.formula` is not supported and results are not guaranteed. User-defined distances should use `match_on.function` instead.")
+                    makedist(z, data, match.fun(method), within)
+                }
 		)
   rm(mf)
 
@@ -509,19 +519,7 @@ compute_mahalanobis <- function(index, data, z) {
     cv <- mt + mc
     rm(mt, mc)
 
-    inv.scale.matrix <- try(solve(cv), silent = TRUE)
-
-    if (inherits(inv.scale.matrix,"try-error")) {
-      dnx <- dimnames(cv)
-      s <- svd(cv)
-      nz <- (s$d > sqrt(.Machine$double.eps) * s$d[1])
-      if (!any(nz)) stop("covariance has rank zero")
-
-      inv.scale.matrix <- s$v[, nz] %*% (t(s$u[, nz])/s$d[nz])
-      dimnames(inv.scale.matrix) <- dnx[2:1]
-      rm(dnx, s, nz)
-    }
-
+    inv.scale.matrix <- safe_invert(cv)
     rm(cv)
 
     return(mahalanobisHelper(data, index, inv.scale.matrix))
@@ -548,19 +546,50 @@ compute_rank_mahalanobis <- function(index, data, z) {
     if (is.null(rownames(data)) | !all(index %in% rownames(data)))
         stop("data must have row names matching index")
 
-    # begin workaround solution to #128
-    all_treated <- rownames(data)[as.logical(z)]
-    all_control <- rownames(data)[!z]
-    all_indices <- expand.grid(all_treated, all_control,
-                               KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE)
-    all_indices <- paste(all_indices[[1]], all_indices[[2]], sep="%@%")
-    short_indices <- paste(index[,1], index[,2], sep="%@%")
-    indices <- match(short_indices, all_indices)
-    if (any(is.na(indices))) stop("Unanticipated problem. (Make sure row names of data don't use the string '%@%'.)")
-    # Now, since `r_smahal` is ignoring its `index` argument anyway:
-    rankdists <- sqrt(r_smahal(NULL, data, z))
-    rankdists <- rankdists[indices]
-    return(rankdists)
+    data <- apply(data, 2, rank)
+    n <- nrow(data)
+    m <- cov(data)
+    cv <- scale_addressing_ties(nrow(data), cov(data))
+    inv.scale.matrix <- safe_invert(cv)
+    rm(cv)
+
+    return(mahalanobisHelper(data, index, inv.scale.matrix))
+}
+
+compute_pooled_cov_rank_mahalanobis <- function(index, data, z) {
+    if (!all(is.finite(data)))
+        stop("Infinite or NA values detected in data for Mahalanobis computations.")
+
+    if (is.null(rownames(data)) | !all(index %in% rownames(data)))
+        stop("data must have row names matching index")
+
+    data <- apply(data, 2, rank)
+
+    if (sum(z) == 1) {
+      mt <- 0  # Addressing #168
+    } else {
+      treated <- data[z, ,drop = FALSE]
+      nt <- nrow(treated)
+      mt <- scale_addressing_ties(nt, cov(treated))
+      mt <- mt * (sum(z) - 1) / (length(z) - 2)
+    }
+
+    if (sum(!z) == 1) {
+      mc <- 0  # Addressing #168
+    } else {
+      control <- data[!z, ,drop = FALSE]
+      nc <- nrow(control)
+      mc <- scale_addressing_ties(nc, cov(control))
+      mc <- mc * (sum(!z) - 1) / (length(!z) - 2)
+    }
+
+    cv <- mt + mc
+    rm(mt, mc)
+
+    inv.scale.matrix <- safe_invert(cv)
+    rm(cv)
+
+    return(mahalanobisHelper(data, index, inv.scale.matrix))
 }
 
 #' @details \bold{First argument (\code{x}): \code{function}.} The passed function

diff --git a/R/utilities.R b/R/utilities.R
@@ -138,3 +138,30 @@ missing_x_msg <- function(x_str, data_str, ...) {
         paste(data_str, "$", x_str, sep=""),
         msg_tail)
 }
+
+#' @importFrom stats var
+scale_addressing_ties <- function(n, cv) {
+    vuntied <- var(1:n)
+    rat <- sqrt(vuntied/diag(cv))
+    if (length(rat) > 1) {
+        diag_rat <- diag(rat)
+    } else {
+        diag_rat <- as.matrix(rat)
+    }
+    return(diag_rat %*% cv %*% diag_rat)
+}
+
+safe_invert <- function(x) {
+  inv.scale.matrix <- try(solve(x), silent = TRUE)
+
+  if (inherits(inv.scale.matrix,"try-error")) {
+    dnx <- dimnames(x)
+    s <- svd(x)
+    nz <- (s$d > sqrt(.Machine$double.eps) * s$d[1])
+    if (!any(nz)) stop("covariance has rank zero")
+
+    inv.scale.matrix <- s$v[, nz] %*% (t(s$u[, nz])/s$d[nz])
+    dimnames(inv.scale.matrix) <- dnx[2:1]
+  }
+  return(inv.scale.matrix)
+}
diff --git a/man/match_on-methods.Rd b/man/match_on-methods.Rd
diff --git a/tests/testthat/test.match_on.R b/tests/testthat/test.match_on.R
@@ -174,6 +174,7 @@ test_that("Issue 87: NA's in data => unmatchable, but retained, units in distanc
   expect_equivalent(f("mahalanobis"), expectedM)
   expect_equivalent(f("euclid"), expectedM)
   expect_equivalent(f("rank_mahal"), expectedM)
+  expect_equivalent(f("pooled_cov"), expectedM)
 
   cal1 <- caliper(match_on(z~x1, data=d), width=1e3)
   expect_equivalent(g(as.matrix(match_on(z ~ x1 + x2, data = d,

diff --git a/tests/testthat/test.rank.mahal.R b/tests/testthat/test.rank.mahal.R
@@ -111,7 +111,7 @@ test_that("Fix for #128 (`compute_rank_mahalanobis` ignores index argument) hold
 
     reference_rankmahal <- compute_smahal(z, X)
 
-        indices <- expand.grid(rownames(reference_rankmahal), colnames(reference_rankmahal))
+    indices <- expand.grid(rownames(reference_rankmahal), colnames(reference_rankmahal))
     indices <- as.matrix(indices)
     expect_equivalent(optmatch:::compute_rank_mahalanobis(indices, X, as.logical(z)),
                 reference_rankmahal[1L:numdists])
@@ -125,3 +125,42 @@ test_that("Fix for #128 (`compute_rank_mahalanobis` ignores index argument) hold
 
 
 })
+
+is_scalar_multiple <- function(A, B) {
+  # Ensure dimensions match
+  if (!all(dim(A) == dim(B))) {
+    return(FALSE)
+  }
+
+  # Find positions where B is non-zero to avoid division by zero
+  non_zero_positions <- B != 0
+
+  # Compute element-wise ratio where B != 0
+  ratios <- A[non_zero_positions] / B[non_zero_positions]
+
+  # Check if all ratios are (approximately) equal
+  return(all(abs(ratios - ratios[1]) < 1e-8))
+}
+
+test_that("compute_pooled_cov_rank_mahalanobis results match ordinary Mahalanobis's", {
+    ## nr number of samples
+    nr <- 10L
+    z <- integer(nr)
+    ## two outcomes: 0 (from initialization), and 1 (assigned below randomly)
+    z[sample(1:nr, nr / 2L)] <- 1L
+
+    ## Goal: two groups with the same within-group variance and no rank ties
+    df <- data.frame(z = z, X = integer(nr))
+    df[df$z == 0, 'X'] <- seq(1, by=2, len = nr / 2)  # odds
+    df[df$z == 1, 'X'] <- seq(2, by=2, len = nr / 2)  # evens
+
+    A <- match_on(z~., data=df, method="pooled_cov")
+    B <- match_on(z~., data=df, method="mahalanobis")
+    # Check if all ratios are (approximately) equal
+    expect_true(is_scalar_multiple(A, B))
+
+    ez <- exactMatch(z~., data=df)
+    A <- match_on(z~., data=df, method="pooled_cov", within=ez)
+    B <- match_on(z~., data=df, method="mahalanobis", within=ez)
+    expect_true(is_scalar_multiple(A, B))
+})
diff --git a/tests/testthat/test.utilities.R b/tests/testthat/test.utilities.R
@@ -2,6 +2,8 @@
 # Tests for utility functions
 ################################################################################
 
+library(MASS)
+
 context("Utility Functions")
 
 test_that("toZ", {
@@ -60,3 +62,25 @@ test_that("#159 - toZ for labelled", {
     expect_true(TRUE) # avoiding empty test warning
   }
 })
+
+test_that("scale_addressing_ties", {
+    x <- cbind(sample(1:5, 10, replace=TRUE),
+               sample(1:5, 10, replace=TRUE))
+    y <- scale_addressing_ties(nrow(x), cov(x))
+    dy <- diag(y)
+    expect_equal(dy, rep(var(1:nrow(x)), length(dy)))
+})
+
+test_that("safe_invert", {
+    ## full rank symmetric square matrix
+    A <- matrix(runif(25), 5, 5)
+    symmetric_matrix <- A %*% t(A)
+    inv_A <- safe_invert(A)
+    expect_equal(inv_A, solve(A))
+
+    ## rank deficient symmetric square matrix
+    B <- matrix(runif(15), 5, 3)
+    symmetric_matrix <- B %*% t(B)
+    inv_B <- safe_invert(B)
+    expect_equal(inv_B, ginv(B))
+})