Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ Suggests:
pander,
xtable,
rrelaxiv,
magrittr
magrittr,
MASS
Enhances:
CBPS,
haven
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ importFrom(stats,terms)
importFrom(stats,terms.formula)
importFrom(stats,update)
importFrom(stats,update.formula)
importFrom(stats,var)
importFrom(tibble,as_tibble)
importFrom(tibble,enframe)
importFrom(tibble,tibble)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

- `optmatch:::scoreCaliper()` gains an optional
`within=` argument (#245)
- `match_on(z~x, method="rank_mahalanobis", within=foo)` makes
better use of restrictions in foo to contain time/memory costs.
- For rank Mahalanobis with underlying covariances pooled across
treatment and control groups, you can now use
`match_on(z~x, method="pooled_cov_rank_mahalanobis")`.
- Updates to internal C++ code

## Changes in **optmatch** Version 0.10.7
Expand Down
101 changes: 65 additions & 36 deletions R/match_on.R
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,14 @@ match_on.bigglm <- function(x,
#' redundancies among the variables by scaling down variables contributions in
#' proportion to their correlations with other included variables.)
#'
#' Euclidean distance is also available, via \code{method="euclidean"}, and
#' ranked, Mahalanobis distance, via \code{method="rank_mahalanobis"}.
#' Euclidean distance is also available, via \code{method="euclidean"}, as
#' are two flavors of ranked-based Mahalanobis distance, via
#' \code{method="rank_mahalanobis"} or \code{method="pooled_cov_rank_mahalanobis"}.
#' Either rank-transforms the covariates first; they differ in whether
#' subsequent covariance of thus-transformed covariates is calculated
#' on all subjects or by pooling of with-group covariances across
#' treatment and control. The \code{method=} argument can be abbreviated
#' in the usual way (via [base::pmatch()]).
#'
#' The treatment indicator \code{Z} as noted above must either be numeric
#' (1 representing treated units and 0 control units) or logical
Expand Down Expand Up @@ -404,15 +410,19 @@ match_on.formula <- function(x,
methodname <- as.character(class(method))
}

which.method <- pmatch(methodname, c("mahalanobis", "euclidean", "rank_mahalanobis", "function"), 4)
which.method <- pmatch(methodname,
c("mahalanobis", "euclidean",
"rank_mahalanobis", "pooled_cov_rank_mahalanobis",
"function"), 5)
tmp <- switch(which.method,
makedist(z, data, compute_mahalanobis, within),
makedist(z, data, compute_euclidean, within),
makedist(z, data, compute_rank_mahalanobis, within),
{
warning("Passing a user-defined `method` to `match_on.formula` is not supported and results are not guaranteed. User-defined distances should use `match_on.function` instead.")
makedist(z, data, match.fun(method), within)
}
makedist(z, data, compute_mahalanobis, within),
makedist(z, data, compute_euclidean, within),
makedist(z, data, compute_rank_mahalanobis, within),
makedist(z, data, compute_pooled_cov_rank_mahalanobis, within),
{
warning("Passing a user-defined `method` to `match_on.formula` is not supported and results are not guaranteed. User-defined distances should use `match_on.function` instead.")
makedist(z, data, match.fun(method), within)
}
)
rm(mf)

Expand Down Expand Up @@ -509,19 +519,7 @@ compute_mahalanobis <- function(index, data, z) {
cv <- mt + mc
rm(mt, mc)

inv.scale.matrix <- try(solve(cv), silent = TRUE)

if (inherits(inv.scale.matrix,"try-error")) {
dnx <- dimnames(cv)
s <- svd(cv)
nz <- (s$d > sqrt(.Machine$double.eps) * s$d[1])
if (!any(nz)) stop("covariance has rank zero")

inv.scale.matrix <- s$v[, nz] %*% (t(s$u[, nz])/s$d[nz])
dimnames(inv.scale.matrix) <- dnx[2:1]
rm(dnx, s, nz)
}

inv.scale.matrix <- safe_invert(cv)
rm(cv)

return(mahalanobisHelper(data, index, inv.scale.matrix))
Expand All @@ -548,19 +546,50 @@ compute_rank_mahalanobis <- function(index, data, z) {
if (is.null(rownames(data)) | !all(index %in% rownames(data)))
stop("data must have row names matching index")

# begin workaround solution to #128
all_treated <- rownames(data)[as.logical(z)]
all_control <- rownames(data)[!z]
all_indices <- expand.grid(all_treated, all_control,
KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE)
all_indices <- paste(all_indices[[1]], all_indices[[2]], sep="%@%")
short_indices <- paste(index[,1], index[,2], sep="%@%")
indices <- match(short_indices, all_indices)
if (any(is.na(indices))) stop("Unanticipated problem. (Make sure row names of data don't use the string '%@%'.)")
# Now, since `r_smahal` is ignoring its `index` argument anyway:
rankdists <- sqrt(r_smahal(NULL, data, z))
rankdists <- rankdists[indices]
return(rankdists)
data <- apply(data, 2, rank)
n <- nrow(data)
m <- cov(data)
cv <- scale_addressing_ties(nrow(data), cov(data))
inv.scale.matrix <- safe_invert(cv)
rm(cv)

return(mahalanobisHelper(data, index, inv.scale.matrix))
}

compute_pooled_cov_rank_mahalanobis <- function(index, data, z) {
if (!all(is.finite(data)))
stop("Infinite or NA values detected in data for Mahalanobis computations.")

if (is.null(rownames(data)) | !all(index %in% rownames(data)))
stop("data must have row names matching index")

data <- apply(data, 2, rank)

if (sum(z) == 1) {
mt <- 0 # Addressing #168
} else {
treated <- data[z, ,drop = FALSE]
nt <- nrow(treated)
mt <- scale_addressing_ties(nt, cov(treated))
mt <- mt * (sum(z) - 1) / (length(z) - 2)
}

if (sum(!z) == 1) {
mc <- 0 # Addressing #168
} else {
control <- data[!z, ,drop = FALSE]
nc <- nrow(control)
mc <- scale_addressing_ties(nc, cov(control))
mc <- mc * (sum(!z) - 1) / (length(!z) - 2)
}

cv <- mt + mc
rm(mt, mc)

inv.scale.matrix <- safe_invert(cv)
rm(cv)

return(mahalanobisHelper(data, index, inv.scale.matrix))
}

#' @details \bold{First argument (\code{x}): \code{function}.} The passed function
Expand Down
27 changes: 27 additions & 0 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,30 @@ missing_x_msg <- function(x_str, data_str, ...) {
paste(data_str, "$", x_str, sep=""),
msg_tail)
}

#' @importFrom stats var
scale_addressing_ties <- function(n, cv) {
vuntied <- var(1:n)
rat <- sqrt(vuntied/diag(cv))
if (length(rat) > 1) {
diag_rat <- diag(rat)
} else {
diag_rat <- as.matrix(rat)
}
return(diag_rat %*% cv %*% diag_rat)
}

safe_invert <- function(x) {
inv.scale.matrix <- try(solve(x), silent = TRUE)

if (inherits(inv.scale.matrix,"try-error")) {
dnx <- dimnames(x)
s <- svd(x)
nz <- (s$d > sqrt(.Machine$double.eps) * s$d[1])
if (!any(nz)) stop("covariance has rank zero")

inv.scale.matrix <- s$v[, nz] %*% (t(s$u[, nz])/s$d[nz])
dimnames(inv.scale.matrix) <- dnx[2:1]
}
return(inv.scale.matrix)
}
10 changes: 8 additions & 2 deletions man/match_on-methods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tests/testthat/test.match_on.R
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ test_that("Issue 87: NA's in data => unmatchable, but retained, units in distanc
expect_equivalent(f("mahalanobis"), expectedM)
expect_equivalent(f("euclid"), expectedM)
expect_equivalent(f("rank_mahal"), expectedM)
expect_equivalent(f("pooled_cov"), expectedM)

cal1 <- caliper(match_on(z~x1, data=d), width=1e3)
expect_equivalent(g(as.matrix(match_on(z ~ x1 + x2, data = d,
Expand Down
41 changes: 40 additions & 1 deletion tests/testthat/test.rank.mahal.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ test_that("Fix for #128 (`compute_rank_mahalanobis` ignores index argument) hold

reference_rankmahal <- compute_smahal(z, X)

indices <- expand.grid(rownames(reference_rankmahal), colnames(reference_rankmahal))
indices <- expand.grid(rownames(reference_rankmahal), colnames(reference_rankmahal))
indices <- as.matrix(indices)
expect_equivalent(optmatch:::compute_rank_mahalanobis(indices, X, as.logical(z)),
reference_rankmahal[1L:numdists])
Expand All @@ -125,3 +125,42 @@ test_that("Fix for #128 (`compute_rank_mahalanobis` ignores index argument) hold


})

is_scalar_multiple <- function(A, B) {
# Ensure dimensions match
if (!all(dim(A) == dim(B))) {
return(FALSE)
}

# Find positions where B is non-zero to avoid division by zero
non_zero_positions <- B != 0

# Compute element-wise ratio where B != 0
ratios <- A[non_zero_positions] / B[non_zero_positions]

# Check if all ratios are (approximately) equal
return(all(abs(ratios - ratios[1]) < 1e-8))
}

test_that("compute_pooled_cov_rank_mahalanobis results match ordinary Mahalanobis's", {
## nr number of samples
nr <- 10L
z <- integer(nr)
## two outcomes: 0 (from initialization), and 1 (assigned below randomly)
z[sample(1:nr, nr / 2L)] <- 1L

## Goal: two groups with the same within-group variance and no rank ties
df <- data.frame(z = z, X = integer(nr))
df[df$z == 0, 'X'] <- seq(1, by=2, len = nr / 2) # odds
df[df$z == 1, 'X'] <- seq(2, by=2, len = nr / 2) # evens

A <- match_on(z~., data=df, method="pooled_cov")
B <- match_on(z~., data=df, method="mahalanobis")
# Check if all ratios are (approximately) equal
expect_true(is_scalar_multiple(A, B))

ez <- exactMatch(z~., data=df)
A <- match_on(z~., data=df, method="pooled_cov", within=ez)
B <- match_on(z~., data=df, method="mahalanobis", within=ez)
expect_true(is_scalar_multiple(A, B))
})
24 changes: 24 additions & 0 deletions tests/testthat/test.utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# Tests for utility functions
################################################################################

library(MASS)

context("Utility Functions")

test_that("toZ", {
Expand Down Expand Up @@ -60,3 +62,25 @@ test_that("#159 - toZ for labelled", {
expect_true(TRUE) # avoiding empty test warning
}
})

test_that("scale_addressing_ties", {
x <- cbind(sample(1:5, 10, replace=TRUE),
sample(1:5, 10, replace=TRUE))
y <- scale_addressing_ties(nrow(x), cov(x))
dy <- diag(y)
expect_equal(dy, rep(var(1:nrow(x)), length(dy)))
})

test_that("safe_invert", {
## full rank symmetric square matrix
A <- matrix(runif(25), 5, 5)
symmetric_matrix <- A %*% t(A)
inv_A <- safe_invert(A)
expect_equal(inv_A, solve(A))

## rank deficient symmetric square matrix
B <- matrix(runif(15), 5, 3)
symmetric_matrix <- B %*% t(B)
inv_B <- safe_invert(B)
expect_equal(inv_B, ginv(B))
})