From f66fccd6a9fbd324f0ce34c568796349000a2e0f Mon Sep 17 00:00:00 2001 From: Josh Buckner Date: Tue, 3 Jun 2025 16:58:51 -0400 Subject: [PATCH 1/9] add within argument to scoreCaliper for efficiency; work in progress --- R/match_on.R | 51 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/R/match_on.R b/R/match_on.R index 90372cd7..08532b43 100644 --- a/R/match_on.R +++ b/R/match_on.R @@ -716,7 +716,7 @@ match_on.numeric <- function(x, within = NULL, caliper = NULL, exclude = NULL, d colnames = c(allowed@colnames, n0_names)) } else { - allowed <- scoreCaliper(x, z, caliper) + allowed <- scoreCaliper(x, z, caliper, within) } if (!is.null(within)) { @@ -750,9 +750,54 @@ match_on.numeric <- function(x, within = NULL, caliper = NULL, exclude = NULL, d #' @param z The treatment assignment vector (same length as \code{x}) #' @param caliper The width of the caliper with respect to the scores #' \code{x}. +#' @param within A valid distance specification, such as the result of +#' \code{\link{exactMatch}} or \code{\link{caliper}}. Finite entries indicate +#' which distances to create. Including this argument can significantly speed +#' up computation for sparse matching problems. Specify this filter either via +#' \code{within} or via \code{strata} elements of a formula; mixing these +#' methods will fail. #' @return An \code{InfinitySparseMatrix} object, suitable to be #' passed to \code{\link{match_on}} as an \code{within} argument. -scoreCaliper <- function(x, z, caliper) { +scoreCaliper <- function(x, z, caliper, within=NULL) { + if (!is.null(within)) { + allowed_list <- list() + for (blk in levels(within@groups)) { + ix <- within@groups == blk + if (sum(ix & z) > 0 & sum(ix & !z) > 0) { + allowed_list[[blk]] <- scoreCaliperBlock(x[ix], z[ix], caliper) + } + } + allowed <- dbind(allowed_list) + allowed <- addRows(allowed, within@rownames[!within@rownames %in% allowed@rownames], Inf) + allowed <- t(addRows(t(allowed), within@colnames[!within@colnames %in% allowed@colnames], Inf)) + } else { + allowed <- scoreCaliperBlock(x, z, caliper) + } + return(allowed) +} + +##' Helper function to add rows to an existing ISM +##' +##' Adds rows of value \code{val} for each entry in \code{names}. +##' @param ism An ISM. +##' @param names A vector of names to be added to the rows. +##' @param val The value to be added. +##' @return The ISM with any padded rows. +addRows <- function(ism, names, val=0) { + numnames <- length(names) + if (numnames == 0) return(ism) # Short circuit + ismdim <- ism@dimension + ism@.Data <- c(ism@.Data, rep(val, numnames*ismdim[2])) + ism@rows <- c(ism@rows, + rep(seq(ismdim[1] + 1, ismdim[1] + numnames), + each=ismdim[2])) + ism@cols <- c(ism@cols, rep(1:ismdim[2], times=numnames)) + ism@dimension <- ismdim + as.integer(c(numnames, 0)) + ism@rownames <- c(ism@rownames, names) + ism +} + +scoreCaliperBlock <- function(x, z, caliper) { z <- toZ(z) treated <- x[z] @@ -773,7 +818,7 @@ scoreCaliper <- function(x, z, caliper) { stops <- findInterval(treated + caliper + .Machine$double.eps, control) starts <- length(control) - findInterval(-(treated - caliper - - .Machine$double.eps), rev(-control)) + .Machine$double.eps), rev(-control)) for (i in 1:k) { if (starts[i] < length(control) && stops[i] > 0 && starts[i] < stops[i]) { From 25720da5d4357761b9c19f9bb8bab0dccf8ea438 Mon Sep 17 00:00:00 2001 From: Josh Buckner Date: Tue, 9 Sep 2025 15:53:36 -0400 Subject: [PATCH 2/9] rename addRows and add a similiarly named function for cols --- R/match_on.R | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/R/match_on.R b/R/match_on.R index 08532b43..267be4be 100644 --- a/R/match_on.R +++ b/R/match_on.R @@ -768,8 +768,11 @@ scoreCaliper <- function(x, z, caliper, within=NULL) { } } allowed <- dbind(allowed_list) - allowed <- addRows(allowed, within@rownames[!within@rownames %in% allowed@rownames], Inf) - allowed <- t(addRows(t(allowed), within@colnames[!within@colnames %in% allowed@colnames], Inf)) + allowed <- addEligibleTreatments(allowed, + within@rownames[!within@rownames %in% allowed@rownames], Inf) + allowed <- t(addEligibleTreatments(t(allowed), + within@colnames[!within@colnames %in% allowed@colnames], + Inf)) } else { allowed <- scoreCaliperBlock(x, z, caliper) } @@ -783,7 +786,7 @@ scoreCaliper <- function(x, z, caliper, within=NULL) { ##' @param names A vector of names to be added to the rows. ##' @param val The value to be added. ##' @return The ISM with any padded rows. -addRows <- function(ism, names, val=0) { +addEligibleTreatments <- function(ism, names, val=0) { numnames <- length(names) if (numnames == 0) return(ism) # Short circuit ismdim <- ism@dimension @@ -797,6 +800,27 @@ addRows <- function(ism, names, val=0) { ism } +##' Helper function to add cols to an existing ISM +##' +##' Adds cols of value \code{val} for each entry in \code{names}. +##' @param ism An ISM. +##' @param names A vector of names to be added to the cols +##' @param val The value to be added. +##' @return The ISM with any padded cols. +addIneligibleControls <- function(ism, names, val=Inf) { + numnames <- length(names) + if (numnames == 0) return(ism) # Short circuit + ismdim <- ism@dimension + ism@.Data <- c(ism@.Data, rep(val, numnames*ismdim[2])) + ism@cols <- c(ism@cols, + rep(seq(ismdim[1] + 1, ismdim[1] + numnames), + each=ismdim[2])) + ism@rows <- c(ism@rows, rep(1:ismdim[2], times=numnames)) + ism@dimension <- ismdim + as.integer(c(numnames, 0)) + ism@colnames <- c(ism@colnames, names) + ism +} + scoreCaliperBlock <- function(x, z, caliper) { z <- toZ(z) From 0bc1e04d1ccce3168dfe648971ed775e439fd4ac Mon Sep 17 00:00:00 2001 From: Josh Buckner Date: Thu, 11 Sep 2025 13:56:25 -0400 Subject: [PATCH 3/9] mostly working rough first stab at functions to add rows and cols to ISMs --- R/match_on.R | 16 ++++++++-------- tests/testthat/test.match_on.R | 17 ++++++++++++++++- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/R/match_on.R b/R/match_on.R index 267be4be..83bce520 100644 --- a/R/match_on.R +++ b/R/match_on.R @@ -795,7 +795,7 @@ addEligibleTreatments <- function(ism, names, val=0) { rep(seq(ismdim[1] + 1, ismdim[1] + numnames), each=ismdim[2])) ism@cols <- c(ism@cols, rep(1:ismdim[2], times=numnames)) - ism@dimension <- ismdim + as.integer(c(numnames, 0)) + ism@dimension <- c(ismdim[1] + as.integer(numnames), ismdim[2]) ism@rownames <- c(ism@rownames, names) ism } @@ -804,19 +804,19 @@ addEligibleTreatments <- function(ism, names, val=0) { ##' ##' Adds cols of value \code{val} for each entry in \code{names}. ##' @param ism An ISM. -##' @param names A vector of names to be added to the cols +##' @param names A vector of names to be added to the cols ##' @param val The value to be added. -##' @return The ISM with any padded cols. +##' @return The ISM with any padded cols. addIneligibleControls <- function(ism, names, val=Inf) { numnames <- length(names) if (numnames == 0) return(ism) # Short circuit ismdim <- ism@dimension - ism@.Data <- c(ism@.Data, rep(val, numnames*ismdim[2])) + ism@.Data <- c(ism@.Data, rep(val, numnames*ismdim[1])) ism@cols <- c(ism@cols, - rep(seq(ismdim[1] + 1, ismdim[1] + numnames), - each=ismdim[2])) - ism@rows <- c(ism@rows, rep(1:ismdim[2], times=numnames)) - ism@dimension <- ismdim + as.integer(c(numnames, 0)) + rep(seq(ismdim[2] + 1, ismdim[2] + numnames), + each=ismdim[1])) + ism@rows <- c(ism@rows, rep(1:ismdim[1], times=numnames)) + ism@dimension <- c(ismdim[1], ismdim[2] + as.integer(numnames)) ism@colnames <- c(ism@colnames, names) ism } diff --git a/tests/testthat/test.match_on.R b/tests/testthat/test.match_on.R index c0b5859c..05c5350b 100644 --- a/tests/testthat/test.match_on.R +++ b/tests/testthat/test.match_on.R @@ -1026,8 +1026,23 @@ test_that("Exclude argument for match_on with caliper arg", { expect_equivalent(mm[, 'z'], 7:9) }) - test_that("No longer support user-defined distances in match_on.formula", { data(nuclearplants) expect_warning(match_on(pr ~ cost, data = nuclearplants, method = optmatch:::compute_euclidean), "not supported") }) + +test_that("add rows to an ISM", { + w <- matrix(c(1, Inf, 2, 3, Inf, 4), nrow = 3) + rownames(w) <- c("a", "b", "c") + B <- as.InfinitySparseMatrix(w[1:2,1:2]) + BPlus <- addEligibleTreatments(B, c("c"), c(2, 4)) + expect_equivalent(as.matrix(BPlus), w) +}) + +test_that("add cols to an ISM", { + w <- matrix(c(1, Inf, 2, 3, Inf, 4, 5, 6, 7), nrow = 3) + colnames(w) <- c("a", "b", "c") + B <- as.InfinitySparseMatrix(w[,1:2]) + BPlus <- addIneligibleControls(B, c("c"), c(5, 6, 7)) + expect_equivalent(as.matrix(BPlus), w) +}) From 77ccee37f4b2de39976fecffbd24a567527b8446 Mon Sep 17 00:00:00 2001 From: Josh Buckner Date: Wed, 24 Sep 2025 15:31:24 -0400 Subject: [PATCH 4/9] requested roxygen adjustment --- R/match_on.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/match_on.R b/R/match_on.R index 83bce520..7181845a 100644 --- a/R/match_on.R +++ b/R/match_on.R @@ -786,6 +786,7 @@ scoreCaliper <- function(x, z, caliper, within=NULL) { ##' @param names A vector of names to be added to the rows. ##' @param val The value to be added. ##' @return The ISM with any padded rows. +##' @keywords internal addEligibleTreatments <- function(ism, names, val=0) { numnames <- length(names) if (numnames == 0) return(ism) # Short circuit @@ -807,6 +808,7 @@ addEligibleTreatments <- function(ism, names, val=0) { ##' @param names A vector of names to be added to the cols ##' @param val The value to be added. ##' @return The ISM with any padded cols. +##' @keywords internal addIneligibleControls <- function(ism, names, val=Inf) { numnames <- length(names) if (numnames == 0) return(ism) # Short circuit From f79900e69249a52ca40a77de1e3b0e3606780d25 Mon Sep 17 00:00:00 2001 From: Josh Buckner Date: Thu, 25 Sep 2025 13:31:51 -0400 Subject: [PATCH 5/9] working helper functions into scoreCaliper --- R/match_on.R | 5 ++--- man/scoreCaliper.Rd | 9 ++++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/R/match_on.R b/R/match_on.R index 7181845a..68d896c7 100644 --- a/R/match_on.R +++ b/R/match_on.R @@ -770,9 +770,8 @@ scoreCaliper <- function(x, z, caliper, within=NULL) { allowed <- dbind(allowed_list) allowed <- addEligibleTreatments(allowed, within@rownames[!within@rownames %in% allowed@rownames], Inf) - allowed <- t(addEligibleTreatments(t(allowed), - within@colnames[!within@colnames %in% allowed@colnames], - Inf)) + allowed <- addIneligibleControls(allowed, + within@colnames[!within@colnames %in% allowed@colnames], Inf) } else { allowed <- scoreCaliperBlock(x, z, caliper) } diff --git a/man/scoreCaliper.Rd b/man/scoreCaliper.Rd index 23e7c342..3074e9f2 100644 --- a/man/scoreCaliper.Rd +++ b/man/scoreCaliper.Rd @@ -5,7 +5,7 @@ \title{(Internal) Helper function to create an InfinitySparseMatrix from a set of scores, a treatment indicator, and a caliper width.} \usage{ -scoreCaliper(x, z, caliper) +scoreCaliper(x, z, caliper, within = NULL) } \arguments{ \item{x}{The scores, a vector indicating the 1-D location of each @@ -15,6 +15,13 @@ unit.} \item{caliper}{The width of the caliper with respect to the scores \code{x}.} + +\item{within}{A valid distance specification, such as the result of +\code{\link{exactMatch}} or \code{\link{caliper}}. Finite entries indicate +which distances to create. Including this argument can significantly speed +up computation for sparse matching problems. Specify this filter either via +\code{within} or via \code{strata} elements of a formula; mixing these +methods will fail.} } \value{ An \code{InfinitySparseMatrix} object, suitable to be From 982b2345a71af76f9f5938260aeb9cb53aa3e23f Mon Sep 17 00:00:00 2001 From: Josh Buckner Date: Mon, 13 Oct 2025 14:46:58 -0400 Subject: [PATCH 6/9] testthat test to ensure that scoreCaliper returns a BISM if its within arg is a BISM --- tests/testthat/test.match_on.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/testthat/test.match_on.R b/tests/testthat/test.match_on.R index 05c5350b..07739e27 100644 --- a/tests/testthat/test.match_on.R +++ b/tests/testthat/test.match_on.R @@ -1046,3 +1046,14 @@ test_that("add cols to an ISM", { BPlus <- addIneligibleControls(B, c("c"), c(5, 6, 7)) expect_equivalent(as.matrix(BPlus), w) }) + +test_that("scoreCaliper returns BISM when given a BISM within arg", { + scores <- rep(1:3, each = 4) + z <- rep(c(0,1), 6) + names(z) <- names(scores) <- letters[1:12] + b <- rep(1:3, 4) + + ez <- exactMatch(z ~ b) + a <- scoreCaliper(scores, z, caliper=1, within=ez) + expect_true(is(a, "BlockedInfinitySparseMatrix")) +}) From 1def87cb555d88341cd4269c730fac814c5a5395 Mon Sep 17 00:00:00 2001 From: Josh Buckner Date: Mon, 13 Oct 2025 15:05:32 -0400 Subject: [PATCH 7/9] check the BISM groups output by scoreCaliper; currently failing --- tests/testthat/test.match_on.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/testthat/test.match_on.R b/tests/testthat/test.match_on.R index 07739e27..c067c4d0 100644 --- a/tests/testthat/test.match_on.R +++ b/tests/testthat/test.match_on.R @@ -1056,4 +1056,14 @@ test_that("scoreCaliper returns BISM when given a BISM within arg", { ez <- exactMatch(z ~ b) a <- scoreCaliper(scores, z, caliper=1, within=ez) expect_true(is(a, "BlockedInfinitySparseMatrix")) + expect_equal(ez@groups, a@groups) # fails + ## currently getting: + ## > ez@groups + ## a b c d e f g h i j k l + ## 1 2 3 1 2 3 1 2 3 1 2 3 + ## Levels: 1 2 3 + ## > a@groups + ## a g d j e k b h c i f l + ## 0 0 0 0 1 1 1 1 2 2 2 2 + ## Levels: 0 1 2 }) From 320a8688228dea0ea0dd8ad06d109f7434b8303e Mon Sep 17 00:00:00 2001 From: Josh Buckner Date: Mon, 20 Oct 2025 12:30:20 -0400 Subject: [PATCH 8/9] change the way dbind indexes groups for BISMS --- R/InfinitySparseMatrix.R | 2 +- tests/testthat/test.match_on.R | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/R/InfinitySparseMatrix.R b/R/InfinitySparseMatrix.R index 91d831fe..364ca77b 100644 --- a/R/InfinitySparseMatrix.R +++ b/R/InfinitySparseMatrix.R @@ -1053,7 +1053,7 @@ dbind <- function(..., force_unique_names = FALSE) { sum(vapply(lapply(mats, methods::slot, "dimension"), "[", 1, 2)))) # This needs to be much smarter, especially if any element is already a BISM - groups <- as.factor(rep(0:(length(mats)-1), times = + groups <- as.factor(rep(seq_along(mats), times = vapply(lapply(mats, slot, "colnames"), length, 1) + vapply(lapply(mats, slot, "rownames"), length, 1))) names(groups) <- do.call(c, Map(c, cnameslist, rnameslist)) diff --git a/tests/testthat/test.match_on.R b/tests/testthat/test.match_on.R index c067c4d0..5f3bbbed 100644 --- a/tests/testthat/test.match_on.R +++ b/tests/testthat/test.match_on.R @@ -1056,7 +1056,8 @@ test_that("scoreCaliper returns BISM when given a BISM within arg", { ez <- exactMatch(z ~ b) a <- scoreCaliper(scores, z, caliper=1, within=ez) expect_true(is(a, "BlockedInfinitySparseMatrix")) - expect_equal(ez@groups, a@groups) # fails + expect_equal(ez@groups[sort(names(ez@groups))], + a@groups[sort(names(a@groups))]) ## currently getting: ## > ez@groups ## a b c d e f g h i j k l From b2e83e45584f8bd69db9c4d6377eca8b3f97d24c Mon Sep 17 00:00:00 2001 From: Ben Hansen Date: Tue, 2 Dec 2025 17:31:28 -0500 Subject: [PATCH 9/9] new NEWS entry, bump dev version # --- DESCRIPTION | 2 +- NEWS.md | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6dc3309b..273806ce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: optmatch -Version: 0.10.8.9001 +Version: 0.10.8.9002 Title: Functions for Optimal Matching Description: Distance based bipartite matching using minimum cost flow, oriented to matching of treatment and control groups in observational studies ('Hansen' diff --git a/NEWS.md b/NEWS.md index 0591962a..ab8d00b9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,8 @@ ## Changes in **optmatch** Version 0.10.8 -- Updates to internal C++ code. +- `optmatch:::scoreCaliper()` gains an optional + `within=` argument (#245) +- Updates to internal C++ code ## Changes in **optmatch** Version 0.10.7