From 93a5162ffc266e07438ce83c90f697d5fec5a5f4 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Fri, 19 Jul 2024 13:54:05 -0400 Subject: [PATCH 01/31] refactoring of subproblem information --- DESCRIPTION | 3 +- R/fullmatch.R | 140 +++++++++++++--- R/solve_reg_fm_prob.R | 2 +- R/subproblemutils.R | 370 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 494 insertions(+), 21 deletions(-) create mode 100644 R/subproblemutils.R diff --git a/DESCRIPTION b/DESCRIPTION index d5cd65d2..02aab102 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -86,12 +86,13 @@ Collate: 'solver.R' 'strata.R' 'stratumStructure.R' + 'subproblemutils.R' 'summary.ism.R' 'summary.optmatch.R' 'utilities.R' 'zzz.R' 'zzzDistanceSpecification.R' VignetteBuilder: knitr -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Encoding: UTF-8 Remotes: josherrickson/rrelaxiv diff --git a/R/fullmatch.R b/R/fullmatch.R index 169cf4a8..3f971f65 100644 --- a/R/fullmatch.R +++ b/R/fullmatch.R @@ -382,23 +382,7 @@ fullmatch.matrix <- function(x, subproblemids <- names(problems) if (is.null(subproblemids)) subproblemids <- character(1L) - if (is.null(hint)) { hints <- rep(list(NULL), np) - } else { - hints <- split(hint, hint[['groups']], - drop=TRUE # drops levels of hint$groups that aren't represented in hint - ) - nohint <- setdiff(subproblemids, names(hints)) - hints <- hints[match(subproblemids, names(hints), 0L)] - if (length(hints)>0) for (ii in 1L:length(hints)) hints[[ii]] <- new("NodeInfo", hints[[ii]]) - if (length(nohint)) - { - nullhint <- rep(list(NULL), length(nohint)) - names(nullhint) <- nohint - hints <- c(hints, nullhint) - if (length(nohint)==np) warning("Hint lacks information about subproblems of this problem; ignoring.") - } - hints <- hints[match(subproblemids, names(hints))] - } + hints <- get_hints(hint, problems) if (length(min.controls) > 1 & np != length(min.controls)) { if (is.null(names(min.controls))) @@ -531,6 +515,9 @@ fullmatch.matrix <- function(x, flipped <- TRUE } + + + ## (I'd like to do the following higher in the call stack, and also more ## informatively, obviating subsequent needs for max.cpt, min.cpt, ## omit.fraction etc. Keeping it here for fear of mischief with flipped subproblems.) @@ -549,6 +536,43 @@ fullmatch.matrix <- function(x, return(temp) } + .fullmatch2 <- function(d, mnctl, mxctl, omf, hint = NULL, solver, + flipped, epsilon.in) { + + # if the subproblem is completely empty, short circuit + if (length(d) == 0 || all(is.infinite(d))) { + x <- dim(d) + cells.a <- rep(NA, x[1]) + cells.b <- rep(NA, x[2]) + names(cells.a) <- rownames(d) + names(cells.b) <- colnames(d) + tmp <- list(cells = c(cells.a, cells.b), err = -1) + return(tmp) + } + + ncol <- dim(d)[2] + nrow <- dim(d)[1] + + + if (!flipped) { + omf.calc <- omf + } else { + omf.calc <- -1 * omf + } + + + temp <- solve_reg_fm_prob2(node_info = hint, + distspec = d, + max.cpt = mxctl, + min.cpt = mnctl, + solver = solver, + omit.fraction = if(!is.na(omf)) { omf.calc }, # passes NULL for NA + epsilon = epsilon.in) + if (!is.null(temp$MCFSolution)) + temp$MCFSolution@subproblems[1L,"flipped"] <- flipped + + return(temp) + } # a second helper function, that will attempt graceful recovery in situations where the match # is infeasible with the given max.controls @@ -596,6 +620,60 @@ fullmatch.matrix <- function(x, } } + .fullmatch.with.recovery2 <- function(d.r, mnctl.r, mxctl.r, omf.r, hint.r = NULL, solver, + flipped.r, epsilon.r) { + + # if the subproblem isn't clearly infeasible, try to get a match + if (mxctl.r * dim(d.r)[1] >= prod(dim(d.r)[2], 1-omf.r, na.rm=TRUE)) { + tmp <- .fullmatch2(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver, + flipped.r, epsilon.r) + if (!all(is.na(tmp[1]$cells))) { + # subproblem is feasible with given constraints, no need to recover + new.omit.fraction <<- c(new.omit.fraction, omf.r) + return(tmp) + } + } + # if max.control is in [1, Inf), and we're infeasible + if(is.finite(mxctl.r) & mxctl.r >= 1) { + # Re-solve with no max.control -- now possibly already happening? + # tmp2 <- list(.fullmatch2(d.r, mnctl.r, Inf, omf.r, hint.r, solver, + # flipped.r, epsilon.r)) + tmp2 <- list(.fullmatch2(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver, + flipped.r, epsilon.r)) + + + tmp2.optmatch <- makeOptmatch(d.r, tmp2, match.call(), data) + trial.ss <- stratumStructure(tmp2.optmatch) + treats <- as.numeric(unlist(lapply(strsplit(names(trial.ss), ":"),"[",1))) + ctrls <- as.numeric(unlist(lapply(strsplit(names(trial.ss), ":"),"[",2))) + num.controls <- sum((pmin(ctrls, mxctl.r)*trial.ss)[treats > 0]) + if(num.controls == 0) { + # infeasible anyways + if (!exists("tmp")) { + tmp <- .fullmatch2(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver, + flipped.r, epsilon.r) + } + new.omit.fraction <<- c(new.omit.fraction, omf.r) + return(tmp) + } + new.omf.r <- 1 - num.controls/dim(d.r)[2] + + # feasible with the new omit fraction + new.omit.fraction <<- c(new.omit.fraction, new.omf.r) + return(.fullmatch2(d.r, mnctl.r, mxctl.r, new.omf.r, hint.r, solver, + flipped.r, epsilon.r)) + } else { + # subproblem is infeasible, but we can't try to fix because no max.controls + if (!exists("tmp")) { + tmp <- .fullmatch2(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver, + flipped.r, epsilon.r) + } + + new.omit.fraction <<- c(new.omit.fraction) + return(tmp) + } + } + # In case we need to try and recover from infeasible, save the new.omit.fraction's used for output to user new.omit.fraction <- numeric(0) @@ -604,10 +682,34 @@ fullmatch.matrix <- function(x, setTryRecovery() } + big.list <- parse_subproblems(problems, + min.controls, + max.controls, + omit.fraction, + hints, + total.n, + TOL) + problems <- big.list[['subproblems']] + is_flipped <- big.list[['flipped_status']] + min.controls <- big.list[['min.controls']] + max.controls <- big.list[['max.controls']] + epsilons <- big.list[['epsilons']] + + + #browser() if (options()$fullmatch_try_recovery) { - solutions <- mapply(.fullmatch.with.recovery, problems, min.controls, max.controls, omit.fraction, hints, solver, SIMPLIFY = FALSE) + # solutions <- mapply(.fullmatch.with.recovery, problems, min.controls, + # max.controls, omit.fraction, hints, solver, SIMPLIFY = FALSE) + solutions <- mapply(.fullmatch.with.recovery2, problems, min.controls, + max.controls, omit.fraction, hints, solver, + is_flipped, epsilons, SIMPLIFY = FALSE) } else { - solutions <- mapply(.fullmatch, problems, min.controls, max.controls, omit.fraction, hints, solver, SIMPLIFY = FALSE) + # solutions <- mapply(.fullmatch, problems, min.controls, + # max.controls, omit.fraction, hints, solver, SIMPLIFY = FALSE) + + solutions <- mapply(.fullmatch2, problems, min.controls, + max.controls, omit.fraction, hints, solver, + is_flipped, epsilons, SIMPLIFY = FALSE) } mout <- makeOptmatch(x, solutions, match.call(), data) diff --git a/R/solve_reg_fm_prob.R b/R/solve_reg_fm_prob.R index a48ce1be..aaefefd2 100644 --- a/R/solve_reg_fm_prob.R +++ b/R/solve_reg_fm_prob.R @@ -86,7 +86,7 @@ solve_reg_fm_prob <- function(node_info, } f.ctls <- 1-omit.fraction } - +###### end up updating omit.fraction? if (floor(min.cpt) > ceiling(max.cpt) | #inconsistent max/min ceiling(1/min.cpt) < floor(1/max.cpt) | #controls per treatment !rfeas | !cfeas # either no controls or no treatments diff --git a/R/subproblemutils.R b/R/subproblemutils.R new file mode 100644 index 00000000..097d237b --- /dev/null +++ b/R/subproblemutils.R @@ -0,0 +1,370 @@ +get_hints <- function(hint, subproblems) +{ + np <- length(subproblems) + subproblemids <- names(subproblems) + if (is.null(subproblemids)) subproblemids <- character(1L) + if (is.null(hint)) + { + hints <- rep(list(NULL), np) + } + else + { + hints <- split(hint, hint[['groups']], + drop=TRUE # drops levels of hint$groups that aren't represented in hint + ) + nohint <- setdiff(subproblemids, names(hints)) + hints <- hints[match(subproblemids, names(hints), 0L)] + if (length(hints)>0) for (ii in 1L:length(hints)) hints[[ii]] <- new("NodeInfo", hints[[ii]]) + if (length(nohint)) + { + nullhint <- rep(list(NULL), length(nohint)) + names(nullhint) <- nohint + hints <- c(hints, nullhint) + if (length(nohint)==np) warning("Hint lacks information about subproblems of this problem; ignoring.") + } + hints <- hints[match(subproblemids, names(hints))] + } + hint.list <- mapply(prepare_subproblem_hint, + d = subproblems, + hint = hints, + SIMPLIFY = FALSE) + return(hint.list) +} + + +get_tol_frac <- function(d, total_n, np) +{ + ncol <- dim(d)[2] + nrow <- dim(d)[1] + + tol.frac <- + if (total_n > 2 * np) { + (nrow + ncol - 2)/(total_n - 2 * np) + } else 1 + return(tol.frac) +} + + +get_flipped_status <- function(d, omf, mxctl, mnctl) +{ + ncol <- dim(d)[2] + nrow <- dim(d)[1] + + if (switch(1 + is.na(omf), omf >= 0, mxctl > .5)) { + maxc <- min(mxctl, ncol) + minc <- max(mnctl, 1/nrow) + flipped <- FALSE + } else { + maxc <- min(1/mnctl, ncol) + minc <- max(1/mxctl, 1/nrow) + d <- t(d) + flipped <- TRUE + } + return(list(subproblem = d, + flipped_status = flipped, + maxc = maxc, + minc = minc)) +} + +prepare_subproblem_hint <- function(d, hint) +{ + if (is.null(hint)){ + hint <- nodes_shell_fmatch(rownames(d), + colnames(d)) + } + return(hint) +} + +get_dms <- function(d, + hint) +{ + + # if the subproblem is completely empty, short circuit + if (length(d) == 0 || + all(is.infinite(d))) { + return(NA) + } + + rownames <- subset(hint[["name"]], hint[['upstream_not_down']]) + colnames <- subset(hint[["name"]], !hint[['upstream_not_down']]) + nrows <- length(rownames) + ncols <- length(colnames) + dm <- edgelist(d, c(rownames, colnames)) + return(dm) +} + +get_epsilon <- function(subproblem, flipped, + hint, tolerance) +{ + rownames <- subset(hint[["name"]], hint[['upstream_not_down']]) + colnames <- subset(hint[["name"]], !hint[['upstream_not_down']]) + nrows <- length(rownames) + ncols <- length(colnames) + + # matchable_nodes_info <- + # filter(hint, + # is.na(hint[['upstream_not_down']]) | #retail bookkeeping nodes + # is_matchable(hint[['name']], dm, "either") + # ) + # rfeas <- sum( matchable_nodes_info[['upstream_not_down']], na.rm=TRUE) + # cfeas <- sum(!matchable_nodes_info[['upstream_not_down']], na.rm=TRUE) + # + sub.dmat <- remove_inf_na_rows_cols(subproblem) + if (!flipped) + { + rfeas <- nrow(sub.dmat) + cfeas <- ncol(sub.dmat) + } else { + rfeas <- ncol(sub.dmat) + cfeas <- nrow(sub.dmat) + } + + + max_dist <- max(sub.dmat, na.rm = TRUE) + eps.val <- calculate_epsilon(rfeas, + cfeas, + tolerance, + max_dist) + + + + return(eps.val) +} + +calculate_epsilon <- function(rfeas, + cfeas, + tolerance, + maxdist) +{ + old.o <- options(warn=-1) + #epsilon_lower_lim <- max(dm$'dist')/(.Machine$integer.max/64 -2) + epsilon_lower_lim <- maxdist / (.Machine$integer.max/64 -2) + epsilon <- if (tolerance>0 & rfeas>1 & cfeas>1) { + max(epsilon_lower_lim, tolerance/(rfeas + cfeas - 2)) + } else epsilon_lower_lim + options(old.o) + return(epsilon) +} + +parse_subproblems <- function(problems, min.controls, + max.controls, omit.fraction, hints, + total.n, TOL.in) +{ + np <- length(problems) + + #list of nodeInfo objects i think + + flipped_status <- mapply(get_flipped_status, + d = problems, + omf = omit.fraction, + mxctl = max.controls, + mnctl = min.controls, + SIMPLIFY = FALSE) + + problems <- lapply(flipped_status, function(x) x[['subproblem']]) + flippeds <- lapply(flipped_status, function(x) x[['flipped_status']]) + mincs <- lapply(flipped_status, function(x) x[['minc']]) + maxcs <- lapply(flipped_status, function(x) x[['maxc']]) + + tol.fracs <- mapply(get_tol_frac, + d = problems, + total_n = rep(total.n, np), + np = rep(np, np)) + tolerances <- tol.fracs * TOL.in + + + # subproblem.edgelists <- mapply(get_dms, + # d = problems, + # hint = hints, + # SIMPLIFY = FALSE) + # what about tolerances and epsilons when the problem is integer? account for this... + + epsilons <- mapply(get_epsilon, + hint = hints, + tolerance = tolerances, + subproblem = problems, + flipped = flippeds, + SIMPLIFY = FALSE) + + + tmp <- list(subproblems = problems, + flipped_status = flippeds, + min.controls = mincs, + max.controls = maxcs, + epsilons = epsilons) + return(tmp) +} + + +solve_reg_fm_prob2 <- function(node_info, + distspec, + min.cpt, + max.cpt, + omit.fraction=NULL, + solver, + epsilon) { + + if (min.cpt <=0 | max.cpt<=0) { + stop("inputs min.cpt, max.cpt must be positive") + } + stopifnot(is(node_info, "NodeInfo")) + rownames <- subset(node_info[["name"]], node_info[['upstream_not_down']]) + colnames <- subset(node_info[["name"]], !node_info[['upstream_not_down']]) + nrows <- length(rownames) + ncols <- length(colnames) + if (!all(rownames %in% dimnames(distspec)[[1]])) { + stop("node_info rownames must be rownames for \'distspec\'") + } + + if (!all(colnames %in% dimnames(distspec)[[2]])) { + stop("node_info colnames must be colnames for \'distspec\'") + } + + # distance must have an edgelist method + if (!hasMethod("edgelist", class(distspec))) { + stop("Argument \'distspec\' must have a \'edgelist\' method") + } + + dm <- edgelist(distspec, c(rownames, colnames)) + + matchable_nodes_info <- + filter(node_info, + is.na(node_info[['upstream_not_down']]) | #retail bookkeeping nodes + is_matchable(node_info[['name']], dm, "either") + ) + rfeas <- sum( matchable_nodes_info[['upstream_not_down']], na.rm=TRUE) + cfeas <- sum(!matchable_nodes_info[['upstream_not_down']], na.rm=TRUE) + ## Update `omit.fraction`: + if (cfeas < ncols & is.numeric(omit.fraction) && omit.fraction >0) { + original_number_to_omit <- omit.fraction*ncols + number_implicitly_omitted_already <- ncols - cfeas + omit.fraction <- + (original_number_to_omit - number_implicitly_omitted_already)/cfeas + ## If the number to be omitted is less than the number of unmatchable + ## columns, ncols - cfeas, then this omit.fraction can be negative. Then + ## we just omit a few more than directed by the supplied omit.fraction: + if (omit.fraction <= 0) omit.fraction <- NULL + } + + if (is.null(omit.fraction)) { + f.ctls <- 1 + } else { + if (!is.numeric(omit.fraction) | omit.fraction <0 | omit.fraction > 1) { + stop("omit.fraction must be null or between 0 and 1") + } + f.ctls <- 1-omit.fraction + } + if (floor(min.cpt) > ceiling(max.cpt) | #inconsistent max/min + ceiling(1/min.cpt) < floor(1/max.cpt) | #controls per treatment + !rfeas | !cfeas # either no controls or no treatments + ) + { + ans <- rep(NA_integer_, nrows + ncols) + names(ans) <- c(rownames, colnames) + return(list(cells=ans, err=0, MCFSolution=NULL)) + } + + + if (all(abs(dm$'dist'- 0) < sqrt(.Machine$double.eps))) { + dm$'dist' <- rep(1L, length(dm$'dist')) # so we'll be routed to intSolve() + } + temp <- + if (is.integer(dm$'dist')) { + intSolve(dm, min.cpt, max.cpt, f.ctls, matchable_nodes_info, solver) + } else { + doubleSolve(dm, min.cpt, max.cpt, f.ctls, matchable_nodes_info, + rfeas, cfeas, epsilon, solver) + } + + temp$treated <- factor(temp[['i']]) # levels of these factors now convey + temp$control <- factor(temp[['j']]) # treatment/control distinction + matches <- solution2factor(temp) + + ans <- rep(NA_character_, nrows + ncols) + names(ans) <- c(rownames, colnames) + ans[names(matches)] <- matches + + if (!is.null(temp[["MCFSolution"]])) + { + temp[["MCFSolution"]]@subproblems[1L, "exceedance"] <- temp$maxerr + temp[["MCFSolution"]]@subproblems[1L, "feasible"] <- any(temp$solutions==1L) + + ## Presently we can treat this subproblem as non-flipped even if it was, + ## since `dm` will have been transposed in the event of flipping. Doing + ## so will prevent the evaluate_* routines below from getting confused. + ## Of course we have to remember to set it based on actual information as + ## the MCFsolutions object passes up through the point in the call stack + ## where that transposition was made. + temp[["MCFSolution"]]@subproblems[1L, "flipped"] <- FALSE + ## ... and now we can proceed with: + evaluate_lagrangian(dm, temp[["MCFSolution"]]) -> + temp[["MCFSolution"]]@subproblems[1L, "lagrangian_value"] + evaluate_dual(dm, temp[["MCFSolution"]]) -> + temp[["MCFSolution"]]@subproblems[1L, "dual_value" ] + nodeinfo(temp[["MCFSolution"]]) <- + update(node_info, nodeinfo(temp[["MCFSolution"]])) + } + + return(list(cells = ans, err = temp$maxerr, + MCFSolution=temp[["MCFSolution"]] + ) + ) +} + + + +findSubproblemEpsilons <- function(tol = NULL, + epsilon = NULL, + subproblems, + x, + d) +{ + if (!is.null(tol)) #user specifies tolerance, rather than epsilon + { + total.n <- sum(dim(x)) + TOL <- tol * total.n + np <- length(subproblems) + ncol <- dim(d)[2] + nrow <- dim(d)[1] + + + tol.frac <- + if (total.n > 2 * np) { + (nrow + ncol - 2)/(total.n - 2 * np) + } else 1 + + # needs to handle multiple specifications for multiple subproblems, 1 specification for multiple subproblems, or 1 specification, 1 subproblem + if (identical(length(tol), np)) { + # if we have a tolerance specification for each subproblem + tolerances <- TOL * tol.frac + + } else { #handle one tolerance for all subproblems. Need to check that other cases trigger errors + if (length(tol) == 1) + { + + tol.temp <- TOL * tol.frac #eventually, don't return tolerances, just return epsilons here + tolerances <- rep(tol.temp, np) + } else { + stop("Please specify one tolerance for all subproblems, or one tolerance per subproblem.") + } + } + return(tolerances) + + + + } + +} + +remove_inf_na_rows_cols <- function(mat) { + # Identify rows that contain all Inf or NA + rows_to_remove <- apply(mat, 1, function(row) all(is.infinite(row) | is.na(row))) + + # Identify columns that contain all Inf or NA + cols_to_remove <- apply(mat, 2, function(col) all(is.infinite(col) | is.na(col))) + + # Remove identified rows and columns + #cleaned_mat <- mat[!rows_to_remove, !cols_to_remove, drop = FALSE] + cleaned_mat <- mat[!rows_to_remove, !cols_to_remove] + return(cleaned_mat) +} From e995c555a8ac11b01174e66f75290c56fc718ca3 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Fri, 19 Jul 2024 15:01:37 -0400 Subject: [PATCH 02/31] fixing some bugs --- R/fullmatch.R | 22 ++++++++++++++++++++-- R/subproblemutils.R | 24 ++++++++++++++++++------ 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/R/fullmatch.R b/R/fullmatch.R index 3f971f65..600482d9 100644 --- a/R/fullmatch.R +++ b/R/fullmatch.R @@ -382,7 +382,25 @@ fullmatch.matrix <- function(x, subproblemids <- names(problems) if (is.null(subproblemids)) subproblemids <- character(1L) - hints <- get_hints(hint, problems) + if (is.null(hint)) { hints <- rep(list(NULL), np) + } else { + hints <- split(hint, hint[['groups']], + drop=TRUE # drops levels of hint$groups that aren't represented in hint + ) + nohint <- setdiff(subproblemids, names(hints)) + hints <- hints[match(subproblemids, names(hints), 0L)] + if (length(hints)>0) for (ii in 1L:length(hints)) hints[[ii]] <- new("NodeInfo", hints[[ii]]) + if (length(nohint)) + { + nullhint <- rep(list(NULL), length(nohint)) + names(nullhint) <- nohint + hints <- c(hints, nullhint) + if (length(nohint)==np) warning("Hint lacks information about subproblems of this problem; ignoring.") + } + hints <- hints[match(subproblemids, names(hints))] + } + + #hints <- get_hints(hint, problems) if (length(min.controls) > 1 & np != length(min.controls)) { if (is.null(names(min.controls))) @@ -694,7 +712,7 @@ fullmatch.matrix <- function(x, min.controls <- big.list[['min.controls']] max.controls <- big.list[['max.controls']] epsilons <- big.list[['epsilons']] - + hints <- big.list[["hints"]] #browser() if (options()$fullmatch_try_recovery) { diff --git a/R/subproblemutils.R b/R/subproblemutils.R index 097d237b..2dda5a0d 100644 --- a/R/subproblemutils.R +++ b/R/subproblemutils.R @@ -126,8 +126,6 @@ get_epsilon <- function(subproblem, flipped, tolerance, max_dist) - - return(eps.val) } @@ -136,6 +134,7 @@ calculate_epsilon <- function(rfeas, tolerance, maxdist) { + old.o <- options(warn=-1) #epsilon_lower_lim <- max(dm$'dist')/(.Machine$integer.max/64 -2) epsilon_lower_lim <- maxdist / (.Machine$integer.max/64 -2) @@ -178,9 +177,13 @@ parse_subproblems <- function(problems, min.controls, # hint = hints, # SIMPLIFY = FALSE) # what about tolerances and epsilons when the problem is integer? account for this... + hint.list <- mapply(prepare_subproblem_hint, + d = problems, + hint = hints, + SIMPLIFY = FALSE) epsilons <- mapply(get_epsilon, - hint = hints, + hint = hint.list, tolerance = tolerances, subproblem = problems, flipped = flippeds, @@ -191,7 +194,8 @@ parse_subproblems <- function(problems, min.controls, flipped_status = flippeds, min.controls = mincs, max.controls = maxcs, - epsilons = epsilons) + epsilons = epsilons, + hints = hint.list) return(tmp) } @@ -362,9 +366,17 @@ remove_inf_na_rows_cols <- function(mat) { # Identify columns that contain all Inf or NA cols_to_remove <- apply(mat, 2, function(col) all(is.infinite(col) | is.na(col))) - # Remove identified rows and columns #cleaned_mat <- mat[!rows_to_remove, !cols_to_remove, drop = FALSE] - cleaned_mat <- mat[!rows_to_remove, !cols_to_remove] + if (sum(rows_to_remove) == 0 && sum(cols_to_remove) == 0) + { + cleaned_mat <- mat + } else { + cleaned_mat <- mat[!rows_to_remove, !cols_to_remove] + } + + #cleaned_mat <- as.matrix(mat[!rows_to_remove, !cols_to_remove]) + #rownames(cleaned_mat) <- rownames(mat)[!rows_to_remove] + #colnames(cleaned_mat) <- colnames(mat)[!cols_to_remove] return(cleaned_mat) } From 4a0048c8cae81d906830eda234193ae7c6448e06 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Fri, 19 Jul 2024 16:16:20 -0400 Subject: [PATCH 03/31] bug fixes --- R/fullmatch.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/R/fullmatch.R b/R/fullmatch.R index 600482d9..50354b47 100644 --- a/R/fullmatch.R +++ b/R/fullmatch.R @@ -642,7 +642,14 @@ fullmatch.matrix <- function(x, flipped.r, epsilon.r) { # if the subproblem isn't clearly infeasible, try to get a match - if (mxctl.r * dim(d.r)[1] >= prod(dim(d.r)[2], 1-omf.r, na.rm=TRUE)) { + if (!flipped.r) + { + feasible.condition <- mxctl.r * dim(d.r)[1] >= prod(dim(d.r)[2], 1-omf.r, na.rm=TRUE) + } else { + feasible.condition <- mxctl.r * dim(t(d.r))[1] >= prod(dim(t(d.r))[2], 1-omf.r, na.rm=TRUE) + } + + if (feasible.condition) { tmp <- .fullmatch2(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver, flipped.r, epsilon.r) if (!all(is.na(tmp[1]$cells))) { From fd31f414df71c9e58195c31aaf90f82556026d7e Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Mon, 22 Jul 2024 12:00:19 -0400 Subject: [PATCH 04/31] fixing infinite values issue --- R/subproblemutils.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/subproblemutils.R b/R/subproblemutils.R index 2dda5a0d..494294fe 100644 --- a/R/subproblemutils.R +++ b/R/subproblemutils.R @@ -119,8 +119,9 @@ get_epsilon <- function(subproblem, flipped, cfeas <- nrow(sub.dmat) } - - max_dist <- max(sub.dmat, na.rm = TRUE) + #dealing with infinite values? + max_dist <- max(sub.dmat[is.finite(sub.dmat)], + na.rm = TRUE) eps.val <- calculate_epsilon(rfeas, cfeas, tolerance, From 9801255bb979186837f417c4f8580f5c0e88f4c2 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Mon, 22 Jul 2024 14:28:40 -0400 Subject: [PATCH 05/31] more bug fixes, passes all tests now --- R/fullmatch.R | 49 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/R/fullmatch.R b/R/fullmatch.R index 50354b47..5e1f39a5 100644 --- a/R/fullmatch.R +++ b/R/fullmatch.R @@ -660,10 +660,15 @@ fullmatch.matrix <- function(x, } # if max.control is in [1, Inf), and we're infeasible if(is.finite(mxctl.r) & mxctl.r >= 1) { - # Re-solve with no max.control -- now possibly already happening? + # Re-solve with no max.control -- we most override the precomputed one now + if (!flipped.r) { + mxctl.r.new <- min(Inf, ncol(d.r)) + } else { + mxctl.r.new <- min(1/mnctl.r, ncol(d.r)) + } # tmp2 <- list(.fullmatch2(d.r, mnctl.r, Inf, omf.r, hint.r, solver, # flipped.r, epsilon.r)) - tmp2 <- list(.fullmatch2(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver, + tmp2 <- list(.fullmatch2(d.r, mnctl.r, mxctl.r.new, omf.r, hint.r, solver, flipped.r, epsilon.r)) @@ -707,34 +712,48 @@ fullmatch.matrix <- function(x, setTryRecovery() } - big.list <- parse_subproblems(problems, + precomputed_parameters <- parse_subproblems(problems, min.controls, max.controls, omit.fraction, hints, total.n, TOL) - problems <- big.list[['subproblems']] - is_flipped <- big.list[['flipped_status']] - min.controls <- big.list[['min.controls']] - max.controls <- big.list[['max.controls']] - epsilons <- big.list[['epsilons']] - hints <- big.list[["hints"]] + + # problems <- precomputed_parameters[['subproblems']] + # is_flipped <- precomputed_parameters[['flipped_status']] + # min.controls <- precomputed_parameters[['min.controls']] + # max.controls <- precomputed_parameters[['max.controls']] + # epsilons <- precomputed_parameters[['epsilons']] + # hints <- precomputed_parameters[["hints"]] #browser() if (options()$fullmatch_try_recovery) { # solutions <- mapply(.fullmatch.with.recovery, problems, min.controls, # max.controls, omit.fraction, hints, solver, SIMPLIFY = FALSE) - solutions <- mapply(.fullmatch.with.recovery2, problems, min.controls, - max.controls, omit.fraction, hints, solver, - is_flipped, epsilons, SIMPLIFY = FALSE) + solutions <- mapply(.fullmatch.with.recovery2, + precomputed_parameters[['subproblems']], + precomputed_parameters[['min.controls']], + precomputed_parameters[['max.controls']], + omit.fraction, + precomputed_parameters[["hints"]], + solver, + precomputed_parameters[['flipped_status']], + precomputed_parameters[['epsilons']], + SIMPLIFY = FALSE) } else { # solutions <- mapply(.fullmatch, problems, min.controls, # max.controls, omit.fraction, hints, solver, SIMPLIFY = FALSE) - solutions <- mapply(.fullmatch2, problems, min.controls, - max.controls, omit.fraction, hints, solver, - is_flipped, epsilons, SIMPLIFY = FALSE) + solutions <- mapply(.fullmatch2, + precomputed_parameters[['subproblems']], + precomputed_parameters[['min.controls']], + precomputed_parameters[['max.controls']], + omit.fraction, + precomputed_parameters[["hints"]], + solver, + precomputed_parameters[['flipped_status']], + precomputed_parameters[['epsilons']], SIMPLIFY = FALSE) } mout <- makeOptmatch(x, solutions, match.call(), data) From 9495ffbfbe4dc9136b8944eff427712428b64559 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Mon, 22 Jul 2024 15:54:12 -0400 Subject: [PATCH 06/31] allowing tolerance in solve_reg_fm_prob --- R/fullmatch.R | 137 ++++----------------------------------- R/max.controls.cap.R | 11 ++-- R/solve_reg_fm_prob.R | 124 +++++++++++++++++------------------ R/subproblemutils.R | 147 +----------------------------------------- 4 files changed, 81 insertions(+), 338 deletions(-) diff --git a/R/fullmatch.R b/R/fullmatch.R index 5e1f39a5..9c18717e 100644 --- a/R/fullmatch.R +++ b/R/fullmatch.R @@ -497,64 +497,7 @@ fullmatch.matrix <- function(x, # a helper to handle a single matching problem. all args required. # input error checking happens in the public fullmatch function. - .fullmatch <- function(d, mnctl, mxctl, omf, hint = NULL, solver) { - - # if the subproblem is completely empty, short circuit - if (length(d) == 0 || all(is.infinite(d))) { - x <- dim(d) - cells.a <- rep(NA, x[1]) - cells.b <- rep(NA, x[2]) - names(cells.a) <- rownames(d) - names(cells.b) <- colnames(d) - tmp <- list(cells = c(cells.a, cells.b), err = -1) - return(tmp) - } - - ncol <- dim(d)[2] - nrow <- dim(d)[1] - - tol.frac <- - if (total.n > 2 * np) { - (nrow + ncol - 2)/(total.n - 2 * np) - } else 1 - - # if omf is specified (i.e. not NA), see if is non-negative - # if omf is not specified, check to see if mxctl is > .5 - if (switch(1 + is.na(omf), omf >= 0, mxctl > .5)) { - maxc <- min(mxctl, ncol) - minc <- max(mnctl, 1/nrow) - omf.calc <- omf - flipped <- FALSE - } else { - maxc <- min(1/mnctl, ncol) - minc <- max(1/mxctl, 1/nrow) - omf.calc <- -1 * omf - d <- t(d) - flipped <- TRUE - } - - - - - ## (I'd like to do the following higher in the call stack, and also more - ## informatively, obviating subsequent needs for max.cpt, min.cpt, - ## omit.fraction etc. Keeping it here for fear of mischief with flipped subproblems.) - if (is.null(hint)) hint <- nodes_shell_fmatch(rownames(d), colnames(d)) - - temp <- solve_reg_fm_prob(node_info = hint, - distspec = d, - max.cpt = maxc, - min.cpt = minc, - tolerance = TOL * tol.frac, - solver = solver, - omit.fraction = if(!is.na(omf)) { omf.calc }# passes NULL for NA - ) - if (!is.null(temp$MCFSolution)) - temp$MCFSolution@subproblems[1L,"flipped"] <- flipped - - return(temp) - } - .fullmatch2 <- function(d, mnctl, mxctl, omf, hint = NULL, solver, + .fullmatch <- function(d, mnctl, mxctl, omf, hint = NULL, solver, flipped, epsilon.in) { # if the subproblem is completely empty, short circuit @@ -579,7 +522,7 @@ fullmatch.matrix <- function(x, } - temp <- solve_reg_fm_prob2(node_info = hint, + temp <- solve_reg_fm_prob(node_info = hint, distspec = d, max.cpt = mxctl, min.cpt = mnctl, @@ -594,51 +537,7 @@ fullmatch.matrix <- function(x, # a second helper function, that will attempt graceful recovery in situations where the match # is infeasible with the given max.controls - .fullmatch.with.recovery <- function(d.r, mnctl.r, mxctl.r, omf.r, hint.r = NULL, solver) { - - # if the subproblem isn't clearly infeasible, try to get a match - if (mxctl.r * dim(d.r)[1] >= prod(dim(d.r)[2], 1-omf.r, na.rm=TRUE)) { - tmp <- .fullmatch(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver) - if (!all(is.na(tmp[1]$cells))) { - # subproblem is feasible with given constraints, no need to recover - new.omit.fraction <<- c(new.omit.fraction, omf.r) - return(tmp) - } - } - # if max.control is in [1, Inf), and we're infeasible - if(is.finite(mxctl.r) & mxctl.r >= 1) { - # Re-solve with no max.control - tmp2 <- list(.fullmatch(d.r, mnctl.r, Inf, omf.r, hint.r, solver)) - tmp2.optmatch <- makeOptmatch(d.r, tmp2, match.call(), data) - trial.ss <- stratumStructure(tmp2.optmatch) - treats <- as.numeric(unlist(lapply(strsplit(names(trial.ss), ":"),"[",1))) - ctrls <- as.numeric(unlist(lapply(strsplit(names(trial.ss), ":"),"[",2))) - num.controls <- sum((pmin(ctrls, mxctl.r)*trial.ss)[treats > 0]) - if(num.controls == 0) { - # infeasible anyways - if (!exists("tmp")) { - tmp <- .fullmatch(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver) - } - new.omit.fraction <<- c(new.omit.fraction, omf.r) - return(tmp) - } - new.omf.r <- 1 - num.controls/dim(d.r)[2] - - # feasible with the new omit fraction - new.omit.fraction <<- c(new.omit.fraction, new.omf.r) - return(.fullmatch(d.r, mnctl.r, mxctl.r, new.omf.r, hint.r, solver)) - } else { - # subproblem is infeasible, but we can't try to fix because no max.controls - if (!exists("tmp")) { - tmp <- .fullmatch(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver) - } - - new.omit.fraction <<- c(new.omit.fraction, omf.r) - return(tmp) - } - } - - .fullmatch.with.recovery2 <- function(d.r, mnctl.r, mxctl.r, omf.r, hint.r = NULL, solver, + .fullmatch.with.recovery <- function(d.r, mnctl.r, mxctl.r, omf.r, hint.r = NULL, solver, flipped.r, epsilon.r) { # if the subproblem isn't clearly infeasible, try to get a match @@ -650,7 +549,7 @@ fullmatch.matrix <- function(x, } if (feasible.condition) { - tmp <- .fullmatch2(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver, + tmp <- .fullmatch(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver, flipped.r, epsilon.r) if (!all(is.na(tmp[1]$cells))) { # subproblem is feasible with given constraints, no need to recover @@ -666,9 +565,8 @@ fullmatch.matrix <- function(x, } else { mxctl.r.new <- min(1/mnctl.r, ncol(d.r)) } - # tmp2 <- list(.fullmatch2(d.r, mnctl.r, Inf, omf.r, hint.r, solver, - # flipped.r, epsilon.r)) - tmp2 <- list(.fullmatch2(d.r, mnctl.r, mxctl.r.new, omf.r, hint.r, solver, + + tmp2 <- list(.fullmatch(d.r, mnctl.r, mxctl.r.new, omf.r, hint.r, solver, flipped.r, epsilon.r)) @@ -680,7 +578,7 @@ fullmatch.matrix <- function(x, if(num.controls == 0) { # infeasible anyways if (!exists("tmp")) { - tmp <- .fullmatch2(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver, + tmp <- .fullmatch(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver, flipped.r, epsilon.r) } new.omit.fraction <<- c(new.omit.fraction, omf.r) @@ -690,12 +588,12 @@ fullmatch.matrix <- function(x, # feasible with the new omit fraction new.omit.fraction <<- c(new.omit.fraction, new.omf.r) - return(.fullmatch2(d.r, mnctl.r, mxctl.r, new.omf.r, hint.r, solver, + return(.fullmatch(d.r, mnctl.r, mxctl.r, new.omf.r, hint.r, solver, flipped.r, epsilon.r)) } else { # subproblem is infeasible, but we can't try to fix because no max.controls if (!exists("tmp")) { - tmp <- .fullmatch2(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver, + tmp <- .fullmatch(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver, flipped.r, epsilon.r) } @@ -720,18 +618,8 @@ fullmatch.matrix <- function(x, total.n, TOL) - # problems <- precomputed_parameters[['subproblems']] - # is_flipped <- precomputed_parameters[['flipped_status']] - # min.controls <- precomputed_parameters[['min.controls']] - # max.controls <- precomputed_parameters[['max.controls']] - # epsilons <- precomputed_parameters[['epsilons']] - # hints <- precomputed_parameters[["hints"]] - - #browser() if (options()$fullmatch_try_recovery) { - # solutions <- mapply(.fullmatch.with.recovery, problems, min.controls, - # max.controls, omit.fraction, hints, solver, SIMPLIFY = FALSE) - solutions <- mapply(.fullmatch.with.recovery2, + solutions <- mapply(.fullmatch.with.recovery, precomputed_parameters[['subproblems']], precomputed_parameters[['min.controls']], precomputed_parameters[['max.controls']], @@ -742,10 +630,7 @@ fullmatch.matrix <- function(x, precomputed_parameters[['epsilons']], SIMPLIFY = FALSE) } else { - # solutions <- mapply(.fullmatch, problems, min.controls, - # max.controls, omit.fraction, hints, solver, SIMPLIFY = FALSE) - - solutions <- mapply(.fullmatch2, + solutions <- mapply(.fullmatch, precomputed_parameters[['subproblems']], precomputed_parameters[['min.controls']], precomputed_parameters[['max.controls']], diff --git a/R/max.controls.cap.R b/R/max.controls.cap.R index a55625be..9ddacc45 100644 --- a/R/max.controls.cap.R +++ b/R/max.controls.cap.R @@ -68,10 +68,13 @@ maxControlsCap <- function(distance, min.controls = NULL, solver = "") nodes <- nodes_shell_fmatch(rownames = trnl, colnames = tcnl) t_nodes <- nodes_shell_fmatch(rownames = tcnl, colnames = trnl) # FEASIBILITY CHECK -- temp depends on whether problem requires flipping - temp <- solve_reg_fm_prob(node_info=nodes, distspec = tdm, - max.cpt = min(tlmxc, ncol), - min.cpt = max(tgmnc, 1/nrow), tolerance=.5, - omit.fraction = NULL, solver = solver) + temp <- solve_reg_fm_prob(node_info=nodes, + distspec = tdm, + max.cpt = min(tlmxc, ncol), + min.cpt = max(tgmnc, 1/nrow), + tolerance=.5, + omit.fraction = NULL, + solver = solver) # IF THE PROBLEM IS FEASIBLE, SET TLMXC TO GREATEST OBTAINED # RATIO OF CONTROLS TO TREATED UNITS. THIS MAY BE MUCH LESS THAN diff --git a/R/solve_reg_fm_prob.R b/R/solve_reg_fm_prob.R index aaefefd2..de79437a 100644 --- a/R/solve_reg_fm_prob.R +++ b/R/solve_reg_fm_prob.R @@ -21,27 +21,29 @@ ##* @param distspec InfinitySparseMatrix, matrix, etc (must have a `prepareMatching()` method) ##* @param min.cpt double, minimum permissible ratio of controls per treatment ##* @param max.cpt double, maximum permissible ratio of controls per treatment -##* @param tolerance +##* @param solver +##* @param epsilon double, grid width ##* @param omit.fraction ##* @return ##* @keywords internal solve_reg_fm_prob <- function(node_info, - distspec, - min.cpt, - max.cpt, - tolerance, - omit.fraction=NULL, - solver) { + distspec, + min.cpt, + max.cpt, + omit.fraction=NULL, + solver, + epsilon = NULL, + tolerance = NULL) { if (min.cpt <=0 | max.cpt<=0) { stop("inputs min.cpt, max.cpt must be positive") } - stopifnot(is(node_info, "NodeInfo")) - rownames <- subset(node_info[["name"]], node_info[['upstream_not_down']]) - colnames <- subset(node_info[["name"]], !node_info[['upstream_not_down']]) - nrows <- length(rownames) - ncols <- length(colnames) + stopifnot(is(node_info, "NodeInfo")) + rownames <- subset(node_info[["name"]], node_info[['upstream_not_down']]) + colnames <- subset(node_info[["name"]], !node_info[['upstream_not_down']]) + nrows <- length(rownames) + ncols <- length(colnames) if (!all(rownames %in% dimnames(distspec)[[1]])) { stop("node_info rownames must be rownames for \'distspec\'") } @@ -55,27 +57,25 @@ solve_reg_fm_prob <- function(node_info, stop("Argument \'distspec\' must have a \'edgelist\' method") } - # convert the distspec to cannonical EdgeList dm <- edgelist(distspec, c(rownames, colnames)) - matchable_nodes_info <- - filter(node_info, - is.na(node_info[['upstream_not_down']]) | #retail bookkeeping nodes + filter(node_info, + is.na(node_info[['upstream_not_down']]) | #retail bookkeeping nodes is_matchable(node_info[['name']], dm, "either") - ) + ) rfeas <- sum( matchable_nodes_info[['upstream_not_down']], na.rm=TRUE) cfeas <- sum(!matchable_nodes_info[['upstream_not_down']], na.rm=TRUE) ## Update `omit.fraction`: if (cfeas < ncols & is.numeric(omit.fraction) && omit.fraction >0) { - original_number_to_omit <- omit.fraction*ncols - number_implicitly_omitted_already <- ncols - cfeas - omit.fraction <- - (original_number_to_omit - number_implicitly_omitted_already)/cfeas - ## If the number to be omitted is less than the number of unmatchable - ## columns, ncols - cfeas, then this omit.fraction can be negative. Then - ## we just omit a few more than directed by the supplied omit.fraction: - if (omit.fraction <= 0) omit.fraction <- NULL + original_number_to_omit <- omit.fraction*ncols + number_implicitly_omitted_already <- ncols - cfeas + omit.fraction <- + (original_number_to_omit - number_implicitly_omitted_already)/cfeas + ## If the number to be omitted is less than the number of unmatchable + ## columns, ncols - cfeas, then this omit.fraction can be negative. Then + ## we just omit a few more than directed by the supplied omit.fraction: + if (omit.fraction <= 0) omit.fraction <- NULL } if (is.null(omit.fraction)) { @@ -86,11 +86,10 @@ solve_reg_fm_prob <- function(node_info, } f.ctls <- 1-omit.fraction } -###### end up updating omit.fraction? - if (floor(min.cpt) > ceiling(max.cpt) | #inconsistent max/min - ceiling(1/min.cpt) < floor(1/max.cpt) | #controls per treatment - !rfeas | !cfeas # either no controls or no treatments - ) + if (floor(min.cpt) > ceiling(max.cpt) | #inconsistent max/min + ceiling(1/min.cpt) < floor(1/max.cpt) | #controls per treatment + !rfeas | !cfeas # either no controls or no treatments + ) { ans <- rep(NA_integer_, nrows + ncols) names(ans) <- c(rownames, colnames) @@ -98,16 +97,17 @@ solve_reg_fm_prob <- function(node_info, } - old.o <- options(warn=-1) - epsilon_lower_lim <- max(dm$'dist')/(.Machine$integer.max/64 -2) - epsilon <- if (tolerance>0 & rfeas>1 & cfeas>1) { - max(epsilon_lower_lim, tolerance/(rfeas + cfeas - 2)) - } else epsilon_lower_lim - options(old.o) - if (all(abs(dm$'dist'- 0) < sqrt(.Machine$double.eps))) { dm$'dist' <- rep(1L, length(dm$'dist')) # so we'll be routed to intSolve() } + + if (!is.null(tolerance) && is.null(epsilon)) { #if tolerance is provided rather than epsilon, calculate epsilon + epsilon <- calculate_epsilon(rfeas = rfeas, + cfeas = cfeas, + tolerance = tolerance, + maxdist = max(dm$'dist')) + } + temp <- if (is.integer(dm$'dist')) { intSolve(dm, min.cpt, max.cpt, f.ctls, matchable_nodes_info, solver) @@ -124,31 +124,31 @@ solve_reg_fm_prob <- function(node_info, names(ans) <- c(rownames, colnames) ans[names(matches)] <- matches - if (!is.null(temp[["MCFSolution"]])) - { - temp[["MCFSolution"]]@subproblems[1L, "exceedance"] <- temp$maxerr - temp[["MCFSolution"]]@subproblems[1L, "feasible"] <- any(temp$solutions==1L) - - ## Presently we can treat this subproblem as non-flipped even if it was, - ## since `dm` will have been transposed in the event of flipping. Doing - ## so will prevent the evaluate_* routines below from getting confused. - ## Of course we have to remember to set it based on actual information as - ## the MCFsolutions object passes up through the point in the call stack - ## where that transposition was made. - temp[["MCFSolution"]]@subproblems[1L, "flipped"] <- FALSE - ## ... and now we can proceed with: - evaluate_lagrangian(dm, temp[["MCFSolution"]]) -> - temp[["MCFSolution"]]@subproblems[1L, "lagrangian_value"] - evaluate_dual(dm, temp[["MCFSolution"]]) -> - temp[["MCFSolution"]]@subproblems[1L, "dual_value" ] - nodeinfo(temp[["MCFSolution"]]) <- - update(node_info, nodeinfo(temp[["MCFSolution"]])) - } - - return(list(cells = ans, err = temp$maxerr, - MCFSolution=temp[["MCFSolution"]] - ) - ) + if (!is.null(temp[["MCFSolution"]])) + { + temp[["MCFSolution"]]@subproblems[1L, "exceedance"] <- temp$maxerr + temp[["MCFSolution"]]@subproblems[1L, "feasible"] <- any(temp$solutions==1L) + + ## Presently we can treat this subproblem as non-flipped even if it was, + ## since `dm` will have been transposed in the event of flipping. Doing + ## so will prevent the evaluate_* routines below from getting confused. + ## Of course we have to remember to set it based on actual information as + ## the MCFsolutions object passes up through the point in the call stack + ## where that transposition was made. + temp[["MCFSolution"]]@subproblems[1L, "flipped"] <- FALSE + ## ... and now we can proceed with: + evaluate_lagrangian(dm, temp[["MCFSolution"]]) -> + temp[["MCFSolution"]]@subproblems[1L, "lagrangian_value"] + evaluate_dual(dm, temp[["MCFSolution"]]) -> + temp[["MCFSolution"]]@subproblems[1L, "dual_value" ] + nodeinfo(temp[["MCFSolution"]]) <- + update(node_info, nodeinfo(temp[["MCFSolution"]])) + } + + return(list(cells = ans, err = temp$maxerr, + MCFSolution=temp[["MCFSolution"]] + ) + ) } diff --git a/R/subproblemutils.R b/R/subproblemutils.R index 494294fe..3ed5ef24 100644 --- a/R/subproblemutils.R +++ b/R/subproblemutils.R @@ -101,14 +101,6 @@ get_epsilon <- function(subproblem, flipped, nrows <- length(rownames) ncols <- length(colnames) - # matchable_nodes_info <- - # filter(hint, - # is.na(hint[['upstream_not_down']]) | #retail bookkeeping nodes - # is_matchable(hint[['name']], dm, "either") - # ) - # rfeas <- sum( matchable_nodes_info[['upstream_not_down']], na.rm=TRUE) - # cfeas <- sum(!matchable_nodes_info[['upstream_not_down']], na.rm=TRUE) - # sub.dmat <- remove_inf_na_rows_cols(subproblem) if (!flipped) { @@ -152,8 +144,6 @@ parse_subproblems <- function(problems, min.controls, { np <- length(problems) - #list of nodeInfo objects i think - flipped_status <- mapply(get_flipped_status, d = problems, omf = omit.fraction, @@ -172,12 +162,6 @@ parse_subproblems <- function(problems, min.controls, np = rep(np, np)) tolerances <- tol.fracs * TOL.in - - # subproblem.edgelists <- mapply(get_dms, - # d = problems, - # hint = hints, - # SIMPLIFY = FALSE) - # what about tolerances and epsilons when the problem is integer? account for this... hint.list <- mapply(prepare_subproblem_hint, d = problems, hint = hints, @@ -201,124 +185,7 @@ parse_subproblems <- function(problems, min.controls, } -solve_reg_fm_prob2 <- function(node_info, - distspec, - min.cpt, - max.cpt, - omit.fraction=NULL, - solver, - epsilon) { - - if (min.cpt <=0 | max.cpt<=0) { - stop("inputs min.cpt, max.cpt must be positive") - } - stopifnot(is(node_info, "NodeInfo")) - rownames <- subset(node_info[["name"]], node_info[['upstream_not_down']]) - colnames <- subset(node_info[["name"]], !node_info[['upstream_not_down']]) - nrows <- length(rownames) - ncols <- length(colnames) - if (!all(rownames %in% dimnames(distspec)[[1]])) { - stop("node_info rownames must be rownames for \'distspec\'") - } - - if (!all(colnames %in% dimnames(distspec)[[2]])) { - stop("node_info colnames must be colnames for \'distspec\'") - } - - # distance must have an edgelist method - if (!hasMethod("edgelist", class(distspec))) { - stop("Argument \'distspec\' must have a \'edgelist\' method") - } - - dm <- edgelist(distspec, c(rownames, colnames)) - - matchable_nodes_info <- - filter(node_info, - is.na(node_info[['upstream_not_down']]) | #retail bookkeeping nodes - is_matchable(node_info[['name']], dm, "either") - ) - rfeas <- sum( matchable_nodes_info[['upstream_not_down']], na.rm=TRUE) - cfeas <- sum(!matchable_nodes_info[['upstream_not_down']], na.rm=TRUE) - ## Update `omit.fraction`: - if (cfeas < ncols & is.numeric(omit.fraction) && omit.fraction >0) { - original_number_to_omit <- omit.fraction*ncols - number_implicitly_omitted_already <- ncols - cfeas - omit.fraction <- - (original_number_to_omit - number_implicitly_omitted_already)/cfeas - ## If the number to be omitted is less than the number of unmatchable - ## columns, ncols - cfeas, then this omit.fraction can be negative. Then - ## we just omit a few more than directed by the supplied omit.fraction: - if (omit.fraction <= 0) omit.fraction <- NULL - } - - if (is.null(omit.fraction)) { - f.ctls <- 1 - } else { - if (!is.numeric(omit.fraction) | omit.fraction <0 | omit.fraction > 1) { - stop("omit.fraction must be null or between 0 and 1") - } - f.ctls <- 1-omit.fraction - } - if (floor(min.cpt) > ceiling(max.cpt) | #inconsistent max/min - ceiling(1/min.cpt) < floor(1/max.cpt) | #controls per treatment - !rfeas | !cfeas # either no controls or no treatments - ) - { - ans <- rep(NA_integer_, nrows + ncols) - names(ans) <- c(rownames, colnames) - return(list(cells=ans, err=0, MCFSolution=NULL)) - } - - - if (all(abs(dm$'dist'- 0) < sqrt(.Machine$double.eps))) { - dm$'dist' <- rep(1L, length(dm$'dist')) # so we'll be routed to intSolve() - } - temp <- - if (is.integer(dm$'dist')) { - intSolve(dm, min.cpt, max.cpt, f.ctls, matchable_nodes_info, solver) - } else { - doubleSolve(dm, min.cpt, max.cpt, f.ctls, matchable_nodes_info, - rfeas, cfeas, epsilon, solver) - } - - temp$treated <- factor(temp[['i']]) # levels of these factors now convey - temp$control <- factor(temp[['j']]) # treatment/control distinction - matches <- solution2factor(temp) - - ans <- rep(NA_character_, nrows + ncols) - names(ans) <- c(rownames, colnames) - ans[names(matches)] <- matches - - if (!is.null(temp[["MCFSolution"]])) - { - temp[["MCFSolution"]]@subproblems[1L, "exceedance"] <- temp$maxerr - temp[["MCFSolution"]]@subproblems[1L, "feasible"] <- any(temp$solutions==1L) - - ## Presently we can treat this subproblem as non-flipped even if it was, - ## since `dm` will have been transposed in the event of flipping. Doing - ## so will prevent the evaluate_* routines below from getting confused. - ## Of course we have to remember to set it based on actual information as - ## the MCFsolutions object passes up through the point in the call stack - ## where that transposition was made. - temp[["MCFSolution"]]@subproblems[1L, "flipped"] <- FALSE - ## ... and now we can proceed with: - evaluate_lagrangian(dm, temp[["MCFSolution"]]) -> - temp[["MCFSolution"]]@subproblems[1L, "lagrangian_value"] - evaluate_dual(dm, temp[["MCFSolution"]]) -> - temp[["MCFSolution"]]@subproblems[1L, "dual_value" ] - nodeinfo(temp[["MCFSolution"]]) <- - update(node_info, nodeinfo(temp[["MCFSolution"]])) - } - - return(list(cells = ans, err = temp$maxerr, - MCFSolution=temp[["MCFSolution"]] - ) - ) -} - - - -findSubproblemEpsilons <- function(tol = NULL, +find_subproblem_epsilons <- function(tol = NULL, epsilon = NULL, subproblems, x, @@ -354,30 +221,18 @@ findSubproblemEpsilons <- function(tol = NULL, } } return(tolerances) - - - } } remove_inf_na_rows_cols <- function(mat) { - # Identify rows that contain all Inf or NA rows_to_remove <- apply(mat, 1, function(row) all(is.infinite(row) | is.na(row))) - - # Identify columns that contain all Inf or NA cols_to_remove <- apply(mat, 2, function(col) all(is.infinite(col) | is.na(col))) - # Remove identified rows and columns - #cleaned_mat <- mat[!rows_to_remove, !cols_to_remove, drop = FALSE] if (sum(rows_to_remove) == 0 && sum(cols_to_remove) == 0) { cleaned_mat <- mat } else { cleaned_mat <- mat[!rows_to_remove, !cols_to_remove] } - - #cleaned_mat <- as.matrix(mat[!rows_to_remove, !cols_to_remove]) - #rownames(cleaned_mat) <- rownames(mat)[!rows_to_remove] - #colnames(cleaned_mat) <- colnames(mat)[!cols_to_remove] return(cleaned_mat) } From a460121ac5e32d798499c3565e20a038d94c196a Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Thu, 25 Jul 2024 00:10:59 -0400 Subject: [PATCH 07/31] small tweaks to fullmatch --- R/fullmatch.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/fullmatch.R b/R/fullmatch.R index 9c18717e..fbd716f4 100644 --- a/R/fullmatch.R +++ b/R/fullmatch.R @@ -545,7 +545,7 @@ fullmatch.matrix <- function(x, { feasible.condition <- mxctl.r * dim(d.r)[1] >= prod(dim(d.r)[2], 1-omf.r, na.rm=TRUE) } else { - feasible.condition <- mxctl.r * dim(t(d.r))[1] >= prod(dim(t(d.r))[2], 1-omf.r, na.rm=TRUE) + feasible.condition <- mxctl.r * dim(d.r)[2] >= prod(dim(d.r)[1], 1-omf.r, na.rm=TRUE) } if (feasible.condition) { @@ -561,7 +561,7 @@ fullmatch.matrix <- function(x, if(is.finite(mxctl.r) & mxctl.r >= 1) { # Re-solve with no max.control -- we most override the precomputed one now if (!flipped.r) { - mxctl.r.new <- min(Inf, ncol(d.r)) + mxctl.r.new <- ncol(d.r) } else { mxctl.r.new <- min(1/mnctl.r, ncol(d.r)) } From 5dfa8115cc9591931d3bc722d1680f256f71c2b7 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Mon, 29 Jul 2024 11:11:35 -0400 Subject: [PATCH 08/31] using ISM based solution for rfeas/cfeas/epsilon calcs --- R/subproblemutils.R | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/R/subproblemutils.R b/R/subproblemutils.R index 3ed5ef24..724242a0 100644 --- a/R/subproblemutils.R +++ b/R/subproblemutils.R @@ -101,19 +101,20 @@ get_epsilon <- function(subproblem, flipped, nrows <- length(rownames) ncols <- length(colnames) - sub.dmat <- remove_inf_na_rows_cols(subproblem) + sub.dmat <- as.InfinitySparseMatrix(subproblem) + if (!flipped) { - rfeas <- nrow(sub.dmat) - cfeas <- ncol(sub.dmat) + rfeas <- length(unique(sub.dmat@rows)) + cfeas <- length(unique(sub.dmat@cols)) } else { - rfeas <- ncol(sub.dmat) - cfeas <- nrow(sub.dmat) + rfeas <- length(unique(sub.dmat@cols)) + cfeas <- length(unique(sub.dmat@rows)) } #dealing with infinite values? - max_dist <- max(sub.dmat[is.finite(sub.dmat)], - na.rm = TRUE) + max_dist <- max(sub.dmat@.Data) + eps.val <- calculate_epsilon(rfeas, cfeas, tolerance, @@ -224,15 +225,3 @@ find_subproblem_epsilons <- function(tol = NULL, } } - -remove_inf_na_rows_cols <- function(mat) { - rows_to_remove <- apply(mat, 1, function(row) all(is.infinite(row) | is.na(row))) - cols_to_remove <- apply(mat, 2, function(col) all(is.infinite(col) | is.na(col))) - if (sum(rows_to_remove) == 0 && sum(cols_to_remove) == 0) - { - cleaned_mat <- mat - } else { - cleaned_mat <- mat[!rows_to_remove, !cols_to_remove] - } - return(cleaned_mat) -} From 432e630210a72b746925bd128237d6e6a266dbc3 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Wed, 31 Jul 2024 17:24:18 -0400 Subject: [PATCH 09/31] updating resolution/tolerance code --- NAMESPACE | 1 + R/fullmatch.R | 31 ++++++++---- R/pairmatch.R | 7 +++ R/subproblemutils.R | 98 +++++++++++++++++++++++++++++++++----- man/fullmatch.Rd | 2 + man/get_subproblem_info.Rd | 16 +++++++ man/pairmatch.Rd | 20 ++++++-- 7 files changed, 151 insertions(+), 24 deletions(-) create mode 100644 man/get_subproblem_info.Rd diff --git a/NAMESPACE b/NAMESPACE index c1e2017f..c10d2765 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -90,6 +90,7 @@ export(findSubproblems) export(full) export(fullmatch) export(getMaxProblemSize) +export(get_subproblem_info) export(mahal.dist) export(match_on) export(matched) diff --git a/R/fullmatch.R b/R/fullmatch.R index fbd716f4..d489676d 100644 --- a/R/fullmatch.R +++ b/R/fullmatch.R @@ -200,6 +200,7 @@ fullmatch <- function(x, tol = .001, data = NULL, solver = "", + resolution = NULL, ...) { # if x does not exist then print helpful error msg @@ -232,6 +233,7 @@ fullmatch.default <- function(x, data = NULL, solver = "", within = NULL, + resolution = NULL, ...) { if (!inherits(x, gsub("match_on.","",methods("match_on")))) { @@ -258,6 +260,7 @@ fullmatch.default <- function(x, tol=tol, data=mfd, solver=solver, + resolution=resolution, ...) attr(out, "call") <- match.call() out @@ -274,6 +277,7 @@ fullmatch.numeric <- function(x, solver = "", z, within = NULL, + resolution = NULL, ...) { m <- match_on(x, within=within, z=z, ...) @@ -285,6 +289,7 @@ fullmatch.numeric <- function(x, tol=tol, data=data, solver=solver, + resolution=resolution, ...) attr(out, "call") <- match.call() out @@ -301,10 +306,19 @@ fullmatch.matrix <- function(x, solver = "", within = NULL, hint, + resolution = NULL, ...) { hint <- if (missing(hint)) NULL else nodeinfo(hint) + # if (missing(hint)) + # { + # hint <- NULL + # } else { + # # extract the resolutions + # hint <- nodeinfo(hint) + # } + ### Checking Input ### # this will throw an error if not valid @@ -400,7 +414,6 @@ fullmatch.matrix <- function(x, hints <- hints[match(subproblemids, names(hints))] } - #hints <- get_hints(hint, problems) if (length(min.controls) > 1 & np != length(min.controls)) { if (is.null(names(min.controls))) @@ -610,13 +623,15 @@ fullmatch.matrix <- function(x, setTryRecovery() } - precomputed_parameters <- parse_subproblems(problems, - min.controls, - max.controls, - omit.fraction, - hints, - total.n, - TOL) + precomputed_parameters <- parse_subproblems(problems = problems, + min.controls = min.controls, + max.controls = max.controls, + omit.fraction = omit.fraction, + hints = hints, + total.n = total.n, + TOL.in = TOL, + resolution = resolution) + if (options()$fullmatch_try_recovery) { solutions <- mapply(.fullmatch.with.recovery, diff --git a/R/pairmatch.R b/R/pairmatch.R index 1edf95fc..75076cbe 100644 --- a/R/pairmatch.R +++ b/R/pairmatch.R @@ -69,6 +69,7 @@ pairmatch <- function(x, controls = 1, data = NULL, remove.unmatchables = FALSE, + resolution = NULL, ...) { # if x does not exist then print helpful error msg @@ -100,6 +101,7 @@ pairmatch.default <- function(x, data = NULL, remove.unmatchables = FALSE, within = NULL, + resolution = NULL, ...) { if (!inherits(x, gsub("match_on.","",methods("match_on")))) { @@ -122,6 +124,7 @@ pairmatch.default <- function(x, controls=controls, data=mfd, remove.unmatchables=remove.unmatchables, + resolution = resolution, ...) attr(out, "call") <- match.call() out @@ -134,6 +137,7 @@ pairmatch.numeric <- function(x, remove.unmatchables = FALSE, z, within = NULL, + resolution = NULL, ...) { m <- match_on(x, within=within, z=z, ...) @@ -141,6 +145,7 @@ pairmatch.numeric <- function(x, controls=controls, data=data, remove.unmatchables=remove.unmatchables, + resolution = resolution, ...) attr(out, "call") <- match.call() out @@ -152,6 +157,7 @@ pairmatch.matrix <- function(x, data = NULL, remove.unmatchables = FALSE, within = NULL, + resolution = NULL, ...) { validDistanceSpecification(x) # will stop() on error @@ -219,6 +225,7 @@ pairmatch.matrix <- function(x, max.controls = controls, omit.fraction = omf, data = data, + resolution = resolution, ...) if(!remove.unmatchables) { options("fullmatch_try_recovery" = saveopt) diff --git a/R/subproblemutils.R b/R/subproblemutils.R index 724242a0..707fa511 100644 --- a/R/subproblemutils.R +++ b/R/subproblemutils.R @@ -141,10 +141,11 @@ calculate_epsilon <- function(rfeas, parse_subproblems <- function(problems, min.controls, max.controls, omit.fraction, hints, - total.n, TOL.in) + total.n, TOL.in, resolution = NULL) { np <- length(problems) - + subproblemids <- names(problems) + if (is.null(subproblemids)) subproblemids <- character(1L) flipped_status <- mapply(get_flipped_status, d = problems, omf = omit.fraction, @@ -157,25 +158,71 @@ parse_subproblems <- function(problems, min.controls, mincs <- lapply(flipped_status, function(x) x[['minc']]) maxcs <- lapply(flipped_status, function(x) x[['maxc']]) - tol.fracs <- mapply(get_tol_frac, - d = problems, - total_n = rep(total.n, np), - np = rep(np, np)) - tolerances <- tol.fracs * TOL.in + #browser() + #when no hint is provided, hints is a list of length np where everything is null + #when hint is provided, hint is a list of NodeInfo elements + hint.provided <- all(sapply(hints, function(x) !is.null(x))) hint.list <- mapply(prepare_subproblem_hint, d = problems, hint = hints, SIMPLIFY = FALSE) - epsilons <- mapply(get_epsilon, - hint = hint.list, - tolerance = tolerances, - subproblem = problems, - flipped = flippeds, - SIMPLIFY = FALSE) + + if (!is.null(resolution)) + { + length.res <- length(resolution) + names.res <- names(resolution) + + if (hint.provided) + { + if(any(!names.res %in% subproblemids)) + { + stop("Group specified in resolution argument cannot be found.") + } + # if resolution is specified for a few subproblems, do the default behavior first: + tol.fracs <- mapply(get_tol_frac, + d = problems, + total_n = rep(total.n, np), + np = rep(np, np)) + tolerances <- tol.fracs * TOL.in + + epsilons <- mapply(get_epsilon, + hint = hint.list, + tolerance = tolerances, + subproblem = problems, + flipped = flippeds, + SIMPLIFY = FALSE) + #Then, update with the new specified values + epsilons[names.res] <- resolution + + } else { + if(length.res != np || + !identical(sort(names.res), sort(subproblemids))) + { + stop("Resolutions must provided for each subproblem as a named list. Please ensure all subproblemids are correct and specified.") + } + + epsilons <- lapply(subproblemids, function(x) resolution[[x]]) + names(epsilons) <- subproblemids + } + } else { #no epsilons provided, just use the default behavior and convert from tolerance + tol.fracs <- mapply(get_tol_frac, + d = problems, + total_n = rep(total.n, np), + np = rep(np, np)) + tolerances <- tol.fracs * TOL.in + + epsilons <- mapply(get_epsilon, + hint = hint.list, + tolerance = tolerances, + subproblem = problems, + flipped = flippeds, + SIMPLIFY = FALSE) + } + tmp <- list(subproblems = problems, flipped_status = flippeds, min.controls = mincs, @@ -225,3 +272,28 @@ find_subproblem_epsilons <- function(tol = NULL, } } + + +#' Title +#' +#' @param object optmatch object +#' @param type either "subproblem" or "resolution" +#' +#' @return +#' @export +#' +#' @examples +get_subproblem_info <- function(object, type) +{ + + if (type == "subproblem") + { + return(attr(object, "subproblem")) + } + if (type == "resolution") + { + resolutions <- attr(object, "MCFSolutions")@subproblems$resolution + names(resolutions) <- attr(object, "MCFSolutions")@subproblems$groups + return(resolutions) + } +} diff --git a/man/fullmatch.Rd b/man/fullmatch.Rd index b71968f6..d59fe548 100644 --- a/man/fullmatch.Rd +++ b/man/fullmatch.Rd @@ -14,6 +14,7 @@ fullmatch( tol = 0.001, data = NULL, solver = "", + resolution = NULL, ... ) @@ -26,6 +27,7 @@ full( tol = 0.001, data = NULL, solver = "", + resolution = NULL, ... ) } diff --git a/man/get_subproblem_info.Rd b/man/get_subproblem_info.Rd new file mode 100644 index 00000000..e57c09cd --- /dev/null +++ b/man/get_subproblem_info.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/subproblemutils.R +\name{get_subproblem_info} +\alias{get_subproblem_info} +\title{Title} +\usage{ +get_subproblem_info(object, type) +} +\arguments{ +\item{object}{optmatch object} + +\item{type}{either "subproblem" or "resolution"} +} +\description{ +Title +} diff --git a/man/pairmatch.Rd b/man/pairmatch.Rd index 5a61d523..b93a1234 100644 --- a/man/pairmatch.Rd +++ b/man/pairmatch.Rd @@ -5,9 +5,23 @@ \alias{pair} \title{Optimal 1:1 and 1:k matching} \usage{ -pairmatch(x, controls = 1, data = NULL, remove.unmatchables = FALSE, ...) - -pair(x, controls = 1, data = NULL, remove.unmatchables = FALSE, ...) +pairmatch( + x, + controls = 1, + data = NULL, + remove.unmatchables = FALSE, + resolution = NULL, + ... +) + +pair( + x, + controls = 1, + data = NULL, + remove.unmatchables = FALSE, + resolution = NULL, + ... +) } \arguments{ \item{x}{Any valid input to \code{match_on}. If \code{x} is a numeric vector, From 64dff54a6018bafff8d3686eb344c874e0e89441 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Wed, 31 Jul 2024 17:25:21 -0400 Subject: [PATCH 10/31] adding tests --- tests/testthat/test.subproblemutils.R | 158 ++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 tests/testthat/test.subproblemutils.R diff --git a/tests/testthat/test.subproblemutils.R b/tests/testthat/test.subproblemutils.R new file mode 100644 index 00000000..897cd997 --- /dev/null +++ b/tests/testthat/test.subproblemutils.R @@ -0,0 +1,158 @@ +################################################################################ +# Testing resolution / tolerance specification +################################################################################ + +context("test subproblem resolution specification functionality") + +test_that("get_subproblem_info basics", { + d <- data.frame(position = rep(1:4, each = 4), + z = rep(0:1, 8), + rownames=letters[1:16]) + dist <- match_on(z ~ position, inv.scale.matrix = diag(1), data=d) + + res.mat <- fullmatch(dist, data=d) + expect_true(identical(length(unique(get_subproblem_info(res.mat, type = "subproblem"))), 1L)) + expect_true(identical(length(get_subproblem_info(res.mat, type = "resolution")), 1L)) + + data(nuclearplants) + f2 <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants) + + expect_true(identical(length(unique(get_subproblem_info(f2, type = "subproblem"))), 2L)) + expect_true(identical(length(get_subproblem_info(f2, type = "resolution")), 2L)) +}) + +test_that("fullmatch: basic error checks", { + data(nuclearplants) + res.custom <- list("0" = 0.001142857, "1" = 0.001142857) + expect_silent(fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = res.custom)) + bad.res <- list("2" = 0.001142857, "1" = 0.001142857) + # subproblem is not in the original specification + expect_error(fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = bad.res)) + # no hint + subset of epsilons should fail + bad.res <- list("1" = 0.001142857) + expect_error(fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = bad.res)) +}) + +test_that("pairmatch: basic error checks", { + data(nuclearplants) + + psm <- glm(pr~.-(pr+cost), family=binomial(), data=nuclearplants) + em <- exactMatch(pr ~ pt, data = nuclearplants) + res.pm <- pairmatch(match_on(psm) + em, data=nuclearplants) + res.custom <- list("0" = 0.001142857, "1" = 0.001142857) + expect_silent(pairmatch(match_on(psm) + em, data=nuclearplants, resolution = res.custom)) + + bad.res <- list("2" = 0.001142857, "1" = 0.001142857) + expect_error(pairmatch(match_on(psm) + em, data=nuclearplants, resolution = bad.res)) + + bad.res <- list("1" = 0.001142857) + expect_error(pairmatch(match_on(psm) + em, data=nuclearplants, resolution = bad.res)) +}) + +test_that("fullmatch: error checking with hints", { + set.seed(201905) + data <- data.frame(z = rep(0:1, each = 5), + x = rnorm(10), fac=rep(c(rep("a",2), rep("b",3)),2) ) + mo <- match_on(z ~ x, data=data) + mos <- match_on(z ~ x + strata(fac), data=data) + f1b <- fullmatch(mos, min.c=.5, max.c=2, data = data, tol=0.1) + res.custom <- list("b" = 0.0001666667) + expect_silent(fullmatch(mos, min.c=.5, max.c=2, data = data, tol=0.0001, hint=f1b, resolution = res.custom)) + + res.custom <- list("b" = 0.0001666667, "a" = 0.0001666667) #out of order should be ok + expect_silent(fullmatch(mos, min.c=.5, max.c=2, data = data, tol=0.0001, hint=f1b, resolution = res.custom)) + + res.custom <- list("b" = 0.01) + expect_silent(fullmatch(mos, min.c=.5, max.c=2, data = data, hint=f1b, resolution = res.custom) ) +}) + + +test_that("fullmatch: with hint", { + set.seed(201905) + data <- data.frame(z = rep(0:1, each = 5), + x = rnorm(10), fac=rep(c(rep("a",2), rep("b",3)),2) ) + mo <- match_on(z ~ x, data=data) + + mos <- match_on(z ~ x + strata(fac), data=data) + f1b <- fullmatch(mos, min.c=.5, max.c=2, data = data, tol=0.1) + + f1c <- fullmatch(mos, min.c=.5, max.c=2, data = data, tol=0.0001, hint=f1b) + res.custom <- list("b" = 0.0001666667) + f1c <- fullmatch(mos, min.c=.5, max.c=2, data = data, hint=f1b, resolution = res.custom) + resos <- get_subproblem_info(f1c, "resolution") + expect_true(resos["b"] == 0.0001666667) + + res.custom <- list("b" = 0.01) + f1c <- fullmatch(mos, min.c=.5, max.c=2, data = data, hint=f1b, resolution = res.custom) + resos <- get_subproblem_info(f1c, "resolution") + expect_true(resos["b"] == 0.01) + + res.custom <- list("b" = 0.01, "a" = .001) + f1c <- fullmatch(mos, min.c=.5, max.c=2, data = data, hint=f1b, resolution = res.custom) + resos <- get_subproblem_info(f1c, "resolution") + expect_true(resos["b"] == 0.01) + expect_true(resos["a"] == 0.001) +}) + +test_that("fullmatch: subproblem specification", { + data(nuclearplants) + f1 <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants) + res.custom <- list("0" = 0.001, "1" = 0.01) + f2 <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = res.custom) + resos.old <- get_subproblem_info(f1, "resolution") + resos <- get_subproblem_info(f2, "resolution") + expect_true(resos["0"] == 0.001) + expect_true(resos["1"] == 0.01) + expect_true(all(resos.old != resos)) +}) + + +test_that("pairmatch: error checking with hints", { + set.seed(201905) + data <- data.frame(z = rep(0:1, each = 5), + x = rnorm(10), fac=rep(c(rep("a",2), rep("b",3)),2) ) + mo <- match_on(z ~ x, data=data) + mos <- match_on(z ~ x + strata(fac), data=data) + p1b <- pairmatch(mos, data = data, tol=0.1) + + res.custom <- list("b" = 0.001, "a" = 0.01) #out of order should be ok + expect_silent(pairmatch(mos, data = data, hint=p1b, resolution = res.custom)) + res.custom <- list("b" = 0.001) + expect_silent(pairmatch(mos, data = data, hint=p1b, resolution = res.custom)) +}) + +test_that("pairmatch: with hint", { + set.seed(201905) + data <- data.frame(z = rep(0:1, each = 5), + x = rnorm(10), fac=rep(c(rep("a",2), rep("b",3)),2) ) + mo <- match_on(z ~ x, data=data) + mos <- match_on(z ~ x + strata(fac), data=data) + p1b <- pairmatch(mos, data = data, tol=0.1) + + res.custom <- list("b" = 0.001, "a" = 0.01) #out of order should be ok + pm1 <- pairmatch(mos, data = data, hint=p1b, resolution = res.custom) + resos <- get_subproblem_info(pm1, "resolution") + expect_true(resos["b"] == 0.001) + expect_true(resos["a"] == 0.01) + + res.custom <- list("b" = 0.001) #should be able to specify subset here + pm1 <- pairmatch(mos, data = data, hint=p1b, resolution = res.custom) + resos <- get_subproblem_info(pm1, "resolution") + expect_true(resos["b"] == 0.001) + +}) + +test_that("pairmatch: subproblem specification", { + data(nuclearplants) + psm <- glm(pr~.-(pr+cost), family=binomial(), data=nuclearplants) + em <- exactMatch(pr ~ pt, data = nuclearplants) + res.pm <- pairmatch(match_on(psm) + em, data=nuclearplants) + res.custom <- list("0" = 0.001, + "1" = 0.01) + pm1 <- pairmatch(match_on(psm) + em, data=nuclearplants, resolution = res.custom) + resos.old <- get_subproblem_info(res.pm, "resolution") + resos <- get_subproblem_info(pm1, "resolution") + expect_true(resos["0"] == 0.001) + expect_true(resos["1"] == 0.01) + expect_true(all(resos.old != resos)) +}) From 0661bd9b3d4929cb3dd47967c1b425842a55c6c1 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Fri, 2 Aug 2024 15:18:55 -0400 Subject: [PATCH 11/31] improving handling of the no-subproblem case --- R/subproblemutils.R | 31 +++++++++++++++++++++++++-- tests/testthat/test.subproblemutils.R | 22 +++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/R/subproblemutils.R b/R/subproblemutils.R index 707fa511..3aaba123 100644 --- a/R/subproblemutils.R +++ b/R/subproblemutils.R @@ -174,6 +174,23 @@ parse_subproblems <- function(problems, min.controls, length.res <- length(resolution) names.res <- names(resolution) + if (all(is.null(names.res))) + { + if (identical(np, 1L) && + identical(length.res, 1L)) + { + names.res <- character(1L) + names(resolution) <- names.res + } else { + stop("No subproblem ids are specified.") + } + } + # specified.names <- sum(names.res != "") + # if (specified.names < length.res) + # { + # stop("Some subproblem ids are missing") + # } # this should be caught later on when checking that ids are present + if (hint.provided) { if(any(!names.res %in% subproblemids)) @@ -203,8 +220,18 @@ parse_subproblems <- function(problems, min.controls, stop("Resolutions must provided for each subproblem as a named list. Please ensure all subproblemids are correct and specified.") } - epsilons <- lapply(subproblemids, function(x) resolution[[x]]) - names(epsilons) <- subproblemids + + if (length(resolution) == 1 && + names(resolution) == "" && + np == 1 && all(subproblemids == "")) # case of one subproblem + { + epsilons <- resolution + names(epsilons) <- character(1L) + } else { + epsilons <- lapply(subproblemids, function(x) resolution[[x]]) + names(epsilons) <- subproblemids + } + } diff --git a/tests/testthat/test.subproblemutils.R b/tests/testthat/test.subproblemutils.R index 897cd997..eca7e126 100644 --- a/tests/testthat/test.subproblemutils.R +++ b/tests/testthat/test.subproblemutils.R @@ -156,3 +156,25 @@ test_that("pairmatch: subproblem specification", { expect_true(resos["1"] == 0.01) expect_true(all(resos.old != resos)) }) + + +test_that("fullmatch: no subproblems", { + d <- data.frame(position = rep(1:4, each = 4), + z = rep(0:1, 8), + rownames=letters[1:16]) + dist <- match_on(z ~ position, inv.scale.matrix = diag(1), data=d) + res.mat <- fullmatch(dist, data=d, resolution = list(.1)) + resos <- get_subproblem_info(res.mat, "resolution") + expect_true(length(resos) == 1) + expect_true(all(resos == .1)) +}) + +test_that("pairmatch: no subproblems", { + data(nuclearplants) + + psm <- glm(pr~.-(pr+cost), family=binomial(), data=nuclearplants) + res.pm <- pairmatch(match_on(psm), data=nuclearplants, resolution = list(.1)) + resos <- get_subproblem_info(res.pm, "resolution") + expect_true(length(resos) == 1) + expect_true(all(resos == .1)) +}) From 336efb3c125ffb6510e5bc0f4c6665003b44c7c9 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Sun, 4 Aug 2024 10:52:40 -0400 Subject: [PATCH 12/31] adding more explicit tests about unnamed resolutions --- tests/testthat/test.subproblemutils.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/testthat/test.subproblemutils.R b/tests/testthat/test.subproblemutils.R index eca7e126..bede2b8e 100644 --- a/tests/testthat/test.subproblemutils.R +++ b/tests/testthat/test.subproblemutils.R @@ -31,6 +31,17 @@ test_that("fullmatch: basic error checks", { # no hint + subset of epsilons should fail bad.res <- list("1" = 0.001142857) expect_error(fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = bad.res)) + #unnamed arguments should fail, length 1 list is a special case + bad.res <- list(0.001142857) + expect_error(fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = bad.res)) + #unnamed arguments should fail + bad.res <- list(0.001142857, "1" =.35) + expect_error(fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = bad.res)) + + #unnamed arguments should fail + bad.res <- list(0.001142857, .35) + expect_error(fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = bad.res)) + }) test_that("pairmatch: basic error checks", { @@ -47,6 +58,18 @@ test_that("pairmatch: basic error checks", { bad.res <- list("1" = 0.001142857) expect_error(pairmatch(match_on(psm) + em, data=nuclearplants, resolution = bad.res)) + + + bad.res <- list(0.001142857, 0.001142857) + expect_error(pairmatch(match_on(psm) + em, data=nuclearplants, resolution = bad.res)) + + bad.res <- list("1" = 0.001142857, + 0.001142857) + expect_error(pairmatch(match_on(psm) + em, data=nuclearplants, resolution = bad.res)) + + bad.res <- list(0.001142857) + expect_error(pairmatch(match_on(psm) + em, data=nuclearplants, resolution = bad.res)) + }) test_that("fullmatch: error checking with hints", { From 559929c53d3d95ac68b888a194f2001912b86e49 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Sun, 4 Aug 2024 12:30:55 -0400 Subject: [PATCH 13/31] minor cleanup + pmatch --- R/fullmatch.R | 8 ------ R/subproblemutils.R | 30 +++++++++++----------- man/get_subproblem_info.Rd | 2 +- tests/testthat/test.subproblemutils.R | 36 +++++++++++++++------------ 4 files changed, 35 insertions(+), 41 deletions(-) diff --git a/R/fullmatch.R b/R/fullmatch.R index d489676d..ed5ceb0c 100644 --- a/R/fullmatch.R +++ b/R/fullmatch.R @@ -311,14 +311,6 @@ fullmatch.matrix <- function(x, hint <- if (missing(hint)) NULL else nodeinfo(hint) - # if (missing(hint)) - # { - # hint <- NULL - # } else { - # # extract the resolutions - # hint <- nodeinfo(hint) - # } - ### Checking Input ### # this will throw an error if not valid diff --git a/R/subproblemutils.R b/R/subproblemutils.R index 3aaba123..2dc43c1d 100644 --- a/R/subproblemutils.R +++ b/R/subproblemutils.R @@ -158,7 +158,6 @@ parse_subproblems <- function(problems, min.controls, mincs <- lapply(flipped_status, function(x) x[['minc']]) maxcs <- lapply(flipped_status, function(x) x[['maxc']]) - #browser() #when no hint is provided, hints is a list of length np where everything is null #when hint is provided, hint is a list of NodeInfo elements hint.provided <- all(sapply(hints, function(x) !is.null(x))) @@ -168,7 +167,6 @@ parse_subproblems <- function(problems, min.controls, hint = hints, SIMPLIFY = FALSE) - if (!is.null(resolution)) { length.res <- length(resolution) @@ -185,11 +183,6 @@ parse_subproblems <- function(problems, min.controls, stop("No subproblem ids are specified.") } } - # specified.names <- sum(names.res != "") - # if (specified.names < length.res) - # { - # stop("Some subproblem ids are missing") - # } # this should be caught later on when checking that ids are present if (hint.provided) { @@ -304,7 +297,7 @@ find_subproblem_epsilons <- function(tol = NULL, #' Title #' #' @param object optmatch object -#' @param type either "subproblem" or "resolution" +#' @param type either "subproblem" or any of the column names from the subproblems table associated with an MCFSolutions object (e.g. "resolution") #' #' @return #' @export @@ -312,15 +305,20 @@ find_subproblem_epsilons <- function(tol = NULL, #' @examples get_subproblem_info <- function(object, type) { - - if (type == "subproblem") - { - return(attr(object, "subproblem")) + if (!inherits(object, "optmatch")){ + stop("Please provide an optmatch object") } - if (type == "resolution") + if (all(type == "subproblem")) { - resolutions <- attr(object, "MCFSolutions")@subproblems$resolution - names(resolutions) <- attr(object, "MCFSolutions")@subproblems$groups - return(resolutions) + return(attr(object, "subproblem")) + } else { + subprob.attributes <- colnames(attr(object, "MCFSolutions")@subproblems) + + idx <- pmatch(type, subprob.attributes, nomatch = NA) + idx <- idx[!is.na(idx)] + + tb <- attr(object, "MCFSolutions")@subproblems + res <- tb[, idx] + return(res) } } diff --git a/man/get_subproblem_info.Rd b/man/get_subproblem_info.Rd index e57c09cd..34f5dbda 100644 --- a/man/get_subproblem_info.Rd +++ b/man/get_subproblem_info.Rd @@ -9,7 +9,7 @@ get_subproblem_info(object, type) \arguments{ \item{object}{optmatch object} -\item{type}{either "subproblem" or "resolution"} +\item{type}{either "subproblem" or any of the column names from the subproblems table associated with an MCFSolutions object (e.g. "resolution")} } \description{ Title diff --git a/tests/testthat/test.subproblemutils.R b/tests/testthat/test.subproblemutils.R index bede2b8e..6146c86b 100644 --- a/tests/testthat/test.subproblemutils.R +++ b/tests/testthat/test.subproblemutils.R @@ -19,6 +19,9 @@ test_that("get_subproblem_info basics", { expect_true(identical(length(unique(get_subproblem_info(f2, type = "subproblem"))), 2L)) expect_true(identical(length(get_subproblem_info(f2, type = "resolution")), 2L)) + dt <- get_subproblem_info(f2, type = c("resolution", "group")) + expect_true(all(dim(dt) == c(2,2))) + }) test_that("fullmatch: basic error checks", { @@ -102,19 +105,19 @@ test_that("fullmatch: with hint", { f1c <- fullmatch(mos, min.c=.5, max.c=2, data = data, tol=0.0001, hint=f1b) res.custom <- list("b" = 0.0001666667) f1c <- fullmatch(mos, min.c=.5, max.c=2, data = data, hint=f1b, resolution = res.custom) - resos <- get_subproblem_info(f1c, "resolution") - expect_true(resos["b"] == 0.0001666667) + resos <- get_subproblem_info(f1c, c("resolution", "groups")) + expect_true(resos[resos[, "groups"] == "b", "resolution"] == 0.0001666667) res.custom <- list("b" = 0.01) f1c <- fullmatch(mos, min.c=.5, max.c=2, data = data, hint=f1b, resolution = res.custom) - resos <- get_subproblem_info(f1c, "resolution") - expect_true(resos["b"] == 0.01) + resos <- get_subproblem_info(f1c, c("resolution", "groups")) + expect_true(resos[resos[, "groups"] == "b", "resolution"] == .01) res.custom <- list("b" = 0.01, "a" = .001) f1c <- fullmatch(mos, min.c=.5, max.c=2, data = data, hint=f1b, resolution = res.custom) - resos <- get_subproblem_info(f1c, "resolution") - expect_true(resos["b"] == 0.01) - expect_true(resos["a"] == 0.001) + resos <- get_subproblem_info(f1c, c("resolution", "groups")) + expect_true(resos[resos[, "groups"] == "b", "resolution"] == .01) + expect_true(resos[resos[, "groups"] == "a", "resolution"] == .001) }) test_that("fullmatch: subproblem specification", { @@ -124,8 +127,9 @@ test_that("fullmatch: subproblem specification", { f2 <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = res.custom) resos.old <- get_subproblem_info(f1, "resolution") resos <- get_subproblem_info(f2, "resolution") - expect_true(resos["0"] == 0.001) - expect_true(resos["1"] == 0.01) + # relying on ordering here, but testing to make sure dimensions are reduced correctly + expect_true(resos[1] == 0.001) + expect_true(resos[2] == 0.01) expect_true(all(resos.old != resos)) }) @@ -154,14 +158,14 @@ test_that("pairmatch: with hint", { res.custom <- list("b" = 0.001, "a" = 0.01) #out of order should be ok pm1 <- pairmatch(mos, data = data, hint=p1b, resolution = res.custom) - resos <- get_subproblem_info(pm1, "resolution") - expect_true(resos["b"] == 0.001) - expect_true(resos["a"] == 0.01) + resos <- get_subproblem_info(pm1, c("resolution", "group")) + expect_true(resos[resos[, "groups"] == "b", "resolution"] == .001) + expect_true(resos[resos[, "groups"] == "a", "resolution"] == .01) res.custom <- list("b" = 0.001) #should be able to specify subset here pm1 <- pairmatch(mos, data = data, hint=p1b, resolution = res.custom) - resos <- get_subproblem_info(pm1, "resolution") - expect_true(resos["b"] == 0.001) + resos <- get_subproblem_info(pm1, c("resolution", "group")) + expect_true(resos[resos[, "groups"] == "b", "resolution"] == .001) }) @@ -175,8 +179,8 @@ test_that("pairmatch: subproblem specification", { pm1 <- pairmatch(match_on(psm) + em, data=nuclearplants, resolution = res.custom) resos.old <- get_subproblem_info(res.pm, "resolution") resos <- get_subproblem_info(pm1, "resolution") - expect_true(resos["0"] == 0.001) - expect_true(resos["1"] == 0.01) + expect_true(resos[1] == 0.001) + expect_true(resos[2] == 0.01) expect_true(all(resos.old != resos)) }) From a6874cf45912083dfe083c7dde4d23ad4c0fb8ee Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Mon, 5 Aug 2024 14:57:46 -0400 Subject: [PATCH 14/31] fixing documentation --- R/subproblemutils.R | 8 ++++++-- man/get_subproblem_info.Rd | 13 +++++++++++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/R/subproblemutils.R b/R/subproblemutils.R index 2dc43c1d..33c7f649 100644 --- a/R/subproblemutils.R +++ b/R/subproblemutils.R @@ -294,15 +294,19 @@ find_subproblem_epsilons <- function(tol = NULL, } -#' Title +#' Get information about subproblems #' #' @param object optmatch object #' @param type either "subproblem" or any of the column names from the subproblems table associated with an MCFSolutions object (e.g. "resolution") #' -#' @return +#' @return if \code{type = "subproblem"}, a named vector mapping units to subproblems is returned. If one or more columns from the subproblems table is specified, that column subset is returned. This might result in either a vector or data frame being returned. #' @export #' #' @examples +#' data(nuclearplants) +#' f1 <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants) +#' get_subproblem_info(object = f1, type = c("resolution", "groups")) +#' get_subproblem_info(object = f1, type = "subproblem") get_subproblem_info <- function(object, type) { if (!inherits(object, "optmatch")){ diff --git a/man/get_subproblem_info.Rd b/man/get_subproblem_info.Rd index 34f5dbda..94da693a 100644 --- a/man/get_subproblem_info.Rd +++ b/man/get_subproblem_info.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/subproblemutils.R \name{get_subproblem_info} \alias{get_subproblem_info} -\title{Title} +\title{Get information about subproblems} \usage{ get_subproblem_info(object, type) } @@ -11,6 +11,15 @@ get_subproblem_info(object, type) \item{type}{either "subproblem" or any of the column names from the subproblems table associated with an MCFSolutions object (e.g. "resolution")} } +\value{ +if \code{type = "subproblem"}, a named vector mapping units to subproblems is returned. If one or more columns from the subproblems table is specified, that column subset is returned. This might result in either a vector or data frame being returned. +} \description{ -Title +Get information about subproblems +} +\examples{ +data(nuclearplants) +f1 <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants) +get_subproblem_info(object = f1, type = c("resolution", "groups")) +get_subproblem_info(object = f1, type = "subproblem") } From 74dd5760b5f52b17d6a77b5f9c0f355d6c3c8c0f Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Wed, 7 Aug 2024 14:07:02 -0400 Subject: [PATCH 15/31] fixing infeasibility tracking logic --- R/solve_reg_fm_prob.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/solve_reg_fm_prob.R b/R/solve_reg_fm_prob.R index a48ce1be..a69adc2c 100644 --- a/R/solve_reg_fm_prob.R +++ b/R/solve_reg_fm_prob.R @@ -127,7 +127,7 @@ solve_reg_fm_prob <- function(node_info, if (!is.null(temp[["MCFSolution"]])) { temp[["MCFSolution"]]@subproblems[1L, "exceedance"] <- temp$maxerr - temp[["MCFSolution"]]@subproblems[1L, "feasible"] <- any(temp$solutions==1L) + temp[["MCFSolution"]]@subproblems[1L, "feasible"] <- any(temp$solution==1L) ## Presently we can treat this subproblem as non-flipped even if it was, ## since `dm` will have been transposed in the event of flipping. Doing From 0a4a9aed3fb81a0fe05b3047277740af1f1c1162 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Wed, 7 Aug 2024 14:44:23 -0400 Subject: [PATCH 16/31] replacing lagrangian with primal --- R/MCFSolutions.R | 6 +-- R/solve_reg_fm_prob.R | 4 +- tests/testthat/test.MCFSolutions.R | 46 ++++++++++---------- tests/testthat/test.complementarySlackness.R | 4 +- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/R/MCFSolutions.R b/R/MCFSolutions.R index 77931118..e26cfbbd 100644 --- a/R/MCFSolutions.R +++ b/R/MCFSolutions.R @@ -7,16 +7,16 @@ setOldClass("data.frame") setClass("SubProbInfo", contains="data.frame", prototype= prototype(data.frame(groups=character(1), flipped=NA, hashed_dist=NA_character_, - resolution=1, lagrangian_value=NA_real_, + resolution=1, primal_value=NA_real_, dual_value=NA_real_, feasible=NA, exceedance=NA_real_, stringsAsFactors=FALSE) ) ) setValidity("SubProbInfo", function(object){ errors <- character(0) if (!all(colnames(object)[1:8]== - c("groups","flipped", "hashed_dist","resolution","lagrangian_value","dual_value", "feasible", "exceedance"))) + c("groups","flipped", "hashed_dist","resolution","primal_value","dual_value", "feasible", "exceedance"))) errors <- c(errors, - 'Cols 1-8 should be:\n\t c("groups","flipped", "hashed_dist","resolution","lagrangian_value","dual_value", "feasible", "exceedance")') + 'Cols 1-8 should be:\n\t c("groups","flipped", "hashed_dist","resolution","primal_value","dual_value", "feasible", "exceedance")') if (!all(vapply(object[c(1,3)], is.character, logical(1)))) errors <- c(errors, 'Cols 1,3 should have type character.') diff --git a/R/solve_reg_fm_prob.R b/R/solve_reg_fm_prob.R index a69adc2c..9feed56f 100644 --- a/R/solve_reg_fm_prob.R +++ b/R/solve_reg_fm_prob.R @@ -137,8 +137,8 @@ solve_reg_fm_prob <- function(node_info, ## where that transposition was made. temp[["MCFSolution"]]@subproblems[1L, "flipped"] <- FALSE ## ... and now we can proceed with: - evaluate_lagrangian(dm, temp[["MCFSolution"]]) -> - temp[["MCFSolution"]]@subproblems[1L, "lagrangian_value"] + evaluate_primal(dm, temp[["MCFSolution"]]) -> + temp[["MCFSolution"]]@subproblems[1L, "primal_value"] evaluate_dual(dm, temp[["MCFSolution"]]) -> temp[["MCFSolution"]]@subproblems[1L, "dual_value" ] nodeinfo(temp[["MCFSolution"]]) <- diff --git a/tests/testthat/test.MCFSolutions.R b/tests/testthat/test.MCFSolutions.R index 8b44c0e7..3b519a10 100644 --- a/tests/testthat/test.MCFSolutions.R +++ b/tests/testthat/test.MCFSolutions.R @@ -3,7 +3,7 @@ context("MCFSolutions & co: S4 classes to encode min-cost-flow solutions") test_that("Instantiation & validity", { expect_true(validObject(new("SubProbInfo"))) # test the prototype expect_silent(spi1 <- new("SubProbInfo", data.frame(groups=c('a','b'), flipped=logical(2), hashed_dist=c('a','b'), - resolution=c(1,10), lagrangian_value=c(.5, 2), dual_value=c(0,1), + resolution=c(1,10), primal_value=c(.5, 2), dual_value=c(0,1), feasible=c(TRUE, FALSE), exceedance=1, stringsAsFactors=F) ) ) @@ -13,7 +13,7 @@ test_that("Instantiation & validity", { colnames(spi2)[1] <- "Subprob" expect_error(validObject(spi2), "Cols 1-8 should be") - expect_true(validObject(new("NodeInfo"))) # test the prototype + expect_true(validObject(new("NodeInfo"))) # test the prototype expect_silent(ni1 <- new("NodeInfo", data.frame(name='a', price=0.5, upstream_not_down=TRUE, supply=1L, groups = as.factor('b'), @@ -48,7 +48,7 @@ test_that("Instantiation & validity", { supply=1L, groups = as.factor('b'), stringsAsFactors=F) ), - "unique" + "unique" ) expect_true(validObject(new("ArcInfo"))) # test the prototype expect_silent(ai <- new("ArcInfo", @@ -72,7 +72,7 @@ test_that("Instantiation & validity", { bookkeeping=data.frame(groups = as.factor('a'), start = as.factor(c('c','d')), end = as.factor('(_Sink_)'), flow=1.5, capacity=1L, stringsAsFactors=F) - ), "should have type integer" # Not sure it's necessary, but insisting + ), "should have type integer" # Not sure it's necessary, but insisting ) # that 'flow' be integer not double expect_error(new("ArcInfo", matches=data.frame(groups = as.factor('a'), upstream = as.factor('b'), @@ -80,8 +80,8 @@ test_that("Instantiation & validity", { bookkeeping=data.frame(groups = as.factor('a'), start = as.factor(c('c','d')), end = as.factor('(_Sink_)'), flow=-1L, capacity=1L, stringsAsFactors=F) - ), "should be nonnegative" - ) + ), "should be nonnegative" + ) expect_error(new("ArcInfo", matches=data.frame(groups = as.factor('a'), upstream = as.factor('b'), @@ -91,9 +91,9 @@ test_that("Instantiation & validity", { start=as.factor(c('c','d')), end=as.factor('(_Sink_)'), flow=2L, capacity=1L, stringsAsFactors=F) - ), "flow can be no greater than capacity" - ) - + ), "flow can be no greater than capacity" + ) + expect_silent(mcf1f <- new("MCFSolutions", subproblems=spi1,nodes=ni1f,arcs=ai)) expect_silent(as(mcf1f, "FullmatchMCFSolutions")) expect_equal(node.labels(mcf1f), node.labels(ni1f)) @@ -109,24 +109,24 @@ expect_setequal(names(getSlots("MCFSolutions")), test_that("c() methods", { spi1 <- new("SubProbInfo", data.frame(groups=c('a','b'), flipped=logical(2), hashed_dist=c('a','b'), - resolution=c(1,10), lagrangian_value=c(.5, 2), dual_value=c(0,1), + resolution=c(1,10), primal_value=c(.5, 2), dual_value=c(0,1), feasible=c(TRUE, FALSE), exceedance=1, stringsAsFactors=F) ) spi2 <- new("SubProbInfo", data.frame(groups=c('c'), flipped=logical(1), hashed_dist=c('a'), - resolution=c(1), lagrangian_value=c(.5), dual_value=c(0), + resolution=c(1), primal_value=c(.5), dual_value=c(0), feasible=c(TRUE), exceedance=1, stringsAsFactors=F) ) spi3 <- new("SubProbInfo", data.frame(groups=c('d'), flipped=logical(1), hashed_dist=c('a'), - resolution=c(1), lagrangian_value=c(.5), dual_value=c(0), + resolution=c(1), primal_value=c(.5), dual_value=c(0), feasible=c(TRUE), exceedance=1, stringsAsFactors=F) ) - + expect_true(validObject(c(spi1, spi2))) expect_true(validObject(c(spi1, spi2, spi3))) expect_true(validObject(c(a=spi1, b=spi2))) # no confusion just b/c no `x=` arg! - + ni1f <- new("NodeInfo", data.frame(name=c('b', 'c', 'd', '(_Sink_)', '(_End_)'), @@ -141,7 +141,7 @@ test_that("c() methods", { node.labels(ni1f) <- ni1f[['name']] ni1f.a <- ni1f.b <- ni1f.c <- ni1f ni1f.a[,'groups'] <- factor(rep('a', nrow(ni1f))) - ni1f.c[,'groups'] <- factor(rep('c', nrow(ni1f))) + ni1f.c[,'groups'] <- factor(rep('c', nrow(ni1f))) expect_true(validObject(c(ni1f.a, ni1f.b))) expect_true(validObject(c(ni1f.a, ni1f.b, ni1f.c))) ni2 <- new("NodeInfo", @@ -172,7 +172,7 @@ test_that("c() methods", { expect_true(validObject(ai1)) expect_true(validObject(c(ai1, ai1))) expect_true(validObject(c(x=ai1, y=ai1, z=ai1))) - some_levs <- c(letters[2:5], '(_Sink_)', '(_End_)') + some_levs <- c(letters[2:5], '(_Sink_)', '(_End_)') ai2 <- new("ArcInfo", matches=data.frame(groups = factor('c'), upstream = factor('b', levels=some_levs), @@ -199,7 +199,7 @@ test_that("c() methods", { mcf2 <- new("MCFSolutions", subproblems=spi2,nodes=ni2,arcs=ai2) expect_true(validObject(mcf1)) expect_true(validObject(mcf2)) - + expect_error(c(mcf1, mcf1), "uplicates") expect_true(validObject(c(mcf1, mcf2))) expect_true(validObject(c(y=mcf1, z=mcf2))) @@ -241,10 +241,10 @@ test_that("NodeInfo to tibble converter", { node.labels(ni1f) <- ni1f[['name']] expect_silent(ni_tbl <- as(ni1f, "tbl_df")) expect_is(ni_tbl$nodelabels, "factor") - expect_equivalent(as.character(ni_tbl$nodelabels), + expect_equivalent(as.character(ni_tbl$nodelabels), c('b', 'c', 'd', '(_Sink_)', '(_End_)') ) - expect_equivalent(levels(ni_tbl$nodelabels), + expect_equivalent(levels(ni_tbl$nodelabels), c('b', 'c', 'd', '(_Sink_)', '(_End_)') ) # default encoding would start w/ "(_End_)", "(_Sink_)" }) @@ -262,9 +262,9 @@ test_that("Preserve levels when filtering a node info tibble",{ ni_tbl <- as(ni1f, "tbl_df") expect_silent(ni_tbl_s <- dplyr::filter(ni_tbl, name %in% letters)) expect_is(ni_tbl_s$nodelabels, "factor") - expect_equivalent(levels(ni_tbl_s$nodelabels), + expect_equivalent(levels(ni_tbl_s$nodelabels), c('b', 'c', 'd', '(_Sink_)', '(_End_)') - ) + ) }) test_that("Node labels getter",{ expect_silent(mcf <- new("MCFSolutions")) #prelim- @@ -287,7 +287,7 @@ test_that("filtering on groups/subproblem field", { spi1 <- new("SubProbInfo", data.frame(groups=c('a','b'), flipped=logical(2), hashed_dist=c('a','b'), - resolution=c(1,10), lagrangian_value=c(.5, 2), dual_value=c(0,1), + resolution=c(1,10), primal_value=c(.5, 2), dual_value=c(0,1), feasible=c(TRUE, FALSE), exceedance=1, stringsAsFactors=F) ) expect_error(filter_by_subproblem(spi1, groups="a"), "implemented") @@ -364,7 +364,7 @@ test_that("NodeInfo subsetting", { expect_equal(nrow(ni0_o), 6) expect_silent(ni0_n <- filter(ni0_o, name!=2 & name!=4)) expect_is(ni0_n, "NodeInfo") - expect_equal(nrow(ni0_n), 4) + expect_equal(nrow(ni0_n), 4) }) test_that("Pull updated prices & supplies into a NodeInfo",{ expect_silent(ni0_o <- nodes_shell_fmatch(c(1,2), c(3,4))) diff --git a/tests/testthat/test.complementarySlackness.R b/tests/testthat/test.complementarySlackness.R index 8d53acc4..87d4962b 100644 --- a/tests/testthat/test.complementarySlackness.R +++ b/tests/testthat/test.complementarySlackness.R @@ -35,7 +35,7 @@ make_known_optimal <- function(flipped=FALSE) { )) subprob <- new("SubProbInfo", data.frame(groups="a", flipped=flipped, hashed_dist=character(1), - resolution=NA_real_, lagrangian_value=NA_real_, dual_value=NA_real_, + resolution=NA_real_, primal_value=NA_real_, dual_value=NA_real_, feasible=NA, exceedance=NA_real_, stringsAsFactors=FALSE) ) mcf_solution <- new("MCFSolutions", subproblems=subprob, nodes=nodes, arcs=arcs) @@ -95,7 +95,7 @@ make_known_optimal_fullm <- function(flipped=FALSE) ) subprob <- new("SubProbInfo", data.frame(groups='b', flipped=flipped, hashed_dist=character(1), - resolution=NA_real_, lagrangian_value=NA_real_, dual_value=NA_real_, + resolution=NA_real_, primal_value=NA_real_, dual_value=NA_real_, feasible=NA, exceedance=NA_real_, stringsAsFactors=FALSE) ) mcf_solution <- new("MCFSolutions", subproblems=subprob, nodes=nodes, arcs=arcs) From 46e35703a2ae83c4addd8126d89987f050bea032 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Thu, 8 Aug 2024 11:15:58 -0700 Subject: [PATCH 17/31] adjusting primal/dual values to make more substantive sense markmfredrickson/optmatch/#164 --- tests/testthat/test.MCFSolutions.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test.MCFSolutions.R b/tests/testthat/test.MCFSolutions.R index 3b519a10..806ec642 100644 --- a/tests/testthat/test.MCFSolutions.R +++ b/tests/testthat/test.MCFSolutions.R @@ -3,7 +3,7 @@ context("MCFSolutions & co: S4 classes to encode min-cost-flow solutions") test_that("Instantiation & validity", { expect_true(validObject(new("SubProbInfo"))) # test the prototype expect_silent(spi1 <- new("SubProbInfo", data.frame(groups=c('a','b'), flipped=logical(2), hashed_dist=c('a','b'), - resolution=c(1,10), primal_value=c(.5, 2), dual_value=c(0,1), + resolution=c(1,10), primal_value=c(2,0), dual_value=c(1,1), feasible=c(TRUE, FALSE), exceedance=1, stringsAsFactors=F) ) ) @@ -109,7 +109,7 @@ expect_setequal(names(getSlots("MCFSolutions")), test_that("c() methods", { spi1 <- new("SubProbInfo", data.frame(groups=c('a','b'), flipped=logical(2), hashed_dist=c('a','b'), - resolution=c(1,10), primal_value=c(.5, 2), dual_value=c(0,1), + resolution=c(1,10), primal_value=c(2,0), dual_value=c(1,1), feasible=c(TRUE, FALSE), exceedance=1, stringsAsFactors=F) ) spi2 <- new("SubProbInfo", @@ -287,7 +287,7 @@ test_that("filtering on groups/subproblem field", { spi1 <- new("SubProbInfo", data.frame(groups=c('a','b'), flipped=logical(2), hashed_dist=c('a','b'), - resolution=c(1,10), primal_value=c(.5, 2), dual_value=c(0,1), + resolution=c(1,10), primal_value=c(2,0), dual_value=c(1,1), feasible=c(TRUE, FALSE), exceedance=1, stringsAsFactors=F) ) expect_error(filter_by_subproblem(spi1, groups="a"), "implemented") From 5e7dadadee26682e09460f5fb1a4cdb89384be0f Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Thu, 8 Aug 2024 11:16:34 -0700 Subject: [PATCH 18/31] switching lagrangian to primal in vignette --- vignettes/MCFSolutions.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/MCFSolutions.Rmd b/vignettes/MCFSolutions.Rmd index f910831e..8589d2e2 100644 --- a/vignettes/MCFSolutions.Rmd +++ b/vignettes/MCFSolutions.Rmd @@ -48,7 +48,7 @@ An `SubProbInfo` is an S4-typed data frame bearing information about (sub)proble - `flipped`, logical (do upstream nodes of MCF representation correspond to _columns_ of match distance matrix, as opposed to rows, the default?); - `hashed_dist`, character (hashed ID for a double precision distance); - `resolution`, double (grid-width for discretization of distances & node prices before rounding and handing off to solver); -- `lagrangian_value`, numeric (as determined by back-transformed node prices, arc costs and arc flows); +- `primal_value`, numeric (as determined by back-transformed node prices, arc costs and arc flows); - `dual_value`, numeric (as determined by back-transformed node prices and arc costs); - `feasible`, logical; - `exceedance`, double (the legacy criterion regret calculation). From aebbde28caf133e54319c3c0be9b125fdc3b23a4 Mon Sep 17 00:00:00 2001 From: "Ben B. Hansen" Date: Fri, 9 Aug 2024 15:38:34 -0400 Subject: [PATCH 19/31] Small updates from devtools::document() ...in case this helps w/ 'ubuntu-latest (devel)' CI compile failure (on fork but not in main repo). If that issue remains, this commit can be `git revert`-ed. --- DESCRIPTION | 2 +- man/dimnames-InfinitySparseMatrix.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d5cd65d2..67a531b7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -92,6 +92,6 @@ Collate: 'zzz.R' 'zzzDistanceSpecification.R' VignetteBuilder: knitr -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 Encoding: UTF-8 Remotes: josherrickson/rrelaxiv diff --git a/man/dimnames-InfinitySparseMatrix.Rd b/man/dimnames-InfinitySparseMatrix.Rd index 7d3fed8b..4fccc8c0 100644 --- a/man/dimnames-InfinitySparseMatrix.Rd +++ b/man/dimnames-InfinitySparseMatrix.Rd @@ -10,7 +10,7 @@ \S4method{dimnames}{InfinitySparseMatrix,list}(x) <- value -\S4method{dimnames}{InfinitySparseMatrix,`NULL`}(x) <- value +\S4method{dimnames}{InfinitySparseMatrix,NULL}(x) <- value } \arguments{ \item{x}{An InfinitySparseMatrix object.} From 781a480e7f8efb92c1d829043eb226e22b9f961e Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Wed, 21 Aug 2024 00:41:09 -0400 Subject: [PATCH 20/31] simplifying feasibility update status, reducing reliance on NULL MCFSolution --- NAMESPACE | 1 + R/fmatch.R | 14 +++++++------- R/fullmatch.R | 20 ++++++++++++++------ R/solve_reg_fm_prob.R | 17 +++++++++++------ R/subproblemutils.R | 31 ++++++++++++++++++++++++++++--- 5 files changed, 61 insertions(+), 22 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index c10d2765..80b8c515 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -106,6 +106,7 @@ export(optmatch_restrictions) export(optmatch_same_distance) export(pair) export(pairmatch) +export(parse_hint_metadata) export(pscore.dist) export(scores) export(setMaxProblemSize) diff --git a/R/fmatch.R b/R/fmatch.R index d348126c..72de21c9 100644 --- a/R/fmatch.R +++ b/R/fmatch.R @@ -48,7 +48,7 @@ fmatch <- function(distance, mxc <- as.integer(round(max.col.units)) mnc <- as.integer(round(min.col.units)) mxr <- as.integer(round(max.row.units)) - + feas.status <- TRUE if (mnc > 1) { mxr <- 1L } @@ -143,11 +143,14 @@ fmatch <- function(distance, out <- as.data.frame(distance, row.names = NULL) out$solution <- rep(-1L, narcs) + feas.status <- FALSE #integrate this into the MCFSolution object if possible + mcfs.none <- new("MCFSolutions") return(c( out, list(maxerr = 0), - list(MCFSolution = NULL) + list(MCFSolution = mcfs.none) )) + } ## Min-Cost-Flow representation of problem #### @@ -232,7 +235,6 @@ fmatch <- function(distance, algorithm = algorithm) x <- as.numeric(lout[[1]]) nodes[, "price"] <- lout[[2]] - nodes[, "price"] <- (nodes[, "price"] - nodes[, "price"][endID]) * -1 if (lout[[4]] != "OPTIMAL" || all(x == -1)) { @@ -254,9 +256,7 @@ fmatch <- function(distance, ## Material used to create s3 optmatch object: - feas <- fop$feasible1 - x <- feas * fop$x1 - + x <- fop$feasible1 * fop$x1 #### Recover node prices, store in nodes table ## ## In full matching, each upstream (row) or downstream (column) node starts ## an arc ending at End, and these are also the only arcs ending there. End @@ -317,7 +317,7 @@ fmatch <- function(distance, arcs <- new("ArcInfo", matches=matches, bookkeeping=bookkeeping) sp <- new("SubProbInfo") - sp[1L, "feasible"] <- TRUE + sp[1L, "feasible"] <- any(x == 1L) fmcfs <- new("FullmatchMCFSolutions", subproblems=sp, nodes=nodes, arcs=arcs) return(c(obj, diff --git a/R/fullmatch.R b/R/fullmatch.R index ed5ceb0c..ba92ece1 100644 --- a/R/fullmatch.R +++ b/R/fullmatch.R @@ -308,7 +308,12 @@ fullmatch.matrix <- function(x, hint, resolution = NULL, ...) { - + # if (!missing(hint)) + # { + # hint_metadata <- parse_hint_metadata(hint) + # } else { + # hint_metadata <- NULL + # } hint <- if (missing(hint)) NULL else nodeinfo(hint) ### Checking Input ### @@ -512,7 +517,8 @@ fullmatch.matrix <- function(x, cells.b <- rep(NA, x[2]) names(cells.a) <- rownames(d) names(cells.b) <- colnames(d) - tmp <- list(cells = c(cells.a, cells.b), err = -1) + tmp <- list(cells = c(cells.a, cells.b), err = -1, + MCFSolution = new("MCFSolutions")) return(tmp) } @@ -534,8 +540,10 @@ fullmatch.matrix <- function(x, solver = solver, omit.fraction = if(!is.na(omf)) { omf.calc }, # passes NULL for NA epsilon = epsilon.in) - if (!is.null(temp$MCFSolution)) + if (!identical(temp[["MCFSolution"]], + new("MCFSolutions"))) { temp$MCFSolution@subproblems[1L,"flipped"] <- flipped + } return(temp) } @@ -614,7 +622,6 @@ fullmatch.matrix <- function(x, warning("The flag fullmatch_try_recovery is unset, setting to TRUE") setTryRecovery() } - precomputed_parameters <- parse_subproblems(problems = problems, min.controls = min.controls, max.controls = max.controls, @@ -693,7 +700,8 @@ fullmatch.matrix <- function(x, mcfsolutions <- rep(list(NULL), np) names(mcfsolutions) <- subproblemids for (ii in 1L:np) { - if (!is.null(solutions[[ii]]$MCFSolution)) + if (!identical(solutions[[ii]][["MCFSolution"]], + new("MCFSolutions"))) { mcfsolutions[[ii]] <- solutions[[ii]]$MCFSolution mcfsolutions[[ii]]@subproblems[1L,"hashed_dist"] <- disthash @@ -714,7 +722,7 @@ fullmatch.matrix <- function(x, } mcfsolutions <- mcfsolutions[!vapply(mcfsolutions, is.null, logical(1))] - attr(mout, "MCFSolutions") <- if (length(mcfsolutions)==0) { + attr(mout, "MCFSolutions") <- if (length(mcfsolutions)==0) { #flagging this as a line may change if we always want to return an MCFSolutions object in the future NULL } else { names(mcfsolutions)[1] <- "x" diff --git a/R/solve_reg_fm_prob.R b/R/solve_reg_fm_prob.R index 46ecc79f..7585e440 100644 --- a/R/solve_reg_fm_prob.R +++ b/R/solve_reg_fm_prob.R @@ -93,7 +93,8 @@ solve_reg_fm_prob <- function(node_info, { ans <- rep(NA_integer_, nrows + ncols) names(ans) <- c(rownames, colnames) - return(list(cells=ans, err=0, MCFSolution=NULL)) + return(list(cells=ans, err=0, MCFSolution=new("MCFSolutions"))) + #return(list(cells=ans, err=0, MCFSolution=NULL)) } @@ -124,10 +125,12 @@ solve_reg_fm_prob <- function(node_info, names(ans) <- c(rownames, colnames) ans[names(matches)] <- matches - if (!is.null(temp[["MCFSolution"]])) - { + #if (!is.null(temp[["MCFSolution"]])) + if (!identical(temp[["MCFSolution"]], + new("MCFSolutions"))) + { temp[["MCFSolution"]]@subproblems[1L, "exceedance"] <- temp$maxerr - temp[["MCFSolution"]]@subproblems[1L, "feasible"] <- any(temp$solution==1L) + #temp[["MCFSolution"]]@subproblems[1L, "feasible"] <- any(temp$solution==1L) ## Presently we can treat this subproblem as non-flipped even if it was, ## since `dm` will have been transposed in the event of flipping. Doing @@ -143,7 +146,7 @@ solve_reg_fm_prob <- function(node_info, temp[["MCFSolution"]]@subproblems[1L, "dual_value" ] nodeinfo(temp[["MCFSolution"]]) <- update(node_info, nodeinfo(temp[["MCFSolution"]])) - } + } return(list(cells = ans, err = temp$maxerr, MCFSolution=temp[["MCFSolution"]] @@ -168,7 +171,9 @@ doubleSolve <- function(dm, min.cpt, max.cpt, f.ctls, node_info, intsol <- intSolve(dm=dm, min.cpt=min.cpt, max.cpt=max.cpt, f.ctls=f.ctls, node_info = node_info, solver = solver) - if (!is.null(intsol$MCFSolution)) + # if (!is.null(intsol$MCFSolution)) + if (!identical(intsol[["MCFSolution"]], + new("MCFSolutions"))) { intsol$MCFSolution@subproblems[1L,"resolution"] <- epsilon diff --git a/R/subproblemutils.R b/R/subproblemutils.R index 33c7f649..be048565 100644 --- a/R/subproblemutils.R +++ b/R/subproblemutils.R @@ -112,8 +112,7 @@ get_epsilon <- function(subproblem, flipped, cfeas <- length(unique(sub.dmat@rows)) } - #dealing with infinite values? - max_dist <- max(sub.dmat@.Data) + max_dist <- suppressWarnings(max(sub.dmat@.Data)) eps.val <- calculate_epsilon(rfeas, cfeas, @@ -130,7 +129,6 @@ calculate_epsilon <- function(rfeas, { old.o <- options(warn=-1) - #epsilon_lower_lim <- max(dm$'dist')/(.Machine$integer.max/64 -2) epsilon_lower_lim <- maxdist / (.Machine$integer.max/64 -2) epsilon <- if (tolerance>0 & rfeas>1 & cfeas>1) { max(epsilon_lower_lim, tolerance/(rfeas + cfeas - 2)) @@ -326,3 +324,30 @@ get_subproblem_info <- function(object, type) return(res) } } + +#' Parse hints for metadata +#' +#' @param full.hint +#' +#' @return +#' @export +#' +#' @examples +parse_hint_metadata <- function(full.hint) +{ + # assuming that the hint is not null + # also assuming existence of MCFSolutions object + + groupids <- get_subproblem_info(full.hint, type = "groups") + feasibility <- get_subproblem_info(full.hint, type = "feasible") + names(feasibility) <- groupids + resolutions <- get_subproblem_info(full.hint, type = "resolution") + names(resolutions) <- groupids + regret_gap <- get_subproblem_info(full.hint, type = "primal_value") - get_subproblem_info(full.hint, type = "dual_value") + names(regret_gap) <- groupids + + rl <- list(feasible = feasibility, + resolution = resolutions, + regret = regret_gap) + return(rl) +} From 446cc2b97ed7346ef73e9cba5ff38965d2067c3b Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Wed, 21 Aug 2024 00:52:16 -0400 Subject: [PATCH 21/31] removing feas.status tag --- R/fmatch.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/fmatch.R b/R/fmatch.R index 72de21c9..bfa75686 100644 --- a/R/fmatch.R +++ b/R/fmatch.R @@ -48,7 +48,6 @@ fmatch <- function(distance, mxc <- as.integer(round(max.col.units)) mnc <- as.integer(round(min.col.units)) mxr <- as.integer(round(max.row.units)) - feas.status <- TRUE if (mnc > 1) { mxr <- 1L } @@ -143,7 +142,6 @@ fmatch <- function(distance, out <- as.data.frame(distance, row.names = NULL) out$solution <- rep(-1L, narcs) - feas.status <- FALSE #integrate this into the MCFSolution object if possible mcfs.none <- new("MCFSolutions") return(c( out, From be47752e02fb344c5de1dd0ffe0add20083d0525 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Thu, 22 Aug 2024 16:27:14 -0400 Subject: [PATCH 22/31] revisions to always ensure that MCFSolutions object is returned --- R/fmatch.R | 133 ++++++++++++++++++++++++++++++++++++++---- R/fullmatch.R | 47 +++++++++++---- R/solve_reg_fm_prob.R | 36 +++++++----- 3 files changed, 180 insertions(+), 36 deletions(-) diff --git a/R/fmatch.R b/R/fmatch.R index bfa75686..366b4489 100644 --- a/R/fmatch.R +++ b/R/fmatch.R @@ -33,7 +33,7 @@ fmatch <- function(distance, min.col.units = 1, f = 1, node_info, - solver) { + solver, try_solver = TRUE) { # checks solver and evaluates LEMON() if neccessary solver <- handleSolver(solver) @@ -52,12 +52,13 @@ fmatch <- function(distance, mxr <- 1L } + # Check that matching problem is well-specified - if (mxc < mnc) { + if ( (mxc < mnc) && try_solver ){ stop("min.col.units may not exceed max.col.units") } - if (any(c(mxc, mnc, mxr) < 1)) { + if ((any(c(mxc, mnc, mxr) < 1)) && try_solver) { stop("max and min constraints must be 1 or greater") } @@ -88,13 +89,13 @@ fmatch <- function(distance, distance[['dist']][nadists] <- replacement } - if (mxr > 1) # i.e. many-one matches permissible + if (mxr > 1 && try_solver) # i.e. many-one matches permissible { if (any(distance[['dist']] <= 0)) stop("Nonpositive discrepancies not compatible with full matching\n (see Hansen & Klopfer, 2006 JCGS, sec.4.1).") } else if (any(distance[['dist']] < 0)) stop("distance should be nonnegative") - if (!is.numeric(f) | f > 1 | f < 0) { + if ((!is.numeric(f) | f > 1 | f < 0) && try_solver) { stop("f must be a number in [0,1]") } @@ -136,17 +137,131 @@ fmatch <- function(distance, stop('Cannot choose "(_End_)" as unit name') ## Bypass solver if problem is recognizably infeasible - if ( (mxr >1 & nt/mxr > n.mc) | #max.row.units too low + if ( !try_solver | + (mxr >1 & nt/mxr > n.mc) | #max.row.units too low (mxr==1L & nt * mnc > n.mc) | #min.col.units too high (nt * mxc < n.mc)) { #max.col.units too low out <- as.data.frame(distance, row.names = NULL) out$solution <- rep(-1L, narcs) - mcfs.none <- new("MCFSolutions") + + #### + nodes <- new("NodeInfo", + data.frame(name=c(row.units, col.units, + "(_End_)", "(_Sink_)"),# note that `factor()` would put these at start not end of levels + price=0L, + upstream_not_down=c(rep(TRUE, nt), rep(FALSE, nc), NA, NA), + supply=c(rep(mxc, nt), rep(0L, nc), + -(mxc * nt - n.mc), -n.mc), + groups=factor(rep(NA_character_, nt+nc+2L)), + stringsAsFactors=FALSE + ) + ) + + endID <- nt + nc + 1L + sinkID <- endID + 1L + node.labels(nodes) <- nodes[['name']] # new convention per i166 + + if ( !all(node_info[['price']]==0) ) + { + nodes[,'price'] <- node_info[match(c(row.units, col.units, + "(_End_)", "(_Sink_)"), + node_info$name), + 'price'] + ## if a col unit doesn't have a match w/in node_info$name, then + ## its nodes row now has a 0 for its price. Replace that w/ min of + ## bookkeeping node prices may preserve CS in some cases. + if (length(col.noprice <- setdiff(col.units, node_info[['name']]))) + nodes[nodes[['name']] %in% col.noprice, 'price'] <- + min(node_info[match(c("(_End_)", "(_Sink_)"), node_info$name), + 'price', drop=TRUE] + ) + } + # ID numbers implicitly point to corresp. row of nodes + #### Arcs involving End or Sink nodes #### + + if (nt == 0 && nc == 0) + { + bookkeeping <- + data.frame(groups=factor(rep(NA_character_, 2L)), + start=c(1,2), # ~ col.units + end=c(1, 2), + flow=0L, + capacity=0L + #capacity=c(rep(mxc - mnc, nt), rep(mxr - 1L, nc), rep(1L, nc)) + ) + + x <- rep(0L, problem.size) + matches <- distance[as.logical(x[1L:narcs]), c("i", "j")] + matches <- data.frame(groups=factor(rep(NA_integer_, nrow(matches))), + upstream=factor(matches[['i']], + levels=node.labels(nodes) + ), + downstream=factor(matches[['j']], + levels=node.labels(nodes) + ) + ) + bookkeeping[['start']] <- factor(bookkeeping[['start']], + levels=c(1L, 2L), + labels=node.labels(nodes) + ) + bookkeeping[['end']] <- factor(bookkeeping[['end']], + levels=c(1L, 2L), + labels=node.labels(nodes) + ) + + + } else { + bookkeeping <- + data.frame(groups=factor(rep(NA_character_, nt +2L*nc)), + start=c(1L:(nt + nc), # ~ c(row.units, col.units) + nt + 1L:nc), # ~ col.units + end=c(rep(endID, nc + nt), + rep(sinkID, nc) ), + flow=0L, + capacity=0L + #capacity=c(rep(mxc - mnc, nt), rep(mxr - 1L, nc), rep(1L, nc)) + ) + + startn<- c(as.integer(distance[['i']]), bookkeeping$start ) + endn <- c(as.integer(distance[['j']]), bookkeeping$end ) + x <- rep(0L, problem.size) + matches <- distance[as.logical(x[1L:narcs]), c("i", "j")] + bookkeeping[1L:(problem.size-narcs), "flow"] <- as.integer(x)[(narcs+1L):problem.size] + matches <- data.frame(groups=factor(rep(NA_integer_, nrow(matches))), + upstream=factor(matches[['i']], + levels=node.labels(nodes) + ), + downstream=factor(matches[['j']], + levels=node.labels(nodes) + ) + ) + bookkeeping[['start']] <- factor(bookkeeping[['start']], + levels=1L:(nt + nc + 2), + labels=node.labels(nodes) + ) + bookkeeping[['end']] <- factor(bookkeeping[['end']], + levels=1L:(nt + nc + 2), + labels=node.labels(nodes) + ) + + + } + + + + arcs <- new("ArcInfo", matches=matches, bookkeeping=bookkeeping) + + sp <- new("SubProbInfo") + sp[1L, "feasible"] <- FALSE + fmcfs <- new("FullmatchMCFSolutions", subproblems=sp, + nodes=nodes, arcs=arcs) + #### + return(c( out, list(maxerr = 0), - list(MCFSolution = mcfs.none) + list(MCFSolution = fmcfs) #so if statements will work temporarily )) } @@ -289,8 +404,6 @@ fmatch <- function(distance, obj <- as.data.frame(distance, row.names = NULL) obj$solution <- x[seq.int(from=min(1L, narcs), to=narcs)] - - ### Recover arc flow info, store in `arcs` ### ## info extracted from problem solution: matches <- distance[as.logical(x[1L:narcs]), c("i", "j")] diff --git a/R/fullmatch.R b/R/fullmatch.R index ba92ece1..821ef32b 100644 --- a/R/fullmatch.R +++ b/R/fullmatch.R @@ -517,8 +517,22 @@ fullmatch.matrix <- function(x, cells.b <- rep(NA, x[2]) names(cells.a) <- rownames(d) names(cells.b) <- colnames(d) + + if (!flipped) { + omf.calc <- omf + } else { + omf.calc <- -1 * omf + } + + stemp <- solve_reg_fm_prob(node_info = hint, + distspec = d, + max.cpt = mxctl, + min.cpt = mnctl, + solver = solver, + omit.fraction = if(!is.na(omf)) { omf.calc }, # passes NULL for NA + epsilon = epsilon.in) tmp <- list(cells = c(cells.a, cells.b), err = -1, - MCFSolution = new("MCFSolutions")) + MCFSolution = stemp[["MCFSolution"]]) return(tmp) } @@ -540,10 +554,12 @@ fullmatch.matrix <- function(x, solver = solver, omit.fraction = if(!is.na(omf)) { omf.calc }, # passes NULL for NA epsilon = epsilon.in) - if (!identical(temp[["MCFSolution"]], - new("MCFSolutions"))) { + # if (!identical(temp[["MCFSolution"]], + # new("MCFSolutions"))) + #if (!temp[["MCFSolution"]]@subproblems[1L, "feasible"]) + #{ temp$MCFSolution@subproblems[1L,"flipped"] <- flipped - } + #} return(temp) } @@ -699,15 +715,20 @@ fullmatch.matrix <- function(x, ## assemble MCF material mcfsolutions <- rep(list(NULL), np) names(mcfsolutions) <- subproblemids - for (ii in 1L:np) { - if (!identical(solutions[[ii]][["MCFSolution"]], - new("MCFSolutions"))) - { + for (ii in 1L:np) + { + #if (!solutions[[ii]][["MCFSolution"]]@subproblems[1L, "feasible"]) + # if (!identical(solutions[[ii]][["MCFSolution"]], + # new("MCFSolutions"))) + #{ mcfsolutions[[ii]] <- solutions[[ii]]$MCFSolution mcfsolutions[[ii]]@subproblems[1L,"hashed_dist"] <- disthash thesubprob <- subproblemids[ii] mcfsolutions[[ii]]@subproblems[1L,"groups"] <- thesubprob - mcfsolutions[[ii]]@nodes[,"groups"] <- factor(thesubprob) + if (nrow(mcfsolutions[[ii]]@nodes ) > 0) + { + mcfsolutions[[ii]]@nodes[,"groups"] <- factor(thesubprob) + } if (nrow(mcfsolutions[[ii]]@arcs@matches) > 0) { mcfsolutions[[ii]]@arcs@matches[,"groups"] <- factor(thesubprob) } @@ -719,10 +740,11 @@ fullmatch.matrix <- function(x, node.labels(mcfsolutions[[ii]]) <- nlabs } } - } - mcfsolutions <- mcfsolutions[!vapply(mcfsolutions, is.null, logical(1))] + mcfsolutions <- mcfsolutions[!vapply(mcfsolutions, is.null, logical(1))] + #browser() attr(mout, "MCFSolutions") <- if (length(mcfsolutions)==0) { #flagging this as a line may change if we always want to return an MCFSolutions object in the future + print("Runs!") NULL } else { names(mcfsolutions)[1] <- "x" @@ -730,6 +752,9 @@ fullmatch.matrix <- function(x, do.call("c", mcfsolutions) } + # names(mcfsolutions[1]) <- "x" + # attr(mout, "MCFSolutions") <- do.call("c", mcfsolutions) + # save solver information attr(mout, "solver") <- solver diff --git a/R/solve_reg_fm_prob.R b/R/solve_reg_fm_prob.R index 7585e440..f492fc71 100644 --- a/R/solve_reg_fm_prob.R +++ b/R/solve_reg_fm_prob.R @@ -34,7 +34,8 @@ solve_reg_fm_prob <- function(node_info, omit.fraction=NULL, solver, epsilon = NULL, - tolerance = NULL) { + tolerance = NULL, + try_solver = TRUE) { if (min.cpt <=0 | max.cpt<=0) { stop("inputs min.cpt, max.cpt must be positive") @@ -86,14 +87,19 @@ solve_reg_fm_prob <- function(node_info, } f.ctls <- 1-omit.fraction } - if (floor(min.cpt) > ceiling(max.cpt) | #inconsistent max/min + if (!try_solver | + floor(min.cpt) > ceiling(max.cpt) | #inconsistent max/min ceiling(1/min.cpt) < floor(1/max.cpt) | #controls per treatment !rfeas | !cfeas # either no controls or no treatments ) { ans <- rep(NA_integer_, nrows + ncols) names(ans) <- c(rownames, colnames) - return(list(cells=ans, err=0, MCFSolution=new("MCFSolutions"))) + #engaging in some questionable practices here, but short circuiting doubleSolve to generate a placeholder MCFSolutions object to return. Everything except the subproblems table is suspect. + stmp <- doubleSolve(dm, min.cpt, max.cpt, f.ctls, matchable_nodes_info, + rfeas, cfeas, epsilon, solver, try_solver = FALSE) + return(list(cells = ans, err=0, MCFSolution=stmp[["MCFSolution"]])) + #return(list(cells=ans, err=0, MCFSolution=new("MCFSolutions"))) #return(list(cells=ans, err=0, MCFSolution=NULL)) } @@ -126,11 +132,11 @@ solve_reg_fm_prob <- function(node_info, ans[names(matches)] <- matches #if (!is.null(temp[["MCFSolution"]])) - if (!identical(temp[["MCFSolution"]], - new("MCFSolutions"))) + # if (!identical(temp[["MCFSolution"]], + # new("MCFSolutions"))) + if (!temp[["MCFSolution"]]@subproblems[1L, "feasible"]) { temp[["MCFSolution"]]@subproblems[1L, "exceedance"] <- temp$maxerr - #temp[["MCFSolution"]]@subproblems[1L, "feasible"] <- any(temp$solution==1L) ## Presently we can treat this subproblem as non-flipped even if it was, ## since `dm` will have been transposed in the event of flipping. Doing @@ -156,7 +162,7 @@ solve_reg_fm_prob <- function(node_info, doubleSolve <- function(dm, min.cpt, max.cpt, f.ctls, node_info, - rfeas, cfeas, epsilon, solver) + rfeas, cfeas, epsilon, solver, try_solver = TRUE) { dm_distance <- dm$'dist' #Used below in roundoff error crude estimate dm$'dist' <- as.integer(ceiling(.5 + dm$'dist' / epsilon)) @@ -169,17 +175,16 @@ doubleSolve <- function(dm, min.cpt, max.cpt, f.ctls, node_info, ) intsol <- intSolve(dm=dm, min.cpt=min.cpt, max.cpt=max.cpt, f.ctls=f.ctls, - node_info = node_info, solver = solver) + node_info = node_info, solver = solver, try_solver = try_solver) - # if (!is.null(intsol$MCFSolution)) - if (!identical(intsol[["MCFSolution"]], - new("MCFSolutions"))) - { + #if (!identical(intsol[["MCFSolution"]], + # new("MCFSolutions"))) + #{ intsol$MCFSolution@subproblems[1L,"resolution"] <- epsilon intsol$MCFSolution@nodes[,'price'] <- intsol$MCFSolution@nodes[['price']] * epsilon - } + #} intsol$maxerr <- if (any(is.na(intsol$solution) | @@ -197,7 +202,7 @@ doubleSolve <- function(dm, min.cpt, max.cpt, f.ctls, node_info, } -intSolve <- function(dm, min.cpt, max.cpt, f.ctls, node_info, solver) { +intSolve <- function(dm, min.cpt, max.cpt, f.ctls, node_info, solver, try_solver = TRUE) { stopifnot(is(dm, "EdgeList"), is(node_info, "NodeInfo")) if (!is.integer(node_info[['price']])) { price_col_position <- which(node_info@names=="price") @@ -211,7 +216,8 @@ intSolve <- function(dm, min.cpt, max.cpt, f.ctls, node_info, solver) { min.col.units = max(1, floor(min.cpt)), f = f.ctls, node_info = node_info, - solver = solver) + solver = solver, + try_solver = try_solver) } ##* Small helper function to turn a solution data.frame into a factor of matches From daafaac7f62e19a332ab6d70e9daca9e916abb7c Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Fri, 23 Aug 2024 12:15:34 -0400 Subject: [PATCH 23/31] minor edits and clean up --- R/fmatch.R | 22 ++++++++++++---------- R/fullmatch.R | 33 ++++++--------------------------- R/solve_reg_fm_prob.R | 22 ++++++++-------------- 3 files changed, 26 insertions(+), 51 deletions(-) diff --git a/R/fmatch.R b/R/fmatch.R index 366b4489..4a8b7de7 100644 --- a/R/fmatch.R +++ b/R/fmatch.R @@ -137,7 +137,7 @@ fmatch <- function(distance, stop('Cannot choose "(_End_)" as unit name') ## Bypass solver if problem is recognizably infeasible - if ( !try_solver | + if ( !try_solver | # this is admittedly a nasty workaround. There are various error checks that occur at different levels of the call stack and this allows us to bypass some of those more easily so that we can return an MCFSolutions object. In practice, we are using this option to ignore infeasibility checks so that we can proceed to create an MCFSolutions (mxr >1 & nt/mxr > n.mc) | #max.row.units too low (mxr==1L & nt * mnc > n.mc) | #min.col.units too high (nt * mxc < n.mc)) { #max.col.units too low @@ -179,16 +179,18 @@ fmatch <- function(distance, } # ID numbers implicitly point to corresp. row of nodes #### Arcs involving End or Sink nodes #### + # There are multiple cases where the solver is NOT called because the problem is clearly infeasible. This makes it difficult to always return an MCFSolutions object. When the solver is called, we can create all of the various constituent parts of the MCFSolutions object without much trouble, although we must paper over certain things, like setting flow = capacity = 0. However, when the solver is not called, we have to make up an arbitrary MCFSolutions object to return. Because of the restrictions on the classes, this is currently being done in a very hacky way. Long term, we would want to consider the expectations for those classes, perhaps change the defaults or class structure somehow. For instance, what should we return when the subproblem is entirely empty? - if (nt == 0 && nc == 0) + # At the moment there are really no guarantees about what can be found in these tables, aside from the subproblems table which should contain a correct specification of feasibility. + + if (nt == 0 && nc == 0) # edge case where subproblem is completely empty. In order to return an MCFSolutions object, I am violating some rules. Specifically, flow and capacity are all set to zero. Matches now includes sink and end nodes because the current classes prohibit empty data frames being returned/appended at the end. { bookkeeping <- data.frame(groups=factor(rep(NA_character_, 2L)), - start=c(1,2), # ~ col.units + start=c(1,2), end=c(1, 2), flow=0L, capacity=0L - #capacity=c(rep(mxc - mnc, nt), rep(mxr - 1L, nc), rep(1L, nc)) ) x <- rep(0L, problem.size) @@ -211,7 +213,7 @@ fmatch <- function(distance, ) - } else { + } else { #This is another edge case where the solver is never called. However, in this case, the subproblem is not empty, so we can use most of the existing logic fairly easily. bookkeeping <- data.frame(groups=factor(rep(NA_character_, nt +2L*nc)), start=c(1L:(nt + nc), # ~ c(row.units, col.units) @@ -220,7 +222,6 @@ fmatch <- function(distance, rep(sinkID, nc) ), flow=0L, capacity=0L - #capacity=c(rep(mxc - mnc, nt), rep(mxr - 1L, nc), rep(1L, nc)) ) startn<- c(as.integer(distance[['i']]), bookkeeping$start ) @@ -256,16 +257,16 @@ fmatch <- function(distance, sp[1L, "feasible"] <- FALSE fmcfs <- new("FullmatchMCFSolutions", subproblems=sp, nodes=nodes, arcs=arcs) - #### - return(c( out, list(maxerr = 0), - list(MCFSolution = fmcfs) #so if statements will work temporarily + list(MCFSolution = fmcfs) )) } - + ## after this point, one of the solvers will be called. + ## That is, the problem is not recognizably infeasible prior to calling the solver. + ## The logic for constructing MCFSolutions is much more straightforward from this point forward ## Min-Cost-Flow representation of problem #### ## Each node has a integer ID number, implicitly pointing ## to corresponding row of this data frame. @@ -428,6 +429,7 @@ fmatch <- function(distance, arcs <- new("ArcInfo", matches=matches, bookkeeping=bookkeeping) sp <- new("SubProbInfo") + # If any solver is called, feasibility is now determined only in this location. We know that the solver was called, but we must check to see if the problem was feasible. sp[1L, "feasible"] <- any(x == 1L) fmcfs <- new("FullmatchMCFSolutions", subproblems=sp, nodes=nodes, arcs=arcs) diff --git a/R/fullmatch.R b/R/fullmatch.R index 821ef32b..9b1e03e4 100644 --- a/R/fullmatch.R +++ b/R/fullmatch.R @@ -308,12 +308,7 @@ fullmatch.matrix <- function(x, hint, resolution = NULL, ...) { - # if (!missing(hint)) - # { - # hint_metadata <- parse_hint_metadata(hint) - # } else { - # hint_metadata <- NULL - # } + hint <- if (missing(hint)) NULL else nodeinfo(hint) ### Checking Input ### @@ -554,12 +549,9 @@ fullmatch.matrix <- function(x, solver = solver, omit.fraction = if(!is.na(omf)) { omf.calc }, # passes NULL for NA epsilon = epsilon.in) - # if (!identical(temp[["MCFSolution"]], - # new("MCFSolutions"))) - #if (!temp[["MCFSolution"]]@subproblems[1L, "feasible"]) - #{ - temp$MCFSolution@subproblems[1L,"flipped"] <- flipped - #} + + temp$MCFSolution@subproblems[1L,"flipped"] <- flipped + return(temp) } @@ -717,10 +709,6 @@ fullmatch.matrix <- function(x, names(mcfsolutions) <- subproblemids for (ii in 1L:np) { - #if (!solutions[[ii]][["MCFSolution"]]@subproblems[1L, "feasible"]) - # if (!identical(solutions[[ii]][["MCFSolution"]], - # new("MCFSolutions"))) - #{ mcfsolutions[[ii]] <- solutions[[ii]]$MCFSolution mcfsolutions[[ii]]@subproblems[1L,"hashed_dist"] <- disthash thesubprob <- subproblemids[ii] @@ -742,18 +730,9 @@ fullmatch.matrix <- function(x, } mcfsolutions <- mcfsolutions[!vapply(mcfsolutions, is.null, logical(1))] - #browser() - attr(mout, "MCFSolutions") <- if (length(mcfsolutions)==0) { #flagging this as a line may change if we always want to return an MCFSolutions object in the future - print("Runs!") - NULL - } else { - names(mcfsolutions)[1] <- "x" - ##b/c in next line `c()` needs to dispatch on an `x` argument - do.call("c", mcfsolutions) - } - # names(mcfsolutions[1]) <- "x" - # attr(mout, "MCFSolutions") <- do.call("c", mcfsolutions) + names(mcfsolutions)[1] <- "x" + attr(mout, "MCFSolutions") <- do.call("c", mcfsolutions) # save solver information attr(mout, "solver") <- solver diff --git a/R/solve_reg_fm_prob.R b/R/solve_reg_fm_prob.R index f492fc71..c4eeb8a1 100644 --- a/R/solve_reg_fm_prob.R +++ b/R/solve_reg_fm_prob.R @@ -24,6 +24,7 @@ ##* @param solver ##* @param epsilon double, grid width ##* @param omit.fraction +##* @param try_solver Logical. This is a temporary parameter allowing us to work around some of the error checking happening at different points in order to recognize infeasible subproblems but still return an MCFSolutions object with an explicit indication of feasibility. ##* @return ##* @keywords internal @@ -99,8 +100,6 @@ solve_reg_fm_prob <- function(node_info, stmp <- doubleSolve(dm, min.cpt, max.cpt, f.ctls, matchable_nodes_info, rfeas, cfeas, epsilon, solver, try_solver = FALSE) return(list(cells = ans, err=0, MCFSolution=stmp[["MCFSolution"]])) - #return(list(cells=ans, err=0, MCFSolution=new("MCFSolutions"))) - #return(list(cells=ans, err=0, MCFSolution=NULL)) } @@ -131,10 +130,8 @@ solve_reg_fm_prob <- function(node_info, names(ans) <- c(rownames, colnames) ans[names(matches)] <- matches - #if (!is.null(temp[["MCFSolution"]])) - # if (!identical(temp[["MCFSolution"]], - # new("MCFSolutions"))) - if (!temp[["MCFSolution"]]@subproblems[1L, "feasible"]) + + if (!temp[["MCFSolution"]]@subproblems[1L, "feasible"]) #this logic takes the place of something that would check to see if the MCFSolution was NULL. I'm not 100% sure these things are interchangeable but it does pass tests for the time being { temp[["MCFSolution"]]@subproblems[1L, "exceedance"] <- temp$maxerr @@ -160,7 +157,7 @@ solve_reg_fm_prob <- function(node_info, ) } - +# see try_solver documentation in solve_reg_fm_prob function for description of that. doubleSolve <- function(dm, min.cpt, max.cpt, f.ctls, node_info, rfeas, cfeas, epsilon, solver, try_solver = TRUE) { @@ -177,14 +174,11 @@ doubleSolve <- function(dm, min.cpt, max.cpt, f.ctls, node_info, intsol <- intSolve(dm=dm, min.cpt=min.cpt, max.cpt=max.cpt, f.ctls=f.ctls, node_info = node_info, solver = solver, try_solver = try_solver) - #if (!identical(intsol[["MCFSolution"]], - # new("MCFSolutions"))) - #{ - intsol$MCFSolution@subproblems[1L,"resolution"] <- epsilon - intsol$MCFSolution@nodes[,'price'] <- - intsol$MCFSolution@nodes[['price']] * epsilon - #} + intsol$MCFSolution@subproblems[1L,"resolution"] <- epsilon + + intsol$MCFSolution@nodes[,'price'] <- intsol$MCFSolution@nodes[['price']] * epsilon + intsol$maxerr <- if (any(is.na(intsol$solution) | From 2e95399f9c3738d80cfcff893d56976a8c29daa6 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Fri, 23 Aug 2024 12:16:06 -0400 Subject: [PATCH 24/31] adding tests for explicit MCFSolutions behavior --- tests/testthat/test.explicit_MCFSolutions.R | 63 +++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 tests/testthat/test.explicit_MCFSolutions.R diff --git a/tests/testthat/test.explicit_MCFSolutions.R b/tests/testthat/test.explicit_MCFSolutions.R new file mode 100644 index 00000000..d78101a4 --- /dev/null +++ b/tests/testthat/test.explicit_MCFSolutions.R @@ -0,0 +1,63 @@ +################################################################################ +# Testing some experimental work on returning feasibility explicitly via MCFSolutions +################################################################################ + +context("Test Explicit MCFSolutions") + +test_that("simple case, all feasible", { + d <- data.frame(position = rep(1:4, each = 4), + z = rep(0:1, 8), + rownames=letters[1:16]) + dist <- match_on(z ~ position, inv.scale.matrix = diag(1), data=d) + res.mat <- fullmatch(dist, data=d) + expect_true(identical((get_subproblem_info(res.mat, type = "feasible")), TRUE)) + + + data(nuclearplants) + f2 <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants) + expect_true(identical((get_subproblem_info(f2, type = "feasible")), c(TRUE, TRUE))) + dt <- get_subproblem_info(f2, type = c("resolution", "group")) + expect_true(all(dim(dt) == c(2,2))) + +}) + + +test_that("Feasibility with subproblems + recovery",{ + data(nuclearplants) + + mm <- match_on(pr ~ cost + t1 + t2, data=nuclearplants) + # infeasible as given + options("fullmatch_try_recovery" = FALSE) + expect_warning(f <- fullmatch(mm, data=nuclearplants, max.controls=2)) + expect_true(identical((get_subproblem_info(f, type = "feasible")), c(FALSE))) + options("fullmatch_try_recovery" = TRUE) + f <- fullmatch(mm, data=nuclearplants, max.controls=2) + expect_true(identical((get_subproblem_info(f, type = "feasible")), c(TRUE))) + + + # not infeasible as mean.controls provided + f <- fullmatch(mm, data=nuclearplants, max.controls=2, mean.controls=2) + expect_true(identical((get_subproblem_info(f, type = "feasible")), c(TRUE))) + + + options("fullmatch_try_recovery" = FALSE) + mm <- match_on(pr ~ cost + t1 + t2, data=nuclearplants, within=exactMatch(pr ~ pt, data=nuclearplants)) + # infeasible as given for subproblem 1, feasible for subproblem 2 + expect_warning(f <- fullmatch(mm, data=nuclearplants, max.controls=2)) + expect_true(identical((get_subproblem_info(f, type = "feasible")), c(FALSE, TRUE))) + + options("fullmatch_try_recovery" = TRUE) + f <- fullmatch(mm, data=nuclearplants, max.controls=2) + expect_true(identical((get_subproblem_info(f, type = "feasible")), c(TRUE, TRUE))) + +}) + + +test_that("completely empty problem", { + m <- matrix(Inf, nrow = 3, ncol = 4) + rownames(m) <- LETTERS[1:3] + colnames(m) <- letters[23:26] + expect_warning(res.m <- fullmatch(m)) + expect_true(get_subproblem_info(res.m, type = "feasible") == FALSE) + +}) From 52910699122a3fa5b21cc170006828f3d1a72334 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Sun, 10 Nov 2024 00:50:23 -0500 Subject: [PATCH 25/31] guaranteeing MCFSolutions object, but streamlining solution for infeasible problems --- R/MCFSolutions.R | 60 +++++++------ R/fmatch.R | 88 +++++++------------- R/fullmatch.R | 3 +- tests/testthat/test.MCFSolutions.R | 12 +-- tests/testthat/test.complementarySlackness.R | 4 +- 5 files changed, 71 insertions(+), 96 deletions(-) diff --git a/R/MCFSolutions.R b/R/MCFSolutions.R index e26cfbbd..2fc9e0c3 100644 --- a/R/MCFSolutions.R +++ b/R/MCFSolutions.R @@ -8,15 +8,15 @@ setClass("SubProbInfo", contains="data.frame", prototype= prototype(data.frame(groups=character(1), flipped=NA, hashed_dist=NA_character_, resolution=1, primal_value=NA_real_, - dual_value=NA_real_, feasible=NA, exceedance=NA_real_, stringsAsFactors=FALSE) + dual_value=NA_real_, feasible=NA, exceedance=NA_real_, solver = NA_character_, stringsAsFactors=FALSE) ) ) setValidity("SubProbInfo", function(object){ errors <- character(0) - if (!all(colnames(object)[1:8]== - c("groups","flipped", "hashed_dist","resolution","primal_value","dual_value", "feasible", "exceedance"))) + if (!all(colnames(object)[1:9]== + c("groups","flipped", "hashed_dist","resolution","primal_value","dual_value", "feasible", "exceedance", "solver"))) errors <- c(errors, - 'Cols 1-8 should be:\n\t c("groups","flipped", "hashed_dist","resolution","primal_value","dual_value", "feasible", "exceedance")') + 'Cols 1-9 should be:\n\t c("groups","flipped", "hashed_dist","resolution","primal_value","dual_value", "feasible", "exceedance", "solver")') if (!all(vapply(object[c(1,3)], is.character, logical(1)))) errors <- c(errors, 'Cols 1,3 should have type character.') @@ -136,32 +136,38 @@ setValidity("MCFSolutions", function(object){ ## levels set, namely node.labels(object) (i.e. row.names(object@nodes) ). Former ## requirement is enforced within the ArcInfo validity checker; here we confirm ## that this level set matches what's in the nodes table. + + + + + if (!identical(row.names(nodeinfo(object)), levels(object@arcs@matches[['upstream']]) - ) + ) + ) + { + if (length(xtralevs <- setdiff(node.labels(object), + levels(object@arcs@matches[['upstream']]) ) - { - if (length(xtralevs <- setdiff(node.labels(object), - levels(object@arcs@matches[['upstream']]) - ) - ) - ) - errors <- c(errors, - paste("@nodes lists nodes not in the levels of @arcs's nodes columns, e.g.", - paste(head(xtralevs,2), collapse=", "), "." - ) - ) - if (length(xtralevs <- setdiff(levels(object@arcs@matches[['upstream']]), - node.labels(object) - ) - ) - ) - errors <- c(errors, - paste("@arcs's nodes columns have levels not appearing in @nodes", - paste(head(xtralevs,2), collapse=", "), "." - ) - ) - } + ) + ) + errors <- c(errors, + paste("@nodes lists nodes not in the levels of @arcs's nodes columns, e.g.", + paste(head(xtralevs,2), collapse=", "), "." + ) + ) + if (length(xtralevs <- setdiff(levels(object@arcs@matches[['upstream']]), + node.labels(object) + ) + ) + ) + errors <- c(errors, + paste("@arcs's nodes columns have levels not appearing in @nodes", + paste(head(xtralevs,2), collapse=", "), "." + ) + ) + } + ## Nodes table must have groups column if (!any(colnames(object@nodes)=="groups")) errors <- c(errors, diff --git a/R/fmatch.R b/R/fmatch.R index 4a8b7de7..84e5d9db 100644 --- a/R/fmatch.R +++ b/R/fmatch.R @@ -144,7 +144,7 @@ fmatch <- function(distance, out <- as.data.frame(distance, row.names = NULL) out$solution <- rep(-1L, narcs) - + #browser() #### nodes <- new("NodeInfo", data.frame(name=c(row.units, col.units, @@ -185,76 +185,43 @@ fmatch <- function(distance, if (nt == 0 && nc == 0) # edge case where subproblem is completely empty. In order to return an MCFSolutions object, I am violating some rules. Specifically, flow and capacity are all set to zero. Matches now includes sink and end nodes because the current classes prohibit empty data frames being returned/appended at the end. { - bookkeeping <- - data.frame(groups=factor(rep(NA_character_, 2L)), - start=c(1,2), - end=c(1, 2), - flow=0L, - capacity=0L - ) - - x <- rep(0L, problem.size) - matches <- distance[as.logical(x[1L:narcs]), c("i", "j")] - matches <- data.frame(groups=factor(rep(NA_integer_, nrow(matches))), - upstream=factor(matches[['i']], - levels=node.labels(nodes) - ), - downstream=factor(matches[['j']], - levels=node.labels(nodes) - ) - ) - bookkeeping[['start']] <- factor(bookkeeping[['start']], - levels=c(1L, 2L), - labels=node.labels(nodes) - ) - bookkeeping[['end']] <- factor(bookkeeping[['end']], - levels=c(1L, 2L), - labels=node.labels(nodes) - ) + bookkeeping <- data.frame(groups = factor(), + start = factor(levels = 1L:2L, + labels = node.labels(nodes)), + end = factor(levels = 1L:2L, + labels = node.labels(nodes)), + flow = integer(), + capacity = integer()) + + matches <- data.frame(groups=factor(), + upstream=factor(levels=node.labels(nodes)), + downstream=factor(levels=node.labels(nodes))) - } else { #This is another edge case where the solver is never called. However, in this case, the subproblem is not empty, so we can use most of the existing logic fairly easily. - bookkeeping <- - data.frame(groups=factor(rep(NA_character_, nt +2L*nc)), - start=c(1L:(nt + nc), # ~ c(row.units, col.units) - nt + 1L:nc), # ~ col.units - end=c(rep(endID, nc + nt), - rep(sinkID, nc) ), - flow=0L, - capacity=0L - ) - - startn<- c(as.integer(distance[['i']]), bookkeeping$start ) - endn <- c(as.integer(distance[['j']]), bookkeeping$end ) - x <- rep(0L, problem.size) - matches <- distance[as.logical(x[1L:narcs]), c("i", "j")] - bookkeeping[1L:(problem.size-narcs), "flow"] <- as.integer(x)[(narcs+1L):problem.size] - matches <- data.frame(groups=factor(rep(NA_integer_, nrow(matches))), - upstream=factor(matches[['i']], - levels=node.labels(nodes) - ), - downstream=factor(matches[['j']], - levels=node.labels(nodes) - ) - ) - bookkeeping[['start']] <- factor(bookkeeping[['start']], - levels=1L:(nt + nc + 2), - labels=node.labels(nodes) - ) - bookkeeping[['end']] <- factor(bookkeeping[['end']], - levels=1L:(nt + nc + 2), - labels=node.labels(nodes) - ) + } else { #This is another edge case where the solver is never called. However, in this case, the subproblem is not empty, so we can use most of the existing logic fairly easily. + bookkeeping <- data.frame(groups = factor(), + start = factor(levels = 1L:(nt + nc + 2), + labels = node.labels(nodes)), + end = factor(levels = 1L:(nt + nc + 2), + labels = node.labels(nodes)), + flow = integer(), + capacity = integer()) + + matches <- data.frame(groups=factor(), + upstream=factor(levels=node.labels(nodes)), + downstream=factor(levels=node.labels(nodes)) + ) } arcs <- new("ArcInfo", matches=matches, bookkeeping=bookkeeping) - + #arcs <- new("ArcInfo") sp <- new("SubProbInfo") sp[1L, "feasible"] <- FALSE + sp[1L, "solver"] <- "" fmcfs <- new("FullmatchMCFSolutions", subproblems=sp, nodes=nodes, arcs=arcs) return(c( @@ -431,6 +398,7 @@ fmatch <- function(distance, sp <- new("SubProbInfo") # If any solver is called, feasibility is now determined only in this location. We know that the solver was called, but we must check to see if the problem was feasible. sp[1L, "feasible"] <- any(x == 1L) + sp[1L, "solver"] <- solver fmcfs <- new("FullmatchMCFSolutions", subproblems=sp, nodes=nodes, arcs=arcs) return(c(obj, diff --git a/R/fullmatch.R b/R/fullmatch.R index 9b1e03e4..d41a5a9f 100644 --- a/R/fullmatch.R +++ b/R/fullmatch.R @@ -719,8 +719,9 @@ fullmatch.matrix <- function(x, } if (nrow(mcfsolutions[[ii]]@arcs@matches) > 0) { mcfsolutions[[ii]]@arcs@matches[,"groups"] <- factor(thesubprob) + mcfsolutions[[ii]]@arcs@bookkeeping[,"groups"] <- factor(thesubprob) } - mcfsolutions[[ii]]@arcs@bookkeeping[,"groups"] <- factor(thesubprob) + bookkeeping_nodes <- c('(_Sink_)', '(_End_)') for (bn in bookkeeping_nodes) { nlabs <- node.labels(mcfsolutions[[ii]]) diff --git a/tests/testthat/test.MCFSolutions.R b/tests/testthat/test.MCFSolutions.R index 806ec642..2237870e 100644 --- a/tests/testthat/test.MCFSolutions.R +++ b/tests/testthat/test.MCFSolutions.R @@ -4,14 +4,14 @@ test_that("Instantiation & validity", { expect_true(validObject(new("SubProbInfo"))) # test the prototype expect_silent(spi1 <- new("SubProbInfo", data.frame(groups=c('a','b'), flipped=logical(2), hashed_dist=c('a','b'), resolution=c(1,10), primal_value=c(2,0), dual_value=c(1,1), - feasible=c(TRUE, FALSE), exceedance=1, stringsAsFactors=F) + feasible=c(TRUE, FALSE), exceedance=1, solver = NA, stringsAsFactors=F) ) ) expect_true(validObject(spi1)) spi2 <- spi1 colnames(spi2)[1] <- "Subprob" - expect_error(validObject(spi2), "Cols 1-8 should be") + expect_error(validObject(spi2), "Cols 1-9 should be") expect_true(validObject(new("NodeInfo"))) # test the prototype expect_silent(ni1 <- new("NodeInfo", @@ -110,17 +110,17 @@ test_that("c() methods", { spi1 <- new("SubProbInfo", data.frame(groups=c('a','b'), flipped=logical(2), hashed_dist=c('a','b'), resolution=c(1,10), primal_value=c(2,0), dual_value=c(1,1), - feasible=c(TRUE, FALSE), exceedance=1, stringsAsFactors=F) + feasible=c(TRUE, FALSE), exceedance=1, solver = NA, stringsAsFactors=F) ) spi2 <- new("SubProbInfo", data.frame(groups=c('c'), flipped=logical(1), hashed_dist=c('a'), resolution=c(1), primal_value=c(.5), dual_value=c(0), - feasible=c(TRUE), exceedance=1, stringsAsFactors=F) + feasible=c(TRUE), exceedance=1, solver = NA, stringsAsFactors=F) ) spi3 <- new("SubProbInfo", data.frame(groups=c('d'), flipped=logical(1), hashed_dist=c('a'), resolution=c(1), primal_value=c(.5), dual_value=c(0), - feasible=c(TRUE), exceedance=1, stringsAsFactors=F) + feasible=c(TRUE), exceedance=1, solver = NA, stringsAsFactors=F) ) expect_true(validObject(c(spi1, spi2))) @@ -288,7 +288,7 @@ test_that("filtering on groups/subproblem field", { spi1 <- new("SubProbInfo", data.frame(groups=c('a','b'), flipped=logical(2), hashed_dist=c('a','b'), resolution=c(1,10), primal_value=c(2,0), dual_value=c(1,1), - feasible=c(TRUE, FALSE), exceedance=1, stringsAsFactors=F) + feasible=c(TRUE, FALSE), exceedance=1, solver = NA, stringsAsFactors=F) ) expect_error(filter_by_subproblem(spi1, groups="a"), "implemented") diff --git a/tests/testthat/test.complementarySlackness.R b/tests/testthat/test.complementarySlackness.R index 87d4962b..e0544d85 100644 --- a/tests/testthat/test.complementarySlackness.R +++ b/tests/testthat/test.complementarySlackness.R @@ -36,7 +36,7 @@ make_known_optimal <- function(flipped=FALSE) { subprob <- new("SubProbInfo", data.frame(groups="a", flipped=flipped, hashed_dist=character(1), resolution=NA_real_, primal_value=NA_real_, dual_value=NA_real_, - feasible=NA, exceedance=NA_real_, stringsAsFactors=FALSE) + feasible=NA, exceedance=NA_real_, solver = NA, stringsAsFactors=FALSE) ) mcf_solution <- new("MCFSolutions", subproblems=subprob, nodes=nodes, arcs=arcs) list(x = x, m = m, mcf = mcf_solution) @@ -96,7 +96,7 @@ make_known_optimal_fullm <- function(flipped=FALSE) subprob <- new("SubProbInfo", data.frame(groups='b', flipped=flipped, hashed_dist=character(1), resolution=NA_real_, primal_value=NA_real_, dual_value=NA_real_, - feasible=NA, exceedance=NA_real_, stringsAsFactors=FALSE) + feasible=NA, exceedance=NA_real_, solver = NA, stringsAsFactors=FALSE) ) mcf_solution <- new("MCFSolutions", subproblems=subprob, nodes=nodes, arcs=arcs) node.labels(mcf_solution) <- names(node.labels(mcf_solution)) # for alignment w/ m From 00ca64c0abcf9857b0383aa0f4669cce05f02069 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Mon, 13 Jan 2025 13:48:40 -0500 Subject: [PATCH 26/31] adding test explicitly checking group mismatch on hint --- tests/testthat/test.fullmatch.R | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/testthat/test.fullmatch.R b/tests/testthat/test.fullmatch.R index 18969f51..2ad0e8ce 100644 --- a/tests/testthat/test.fullmatch.R +++ b/tests/testthat/test.fullmatch.R @@ -688,6 +688,21 @@ test_that('Hints accepted',{ expect_silent(f1d <- fullmatch(t(mosc), min.c=.5, max.c=2, data = data, tol=0.1)) expect_is(attr(f1d, "MCFSolutions"), "FullmatchMCFSolutions") expect_silent(fullmatch(t(mosc), min.c=.5, max.c=2, data = data, tol=0.1, hint=f1d)) + + # hint has different subgroups, is ignored. + set.seed(201905) + data <- data.frame(z = rep(0:1, each = 5), + x = rnorm(10), + fac=rep(c(rep("a",2), rep("b",3)),2), + fac1=rep(c(rep("c",2), rep("d",3)),2)) + mo <- match_on(z ~ x + strata(fac1), data=data) + f1a <- fullmatch(mo, min.c=.5, max.c=2, data = data, tol=0.1) + mos <- match_on(z ~ x + strata(fac), data=data) + + expect_warning(fullmatch(mos, min.c=.5, max.c=2, data = data, tol=0.1, hint=f1a), "ignoring") + + + }) test_that("If matching fails, we should give a warning", { From 76f8fe1b94c9a78c857448acd603f2c0e7caa004 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Mon, 13 Jan 2025 14:37:52 -0500 Subject: [PATCH 27/31] updating documentation --- NAMESPACE | 1 - R/fullmatch.R | 11 +++++++++++ R/pairmatch.R | 9 +++++++++ R/subproblemutils.R | 27 --------------------------- man/fullmatch.Rd | 11 +++++++++++ man/pairmatch.Rd | 11 +++++++++++ 6 files changed, 42 insertions(+), 28 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 80b8c515..c10d2765 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -106,7 +106,6 @@ export(optmatch_restrictions) export(optmatch_same_distance) export(pair) export(pairmatch) -export(parse_hint_metadata) export(pscore.dist) export(scores) export(setMaxProblemSize) diff --git a/R/fullmatch.R b/R/fullmatch.R index d41a5a9f..5fe719f7 100644 --- a/R/fullmatch.R +++ b/R/fullmatch.R @@ -172,6 +172,17 @@ setTryRecovery <- function() { #' CycleCancelling, we recommend setting the \code{fullmatch_try_recovery} #' option to \code{FALSE}. #' +#' @param resolution Optional argument specifying subproblem-level +#' resolution(s) to be passed to the solver. +#' If there is only one subproblem, resolution must be specified +#' as an unnamed list of length 1. If there are multiple subproblems, one +#' must specify a named list, where the names correspond to a +#' subproblem/group ID and the values are resolution values. Default is NULL +#' +#' @param hint Optional argument specifying a hint to be passed to the solver. +#' These should be provided as \code{optmatch} objects. +#' Hints should have the same subproblem structure as the currently-specified problem. +#' #' @param ... Additional arguments, passed to \code{match_on} (e.g. \code{within}) #' or to specific methods. #' diff --git a/R/pairmatch.R b/R/pairmatch.R index 75076cbe..f7216bc0 100644 --- a/R/pairmatch.R +++ b/R/pairmatch.R @@ -50,6 +50,15 @@ #' @param data Optional data set. #' @param remove.unmatchables Should treatment group members for which there are #' no eligible controls be removed prior to matching? +#' @param resolution Optional argument specifying subproblem-level +#' resolution(s) to be passed to the solver. +#' If there is only one subproblem, resolution must be specified +#' as an unnamed list of length 1. If there are multiple subproblems, one +#' must specify a named list, where the names correspond to a +#' subproblem/group ID and the values are resolution values. Default is NULL +#' @param hint Optional argument specifying a hint to be passed to the solver. +#' These should be provided as \code{optmatch} objects. +#' Hints should have the same subproblem structure as the currently-specified problem. #' @param ... Additional arguments to pass to \code{\link{match_on}} #' (e.g. \code{within})) or to \code{\link{fullmatch}} (e.g. \code{tol}). #' It is an error to pass \code{min.controls}, diff --git a/R/subproblemutils.R b/R/subproblemutils.R index be048565..1ee67c6e 100644 --- a/R/subproblemutils.R +++ b/R/subproblemutils.R @@ -324,30 +324,3 @@ get_subproblem_info <- function(object, type) return(res) } } - -#' Parse hints for metadata -#' -#' @param full.hint -#' -#' @return -#' @export -#' -#' @examples -parse_hint_metadata <- function(full.hint) -{ - # assuming that the hint is not null - # also assuming existence of MCFSolutions object - - groupids <- get_subproblem_info(full.hint, type = "groups") - feasibility <- get_subproblem_info(full.hint, type = "feasible") - names(feasibility) <- groupids - resolutions <- get_subproblem_info(full.hint, type = "resolution") - names(resolutions) <- groupids - regret_gap <- get_subproblem_info(full.hint, type = "primal_value") - get_subproblem_info(full.hint, type = "dual_value") - names(regret_gap) <- groupids - - rl <- list(feasible = feasibility, - resolution = resolutions, - regret = regret_gap) - return(rl) -} diff --git a/man/fullmatch.Rd b/man/fullmatch.Rd index d59fe548..c5470251 100644 --- a/man/fullmatch.Rd +++ b/man/fullmatch.Rd @@ -143,8 +143,19 @@ an infeasible matching problem. When using a LEMON algorithm other than CycleCancelling, we recommend setting the \code{fullmatch_try_recovery} option to \code{FALSE}.} +\item{resolution}{Optional argument specifying subproblem-level +resolution(s) to be passed to the solver. +If there is only one subproblem, resolution must be specified +as an unnamed list of length 1. If there are multiple subproblems, one +must specify a named list, where the names correspond to a +subproblem/group ID and the values are resolution values. Default is NULL} + \item{...}{Additional arguments, passed to \code{match_on} (e.g. \code{within}) or to specific methods.} + +\item{hint}{Optional argument specifying a hint to be passed to the solver. +These should be provided as \code{optmatch} objects. +Hints should have the same subproblem structure as the currently-specified problem.} } \value{ A \code{\link{optmatch}} object (\code{factor}) indicating matched groups. diff --git a/man/pairmatch.Rd b/man/pairmatch.Rd index b93a1234..67ba7828 100644 --- a/man/pairmatch.Rd +++ b/man/pairmatch.Rd @@ -37,11 +37,22 @@ Alternatively, a precomputed distance may be entered.} \item{remove.unmatchables}{Should treatment group members for which there are no eligible controls be removed prior to matching?} +\item{resolution}{Optional argument specifying subproblem-level +resolution(s) to be passed to the solver. +If there is only one subproblem, resolution must be specified +as an unnamed list of length 1. If there are multiple subproblems, one +must specify a named list, where the names correspond to a +subproblem/group ID and the values are resolution values. Default is NULL} + \item{...}{Additional arguments to pass to \code{\link{match_on}} (e.g. \code{within})) or to \code{\link{fullmatch}} (e.g. \code{tol}). It is an error to pass \code{min.controls}, \code{max.controls}, \code{mean.controls} or \code{omit.fraction} as \code{pairmatch} must set these values.} + +\item{hint}{Optional argument specifying a hint to be passed to the solver. +These should be provided as \code{optmatch} objects. +Hints should have the same subproblem structure as the currently-specified problem.} } \value{ A \code{\link{optmatch}} object (\code{factor}) indicating matched groups. From 3fcbaa41c0ce361929cd7f8d7916d51e97dcd115 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Mon, 13 Jan 2025 16:49:47 -0500 Subject: [PATCH 28/31] examples and explanation of hinting, subproblem features in vignette --- vignettes/MCFSolutions.Rmd | 69 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/vignettes/MCFSolutions.Rmd b/vignettes/MCFSolutions.Rmd index 8589d2e2..3978b440 100644 --- a/vignettes/MCFSolutions.Rmd +++ b/vignettes/MCFSolutions.Rmd @@ -12,6 +12,7 @@ vignette: > ```{r, echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, prompt=TRUE) library(magrittr) +library(optmatch) ``` # Overview @@ -215,3 +216,71 @@ Return object of a new S4 class, `Optmatch`, inheriting from factor as well as f - `c()` for `MCFSolutions`: `rbind()`s the various constituent data frames, enforcing req's that subproblems have distinct names, de-duping bookkeeping nodes and enforcing requirement that non-bookkeeping nodes' names be distinct. - `addArcs()` for `MCFSolutions` objects - `addNodes()` for `MCFSolutions` objects + +# Expectations for MCFSolutions objects + +As of 1/13/2025 in the `nodeprices` branch, MCFSolutions are now always returned from a `fullmatch()` or `pairmatch()` call, regardless of feasibility. Users can rely on subproblem and node price information being returned via the "subproblems" and "nodes" slots, respectively. This information is always present, regardless of feasibility. Information in the "nodes" slot is returned to the best of the program's ability. If a subproblem is feasible, then the results of that computation are returned. If the subproblem is infeasible, then there are two possibilities for what gets returned. If the subproblem was determined to be infeasible before being passed to the solver, then the results from `node_shell_fmatch()` are returned. If the subproblem is passed to the solver, then the results from the solver are returned. + +The helper function `get_subproblem_info()` has been added to facilitate the extraction of subproblem level data. This is intended to help make warm start and hinting functionality more accessible to users. Users can extract a mapping of units and their corresponding subproblem ID with the following: + +```{r} +data(nuclearplants) +f1 <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants) +get_subproblem_info(object = f1, type = "subproblem") +``` +Users can also extract information from the subproblems table by specifying one or more of the column names from the "subproblems" table associated with an MCFSolutions object (e.g. "resolution"). + +```{r} +get_subproblem_info(object = f1, type = c("resolution", "groups")) +``` + + +# Hinting + +As of 1/13/2025 in the `nodeprices` branch, a number of hinting and warm-starting features are available to users. Users can provide a result from `fullmatch()` or `pairmatch()` to a new, similar specification in order to provide a "warm start" to the solver. These hints are provided via an optional argument to `pairmatch()` and `fullmatch()` called `hint`. These previous solutions must have the same units and an identical subgroup/subproblem structure to the current specification, but other parameters are allowed to be different. For instance, tolerance values may differ. + +In the following example, we provide a hint that differs from a given specification by the `tol` value. So, the hint is valid: +```{r} +set.seed(201905) +data <- data.frame(z = rep(0:1, each = 5), + x = rnorm(10), fac=rep(c(rep("a",2), rep("b",3)),2) ) +mo <- match_on(z ~ x, data=data) +fm <- fullmatch(mo, min.c=.5, max.c=2, data = data, tol=0.1) +fullmatch(mo, min.c=.5, max.c=2, data = data, tol=0.0001, hint=fm) +``` + +However, in the following example, the hint and new specification have different subproblem structures. The hint is then ignored and a warning is thrown. In general, if subproblem information does not match across hints and specifications, the hint is ignored. + +```{r} +set.seed(201905) +data <- data.frame(z = rep(0:1, each = 5), + x = rnorm(10), + fac=rep(c(rep("a",2), rep("b",3)),2), + fac1=rep(c(rep("c",2), rep("d",3)),2)) +mo <- match_on(z ~ x + strata(fac1), data=data) +fm <- fullmatch(mo, min.c=.5, max.c=2, data = data, tol=0.1) +mos <- match_on(z ~ x + strata(fac), data=data) + +fullmatch(mos, min.c=.5, max.c=2, data = data, tol=0.1, hint=fm) +``` + +## Subproblem Level Resolution Specification + +Users can also provide subproblem level resolution values, which are then passed down to the solver. These are provided via an argument to `fullmatch()` and/or `pairmatch()` called `resolution`. This argument must take the form of a list. If there is only one subproblem in the current specification and users wish to specify a resolution value, then users must provide an unnamed list with the resolution value: + +```{r} +d <- data.frame(position = rep(1:4, each = 4), + z = rep(0:1, 8), + rownames=letters[1:16]) +dist <- match_on(z ~ position, inv.scale.matrix = diag(1), data=d) +res.mat <- fullmatch(dist, data=d, resolution = list(.1)) +``` + +If the specification has more than one subproblem and users wish to provide resolution values(s), users must provide a named list to the `resolution` argument. The names of elements must correspond to a valid subproblem/group ID, and the elements are numeric values indicating the desired resolution value to be passed to the solver. + +```{r} +data(nuclearplants) +res.custom <- list("0" = 0.001142857, "1" = 0.001) +fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = res.custom) +``` + From 8c8ad30793dc1778c5b1b8048b21371c63285520 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Thu, 16 Jan 2025 18:26:35 -0500 Subject: [PATCH 29/31] adding some examples of hinting and subproblem level resolution specification --- inst/examples/fullmatch.R | 13 +++++++++++++ inst/examples/pairmatch.R | 13 +++++++++++++ 2 files changed, 26 insertions(+) diff --git a/inst/examples/fullmatch.R b/inst/examples/fullmatch.R index b2727fb7..bf7e225d 100644 --- a/inst/examples/fullmatch.R +++ b/inst/examples/fullmatch.R @@ -51,3 +51,16 @@ m5 <- fullmatch(glm(pr ~ t1 + t2 + strata(pt), data=nuclearplants, # Including `strata(foo)` inside a glm uses `foo` in the model as # well, so here m4 and m5 are equivalent. m3 differs in that it does # not include `pt` in the glm. + +# Full matching, then using the results as a hint for a +# similar problem that differs by tolerance +fm_init <- fullmatch(pr ~ t1 + t2, tol = .01, + data = nuclearplants) + +fm_hint <- fullmatch(pr ~ t1 + t2, tol = .001, + data = nuclearplants, hint = fm1) + +# Specifying subproblem level resolution values +res.custom <- list("0" = 0.001142857, "1" = 0.001) +fm_reso <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants, + resolution = res.custom) diff --git a/inst/examples/pairmatch.R b/inst/examples/pairmatch.R index 71c14e0a..6288d62b 100644 --- a/inst/examples/pairmatch.R +++ b/inst/examples/pairmatch.R @@ -48,3 +48,16 @@ m5 <- pairmatch(glm(pr ~ t1 + t2 + strata(pt), data=nuclearplants, # Including `strata(foo)` inside a glm uses `foo` in the model as # well, so here m4 and m5 are equivalent. m3 differs in that it does # not include `pt` in the glm. + +# Pair matching, then using the results as a hint for a +# similar problem that differs by tolerance +p_init <- pairmatch(pr ~ cost + t1, tol = .01, + data = nuclearplants) + +p_hint <- pairmatch(pr ~ cost + t1, tol = .001, + data = nuclearplants, hint = p1) + +# Specifying subproblem level resolution values +res.custom <- list("0" = 0.001142857, "1" = 0.001) +p_reso <- pairmatch(pr ~ cost + strata(pt), data=nuclearplants, + resolution = res.custom) From ffcbbb8f9bdce41a2ef01f3efcbc9ed40d2f9263 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Thu, 23 Jan 2025 17:15:03 -0500 Subject: [PATCH 30/31] fixing bug for empty subproblems --- R/fmatch.R | 1 - R/fullmatch.R | 4 ++-- R/matchfailed.R | 8 +++----- man/fullmatch.Rd | 13 +++++++++++++ man/pairmatch.Rd | 13 +++++++++++++ 5 files changed, 31 insertions(+), 8 deletions(-) diff --git a/R/fmatch.R b/R/fmatch.R index 84e5d9db..d6d2d1eb 100644 --- a/R/fmatch.R +++ b/R/fmatch.R @@ -144,7 +144,6 @@ fmatch <- function(distance, out <- as.data.frame(distance, row.names = NULL) out$solution <- rep(-1L, narcs) - #browser() #### nodes <- new("NodeInfo", data.frame(name=c(row.units, col.units, diff --git a/R/fullmatch.R b/R/fullmatch.R index 5fe719f7..74dba239 100644 --- a/R/fullmatch.R +++ b/R/fullmatch.R @@ -515,7 +515,6 @@ fullmatch.matrix <- function(x, # input error checking happens in the public fullmatch function. .fullmatch <- function(d, mnctl, mxctl, omf, hint = NULL, solver, flipped, epsilon.in) { - # if the subproblem is completely empty, short circuit if (length(d) == 0 || all(is.infinite(d))) { x <- dim(d) @@ -536,7 +535,8 @@ fullmatch.matrix <- function(x, min.cpt = mnctl, solver = solver, omit.fraction = if(!is.na(omf)) { omf.calc }, # passes NULL for NA - epsilon = epsilon.in) + epsilon = epsilon.in, + try_solver = FALSE) tmp <- list(cells = c(cells.a, cells.b), err = -1, MCFSolution = stemp[["MCFSolution"]]) return(tmp) diff --git a/R/matchfailed.R b/R/matchfailed.R index d16822e3..4d04f6a9 100644 --- a/R/matchfailed.R +++ b/R/matchfailed.R @@ -19,9 +19,7 @@ matchfailed <- function(x) { #' @return A named logical vector indicating either success or failure for each subproblem. #' @keywords internal subproblemSuccess <- function(x) { - grps <- attr(x, "subproblem") - failed <- vapply(split(x, grps), function(x) { - all(is.na(x) | grepl("\\.NA$", x)) - }, logical(1)) - return(!failed) + is.feas <- attr(x, "MCFSolutions")@subproblems[, "feasible"] + names(is.feas) <- attr(x, "MCFSolutions")@subproblems[, "groups"] + return(is.feas) } diff --git a/man/fullmatch.Rd b/man/fullmatch.Rd index c5470251..2c9d0eb6 100644 --- a/man/fullmatch.Rd +++ b/man/fullmatch.Rd @@ -268,6 +268,19 @@ m5 <- fullmatch(glm(pr ~ t1 + t2 + strata(pt), data=nuclearplants, # Including `strata(foo)` inside a glm uses `foo` in the model as # well, so here m4 and m5 are equivalent. m3 differs in that it does # not include `pt` in the glm. + +# Full matching, then using the results as a hint for a +# similar problem that differs by tolerance +fm_init <- fullmatch(pr ~ t1 + t2, tol = .01, + data = nuclearplants) + +fm_hint <- fullmatch(pr ~ t1 + t2, tol = .001, + data = nuclearplants, hint = fm1) + +# Specifying subproblem level resolution values +res.custom <- list("0" = 0.001142857, "1" = 0.001) +fm_reso <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants, + resolution = res.custom) } \references{ Hansen, B.B. and Klopfer, S.O. (2006), \sQuote{ Optimal full matching and related designs via network flows}, diff --git a/man/pairmatch.Rd b/man/pairmatch.Rd index 67ba7828..e9cfd95d 100644 --- a/man/pairmatch.Rd +++ b/man/pairmatch.Rd @@ -151,6 +151,19 @@ m5 <- pairmatch(glm(pr ~ t1 + t2 + strata(pt), data=nuclearplants, # Including `strata(foo)` inside a glm uses `foo` in the model as # well, so here m4 and m5 are equivalent. m3 differs in that it does # not include `pt` in the glm. + +# Pair matching, then using the results as a hint for a +# similar problem that differs by tolerance +p_init <- pairmatch(pr ~ cost + t1, tol = .01, + data = nuclearplants) + +p_hint <- pairmatch(pr ~ cost + t1, tol = .001, + data = nuclearplants, hint = p1) + +# Specifying subproblem level resolution values +res.custom <- list("0" = 0.001142857, "1" = 0.001) +p_reso <- pairmatch(pr ~ cost + strata(pt), data=nuclearplants, + resolution = res.custom) } \references{ Hansen, B.B. and Klopfer, S.O. (2006), \sQuote{Optimal full matching From 20e6edf57e89ae43ea2f49b3e50773ab175e7d84 Mon Sep 17 00:00:00 2001 From: Adam Rauh Date: Thu, 23 Jan 2025 17:15:23 -0500 Subject: [PATCH 31/31] adjusting test to new expectations --- tests/testthat/test.optmatchS3.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test.optmatchS3.R b/tests/testthat/test.optmatchS3.R index 0a6f9706..f7ef887b 100644 --- a/tests/testthat/test.optmatchS3.R +++ b/tests/testthat/test.optmatchS3.R @@ -209,7 +209,8 @@ test_that("Indicating failing subproblems", { spS <- subproblemSuccess(f1) mf <- matchfailed(f1) expect_true(all(spS == FALSE)) - expect_equal(names(spS), "1") + # changing this from "1" to "" in the "nodeprices" branch. Making (temporary?) decision that when there are no subproblems, the group name is blank, rather than "1". + expect_equal(names(spS), "") expect_is(mf, "logical") expect_length(mf, nrow(nuclearplants)) expect_true(all(mf == TRUE))