diff --git a/DESCRIPTION b/DESCRIPTION index 6dc3309b..5043de00 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -86,6 +86,7 @@ Collate: 'solver.R' 'strata.R' 'stratumStructure.R' + 'subproblemutils.R' 'summary.ism.R' 'summary.optmatch.R' 'utilities.R' diff --git a/NAMESPACE b/NAMESPACE index c1e2017f..c10d2765 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -90,6 +90,7 @@ export(findSubproblems) export(full) export(fullmatch) export(getMaxProblemSize) +export(get_subproblem_info) export(mahal.dist) export(match_on) export(matched) diff --git a/R/MCFSolutions.R b/R/MCFSolutions.R index 7b5161f7..f29e6809 100644 --- a/R/MCFSolutions.R +++ b/R/MCFSolutions.R @@ -7,16 +7,16 @@ setOldClass("data.frame") setClass("SubProbInfo", contains="data.frame", prototype= prototype(data.frame(groups=character(1), flipped=NA, hashed_dist=NA_character_, - resolution=1, lagrangian_value=NA_real_, - dual_value=NA_real_, feasible=NA, exceedance=NA_real_, stringsAsFactors=FALSE) + resolution=1, primal_value=NA_real_, + dual_value=NA_real_, feasible=NA, exceedance=NA_real_, solver = NA_character_, stringsAsFactors=FALSE) ) ) setValidity("SubProbInfo", function(object){ errors <- character(0) - if (!all(colnames(object)[1:8]== - c("groups","flipped", "hashed_dist","resolution","lagrangian_value","dual_value", "feasible", "exceedance"))) + if (!all(colnames(object)[1:9]== + c("groups","flipped", "hashed_dist","resolution","primal_value","dual_value", "feasible", "exceedance", "solver"))) errors <- c(errors, - 'Cols 1-8 should be:\n\t c("groups","flipped", "hashed_dist","resolution","lagrangian_value","dual_value", "feasible", "exceedance")') + 'Cols 1-9 should be:\n\t c("groups","flipped", "hashed_dist","resolution","primal_value","dual_value", "feasible", "exceedance", "solver")') if (!all(vapply(object[c(1,3)], is.character, logical(1)))) errors <- c(errors, 'Cols 1,3 should have type character.') @@ -136,32 +136,38 @@ setValidity("MCFSolutions", function(object){ ## levels set, namely node.labels(object) (i.e. row.names(object@nodes) ). Former ## requirement is enforced within the ArcInfo validity checker; here we confirm ## that this level set matches what's in the nodes table. + + + + + if (!identical(row.names(nodeinfo(object)), levels(object@arcs@matches[['upstream']]) - ) + ) + ) + { + if (length(xtralevs <- setdiff(node.labels(object), + levels(object@arcs@matches[['upstream']]) ) - { - if (length(xtralevs <- setdiff(node.labels(object), - levels(object@arcs@matches[['upstream']]) - ) - ) - ) - errors <- c(errors, - paste("@nodes lists nodes not in the levels of @arcs's nodes columns, e.g.", - paste(head(xtralevs,2), collapse=", "), "." - ) - ) - if (length(xtralevs <- setdiff(levels(object@arcs@matches[['upstream']]), - node.labels(object) - ) - ) - ) - errors <- c(errors, - paste("@arcs's nodes columns have levels not appearing in @nodes", - paste(head(xtralevs,2), collapse=", "), "." - ) - ) - } + ) + ) + errors <- c(errors, + paste("@nodes lists nodes not in the levels of @arcs's nodes columns, e.g.", + paste(head(xtralevs,2), collapse=", "), "." + ) + ) + if (length(xtralevs <- setdiff(levels(object@arcs@matches[['upstream']]), + node.labels(object) + ) + ) + ) + errors <- c(errors, + paste("@arcs's nodes columns have levels not appearing in @nodes", + paste(head(xtralevs,2), collapse=", "), "." + ) + ) + } + ## Nodes table must have groups column if (!any(colnames(object@nodes)=="groups")) errors <- c(errors, diff --git a/R/fmatch.R b/R/fmatch.R index d348126c..d6d2d1eb 100644 --- a/R/fmatch.R +++ b/R/fmatch.R @@ -33,7 +33,7 @@ fmatch <- function(distance, min.col.units = 1, f = 1, node_info, - solver) { + solver, try_solver = TRUE) { # checks solver and evaluates LEMON() if neccessary solver <- handleSolver(solver) @@ -48,17 +48,17 @@ fmatch <- function(distance, mxc <- as.integer(round(max.col.units)) mnc <- as.integer(round(min.col.units)) mxr <- as.integer(round(max.row.units)) - if (mnc > 1) { mxr <- 1L } + # Check that matching problem is well-specified - if (mxc < mnc) { + if ( (mxc < mnc) && try_solver ){ stop("min.col.units may not exceed max.col.units") } - if (any(c(mxc, mnc, mxr) < 1)) { + if ((any(c(mxc, mnc, mxr) < 1)) && try_solver) { stop("max and min constraints must be 1 or greater") } @@ -89,13 +89,13 @@ fmatch <- function(distance, distance[['dist']][nadists] <- replacement } - if (mxr > 1) # i.e. many-one matches permissible + if (mxr > 1 && try_solver) # i.e. many-one matches permissible { if (any(distance[['dist']] <= 0)) stop("Nonpositive discrepancies not compatible with full matching\n (see Hansen & Klopfer, 2006 JCGS, sec.4.1).") } else if (any(distance[['dist']] < 0)) stop("distance should be nonnegative") - if (!is.numeric(f) | f > 1 | f < 0) { + if ((!is.numeric(f) | f > 1 | f < 0) && try_solver) { stop("f must be a number in [0,1]") } @@ -137,19 +137,102 @@ fmatch <- function(distance, stop('Cannot choose "(_End_)" as unit name') ## Bypass solver if problem is recognizably infeasible - if ( (mxr >1 & nt/mxr > n.mc) | #max.row.units too low + if ( !try_solver | # this is admittedly a nasty workaround. There are various error checks that occur at different levels of the call stack and this allows us to bypass some of those more easily so that we can return an MCFSolutions object. In practice, we are using this option to ignore infeasibility checks so that we can proceed to create an MCFSolutions + (mxr >1 & nt/mxr > n.mc) | #max.row.units too low (mxr==1L & nt * mnc > n.mc) | #min.col.units too high (nt * mxc < n.mc)) { #max.col.units too low out <- as.data.frame(distance, row.names = NULL) out$solution <- rep(-1L, narcs) + #### + nodes <- new("NodeInfo", + data.frame(name=c(row.units, col.units, + "(_End_)", "(_Sink_)"),# note that `factor()` would put these at start not end of levels + price=0L, + upstream_not_down=c(rep(TRUE, nt), rep(FALSE, nc), NA, NA), + supply=c(rep(mxc, nt), rep(0L, nc), + -(mxc * nt - n.mc), -n.mc), + groups=factor(rep(NA_character_, nt+nc+2L)), + stringsAsFactors=FALSE + ) + ) + + endID <- nt + nc + 1L + sinkID <- endID + 1L + node.labels(nodes) <- nodes[['name']] # new convention per i166 + + if ( !all(node_info[['price']]==0) ) + { + nodes[,'price'] <- node_info[match(c(row.units, col.units, + "(_End_)", "(_Sink_)"), + node_info$name), + 'price'] + ## if a col unit doesn't have a match w/in node_info$name, then + ## its nodes row now has a 0 for its price. Replace that w/ min of + ## bookkeeping node prices may preserve CS in some cases. + if (length(col.noprice <- setdiff(col.units, node_info[['name']]))) + nodes[nodes[['name']] %in% col.noprice, 'price'] <- + min(node_info[match(c("(_End_)", "(_Sink_)"), node_info$name), + 'price', drop=TRUE] + ) + } + # ID numbers implicitly point to corresp. row of nodes + #### Arcs involving End or Sink nodes #### + # There are multiple cases where the solver is NOT called because the problem is clearly infeasible. This makes it difficult to always return an MCFSolutions object. When the solver is called, we can create all of the various constituent parts of the MCFSolutions object without much trouble, although we must paper over certain things, like setting flow = capacity = 0. However, when the solver is not called, we have to make up an arbitrary MCFSolutions object to return. Because of the restrictions on the classes, this is currently being done in a very hacky way. Long term, we would want to consider the expectations for those classes, perhaps change the defaults or class structure somehow. For instance, what should we return when the subproblem is entirely empty? + + # At the moment there are really no guarantees about what can be found in these tables, aside from the subproblems table which should contain a correct specification of feasibility. + + if (nt == 0 && nc == 0) # edge case where subproblem is completely empty. In order to return an MCFSolutions object, I am violating some rules. Specifically, flow and capacity are all set to zero. Matches now includes sink and end nodes because the current classes prohibit empty data frames being returned/appended at the end. + { + + bookkeeping <- data.frame(groups = factor(), + start = factor(levels = 1L:2L, + labels = node.labels(nodes)), + end = factor(levels = 1L:2L, + labels = node.labels(nodes)), + flow = integer(), + capacity = integer()) + + matches <- data.frame(groups=factor(), + upstream=factor(levels=node.labels(nodes)), + downstream=factor(levels=node.labels(nodes))) + + + } else { #This is another edge case where the solver is never called. However, in this case, the subproblem is not empty, so we can use most of the existing logic fairly easily. + + bookkeeping <- data.frame(groups = factor(), + start = factor(levels = 1L:(nt + nc + 2), + labels = node.labels(nodes)), + end = factor(levels = 1L:(nt + nc + 2), + labels = node.labels(nodes)), + flow = integer(), + capacity = integer()) + + matches <- data.frame(groups=factor(), + upstream=factor(levels=node.labels(nodes)), + downstream=factor(levels=node.labels(nodes)) + ) + } + + + + arcs <- new("ArcInfo", matches=matches, bookkeeping=bookkeeping) + #arcs <- new("ArcInfo") + sp <- new("SubProbInfo") + sp[1L, "feasible"] <- FALSE + sp[1L, "solver"] <- "" + fmcfs <- new("FullmatchMCFSolutions", subproblems=sp, + nodes=nodes, arcs=arcs) return(c( out, list(maxerr = 0), - list(MCFSolution = NULL) + list(MCFSolution = fmcfs) )) - } + } + ## after this point, one of the solvers will be called. + ## That is, the problem is not recognizably infeasible prior to calling the solver. + ## The logic for constructing MCFSolutions is much more straightforward from this point forward ## Min-Cost-Flow representation of problem #### ## Each node has a integer ID number, implicitly pointing ## to corresponding row of this data frame. @@ -232,7 +315,6 @@ fmatch <- function(distance, algorithm = algorithm) x <- as.numeric(lout[[1]]) nodes[, "price"] <- lout[[2]] - nodes[, "price"] <- (nodes[, "price"] - nodes[, "price"][endID]) * -1 if (lout[[4]] != "OPTIMAL" || all(x == -1)) { @@ -254,9 +336,7 @@ fmatch <- function(distance, ## Material used to create s3 optmatch object: - feas <- fop$feasible1 - x <- feas * fop$x1 - + x <- fop$feasible1 * fop$x1 #### Recover node prices, store in nodes table ## ## In full matching, each upstream (row) or downstream (column) node starts ## an arc ending at End, and these are also the only arcs ending there. End @@ -291,8 +371,6 @@ fmatch <- function(distance, obj <- as.data.frame(distance, row.names = NULL) obj$solution <- x[seq.int(from=min(1L, narcs), to=narcs)] - - ### Recover arc flow info, store in `arcs` ### ## info extracted from problem solution: matches <- distance[as.logical(x[1L:narcs]), c("i", "j")] @@ -317,7 +395,9 @@ fmatch <- function(distance, arcs <- new("ArcInfo", matches=matches, bookkeeping=bookkeeping) sp <- new("SubProbInfo") - sp[1L, "feasible"] <- TRUE + # If any solver is called, feasibility is now determined only in this location. We know that the solver was called, but we must check to see if the problem was feasible. + sp[1L, "feasible"] <- any(x == 1L) + sp[1L, "solver"] <- solver fmcfs <- new("FullmatchMCFSolutions", subproblems=sp, nodes=nodes, arcs=arcs) return(c(obj, diff --git a/R/fullmatch.R b/R/fullmatch.R index 169cf4a8..74dba239 100644 --- a/R/fullmatch.R +++ b/R/fullmatch.R @@ -172,6 +172,17 @@ setTryRecovery <- function() { #' CycleCancelling, we recommend setting the \code{fullmatch_try_recovery} #' option to \code{FALSE}. #' +#' @param resolution Optional argument specifying subproblem-level +#' resolution(s) to be passed to the solver. +#' If there is only one subproblem, resolution must be specified +#' as an unnamed list of length 1. If there are multiple subproblems, one +#' must specify a named list, where the names correspond to a +#' subproblem/group ID and the values are resolution values. Default is NULL +#' +#' @param hint Optional argument specifying a hint to be passed to the solver. +#' These should be provided as \code{optmatch} objects. +#' Hints should have the same subproblem structure as the currently-specified problem. +#' #' @param ... Additional arguments, passed to \code{match_on} (e.g. \code{within}) #' or to specific methods. #' @@ -200,6 +211,7 @@ fullmatch <- function(x, tol = .001, data = NULL, solver = "", + resolution = NULL, ...) { # if x does not exist then print helpful error msg @@ -232,6 +244,7 @@ fullmatch.default <- function(x, data = NULL, solver = "", within = NULL, + resolution = NULL, ...) { if (!inherits(x, gsub("match_on.","",methods("match_on")))) { @@ -258,6 +271,7 @@ fullmatch.default <- function(x, tol=tol, data=mfd, solver=solver, + resolution=resolution, ...) attr(out, "call") <- match.call() out @@ -274,6 +288,7 @@ fullmatch.numeric <- function(x, solver = "", z, within = NULL, + resolution = NULL, ...) { m <- match_on(x, within=within, z=z, ...) @@ -285,6 +300,7 @@ fullmatch.numeric <- function(x, tol=tol, data=data, solver=solver, + resolution=resolution, ...) attr(out, "call") <- match.call() out @@ -301,6 +317,7 @@ fullmatch.matrix <- function(x, solver = "", within = NULL, hint, + resolution = NULL, ...) { hint <- if (missing(hint)) NULL else nodeinfo(hint) @@ -382,23 +399,24 @@ fullmatch.matrix <- function(x, subproblemids <- names(problems) if (is.null(subproblemids)) subproblemids <- character(1L) - if (is.null(hint)) { hints <- rep(list(NULL), np) - } else { - hints <- split(hint, hint[['groups']], - drop=TRUE # drops levels of hint$groups that aren't represented in hint - ) - nohint <- setdiff(subproblemids, names(hints)) - hints <- hints[match(subproblemids, names(hints), 0L)] - if (length(hints)>0) for (ii in 1L:length(hints)) hints[[ii]] <- new("NodeInfo", hints[[ii]]) - if (length(nohint)) - { - nullhint <- rep(list(NULL), length(nohint)) - names(nullhint) <- nohint - hints <- c(hints, nullhint) - if (length(nohint)==np) warning("Hint lacks information about subproblems of this problem; ignoring.") - } - hints <- hints[match(subproblemids, names(hints))] - } + if (is.null(hint)) { hints <- rep(list(NULL), np) + } else { + hints <- split(hint, hint[['groups']], + drop=TRUE # drops levels of hint$groups that aren't represented in hint + ) + nohint <- setdiff(subproblemids, names(hints)) + hints <- hints[match(subproblemids, names(hints), 0L)] + if (length(hints)>0) for (ii in 1L:length(hints)) hints[[ii]] <- new("NodeInfo", hints[[ii]]) + if (length(nohint)) + { + nullhint <- rep(list(NULL), length(nohint)) + names(nullhint) <- nohint + hints <- c(hints, nullhint) + if (length(nohint)==np) warning("Hint lacks information about subproblems of this problem; ignoring.") + } + hints <- hints[match(subproblemids, names(hints))] + } + if (length(min.controls) > 1 & np != length(min.controls)) { if (is.null(names(min.controls))) @@ -495,8 +513,8 @@ fullmatch.matrix <- function(x, # a helper to handle a single matching problem. all args required. # input error checking happens in the public fullmatch function. - .fullmatch <- function(d, mnctl, mxctl, omf, hint = NULL, solver) { - + .fullmatch <- function(d, mnctl, mxctl, omf, hint = NULL, solver, + flipped, epsilon.in) { # if the subproblem is completely empty, short circuit if (length(d) == 0 || all(is.infinite(d))) { x <- dim(d) @@ -504,59 +522,67 @@ fullmatch.matrix <- function(x, cells.b <- rep(NA, x[2]) names(cells.a) <- rownames(d) names(cells.b) <- colnames(d) - tmp <- list(cells = c(cells.a, cells.b), err = -1) + + if (!flipped) { + omf.calc <- omf + } else { + omf.calc <- -1 * omf + } + + stemp <- solve_reg_fm_prob(node_info = hint, + distspec = d, + max.cpt = mxctl, + min.cpt = mnctl, + solver = solver, + omit.fraction = if(!is.na(omf)) { omf.calc }, # passes NULL for NA + epsilon = epsilon.in, + try_solver = FALSE) + tmp <- list(cells = c(cells.a, cells.b), err = -1, + MCFSolution = stemp[["MCFSolution"]]) return(tmp) } ncol <- dim(d)[2] nrow <- dim(d)[1] - tol.frac <- - if (total.n > 2 * np) { - (nrow + ncol - 2)/(total.n - 2 * np) - } else 1 - - # if omf is specified (i.e. not NA), see if is non-negative - # if omf is not specified, check to see if mxctl is > .5 - if (switch(1 + is.na(omf), omf >= 0, mxctl > .5)) { - maxc <- min(mxctl, ncol) - minc <- max(mnctl, 1/nrow) - omf.calc <- omf - flipped <- FALSE + + if (!flipped) { + omf.calc <- omf } else { - maxc <- min(1/mnctl, ncol) - minc <- max(1/mxctl, 1/nrow) omf.calc <- -1 * omf - d <- t(d) - flipped <- TRUE } - ## (I'd like to do the following higher in the call stack, and also more - ## informatively, obviating subsequent needs for max.cpt, min.cpt, - ## omit.fraction etc. Keeping it here for fear of mischief with flipped subproblems.) - if (is.null(hint)) hint <- nodes_shell_fmatch(rownames(d), colnames(d)) temp <- solve_reg_fm_prob(node_info = hint, - distspec = d, - max.cpt = maxc, - min.cpt = minc, - tolerance = TOL * tol.frac, - solver = solver, - omit.fraction = if(!is.na(omf)) { omf.calc }# passes NULL for NA - ) - if (!is.null(temp$MCFSolution)) - temp$MCFSolution@subproblems[1L,"flipped"] <- flipped + distspec = d, + max.cpt = mxctl, + min.cpt = mnctl, + solver = solver, + omit.fraction = if(!is.na(omf)) { omf.calc }, # passes NULL for NA + epsilon = epsilon.in) + + temp$MCFSolution@subproblems[1L,"flipped"] <- flipped + return(temp) } # a second helper function, that will attempt graceful recovery in situations where the match # is infeasible with the given max.controls - .fullmatch.with.recovery <- function(d.r, mnctl.r, mxctl.r, omf.r, hint.r = NULL, solver) { + .fullmatch.with.recovery <- function(d.r, mnctl.r, mxctl.r, omf.r, hint.r = NULL, solver, + flipped.r, epsilon.r) { # if the subproblem isn't clearly infeasible, try to get a match - if (mxctl.r * dim(d.r)[1] >= prod(dim(d.r)[2], 1-omf.r, na.rm=TRUE)) { - tmp <- .fullmatch(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver) + if (!flipped.r) + { + feasible.condition <- mxctl.r * dim(d.r)[1] >= prod(dim(d.r)[2], 1-omf.r, na.rm=TRUE) + } else { + feasible.condition <- mxctl.r * dim(d.r)[2] >= prod(dim(d.r)[1], 1-omf.r, na.rm=TRUE) + } + + if (feasible.condition) { + tmp <- .fullmatch(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver, + flipped.r, epsilon.r) if (!all(is.na(tmp[1]$cells))) { # subproblem is feasible with given constraints, no need to recover new.omit.fraction <<- c(new.omit.fraction, omf.r) @@ -565,8 +591,17 @@ fullmatch.matrix <- function(x, } # if max.control is in [1, Inf), and we're infeasible if(is.finite(mxctl.r) & mxctl.r >= 1) { - # Re-solve with no max.control - tmp2 <- list(.fullmatch(d.r, mnctl.r, Inf, omf.r, hint.r, solver)) + # Re-solve with no max.control -- we most override the precomputed one now + if (!flipped.r) { + mxctl.r.new <- ncol(d.r) + } else { + mxctl.r.new <- min(1/mnctl.r, ncol(d.r)) + } + + tmp2 <- list(.fullmatch(d.r, mnctl.r, mxctl.r.new, omf.r, hint.r, solver, + flipped.r, epsilon.r)) + + tmp2.optmatch <- makeOptmatch(d.r, tmp2, match.call(), data) trial.ss <- stratumStructure(tmp2.optmatch) treats <- as.numeric(unlist(lapply(strsplit(names(trial.ss), ":"),"[",1))) @@ -575,7 +610,8 @@ fullmatch.matrix <- function(x, if(num.controls == 0) { # infeasible anyways if (!exists("tmp")) { - tmp <- .fullmatch(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver) + tmp <- .fullmatch(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver, + flipped.r, epsilon.r) } new.omit.fraction <<- c(new.omit.fraction, omf.r) return(tmp) @@ -584,14 +620,16 @@ fullmatch.matrix <- function(x, # feasible with the new omit fraction new.omit.fraction <<- c(new.omit.fraction, new.omf.r) - return(.fullmatch(d.r, mnctl.r, mxctl.r, new.omf.r, hint.r, solver)) + return(.fullmatch(d.r, mnctl.r, mxctl.r, new.omf.r, hint.r, solver, + flipped.r, epsilon.r)) } else { # subproblem is infeasible, but we can't try to fix because no max.controls if (!exists("tmp")) { - tmp <- .fullmatch(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver) + tmp <- .fullmatch(d.r, mnctl.r, mxctl.r, omf.r, hint.r, solver, + flipped.r, epsilon.r) } - new.omit.fraction <<- c(new.omit.fraction, omf.r) + new.omit.fraction <<- c(new.omit.fraction) return(tmp) } } @@ -603,11 +641,37 @@ fullmatch.matrix <- function(x, warning("The flag fullmatch_try_recovery is unset, setting to TRUE") setTryRecovery() } + precomputed_parameters <- parse_subproblems(problems = problems, + min.controls = min.controls, + max.controls = max.controls, + omit.fraction = omit.fraction, + hints = hints, + total.n = total.n, + TOL.in = TOL, + resolution = resolution) + if (options()$fullmatch_try_recovery) { - solutions <- mapply(.fullmatch.with.recovery, problems, min.controls, max.controls, omit.fraction, hints, solver, SIMPLIFY = FALSE) + solutions <- mapply(.fullmatch.with.recovery, + precomputed_parameters[['subproblems']], + precomputed_parameters[['min.controls']], + precomputed_parameters[['max.controls']], + omit.fraction, + precomputed_parameters[["hints"]], + solver, + precomputed_parameters[['flipped_status']], + precomputed_parameters[['epsilons']], + SIMPLIFY = FALSE) } else { - solutions <- mapply(.fullmatch, problems, min.controls, max.controls, omit.fraction, hints, solver, SIMPLIFY = FALSE) + solutions <- mapply(.fullmatch, + precomputed_parameters[['subproblems']], + precomputed_parameters[['min.controls']], + precomputed_parameters[['max.controls']], + omit.fraction, + precomputed_parameters[["hints"]], + solver, + precomputed_parameters[['flipped_status']], + precomputed_parameters[['epsilons']], SIMPLIFY = FALSE) } mout <- makeOptmatch(x, solutions, match.call(), data) @@ -654,18 +718,21 @@ fullmatch.matrix <- function(x, ## assemble MCF material mcfsolutions <- rep(list(NULL), np) names(mcfsolutions) <- subproblemids - for (ii in 1L:np) { - if (!is.null(solutions[[ii]]$MCFSolution)) - { + for (ii in 1L:np) + { mcfsolutions[[ii]] <- solutions[[ii]]$MCFSolution mcfsolutions[[ii]]@subproblems[1L,"hashed_dist"] <- disthash thesubprob <- subproblemids[ii] mcfsolutions[[ii]]@subproblems[1L,"groups"] <- thesubprob - mcfsolutions[[ii]]@nodes[,"groups"] <- factor(thesubprob) + if (nrow(mcfsolutions[[ii]]@nodes ) > 0) + { + mcfsolutions[[ii]]@nodes[,"groups"] <- factor(thesubprob) + } if (nrow(mcfsolutions[[ii]]@arcs@matches) > 0) { mcfsolutions[[ii]]@arcs@matches[,"groups"] <- factor(thesubprob) + mcfsolutions[[ii]]@arcs@bookkeeping[,"groups"] <- factor(thesubprob) } - mcfsolutions[[ii]]@arcs@bookkeeping[,"groups"] <- factor(thesubprob) + bookkeeping_nodes <- c('(_Sink_)', '(_End_)') for (bn in bookkeeping_nodes) { nlabs <- node.labels(mcfsolutions[[ii]]) @@ -673,16 +740,11 @@ fullmatch.matrix <- function(x, node.labels(mcfsolutions[[ii]]) <- nlabs } } - } + mcfsolutions <- mcfsolutions[!vapply(mcfsolutions, is.null, logical(1))] - attr(mout, "MCFSolutions") <- if (length(mcfsolutions)==0) { - NULL - } else { - names(mcfsolutions)[1] <- "x" - ##b/c in next line `c()` needs to dispatch on an `x` argument - do.call("c", mcfsolutions) - } + names(mcfsolutions)[1] <- "x" + attr(mout, "MCFSolutions") <- do.call("c", mcfsolutions) # save solver information attr(mout, "solver") <- solver diff --git a/R/matchfailed.R b/R/matchfailed.R index d16822e3..4d04f6a9 100644 --- a/R/matchfailed.R +++ b/R/matchfailed.R @@ -19,9 +19,7 @@ matchfailed <- function(x) { #' @return A named logical vector indicating either success or failure for each subproblem. #' @keywords internal subproblemSuccess <- function(x) { - grps <- attr(x, "subproblem") - failed <- vapply(split(x, grps), function(x) { - all(is.na(x) | grepl("\\.NA$", x)) - }, logical(1)) - return(!failed) + is.feas <- attr(x, "MCFSolutions")@subproblems[, "feasible"] + names(is.feas) <- attr(x, "MCFSolutions")@subproblems[, "groups"] + return(is.feas) } diff --git a/R/max.controls.cap.R b/R/max.controls.cap.R index a55625be..9ddacc45 100644 --- a/R/max.controls.cap.R +++ b/R/max.controls.cap.R @@ -68,10 +68,13 @@ maxControlsCap <- function(distance, min.controls = NULL, solver = "") nodes <- nodes_shell_fmatch(rownames = trnl, colnames = tcnl) t_nodes <- nodes_shell_fmatch(rownames = tcnl, colnames = trnl) # FEASIBILITY CHECK -- temp depends on whether problem requires flipping - temp <- solve_reg_fm_prob(node_info=nodes, distspec = tdm, - max.cpt = min(tlmxc, ncol), - min.cpt = max(tgmnc, 1/nrow), tolerance=.5, - omit.fraction = NULL, solver = solver) + temp <- solve_reg_fm_prob(node_info=nodes, + distspec = tdm, + max.cpt = min(tlmxc, ncol), + min.cpt = max(tgmnc, 1/nrow), + tolerance=.5, + omit.fraction = NULL, + solver = solver) # IF THE PROBLEM IS FEASIBLE, SET TLMXC TO GREATEST OBTAINED # RATIO OF CONTROLS TO TREATED UNITS. THIS MAY BE MUCH LESS THAN diff --git a/R/pairmatch.R b/R/pairmatch.R index 1edf95fc..f7216bc0 100644 --- a/R/pairmatch.R +++ b/R/pairmatch.R @@ -50,6 +50,15 @@ #' @param data Optional data set. #' @param remove.unmatchables Should treatment group members for which there are #' no eligible controls be removed prior to matching? +#' @param resolution Optional argument specifying subproblem-level +#' resolution(s) to be passed to the solver. +#' If there is only one subproblem, resolution must be specified +#' as an unnamed list of length 1. If there are multiple subproblems, one +#' must specify a named list, where the names correspond to a +#' subproblem/group ID and the values are resolution values. Default is NULL +#' @param hint Optional argument specifying a hint to be passed to the solver. +#' These should be provided as \code{optmatch} objects. +#' Hints should have the same subproblem structure as the currently-specified problem. #' @param ... Additional arguments to pass to \code{\link{match_on}} #' (e.g. \code{within})) or to \code{\link{fullmatch}} (e.g. \code{tol}). #' It is an error to pass \code{min.controls}, @@ -69,6 +78,7 @@ pairmatch <- function(x, controls = 1, data = NULL, remove.unmatchables = FALSE, + resolution = NULL, ...) { # if x does not exist then print helpful error msg @@ -100,6 +110,7 @@ pairmatch.default <- function(x, data = NULL, remove.unmatchables = FALSE, within = NULL, + resolution = NULL, ...) { if (!inherits(x, gsub("match_on.","",methods("match_on")))) { @@ -122,6 +133,7 @@ pairmatch.default <- function(x, controls=controls, data=mfd, remove.unmatchables=remove.unmatchables, + resolution = resolution, ...) attr(out, "call") <- match.call() out @@ -134,6 +146,7 @@ pairmatch.numeric <- function(x, remove.unmatchables = FALSE, z, within = NULL, + resolution = NULL, ...) { m <- match_on(x, within=within, z=z, ...) @@ -141,6 +154,7 @@ pairmatch.numeric <- function(x, controls=controls, data=data, remove.unmatchables=remove.unmatchables, + resolution = resolution, ...) attr(out, "call") <- match.call() out @@ -152,6 +166,7 @@ pairmatch.matrix <- function(x, data = NULL, remove.unmatchables = FALSE, within = NULL, + resolution = NULL, ...) { validDistanceSpecification(x) # will stop() on error @@ -219,6 +234,7 @@ pairmatch.matrix <- function(x, max.controls = controls, omit.fraction = omf, data = data, + resolution = resolution, ...) if(!remove.unmatchables) { options("fullmatch_try_recovery" = saveopt) diff --git a/R/solve_reg_fm_prob.R b/R/solve_reg_fm_prob.R index a48ce1be..c4eeb8a1 100644 --- a/R/solve_reg_fm_prob.R +++ b/R/solve_reg_fm_prob.R @@ -21,27 +21,31 @@ ##* @param distspec InfinitySparseMatrix, matrix, etc (must have a `prepareMatching()` method) ##* @param min.cpt double, minimum permissible ratio of controls per treatment ##* @param max.cpt double, maximum permissible ratio of controls per treatment -##* @param tolerance +##* @param solver +##* @param epsilon double, grid width ##* @param omit.fraction +##* @param try_solver Logical. This is a temporary parameter allowing us to work around some of the error checking happening at different points in order to recognize infeasible subproblems but still return an MCFSolutions object with an explicit indication of feasibility. ##* @return ##* @keywords internal solve_reg_fm_prob <- function(node_info, - distspec, - min.cpt, - max.cpt, - tolerance, - omit.fraction=NULL, - solver) { + distspec, + min.cpt, + max.cpt, + omit.fraction=NULL, + solver, + epsilon = NULL, + tolerance = NULL, + try_solver = TRUE) { if (min.cpt <=0 | max.cpt<=0) { stop("inputs min.cpt, max.cpt must be positive") } - stopifnot(is(node_info, "NodeInfo")) - rownames <- subset(node_info[["name"]], node_info[['upstream_not_down']]) - colnames <- subset(node_info[["name"]], !node_info[['upstream_not_down']]) - nrows <- length(rownames) - ncols <- length(colnames) + stopifnot(is(node_info, "NodeInfo")) + rownames <- subset(node_info[["name"]], node_info[['upstream_not_down']]) + colnames <- subset(node_info[["name"]], !node_info[['upstream_not_down']]) + nrows <- length(rownames) + ncols <- length(colnames) if (!all(rownames %in% dimnames(distspec)[[1]])) { stop("node_info rownames must be rownames for \'distspec\'") } @@ -55,27 +59,25 @@ solve_reg_fm_prob <- function(node_info, stop("Argument \'distspec\' must have a \'edgelist\' method") } - # convert the distspec to cannonical EdgeList dm <- edgelist(distspec, c(rownames, colnames)) - matchable_nodes_info <- - filter(node_info, - is.na(node_info[['upstream_not_down']]) | #retail bookkeeping nodes + filter(node_info, + is.na(node_info[['upstream_not_down']]) | #retail bookkeeping nodes is_matchable(node_info[['name']], dm, "either") - ) + ) rfeas <- sum( matchable_nodes_info[['upstream_not_down']], na.rm=TRUE) cfeas <- sum(!matchable_nodes_info[['upstream_not_down']], na.rm=TRUE) ## Update `omit.fraction`: if (cfeas < ncols & is.numeric(omit.fraction) && omit.fraction >0) { - original_number_to_omit <- omit.fraction*ncols - number_implicitly_omitted_already <- ncols - cfeas - omit.fraction <- - (original_number_to_omit - number_implicitly_omitted_already)/cfeas - ## If the number to be omitted is less than the number of unmatchable - ## columns, ncols - cfeas, then this omit.fraction can be negative. Then - ## we just omit a few more than directed by the supplied omit.fraction: - if (omit.fraction <= 0) omit.fraction <- NULL + original_number_to_omit <- omit.fraction*ncols + number_implicitly_omitted_already <- ncols - cfeas + omit.fraction <- + (original_number_to_omit - number_implicitly_omitted_already)/cfeas + ## If the number to be omitted is less than the number of unmatchable + ## columns, ncols - cfeas, then this omit.fraction can be negative. Then + ## we just omit a few more than directed by the supplied omit.fraction: + if (omit.fraction <= 0) omit.fraction <- NULL } if (is.null(omit.fraction)) { @@ -86,28 +88,32 @@ solve_reg_fm_prob <- function(node_info, } f.ctls <- 1-omit.fraction } - - if (floor(min.cpt) > ceiling(max.cpt) | #inconsistent max/min - ceiling(1/min.cpt) < floor(1/max.cpt) | #controls per treatment - !rfeas | !cfeas # either no controls or no treatments - ) + if (!try_solver | + floor(min.cpt) > ceiling(max.cpt) | #inconsistent max/min + ceiling(1/min.cpt) < floor(1/max.cpt) | #controls per treatment + !rfeas | !cfeas # either no controls or no treatments + ) { ans <- rep(NA_integer_, nrows + ncols) names(ans) <- c(rownames, colnames) - return(list(cells=ans, err=0, MCFSolution=NULL)) + #engaging in some questionable practices here, but short circuiting doubleSolve to generate a placeholder MCFSolutions object to return. Everything except the subproblems table is suspect. + stmp <- doubleSolve(dm, min.cpt, max.cpt, f.ctls, matchable_nodes_info, + rfeas, cfeas, epsilon, solver, try_solver = FALSE) + return(list(cells = ans, err=0, MCFSolution=stmp[["MCFSolution"]])) } - old.o <- options(warn=-1) - epsilon_lower_lim <- max(dm$'dist')/(.Machine$integer.max/64 -2) - epsilon <- if (tolerance>0 & rfeas>1 & cfeas>1) { - max(epsilon_lower_lim, tolerance/(rfeas + cfeas - 2)) - } else epsilon_lower_lim - options(old.o) - if (all(abs(dm$'dist'- 0) < sqrt(.Machine$double.eps))) { dm$'dist' <- rep(1L, length(dm$'dist')) # so we'll be routed to intSolve() } + + if (!is.null(tolerance) && is.null(epsilon)) { #if tolerance is provided rather than epsilon, calculate epsilon + epsilon <- calculate_epsilon(rfeas = rfeas, + cfeas = cfeas, + tolerance = tolerance, + maxdist = max(dm$'dist')) + } + temp <- if (is.integer(dm$'dist')) { intSolve(dm, min.cpt, max.cpt, f.ctls, matchable_nodes_info, solver) @@ -124,10 +130,10 @@ solve_reg_fm_prob <- function(node_info, names(ans) <- c(rownames, colnames) ans[names(matches)] <- matches - if (!is.null(temp[["MCFSolution"]])) - { + + if (!temp[["MCFSolution"]]@subproblems[1L, "feasible"]) #this logic takes the place of something that would check to see if the MCFSolution was NULL. I'm not 100% sure these things are interchangeable but it does pass tests for the time being + { temp[["MCFSolution"]]@subproblems[1L, "exceedance"] <- temp$maxerr - temp[["MCFSolution"]]@subproblems[1L, "feasible"] <- any(temp$solutions==1L) ## Presently we can treat this subproblem as non-flipped even if it was, ## since `dm` will have been transposed in the event of flipping. Doing @@ -137,23 +143,23 @@ solve_reg_fm_prob <- function(node_info, ## where that transposition was made. temp[["MCFSolution"]]@subproblems[1L, "flipped"] <- FALSE ## ... and now we can proceed with: - evaluate_lagrangian(dm, temp[["MCFSolution"]]) -> - temp[["MCFSolution"]]@subproblems[1L, "lagrangian_value"] + evaluate_primal(dm, temp[["MCFSolution"]]) -> + temp[["MCFSolution"]]@subproblems[1L, "primal_value"] evaluate_dual(dm, temp[["MCFSolution"]]) -> temp[["MCFSolution"]]@subproblems[1L, "dual_value" ] nodeinfo(temp[["MCFSolution"]]) <- update(node_info, nodeinfo(temp[["MCFSolution"]])) - } + } - return(list(cells = ans, err = temp$maxerr, - MCFSolution=temp[["MCFSolution"]] - ) - ) + return(list(cells = ans, err = temp$maxerr, + MCFSolution=temp[["MCFSolution"]] + ) + ) } - +# see try_solver documentation in solve_reg_fm_prob function for description of that. doubleSolve <- function(dm, min.cpt, max.cpt, f.ctls, node_info, - rfeas, cfeas, epsilon, solver) + rfeas, cfeas, epsilon, solver, try_solver = TRUE) { dm_distance <- dm$'dist' #Used below in roundoff error crude estimate dm$'dist' <- as.integer(ceiling(.5 + dm$'dist' / epsilon)) @@ -166,15 +172,13 @@ doubleSolve <- function(dm, min.cpt, max.cpt, f.ctls, node_info, ) intsol <- intSolve(dm=dm, min.cpt=min.cpt, max.cpt=max.cpt, f.ctls=f.ctls, - node_info = node_info, solver = solver) + node_info = node_info, solver = solver, try_solver = try_solver) - if (!is.null(intsol$MCFSolution)) - { - intsol$MCFSolution@subproblems[1L,"resolution"] <- epsilon - intsol$MCFSolution@nodes[,'price'] <- - intsol$MCFSolution@nodes[['price']] * epsilon - } + intsol$MCFSolution@subproblems[1L,"resolution"] <- epsilon + + intsol$MCFSolution@nodes[,'price'] <- intsol$MCFSolution@nodes[['price']] * epsilon + intsol$maxerr <- if (any(is.na(intsol$solution) | @@ -192,7 +196,7 @@ doubleSolve <- function(dm, min.cpt, max.cpt, f.ctls, node_info, } -intSolve <- function(dm, min.cpt, max.cpt, f.ctls, node_info, solver) { +intSolve <- function(dm, min.cpt, max.cpt, f.ctls, node_info, solver, try_solver = TRUE) { stopifnot(is(dm, "EdgeList"), is(node_info, "NodeInfo")) if (!is.integer(node_info[['price']])) { price_col_position <- which(node_info@names=="price") @@ -206,7 +210,8 @@ intSolve <- function(dm, min.cpt, max.cpt, f.ctls, node_info, solver) { min.col.units = max(1, floor(min.cpt)), f = f.ctls, node_info = node_info, - solver = solver) + solver = solver, + try_solver = try_solver) } ##* Small helper function to turn a solution data.frame into a factor of matches diff --git a/R/subproblemutils.R b/R/subproblemutils.R new file mode 100644 index 00000000..1ee67c6e --- /dev/null +++ b/R/subproblemutils.R @@ -0,0 +1,326 @@ +get_hints <- function(hint, subproblems) +{ + np <- length(subproblems) + subproblemids <- names(subproblems) + if (is.null(subproblemids)) subproblemids <- character(1L) + if (is.null(hint)) + { + hints <- rep(list(NULL), np) + } + else + { + hints <- split(hint, hint[['groups']], + drop=TRUE # drops levels of hint$groups that aren't represented in hint + ) + nohint <- setdiff(subproblemids, names(hints)) + hints <- hints[match(subproblemids, names(hints), 0L)] + if (length(hints)>0) for (ii in 1L:length(hints)) hints[[ii]] <- new("NodeInfo", hints[[ii]]) + if (length(nohint)) + { + nullhint <- rep(list(NULL), length(nohint)) + names(nullhint) <- nohint + hints <- c(hints, nullhint) + if (length(nohint)==np) warning("Hint lacks information about subproblems of this problem; ignoring.") + } + hints <- hints[match(subproblemids, names(hints))] + } + hint.list <- mapply(prepare_subproblem_hint, + d = subproblems, + hint = hints, + SIMPLIFY = FALSE) + return(hint.list) +} + + +get_tol_frac <- function(d, total_n, np) +{ + ncol <- dim(d)[2] + nrow <- dim(d)[1] + + tol.frac <- + if (total_n > 2 * np) { + (nrow + ncol - 2)/(total_n - 2 * np) + } else 1 + return(tol.frac) +} + + +get_flipped_status <- function(d, omf, mxctl, mnctl) +{ + ncol <- dim(d)[2] + nrow <- dim(d)[1] + + if (switch(1 + is.na(omf), omf >= 0, mxctl > .5)) { + maxc <- min(mxctl, ncol) + minc <- max(mnctl, 1/nrow) + flipped <- FALSE + } else { + maxc <- min(1/mnctl, ncol) + minc <- max(1/mxctl, 1/nrow) + d <- t(d) + flipped <- TRUE + } + return(list(subproblem = d, + flipped_status = flipped, + maxc = maxc, + minc = minc)) +} + +prepare_subproblem_hint <- function(d, hint) +{ + if (is.null(hint)){ + hint <- nodes_shell_fmatch(rownames(d), + colnames(d)) + } + return(hint) +} + +get_dms <- function(d, + hint) +{ + + # if the subproblem is completely empty, short circuit + if (length(d) == 0 || + all(is.infinite(d))) { + return(NA) + } + + rownames <- subset(hint[["name"]], hint[['upstream_not_down']]) + colnames <- subset(hint[["name"]], !hint[['upstream_not_down']]) + nrows <- length(rownames) + ncols <- length(colnames) + dm <- edgelist(d, c(rownames, colnames)) + return(dm) +} + +get_epsilon <- function(subproblem, flipped, + hint, tolerance) +{ + rownames <- subset(hint[["name"]], hint[['upstream_not_down']]) + colnames <- subset(hint[["name"]], !hint[['upstream_not_down']]) + nrows <- length(rownames) + ncols <- length(colnames) + + sub.dmat <- as.InfinitySparseMatrix(subproblem) + + if (!flipped) + { + rfeas <- length(unique(sub.dmat@rows)) + cfeas <- length(unique(sub.dmat@cols)) + } else { + rfeas <- length(unique(sub.dmat@cols)) + cfeas <- length(unique(sub.dmat@rows)) + } + + max_dist <- suppressWarnings(max(sub.dmat@.Data)) + + eps.val <- calculate_epsilon(rfeas, + cfeas, + tolerance, + max_dist) + + return(eps.val) +} + +calculate_epsilon <- function(rfeas, + cfeas, + tolerance, + maxdist) +{ + + old.o <- options(warn=-1) + epsilon_lower_lim <- maxdist / (.Machine$integer.max/64 -2) + epsilon <- if (tolerance>0 & rfeas>1 & cfeas>1) { + max(epsilon_lower_lim, tolerance/(rfeas + cfeas - 2)) + } else epsilon_lower_lim + options(old.o) + return(epsilon) +} + +parse_subproblems <- function(problems, min.controls, + max.controls, omit.fraction, hints, + total.n, TOL.in, resolution = NULL) +{ + np <- length(problems) + subproblemids <- names(problems) + if (is.null(subproblemids)) subproblemids <- character(1L) + flipped_status <- mapply(get_flipped_status, + d = problems, + omf = omit.fraction, + mxctl = max.controls, + mnctl = min.controls, + SIMPLIFY = FALSE) + + problems <- lapply(flipped_status, function(x) x[['subproblem']]) + flippeds <- lapply(flipped_status, function(x) x[['flipped_status']]) + mincs <- lapply(flipped_status, function(x) x[['minc']]) + maxcs <- lapply(flipped_status, function(x) x[['maxc']]) + + #when no hint is provided, hints is a list of length np where everything is null + #when hint is provided, hint is a list of NodeInfo elements + hint.provided <- all(sapply(hints, function(x) !is.null(x))) + + hint.list <- mapply(prepare_subproblem_hint, + d = problems, + hint = hints, + SIMPLIFY = FALSE) + + if (!is.null(resolution)) + { + length.res <- length(resolution) + names.res <- names(resolution) + + if (all(is.null(names.res))) + { + if (identical(np, 1L) && + identical(length.res, 1L)) + { + names.res <- character(1L) + names(resolution) <- names.res + } else { + stop("No subproblem ids are specified.") + } + } + + if (hint.provided) + { + if(any(!names.res %in% subproblemids)) + { + stop("Group specified in resolution argument cannot be found.") + } + # if resolution is specified for a few subproblems, do the default behavior first: + tol.fracs <- mapply(get_tol_frac, + d = problems, + total_n = rep(total.n, np), + np = rep(np, np)) + tolerances <- tol.fracs * TOL.in + + epsilons <- mapply(get_epsilon, + hint = hint.list, + tolerance = tolerances, + subproblem = problems, + flipped = flippeds, + SIMPLIFY = FALSE) + #Then, update with the new specified values + epsilons[names.res] <- resolution + + } else { + if(length.res != np || + !identical(sort(names.res), sort(subproblemids))) + { + stop("Resolutions must provided for each subproblem as a named list. Please ensure all subproblemids are correct and specified.") + } + + + if (length(resolution) == 1 && + names(resolution) == "" && + np == 1 && all(subproblemids == "")) # case of one subproblem + { + epsilons <- resolution + names(epsilons) <- character(1L) + } else { + epsilons <- lapply(subproblemids, function(x) resolution[[x]]) + names(epsilons) <- subproblemids + } + + } + + + } else { #no epsilons provided, just use the default behavior and convert from tolerance + tol.fracs <- mapply(get_tol_frac, + d = problems, + total_n = rep(total.n, np), + np = rep(np, np)) + tolerances <- tol.fracs * TOL.in + + epsilons <- mapply(get_epsilon, + hint = hint.list, + tolerance = tolerances, + subproblem = problems, + flipped = flippeds, + SIMPLIFY = FALSE) + } + + tmp <- list(subproblems = problems, + flipped_status = flippeds, + min.controls = mincs, + max.controls = maxcs, + epsilons = epsilons, + hints = hint.list) + return(tmp) +} + + +find_subproblem_epsilons <- function(tol = NULL, + epsilon = NULL, + subproblems, + x, + d) +{ + if (!is.null(tol)) #user specifies tolerance, rather than epsilon + { + total.n <- sum(dim(x)) + TOL <- tol * total.n + np <- length(subproblems) + ncol <- dim(d)[2] + nrow <- dim(d)[1] + + + tol.frac <- + if (total.n > 2 * np) { + (nrow + ncol - 2)/(total.n - 2 * np) + } else 1 + + # needs to handle multiple specifications for multiple subproblems, 1 specification for multiple subproblems, or 1 specification, 1 subproblem + if (identical(length(tol), np)) { + # if we have a tolerance specification for each subproblem + tolerances <- TOL * tol.frac + + } else { #handle one tolerance for all subproblems. Need to check that other cases trigger errors + if (length(tol) == 1) + { + + tol.temp <- TOL * tol.frac #eventually, don't return tolerances, just return epsilons here + tolerances <- rep(tol.temp, np) + } else { + stop("Please specify one tolerance for all subproblems, or one tolerance per subproblem.") + } + } + return(tolerances) + } + +} + + +#' Get information about subproblems +#' +#' @param object optmatch object +#' @param type either "subproblem" or any of the column names from the subproblems table associated with an MCFSolutions object (e.g. "resolution") +#' +#' @return if \code{type = "subproblem"}, a named vector mapping units to subproblems is returned. If one or more columns from the subproblems table is specified, that column subset is returned. This might result in either a vector or data frame being returned. +#' @export +#' +#' @examples +#' data(nuclearplants) +#' f1 <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants) +#' get_subproblem_info(object = f1, type = c("resolution", "groups")) +#' get_subproblem_info(object = f1, type = "subproblem") +get_subproblem_info <- function(object, type) +{ + if (!inherits(object, "optmatch")){ + stop("Please provide an optmatch object") + } + if (all(type == "subproblem")) + { + return(attr(object, "subproblem")) + } else { + subprob.attributes <- colnames(attr(object, "MCFSolutions")@subproblems) + + idx <- pmatch(type, subprob.attributes, nomatch = NA) + idx <- idx[!is.na(idx)] + + tb <- attr(object, "MCFSolutions")@subproblems + res <- tb[, idx] + return(res) + } +} diff --git a/inst/examples/fullmatch.R b/inst/examples/fullmatch.R index b2727fb7..bf7e225d 100644 --- a/inst/examples/fullmatch.R +++ b/inst/examples/fullmatch.R @@ -51,3 +51,16 @@ m5 <- fullmatch(glm(pr ~ t1 + t2 + strata(pt), data=nuclearplants, # Including `strata(foo)` inside a glm uses `foo` in the model as # well, so here m4 and m5 are equivalent. m3 differs in that it does # not include `pt` in the glm. + +# Full matching, then using the results as a hint for a +# similar problem that differs by tolerance +fm_init <- fullmatch(pr ~ t1 + t2, tol = .01, + data = nuclearplants) + +fm_hint <- fullmatch(pr ~ t1 + t2, tol = .001, + data = nuclearplants, hint = fm1) + +# Specifying subproblem level resolution values +res.custom <- list("0" = 0.001142857, "1" = 0.001) +fm_reso <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants, + resolution = res.custom) diff --git a/inst/examples/pairmatch.R b/inst/examples/pairmatch.R index 71c14e0a..6288d62b 100644 --- a/inst/examples/pairmatch.R +++ b/inst/examples/pairmatch.R @@ -48,3 +48,16 @@ m5 <- pairmatch(glm(pr ~ t1 + t2 + strata(pt), data=nuclearplants, # Including `strata(foo)` inside a glm uses `foo` in the model as # well, so here m4 and m5 are equivalent. m3 differs in that it does # not include `pt` in the glm. + +# Pair matching, then using the results as a hint for a +# similar problem that differs by tolerance +p_init <- pairmatch(pr ~ cost + t1, tol = .01, + data = nuclearplants) + +p_hint <- pairmatch(pr ~ cost + t1, tol = .001, + data = nuclearplants, hint = p1) + +# Specifying subproblem level resolution values +res.custom <- list("0" = 0.001142857, "1" = 0.001) +p_reso <- pairmatch(pr ~ cost + strata(pt), data=nuclearplants, + resolution = res.custom) diff --git a/man/fullmatch.Rd b/man/fullmatch.Rd index b71968f6..2c9d0eb6 100644 --- a/man/fullmatch.Rd +++ b/man/fullmatch.Rd @@ -14,6 +14,7 @@ fullmatch( tol = 0.001, data = NULL, solver = "", + resolution = NULL, ... ) @@ -26,6 +27,7 @@ full( tol = 0.001, data = NULL, solver = "", + resolution = NULL, ... ) } @@ -141,8 +143,19 @@ an infeasible matching problem. When using a LEMON algorithm other than CycleCancelling, we recommend setting the \code{fullmatch_try_recovery} option to \code{FALSE}.} +\item{resolution}{Optional argument specifying subproblem-level +resolution(s) to be passed to the solver. +If there is only one subproblem, resolution must be specified +as an unnamed list of length 1. If there are multiple subproblems, one +must specify a named list, where the names correspond to a +subproblem/group ID and the values are resolution values. Default is NULL} + \item{...}{Additional arguments, passed to \code{match_on} (e.g. \code{within}) or to specific methods.} + +\item{hint}{Optional argument specifying a hint to be passed to the solver. +These should be provided as \code{optmatch} objects. +Hints should have the same subproblem structure as the currently-specified problem.} } \value{ A \code{\link{optmatch}} object (\code{factor}) indicating matched groups. @@ -255,6 +268,19 @@ m5 <- fullmatch(glm(pr ~ t1 + t2 + strata(pt), data=nuclearplants, # Including `strata(foo)` inside a glm uses `foo` in the model as # well, so here m4 and m5 are equivalent. m3 differs in that it does # not include `pt` in the glm. + +# Full matching, then using the results as a hint for a +# similar problem that differs by tolerance +fm_init <- fullmatch(pr ~ t1 + t2, tol = .01, + data = nuclearplants) + +fm_hint <- fullmatch(pr ~ t1 + t2, tol = .001, + data = nuclearplants, hint = fm1) + +# Specifying subproblem level resolution values +res.custom <- list("0" = 0.001142857, "1" = 0.001) +fm_reso <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants, + resolution = res.custom) } \references{ Hansen, B.B. and Klopfer, S.O. (2006), \sQuote{ Optimal full matching and related designs via network flows}, diff --git a/man/get_subproblem_info.Rd b/man/get_subproblem_info.Rd new file mode 100644 index 00000000..94da693a --- /dev/null +++ b/man/get_subproblem_info.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/subproblemutils.R +\name{get_subproblem_info} +\alias{get_subproblem_info} +\title{Get information about subproblems} +\usage{ +get_subproblem_info(object, type) +} +\arguments{ +\item{object}{optmatch object} + +\item{type}{either "subproblem" or any of the column names from the subproblems table associated with an MCFSolutions object (e.g. "resolution")} +} +\value{ +if \code{type = "subproblem"}, a named vector mapping units to subproblems is returned. If one or more columns from the subproblems table is specified, that column subset is returned. This might result in either a vector or data frame being returned. +} +\description{ +Get information about subproblems +} +\examples{ +data(nuclearplants) +f1 <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants) +get_subproblem_info(object = f1, type = c("resolution", "groups")) +get_subproblem_info(object = f1, type = "subproblem") +} diff --git a/man/pairmatch.Rd b/man/pairmatch.Rd index 5a61d523..e9cfd95d 100644 --- a/man/pairmatch.Rd +++ b/man/pairmatch.Rd @@ -5,9 +5,23 @@ \alias{pair} \title{Optimal 1:1 and 1:k matching} \usage{ -pairmatch(x, controls = 1, data = NULL, remove.unmatchables = FALSE, ...) - -pair(x, controls = 1, data = NULL, remove.unmatchables = FALSE, ...) +pairmatch( + x, + controls = 1, + data = NULL, + remove.unmatchables = FALSE, + resolution = NULL, + ... +) + +pair( + x, + controls = 1, + data = NULL, + remove.unmatchables = FALSE, + resolution = NULL, + ... +) } \arguments{ \item{x}{Any valid input to \code{match_on}. If \code{x} is a numeric vector, @@ -23,11 +37,22 @@ Alternatively, a precomputed distance may be entered.} \item{remove.unmatchables}{Should treatment group members for which there are no eligible controls be removed prior to matching?} +\item{resolution}{Optional argument specifying subproblem-level +resolution(s) to be passed to the solver. +If there is only one subproblem, resolution must be specified +as an unnamed list of length 1. If there are multiple subproblems, one +must specify a named list, where the names correspond to a +subproblem/group ID and the values are resolution values. Default is NULL} + \item{...}{Additional arguments to pass to \code{\link{match_on}} (e.g. \code{within})) or to \code{\link{fullmatch}} (e.g. \code{tol}). It is an error to pass \code{min.controls}, \code{max.controls}, \code{mean.controls} or \code{omit.fraction} as \code{pairmatch} must set these values.} + +\item{hint}{Optional argument specifying a hint to be passed to the solver. +These should be provided as \code{optmatch} objects. +Hints should have the same subproblem structure as the currently-specified problem.} } \value{ A \code{\link{optmatch}} object (\code{factor}) indicating matched groups. @@ -126,6 +151,19 @@ m5 <- pairmatch(glm(pr ~ t1 + t2 + strata(pt), data=nuclearplants, # Including `strata(foo)` inside a glm uses `foo` in the model as # well, so here m4 and m5 are equivalent. m3 differs in that it does # not include `pt` in the glm. + +# Pair matching, then using the results as a hint for a +# similar problem that differs by tolerance +p_init <- pairmatch(pr ~ cost + t1, tol = .01, + data = nuclearplants) + +p_hint <- pairmatch(pr ~ cost + t1, tol = .001, + data = nuclearplants, hint = p1) + +# Specifying subproblem level resolution values +res.custom <- list("0" = 0.001142857, "1" = 0.001) +p_reso <- pairmatch(pr ~ cost + strata(pt), data=nuclearplants, + resolution = res.custom) } \references{ Hansen, B.B. and Klopfer, S.O. (2006), \sQuote{Optimal full matching diff --git a/tests/testthat/test.MCFSolutions.R b/tests/testthat/test.MCFSolutions.R index 8b44c0e7..2237870e 100644 --- a/tests/testthat/test.MCFSolutions.R +++ b/tests/testthat/test.MCFSolutions.R @@ -3,17 +3,17 @@ context("MCFSolutions & co: S4 classes to encode min-cost-flow solutions") test_that("Instantiation & validity", { expect_true(validObject(new("SubProbInfo"))) # test the prototype expect_silent(spi1 <- new("SubProbInfo", data.frame(groups=c('a','b'), flipped=logical(2), hashed_dist=c('a','b'), - resolution=c(1,10), lagrangian_value=c(.5, 2), dual_value=c(0,1), - feasible=c(TRUE, FALSE), exceedance=1, stringsAsFactors=F) + resolution=c(1,10), primal_value=c(2,0), dual_value=c(1,1), + feasible=c(TRUE, FALSE), exceedance=1, solver = NA, stringsAsFactors=F) ) ) expect_true(validObject(spi1)) spi2 <- spi1 colnames(spi2)[1] <- "Subprob" - expect_error(validObject(spi2), "Cols 1-8 should be") + expect_error(validObject(spi2), "Cols 1-9 should be") - expect_true(validObject(new("NodeInfo"))) # test the prototype + expect_true(validObject(new("NodeInfo"))) # test the prototype expect_silent(ni1 <- new("NodeInfo", data.frame(name='a', price=0.5, upstream_not_down=TRUE, supply=1L, groups = as.factor('b'), @@ -48,7 +48,7 @@ test_that("Instantiation & validity", { supply=1L, groups = as.factor('b'), stringsAsFactors=F) ), - "unique" + "unique" ) expect_true(validObject(new("ArcInfo"))) # test the prototype expect_silent(ai <- new("ArcInfo", @@ -72,7 +72,7 @@ test_that("Instantiation & validity", { bookkeeping=data.frame(groups = as.factor('a'), start = as.factor(c('c','d')), end = as.factor('(_Sink_)'), flow=1.5, capacity=1L, stringsAsFactors=F) - ), "should have type integer" # Not sure it's necessary, but insisting + ), "should have type integer" # Not sure it's necessary, but insisting ) # that 'flow' be integer not double expect_error(new("ArcInfo", matches=data.frame(groups = as.factor('a'), upstream = as.factor('b'), @@ -80,8 +80,8 @@ test_that("Instantiation & validity", { bookkeeping=data.frame(groups = as.factor('a'), start = as.factor(c('c','d')), end = as.factor('(_Sink_)'), flow=-1L, capacity=1L, stringsAsFactors=F) - ), "should be nonnegative" - ) + ), "should be nonnegative" + ) expect_error(new("ArcInfo", matches=data.frame(groups = as.factor('a'), upstream = as.factor('b'), @@ -91,9 +91,9 @@ test_that("Instantiation & validity", { start=as.factor(c('c','d')), end=as.factor('(_Sink_)'), flow=2L, capacity=1L, stringsAsFactors=F) - ), "flow can be no greater than capacity" - ) - + ), "flow can be no greater than capacity" + ) + expect_silent(mcf1f <- new("MCFSolutions", subproblems=spi1,nodes=ni1f,arcs=ai)) expect_silent(as(mcf1f, "FullmatchMCFSolutions")) expect_equal(node.labels(mcf1f), node.labels(ni1f)) @@ -109,24 +109,24 @@ expect_setequal(names(getSlots("MCFSolutions")), test_that("c() methods", { spi1 <- new("SubProbInfo", data.frame(groups=c('a','b'), flipped=logical(2), hashed_dist=c('a','b'), - resolution=c(1,10), lagrangian_value=c(.5, 2), dual_value=c(0,1), - feasible=c(TRUE, FALSE), exceedance=1, stringsAsFactors=F) + resolution=c(1,10), primal_value=c(2,0), dual_value=c(1,1), + feasible=c(TRUE, FALSE), exceedance=1, solver = NA, stringsAsFactors=F) ) spi2 <- new("SubProbInfo", data.frame(groups=c('c'), flipped=logical(1), hashed_dist=c('a'), - resolution=c(1), lagrangian_value=c(.5), dual_value=c(0), - feasible=c(TRUE), exceedance=1, stringsAsFactors=F) + resolution=c(1), primal_value=c(.5), dual_value=c(0), + feasible=c(TRUE), exceedance=1, solver = NA, stringsAsFactors=F) ) spi3 <- new("SubProbInfo", data.frame(groups=c('d'), flipped=logical(1), hashed_dist=c('a'), - resolution=c(1), lagrangian_value=c(.5), dual_value=c(0), - feasible=c(TRUE), exceedance=1, stringsAsFactors=F) + resolution=c(1), primal_value=c(.5), dual_value=c(0), + feasible=c(TRUE), exceedance=1, solver = NA, stringsAsFactors=F) ) - + expect_true(validObject(c(spi1, spi2))) expect_true(validObject(c(spi1, spi2, spi3))) expect_true(validObject(c(a=spi1, b=spi2))) # no confusion just b/c no `x=` arg! - + ni1f <- new("NodeInfo", data.frame(name=c('b', 'c', 'd', '(_Sink_)', '(_End_)'), @@ -141,7 +141,7 @@ test_that("c() methods", { node.labels(ni1f) <- ni1f[['name']] ni1f.a <- ni1f.b <- ni1f.c <- ni1f ni1f.a[,'groups'] <- factor(rep('a', nrow(ni1f))) - ni1f.c[,'groups'] <- factor(rep('c', nrow(ni1f))) + ni1f.c[,'groups'] <- factor(rep('c', nrow(ni1f))) expect_true(validObject(c(ni1f.a, ni1f.b))) expect_true(validObject(c(ni1f.a, ni1f.b, ni1f.c))) ni2 <- new("NodeInfo", @@ -172,7 +172,7 @@ test_that("c() methods", { expect_true(validObject(ai1)) expect_true(validObject(c(ai1, ai1))) expect_true(validObject(c(x=ai1, y=ai1, z=ai1))) - some_levs <- c(letters[2:5], '(_Sink_)', '(_End_)') + some_levs <- c(letters[2:5], '(_Sink_)', '(_End_)') ai2 <- new("ArcInfo", matches=data.frame(groups = factor('c'), upstream = factor('b', levels=some_levs), @@ -199,7 +199,7 @@ test_that("c() methods", { mcf2 <- new("MCFSolutions", subproblems=spi2,nodes=ni2,arcs=ai2) expect_true(validObject(mcf1)) expect_true(validObject(mcf2)) - + expect_error(c(mcf1, mcf1), "uplicates") expect_true(validObject(c(mcf1, mcf2))) expect_true(validObject(c(y=mcf1, z=mcf2))) @@ -241,10 +241,10 @@ test_that("NodeInfo to tibble converter", { node.labels(ni1f) <- ni1f[['name']] expect_silent(ni_tbl <- as(ni1f, "tbl_df")) expect_is(ni_tbl$nodelabels, "factor") - expect_equivalent(as.character(ni_tbl$nodelabels), + expect_equivalent(as.character(ni_tbl$nodelabels), c('b', 'c', 'd', '(_Sink_)', '(_End_)') ) - expect_equivalent(levels(ni_tbl$nodelabels), + expect_equivalent(levels(ni_tbl$nodelabels), c('b', 'c', 'd', '(_Sink_)', '(_End_)') ) # default encoding would start w/ "(_End_)", "(_Sink_)" }) @@ -262,9 +262,9 @@ test_that("Preserve levels when filtering a node info tibble",{ ni_tbl <- as(ni1f, "tbl_df") expect_silent(ni_tbl_s <- dplyr::filter(ni_tbl, name %in% letters)) expect_is(ni_tbl_s$nodelabels, "factor") - expect_equivalent(levels(ni_tbl_s$nodelabels), + expect_equivalent(levels(ni_tbl_s$nodelabels), c('b', 'c', 'd', '(_Sink_)', '(_End_)') - ) + ) }) test_that("Node labels getter",{ expect_silent(mcf <- new("MCFSolutions")) #prelim- @@ -287,8 +287,8 @@ test_that("filtering on groups/subproblem field", { spi1 <- new("SubProbInfo", data.frame(groups=c('a','b'), flipped=logical(2), hashed_dist=c('a','b'), - resolution=c(1,10), lagrangian_value=c(.5, 2), dual_value=c(0,1), - feasible=c(TRUE, FALSE), exceedance=1, stringsAsFactors=F) + resolution=c(1,10), primal_value=c(2,0), dual_value=c(1,1), + feasible=c(TRUE, FALSE), exceedance=1, solver = NA, stringsAsFactors=F) ) expect_error(filter_by_subproblem(spi1, groups="a"), "implemented") @@ -364,7 +364,7 @@ test_that("NodeInfo subsetting", { expect_equal(nrow(ni0_o), 6) expect_silent(ni0_n <- filter(ni0_o, name!=2 & name!=4)) expect_is(ni0_n, "NodeInfo") - expect_equal(nrow(ni0_n), 4) + expect_equal(nrow(ni0_n), 4) }) test_that("Pull updated prices & supplies into a NodeInfo",{ expect_silent(ni0_o <- nodes_shell_fmatch(c(1,2), c(3,4))) diff --git a/tests/testthat/test.complementarySlackness.R b/tests/testthat/test.complementarySlackness.R index 8d53acc4..e0544d85 100644 --- a/tests/testthat/test.complementarySlackness.R +++ b/tests/testthat/test.complementarySlackness.R @@ -35,8 +35,8 @@ make_known_optimal <- function(flipped=FALSE) { )) subprob <- new("SubProbInfo", data.frame(groups="a", flipped=flipped, hashed_dist=character(1), - resolution=NA_real_, lagrangian_value=NA_real_, dual_value=NA_real_, - feasible=NA, exceedance=NA_real_, stringsAsFactors=FALSE) + resolution=NA_real_, primal_value=NA_real_, dual_value=NA_real_, + feasible=NA, exceedance=NA_real_, solver = NA, stringsAsFactors=FALSE) ) mcf_solution <- new("MCFSolutions", subproblems=subprob, nodes=nodes, arcs=arcs) list(x = x, m = m, mcf = mcf_solution) @@ -95,8 +95,8 @@ make_known_optimal_fullm <- function(flipped=FALSE) ) subprob <- new("SubProbInfo", data.frame(groups='b', flipped=flipped, hashed_dist=character(1), - resolution=NA_real_, lagrangian_value=NA_real_, dual_value=NA_real_, - feasible=NA, exceedance=NA_real_, stringsAsFactors=FALSE) + resolution=NA_real_, primal_value=NA_real_, dual_value=NA_real_, + feasible=NA, exceedance=NA_real_, solver = NA, stringsAsFactors=FALSE) ) mcf_solution <- new("MCFSolutions", subproblems=subprob, nodes=nodes, arcs=arcs) node.labels(mcf_solution) <- names(node.labels(mcf_solution)) # for alignment w/ m diff --git a/tests/testthat/test.explicit_MCFSolutions.R b/tests/testthat/test.explicit_MCFSolutions.R new file mode 100644 index 00000000..d78101a4 --- /dev/null +++ b/tests/testthat/test.explicit_MCFSolutions.R @@ -0,0 +1,63 @@ +################################################################################ +# Testing some experimental work on returning feasibility explicitly via MCFSolutions +################################################################################ + +context("Test Explicit MCFSolutions") + +test_that("simple case, all feasible", { + d <- data.frame(position = rep(1:4, each = 4), + z = rep(0:1, 8), + rownames=letters[1:16]) + dist <- match_on(z ~ position, inv.scale.matrix = diag(1), data=d) + res.mat <- fullmatch(dist, data=d) + expect_true(identical((get_subproblem_info(res.mat, type = "feasible")), TRUE)) + + + data(nuclearplants) + f2 <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants) + expect_true(identical((get_subproblem_info(f2, type = "feasible")), c(TRUE, TRUE))) + dt <- get_subproblem_info(f2, type = c("resolution", "group")) + expect_true(all(dim(dt) == c(2,2))) + +}) + + +test_that("Feasibility with subproblems + recovery",{ + data(nuclearplants) + + mm <- match_on(pr ~ cost + t1 + t2, data=nuclearplants) + # infeasible as given + options("fullmatch_try_recovery" = FALSE) + expect_warning(f <- fullmatch(mm, data=nuclearplants, max.controls=2)) + expect_true(identical((get_subproblem_info(f, type = "feasible")), c(FALSE))) + options("fullmatch_try_recovery" = TRUE) + f <- fullmatch(mm, data=nuclearplants, max.controls=2) + expect_true(identical((get_subproblem_info(f, type = "feasible")), c(TRUE))) + + + # not infeasible as mean.controls provided + f <- fullmatch(mm, data=nuclearplants, max.controls=2, mean.controls=2) + expect_true(identical((get_subproblem_info(f, type = "feasible")), c(TRUE))) + + + options("fullmatch_try_recovery" = FALSE) + mm <- match_on(pr ~ cost + t1 + t2, data=nuclearplants, within=exactMatch(pr ~ pt, data=nuclearplants)) + # infeasible as given for subproblem 1, feasible for subproblem 2 + expect_warning(f <- fullmatch(mm, data=nuclearplants, max.controls=2)) + expect_true(identical((get_subproblem_info(f, type = "feasible")), c(FALSE, TRUE))) + + options("fullmatch_try_recovery" = TRUE) + f <- fullmatch(mm, data=nuclearplants, max.controls=2) + expect_true(identical((get_subproblem_info(f, type = "feasible")), c(TRUE, TRUE))) + +}) + + +test_that("completely empty problem", { + m <- matrix(Inf, nrow = 3, ncol = 4) + rownames(m) <- LETTERS[1:3] + colnames(m) <- letters[23:26] + expect_warning(res.m <- fullmatch(m)) + expect_true(get_subproblem_info(res.m, type = "feasible") == FALSE) + +}) diff --git a/tests/testthat/test.fullmatch.R b/tests/testthat/test.fullmatch.R index 18969f51..2ad0e8ce 100644 --- a/tests/testthat/test.fullmatch.R +++ b/tests/testthat/test.fullmatch.R @@ -688,6 +688,21 @@ test_that('Hints accepted',{ expect_silent(f1d <- fullmatch(t(mosc), min.c=.5, max.c=2, data = data, tol=0.1)) expect_is(attr(f1d, "MCFSolutions"), "FullmatchMCFSolutions") expect_silent(fullmatch(t(mosc), min.c=.5, max.c=2, data = data, tol=0.1, hint=f1d)) + + # hint has different subgroups, is ignored. + set.seed(201905) + data <- data.frame(z = rep(0:1, each = 5), + x = rnorm(10), + fac=rep(c(rep("a",2), rep("b",3)),2), + fac1=rep(c(rep("c",2), rep("d",3)),2)) + mo <- match_on(z ~ x + strata(fac1), data=data) + f1a <- fullmatch(mo, min.c=.5, max.c=2, data = data, tol=0.1) + mos <- match_on(z ~ x + strata(fac), data=data) + + expect_warning(fullmatch(mos, min.c=.5, max.c=2, data = data, tol=0.1, hint=f1a), "ignoring") + + + }) test_that("If matching fails, we should give a warning", { diff --git a/tests/testthat/test.optmatchS3.R b/tests/testthat/test.optmatchS3.R index 0a6f9706..f7ef887b 100644 --- a/tests/testthat/test.optmatchS3.R +++ b/tests/testthat/test.optmatchS3.R @@ -209,7 +209,8 @@ test_that("Indicating failing subproblems", { spS <- subproblemSuccess(f1) mf <- matchfailed(f1) expect_true(all(spS == FALSE)) - expect_equal(names(spS), "1") + # changing this from "1" to "" in the "nodeprices" branch. Making (temporary?) decision that when there are no subproblems, the group name is blank, rather than "1". + expect_equal(names(spS), "") expect_is(mf, "logical") expect_length(mf, nrow(nuclearplants)) expect_true(all(mf == TRUE)) diff --git a/tests/testthat/test.subproblemutils.R b/tests/testthat/test.subproblemutils.R new file mode 100644 index 00000000..6146c86b --- /dev/null +++ b/tests/testthat/test.subproblemutils.R @@ -0,0 +1,207 @@ +################################################################################ +# Testing resolution / tolerance specification +################################################################################ + +context("test subproblem resolution specification functionality") + +test_that("get_subproblem_info basics", { + d <- data.frame(position = rep(1:4, each = 4), + z = rep(0:1, 8), + rownames=letters[1:16]) + dist <- match_on(z ~ position, inv.scale.matrix = diag(1), data=d) + + res.mat <- fullmatch(dist, data=d) + expect_true(identical(length(unique(get_subproblem_info(res.mat, type = "subproblem"))), 1L)) + expect_true(identical(length(get_subproblem_info(res.mat, type = "resolution")), 1L)) + + data(nuclearplants) + f2 <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants) + + expect_true(identical(length(unique(get_subproblem_info(f2, type = "subproblem"))), 2L)) + expect_true(identical(length(get_subproblem_info(f2, type = "resolution")), 2L)) + dt <- get_subproblem_info(f2, type = c("resolution", "group")) + expect_true(all(dim(dt) == c(2,2))) + +}) + +test_that("fullmatch: basic error checks", { + data(nuclearplants) + res.custom <- list("0" = 0.001142857, "1" = 0.001142857) + expect_silent(fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = res.custom)) + bad.res <- list("2" = 0.001142857, "1" = 0.001142857) + # subproblem is not in the original specification + expect_error(fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = bad.res)) + # no hint + subset of epsilons should fail + bad.res <- list("1" = 0.001142857) + expect_error(fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = bad.res)) + #unnamed arguments should fail, length 1 list is a special case + bad.res <- list(0.001142857) + expect_error(fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = bad.res)) + #unnamed arguments should fail + bad.res <- list(0.001142857, "1" =.35) + expect_error(fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = bad.res)) + + #unnamed arguments should fail + bad.res <- list(0.001142857, .35) + expect_error(fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = bad.res)) + +}) + +test_that("pairmatch: basic error checks", { + data(nuclearplants) + + psm <- glm(pr~.-(pr+cost), family=binomial(), data=nuclearplants) + em <- exactMatch(pr ~ pt, data = nuclearplants) + res.pm <- pairmatch(match_on(psm) + em, data=nuclearplants) + res.custom <- list("0" = 0.001142857, "1" = 0.001142857) + expect_silent(pairmatch(match_on(psm) + em, data=nuclearplants, resolution = res.custom)) + + bad.res <- list("2" = 0.001142857, "1" = 0.001142857) + expect_error(pairmatch(match_on(psm) + em, data=nuclearplants, resolution = bad.res)) + + bad.res <- list("1" = 0.001142857) + expect_error(pairmatch(match_on(psm) + em, data=nuclearplants, resolution = bad.res)) + + + bad.res <- list(0.001142857, 0.001142857) + expect_error(pairmatch(match_on(psm) + em, data=nuclearplants, resolution = bad.res)) + + bad.res <- list("1" = 0.001142857, + 0.001142857) + expect_error(pairmatch(match_on(psm) + em, data=nuclearplants, resolution = bad.res)) + + bad.res <- list(0.001142857) + expect_error(pairmatch(match_on(psm) + em, data=nuclearplants, resolution = bad.res)) + +}) + +test_that("fullmatch: error checking with hints", { + set.seed(201905) + data <- data.frame(z = rep(0:1, each = 5), + x = rnorm(10), fac=rep(c(rep("a",2), rep("b",3)),2) ) + mo <- match_on(z ~ x, data=data) + mos <- match_on(z ~ x + strata(fac), data=data) + f1b <- fullmatch(mos, min.c=.5, max.c=2, data = data, tol=0.1) + res.custom <- list("b" = 0.0001666667) + expect_silent(fullmatch(mos, min.c=.5, max.c=2, data = data, tol=0.0001, hint=f1b, resolution = res.custom)) + + res.custom <- list("b" = 0.0001666667, "a" = 0.0001666667) #out of order should be ok + expect_silent(fullmatch(mos, min.c=.5, max.c=2, data = data, tol=0.0001, hint=f1b, resolution = res.custom)) + + res.custom <- list("b" = 0.01) + expect_silent(fullmatch(mos, min.c=.5, max.c=2, data = data, hint=f1b, resolution = res.custom) ) +}) + + +test_that("fullmatch: with hint", { + set.seed(201905) + data <- data.frame(z = rep(0:1, each = 5), + x = rnorm(10), fac=rep(c(rep("a",2), rep("b",3)),2) ) + mo <- match_on(z ~ x, data=data) + + mos <- match_on(z ~ x + strata(fac), data=data) + f1b <- fullmatch(mos, min.c=.5, max.c=2, data = data, tol=0.1) + + f1c <- fullmatch(mos, min.c=.5, max.c=2, data = data, tol=0.0001, hint=f1b) + res.custom <- list("b" = 0.0001666667) + f1c <- fullmatch(mos, min.c=.5, max.c=2, data = data, hint=f1b, resolution = res.custom) + resos <- get_subproblem_info(f1c, c("resolution", "groups")) + expect_true(resos[resos[, "groups"] == "b", "resolution"] == 0.0001666667) + + res.custom <- list("b" = 0.01) + f1c <- fullmatch(mos, min.c=.5, max.c=2, data = data, hint=f1b, resolution = res.custom) + resos <- get_subproblem_info(f1c, c("resolution", "groups")) + expect_true(resos[resos[, "groups"] == "b", "resolution"] == .01) + + res.custom <- list("b" = 0.01, "a" = .001) + f1c <- fullmatch(mos, min.c=.5, max.c=2, data = data, hint=f1b, resolution = res.custom) + resos <- get_subproblem_info(f1c, c("resolution", "groups")) + expect_true(resos[resos[, "groups"] == "b", "resolution"] == .01) + expect_true(resos[resos[, "groups"] == "a", "resolution"] == .001) +}) + +test_that("fullmatch: subproblem specification", { + data(nuclearplants) + f1 <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants) + res.custom <- list("0" = 0.001, "1" = 0.01) + f2 <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = res.custom) + resos.old <- get_subproblem_info(f1, "resolution") + resos <- get_subproblem_info(f2, "resolution") + # relying on ordering here, but testing to make sure dimensions are reduced correctly + expect_true(resos[1] == 0.001) + expect_true(resos[2] == 0.01) + expect_true(all(resos.old != resos)) +}) + + +test_that("pairmatch: error checking with hints", { + set.seed(201905) + data <- data.frame(z = rep(0:1, each = 5), + x = rnorm(10), fac=rep(c(rep("a",2), rep("b",3)),2) ) + mo <- match_on(z ~ x, data=data) + mos <- match_on(z ~ x + strata(fac), data=data) + p1b <- pairmatch(mos, data = data, tol=0.1) + + res.custom <- list("b" = 0.001, "a" = 0.01) #out of order should be ok + expect_silent(pairmatch(mos, data = data, hint=p1b, resolution = res.custom)) + res.custom <- list("b" = 0.001) + expect_silent(pairmatch(mos, data = data, hint=p1b, resolution = res.custom)) +}) + +test_that("pairmatch: with hint", { + set.seed(201905) + data <- data.frame(z = rep(0:1, each = 5), + x = rnorm(10), fac=rep(c(rep("a",2), rep("b",3)),2) ) + mo <- match_on(z ~ x, data=data) + mos <- match_on(z ~ x + strata(fac), data=data) + p1b <- pairmatch(mos, data = data, tol=0.1) + + res.custom <- list("b" = 0.001, "a" = 0.01) #out of order should be ok + pm1 <- pairmatch(mos, data = data, hint=p1b, resolution = res.custom) + resos <- get_subproblem_info(pm1, c("resolution", "group")) + expect_true(resos[resos[, "groups"] == "b", "resolution"] == .001) + expect_true(resos[resos[, "groups"] == "a", "resolution"] == .01) + + res.custom <- list("b" = 0.001) #should be able to specify subset here + pm1 <- pairmatch(mos, data = data, hint=p1b, resolution = res.custom) + resos <- get_subproblem_info(pm1, c("resolution", "group")) + expect_true(resos[resos[, "groups"] == "b", "resolution"] == .001) + +}) + +test_that("pairmatch: subproblem specification", { + data(nuclearplants) + psm <- glm(pr~.-(pr+cost), family=binomial(), data=nuclearplants) + em <- exactMatch(pr ~ pt, data = nuclearplants) + res.pm <- pairmatch(match_on(psm) + em, data=nuclearplants) + res.custom <- list("0" = 0.001, + "1" = 0.01) + pm1 <- pairmatch(match_on(psm) + em, data=nuclearplants, resolution = res.custom) + resos.old <- get_subproblem_info(res.pm, "resolution") + resos <- get_subproblem_info(pm1, "resolution") + expect_true(resos[1] == 0.001) + expect_true(resos[2] == 0.01) + expect_true(all(resos.old != resos)) +}) + + +test_that("fullmatch: no subproblems", { + d <- data.frame(position = rep(1:4, each = 4), + z = rep(0:1, 8), + rownames=letters[1:16]) + dist <- match_on(z ~ position, inv.scale.matrix = diag(1), data=d) + res.mat <- fullmatch(dist, data=d, resolution = list(.1)) + resos <- get_subproblem_info(res.mat, "resolution") + expect_true(length(resos) == 1) + expect_true(all(resos == .1)) +}) + +test_that("pairmatch: no subproblems", { + data(nuclearplants) + + psm <- glm(pr~.-(pr+cost), family=binomial(), data=nuclearplants) + res.pm <- pairmatch(match_on(psm), data=nuclearplants, resolution = list(.1)) + resos <- get_subproblem_info(res.pm, "resolution") + expect_true(length(resos) == 1) + expect_true(all(resos == .1)) +}) diff --git a/vignettes/MCFSolutions.Rmd b/vignettes/MCFSolutions.Rmd index 47e569ee..4e58df9e 100644 --- a/vignettes/MCFSolutions.Rmd +++ b/vignettes/MCFSolutions.Rmd @@ -12,6 +12,7 @@ vignette: > ```{r, echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, prompt=TRUE) library(magrittr) +library(optmatch) ``` # Overview @@ -217,3 +218,71 @@ Return object of a new S4 class, `Optmatch`, inheriting from factor as well as f - `c()` for `MCFSolutions`: `rbind()`s the various constituent data frames, enforcing req's that subproblems have distinct names, de-duping bookkeeping nodes and enforcing requirement that non-bookkeeping nodes' names be distinct. - `addArcs()` for `MCFSolutions` objects - `addNodes()` for `MCFSolutions` objects + +# Expectations for MCFSolutions objects + +As of 1/13/2025 in the `nodeprices` branch, MCFSolutions are now always returned from a `fullmatch()` or `pairmatch()` call, regardless of feasibility. Users can rely on subproblem and node price information being returned via the "subproblems" and "nodes" slots, respectively. This information is always present, regardless of feasibility. Information in the "nodes" slot is returned to the best of the program's ability. If a subproblem is feasible, then the results of that computation are returned. If the subproblem is infeasible, then there are two possibilities for what gets returned. If the subproblem was determined to be infeasible before being passed to the solver, then the results from `node_shell_fmatch()` are returned. If the subproblem is passed to the solver, then the results from the solver are returned. + +The helper function `get_subproblem_info()` has been added to facilitate the extraction of subproblem level data. This is intended to help make warm start and hinting functionality more accessible to users. Users can extract a mapping of units and their corresponding subproblem ID with the following: + +```{r} +data(nuclearplants) +f1 <- fullmatch(pr ~ cost + strata(pt), data=nuclearplants) +get_subproblem_info(object = f1, type = "subproblem") +``` +Users can also extract information from the subproblems table by specifying one or more of the column names from the "subproblems" table associated with an MCFSolutions object (e.g. "resolution"). + +```{r} +get_subproblem_info(object = f1, type = c("resolution", "groups")) +``` + + +# Hinting + +As of 1/13/2025 in the `nodeprices` branch, a number of hinting and warm-starting features are available to users. Users can provide a result from `fullmatch()` or `pairmatch()` to a new, similar specification in order to provide a "warm start" to the solver. These hints are provided via an optional argument to `pairmatch()` and `fullmatch()` called `hint`. These previous solutions must have the same units and an identical subgroup/subproblem structure to the current specification, but other parameters are allowed to be different. For instance, tolerance values may differ. + +In the following example, we provide a hint that differs from a given specification by the `tol` value. So, the hint is valid: +```{r} +set.seed(201905) +data <- data.frame(z = rep(0:1, each = 5), + x = rnorm(10), fac=rep(c(rep("a",2), rep("b",3)),2) ) +mo <- match_on(z ~ x, data=data) +fm <- fullmatch(mo, min.c=.5, max.c=2, data = data, tol=0.1) +fullmatch(mo, min.c=.5, max.c=2, data = data, tol=0.0001, hint=fm) +``` + +However, in the following example, the hint and new specification have different subproblem structures. The hint is then ignored and a warning is thrown. In general, if subproblem information does not match across hints and specifications, the hint is ignored. + +```{r} +set.seed(201905) +data <- data.frame(z = rep(0:1, each = 5), + x = rnorm(10), + fac=rep(c(rep("a",2), rep("b",3)),2), + fac1=rep(c(rep("c",2), rep("d",3)),2)) +mo <- match_on(z ~ x + strata(fac1), data=data) +fm <- fullmatch(mo, min.c=.5, max.c=2, data = data, tol=0.1) +mos <- match_on(z ~ x + strata(fac), data=data) + +fullmatch(mos, min.c=.5, max.c=2, data = data, tol=0.1, hint=fm) +``` + +## Subproblem Level Resolution Specification + +Users can also provide subproblem level resolution values, which are then passed down to the solver. These are provided via an argument to `fullmatch()` and/or `pairmatch()` called `resolution`. This argument must take the form of a list. If there is only one subproblem in the current specification and users wish to specify a resolution value, then users must provide an unnamed list with the resolution value: + +```{r} +d <- data.frame(position = rep(1:4, each = 4), + z = rep(0:1, 8), + rownames=letters[1:16]) +dist <- match_on(z ~ position, inv.scale.matrix = diag(1), data=d) +res.mat <- fullmatch(dist, data=d, resolution = list(.1)) +``` + +If the specification has more than one subproblem and users wish to provide resolution values(s), users must provide a named list to the `resolution` argument. The names of elements must correspond to a valid subproblem/group ID, and the elements are numeric values indicating the desired resolution value to be passed to the solver. + +```{r} +data(nuclearplants) +res.custom <- list("0" = 0.001142857, "1" = 0.001) +fullmatch(pr ~ cost + strata(pt), data=nuclearplants, resolution = res.custom) +``` +