Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
93a5162
refactoring of subproblem information
adamrauh Jul 19, 2024
e995c55
fixing some bugs
adamrauh Jul 19, 2024
4a0048c
bug fixes
adamrauh Jul 19, 2024
fd31f41
fixing infinite values issue
adamrauh Jul 22, 2024
9801255
more bug fixes, passes all tests now
adamrauh Jul 22, 2024
9495ffb
allowing tolerance in solve_reg_fm_prob
adamrauh Jul 22, 2024
a460121
small tweaks to fullmatch
adamrauh Jul 25, 2024
5dfa811
using ISM based solution for rfeas/cfeas/epsilon calcs
adamrauh Jul 29, 2024
432e630
updating resolution/tolerance code
adamrauh Jul 31, 2024
64dff54
adding tests
adamrauh Jul 31, 2024
0661bd9
improving handling of the no-subproblem case
adamrauh Aug 2, 2024
336efb3
adding more explicit tests about unnamed resolutions
adamrauh Aug 4, 2024
559929c
minor cleanup + pmatch
adamrauh Aug 4, 2024
a6874cf
fixing documentation
adamrauh Aug 5, 2024
74dd576
fixing infeasibility tracking logic
adamrauh Aug 7, 2024
0a4a9ae
replacing lagrangian with primal
adamrauh Aug 7, 2024
46e3570
adjusting primal/dual values to make more substantive sense markmfred…
adamrauh Aug 8, 2024
5e7dada
switching lagrangian to primal in vignette
adamrauh Aug 8, 2024
aebbde2
Small updates from devtools::document()
benthestatistician Aug 9, 2024
ee86b14
merging modified master with nodeprices branch
adamrauh Aug 19, 2024
781a480
simplifying feasibility update status, reducing reliance on NULL MCFS…
adamrauh Aug 21, 2024
446cc2b
removing feas.status tag
adamrauh Aug 21, 2024
be47752
revisions to always ensure that MCFSolutions object is returned
adamrauh Aug 22, 2024
daafaac
minor edits and clean up
adamrauh Aug 23, 2024
2e95399
adding tests for explicit MCFSolutions behavior
adamrauh Aug 23, 2024
5291069
guaranteeing MCFSolutions object, but streamlining solution for infea…
adamrauh Nov 10, 2024
00ca64c
adding test explicitly checking group mismatch on hint
adamrauh Jan 13, 2025
76f8fe1
updating documentation
adamrauh Jan 13, 2025
3fcbaa4
examples and explanation of hinting, subproblem features in vignette
adamrauh Jan 13, 2025
8c8ad30
adding some examples of hinting and subproblem level resolution speci…
adamrauh Jan 16, 2025
7f61a45
merging proj1-nodeprices with nodeprices branch
adamrauh Jan 18, 2025
ffcbbb8
fixing bug for empty subproblems
adamrauh Jan 23, 2025
20e6edf
adjusting test to new expectations
adamrauh Jan 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ Collate:
'solver.R'
'strata.R'
'stratumStructure.R'
'subproblemutils.R'
'summary.ism.R'
'summary.optmatch.R'
'utilities.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ export(findSubproblems)
export(full)
export(fullmatch)
export(getMaxProblemSize)
export(get_subproblem_info)
export(mahal.dist)
export(match_on)
export(matched)
Expand Down
62 changes: 34 additions & 28 deletions R/MCFSolutions.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@ setOldClass("data.frame")
setClass("SubProbInfo", contains="data.frame",
prototype=
prototype(data.frame(groups=character(1), flipped=NA, hashed_dist=NA_character_,
resolution=1, lagrangian_value=NA_real_,
dual_value=NA_real_, feasible=NA, exceedance=NA_real_, stringsAsFactors=FALSE)
resolution=1, primal_value=NA_real_,
dual_value=NA_real_, feasible=NA, exceedance=NA_real_, solver = NA_character_, stringsAsFactors=FALSE)
)
)
setValidity("SubProbInfo", function(object){
errors <- character(0)
if (!all(colnames(object)[1:8]==
c("groups","flipped", "hashed_dist","resolution","lagrangian_value","dual_value", "feasible", "exceedance")))
if (!all(colnames(object)[1:9]==
c("groups","flipped", "hashed_dist","resolution","primal_value","dual_value", "feasible", "exceedance", "solver")))
errors <- c(errors,
'Cols 1-8 should be:\n\t c("groups","flipped", "hashed_dist","resolution","lagrangian_value","dual_value", "feasible", "exceedance")')
'Cols 1-9 should be:\n\t c("groups","flipped", "hashed_dist","resolution","primal_value","dual_value", "feasible", "exceedance", "solver")')
if (!all(vapply(object[c(1,3)], is.character, logical(1))))
errors <- c(errors,
'Cols 1,3 should have type character.')
Expand Down Expand Up @@ -136,32 +136,38 @@ setValidity("MCFSolutions", function(object){
## levels set, namely node.labels(object) (i.e. row.names(object@nodes) ). Former
## requirement is enforced within the ArcInfo validity checker; here we confirm
## that this level set matches what's in the nodes table.





if (!identical(row.names(nodeinfo(object)),
levels(object@arcs@matches[['upstream']])
)
)
)
{
if (length(xtralevs <- setdiff(node.labels(object),
levels(object@arcs@matches[['upstream']])
)
{
if (length(xtralevs <- setdiff(node.labels(object),
levels(object@arcs@matches[['upstream']])
)
)
)
errors <- c(errors,
paste("@nodes lists nodes not in the levels of @arcs's nodes columns, e.g.",
paste(head(xtralevs,2), collapse=", "), "."
)
)
if (length(xtralevs <- setdiff(levels(object@arcs@matches[['upstream']]),
node.labels(object)
)
)
)
errors <- c(errors,
paste("@arcs's nodes columns have levels not appearing in @nodes",
paste(head(xtralevs,2), collapse=", "), "."
)
)
}
)
)
errors <- c(errors,
paste("@nodes lists nodes not in the levels of @arcs's nodes columns, e.g.",
paste(head(xtralevs,2), collapse=", "), "."
)
)
if (length(xtralevs <- setdiff(levels(object@arcs@matches[['upstream']]),
node.labels(object)
)
)
)
errors <- c(errors,
paste("@arcs's nodes columns have levels not appearing in @nodes",
paste(head(xtralevs,2), collapse=", "), "."
)
)
}

## Nodes table must have groups column
if (!any(colnames(object@nodes)=="groups"))
errors <- c(errors,
Expand Down
112 changes: 96 additions & 16 deletions R/fmatch.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ fmatch <- function(distance,
min.col.units = 1,
f = 1,
node_info,
solver) {
solver, try_solver = TRUE) {
# checks solver and evaluates LEMON() if neccessary
solver <- handleSolver(solver)

Expand All @@ -48,17 +48,17 @@ fmatch <- function(distance,
mxc <- as.integer(round(max.col.units))
mnc <- as.integer(round(min.col.units))
mxr <- as.integer(round(max.row.units))

if (mnc > 1) {
mxr <- 1L
}


# Check that matching problem is well-specified
if (mxc < mnc) {
if ( (mxc < mnc) && try_solver ){
stop("min.col.units may not exceed max.col.units")
}

if (any(c(mxc, mnc, mxr) < 1)) {
if ((any(c(mxc, mnc, mxr) < 1)) && try_solver) {
stop("max and min constraints must be 1 or greater")
}

Expand Down Expand Up @@ -89,13 +89,13 @@ fmatch <- function(distance,
distance[['dist']][nadists] <- replacement
}

if (mxr > 1) # i.e. many-one matches permissible
if (mxr > 1 && try_solver) # i.e. many-one matches permissible
{
if (any(distance[['dist']] <= 0))
stop("Nonpositive discrepancies not compatible with full matching\n (see Hansen & Klopfer, 2006 JCGS, sec.4.1).")
} else if (any(distance[['dist']] < 0)) stop("distance should be nonnegative")

if (!is.numeric(f) | f > 1 | f < 0) {
if ((!is.numeric(f) | f > 1 | f < 0) && try_solver) {
stop("f must be a number in [0,1]")
}

Expand Down Expand Up @@ -137,19 +137,102 @@ fmatch <- function(distance,
stop('Cannot choose "(_End_)" as unit name')

## Bypass solver if problem is recognizably infeasible
if ( (mxr >1 & nt/mxr > n.mc) | #max.row.units too low
if ( !try_solver | # this is admittedly a nasty workaround. There are various error checks that occur at different levels of the call stack and this allows us to bypass some of those more easily so that we can return an MCFSolutions object. In practice, we are using this option to ignore infeasibility checks so that we can proceed to create an MCFSolutions
(mxr >1 & nt/mxr > n.mc) | #max.row.units too low
(mxr==1L & nt * mnc > n.mc) | #min.col.units too high
(nt * mxc < n.mc)) { #max.col.units too low

out <- as.data.frame(distance, row.names = NULL)
out$solution <- rep(-1L, narcs)
####
nodes <- new("NodeInfo",
data.frame(name=c(row.units, col.units,
"(_End_)", "(_Sink_)"),# note that `factor()` would put these at start not end of levels
price=0L,
upstream_not_down=c(rep(TRUE, nt), rep(FALSE, nc), NA, NA),
supply=c(rep(mxc, nt), rep(0L, nc),
-(mxc * nt - n.mc), -n.mc),
groups=factor(rep(NA_character_, nt+nc+2L)),
stringsAsFactors=FALSE
)
)

endID <- nt + nc + 1L
sinkID <- endID + 1L
node.labels(nodes) <- nodes[['name']] # new convention per i166

if ( !all(node_info[['price']]==0) )
{
nodes[,'price'] <- node_info[match(c(row.units, col.units,
"(_End_)", "(_Sink_)"),
node_info$name),
'price']
## if a col unit doesn't have a match w/in node_info$name, then
## its nodes row now has a 0 for its price. Replace that w/ min of
## bookkeeping node prices may preserve CS in some cases.
if (length(col.noprice <- setdiff(col.units, node_info[['name']])))
nodes[nodes[['name']] %in% col.noprice, 'price'] <-
min(node_info[match(c("(_End_)", "(_Sink_)"), node_info$name),
'price', drop=TRUE]
)
}
# ID numbers implicitly point to corresp. row of nodes
#### Arcs involving End or Sink nodes ####
# There are multiple cases where the solver is NOT called because the problem is clearly infeasible. This makes it difficult to always return an MCFSolutions object. When the solver is called, we can create all of the various constituent parts of the MCFSolutions object without much trouble, although we must paper over certain things, like setting flow = capacity = 0. However, when the solver is not called, we have to make up an arbitrary MCFSolutions object to return. Because of the restrictions on the classes, this is currently being done in a very hacky way. Long term, we would want to consider the expectations for those classes, perhaps change the defaults or class structure somehow. For instance, what should we return when the subproblem is entirely empty?

# At the moment there are really no guarantees about what can be found in these tables, aside from the subproblems table which should contain a correct specification of feasibility.

if (nt == 0 && nc == 0) # edge case where subproblem is completely empty. In order to return an MCFSolutions object, I am violating some rules. Specifically, flow and capacity are all set to zero. Matches now includes sink and end nodes because the current classes prohibit empty data frames being returned/appended at the end.
{

bookkeeping <- data.frame(groups = factor(),
start = factor(levels = 1L:2L,
labels = node.labels(nodes)),
end = factor(levels = 1L:2L,
labels = node.labels(nodes)),
flow = integer(),
capacity = integer())

matches <- data.frame(groups=factor(),
upstream=factor(levels=node.labels(nodes)),
downstream=factor(levels=node.labels(nodes)))


} else { #This is another edge case where the solver is never called. However, in this case, the subproblem is not empty, so we can use most of the existing logic fairly easily.

bookkeeping <- data.frame(groups = factor(),
start = factor(levels = 1L:(nt + nc + 2),
labels = node.labels(nodes)),
end = factor(levels = 1L:(nt + nc + 2),
labels = node.labels(nodes)),
flow = integer(),
capacity = integer())

matches <- data.frame(groups=factor(),
upstream=factor(levels=node.labels(nodes)),
downstream=factor(levels=node.labels(nodes))
)
}



arcs <- new("ArcInfo", matches=matches, bookkeeping=bookkeeping)
#arcs <- new("ArcInfo")
sp <- new("SubProbInfo")
sp[1L, "feasible"] <- FALSE
sp[1L, "solver"] <- ""
fmcfs <- new("FullmatchMCFSolutions", subproblems=sp,
nodes=nodes, arcs=arcs)
return(c(
out,
list(maxerr = 0),
list(MCFSolution = NULL)
list(MCFSolution = fmcfs)
))
}

}
## after this point, one of the solvers will be called.
## That is, the problem is not recognizably infeasible prior to calling the solver.
## The logic for constructing MCFSolutions is much more straightforward from this point forward
## Min-Cost-Flow representation of problem ####
## Each node has a integer ID number, implicitly pointing
## to corresponding row of this data frame.
Expand Down Expand Up @@ -232,7 +315,6 @@ fmatch <- function(distance,
algorithm = algorithm)
x <- as.numeric(lout[[1]])
nodes[, "price"] <- lout[[2]]

nodes[, "price"] <- (nodes[, "price"] - nodes[, "price"][endID]) * -1

if (lout[[4]] != "OPTIMAL" || all(x == -1)) {
Expand All @@ -254,9 +336,7 @@ fmatch <- function(distance,


## Material used to create s3 optmatch object:
feas <- fop$feasible1
x <- feas * fop$x1

x <- fop$feasible1 * fop$x1
#### Recover node prices, store in nodes table ##
## In full matching, each upstream (row) or downstream (column) node starts
## an arc ending at End, and these are also the only arcs ending there. End
Expand Down Expand Up @@ -291,8 +371,6 @@ fmatch <- function(distance,
obj <- as.data.frame(distance, row.names = NULL)
obj$solution <- x[seq.int(from=min(1L, narcs), to=narcs)]



### Recover arc flow info, store in `arcs` ###
## info extracted from problem solution:
matches <- distance[as.logical(x[1L:narcs]), c("i", "j")]
Expand All @@ -317,7 +395,9 @@ fmatch <- function(distance,
arcs <- new("ArcInfo", matches=matches, bookkeeping=bookkeeping)

sp <- new("SubProbInfo")
sp[1L, "feasible"] <- TRUE
# If any solver is called, feasibility is now determined only in this location. We know that the solver was called, but we must check to see if the problem was feasible.
sp[1L, "feasible"] <- any(x == 1L)
sp[1L, "solver"] <- solver
fmcfs <- new("FullmatchMCFSolutions", subproblems=sp,
nodes=nodes, arcs=arcs)
return(c(obj,
Expand Down
Loading