diff --git a/DESCRIPTION b/DESCRIPTION index 675e5ec..73c1a2a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: RItools -Version: 0.3-5 +Version: 0.3-5.9003 Title: Randomization Inference Tools Authors@R: c(person("Jake", "Bowers", role = c("aut", "cre"), email = "jwbowers@illinois.edu"), person("Mark", "Fredrickson", role = "aut", email = "mark.m.fredrickson@gmail.com"), diff --git a/NEWS.md b/NEWS.md index 10920be..cb28acb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# RItools {NEXTVERSION} + +* Bug fix (base R plot method svg export) +* More graceful handling in balanceTest() of the edge case of balance testing for all-constant variables, with messages not errors and p-values of NA not 1 + # RItools 0.3-5 * Now "Depends" on the `survival` package to bring in the `strata` and `cluster` diff --git a/R/Design.R b/R/Design.R index 0f5682c..e3f3faf 100644 --- a/R/Design.R +++ b/R/Design.R @@ -999,8 +999,11 @@ HB08 <- function(alignedcovs) { check_for_degenerate(cov_minus_.5, n_, s_) + if (!all(zero_variance)) + { mvz <- drop(crossprod(ssn[!zero_variance], cov_minus_.5)) csq <- drop(crossprod(mvz)) + } else csq <- NA_real_ DF <- ncol(cov_minus_.5) list(z = zstat, p = p, Msq = csq , DF = DF, diff --git a/R/balanceTest.R b/R/balanceTest.R index f12ebeb..500d14b 100644 --- a/R/balanceTest.R +++ b/R/balanceTest.R @@ -250,6 +250,9 @@ balanceTest <- function(fmla, ## we wanted to allow departures from the ETT default. ## Something like `design@Sweights <- DesignWeights(aggDesign, <...>)`.) descriptives <- designToDescriptives(design, covariate.scales) + ## The notmissing indicators can't themselves be missing, so no role for + ## another notmissing indicator to record where its missingness pattern. + ## Instead we just record a blank for them: NMpatterns <- c(NMpatterns, rep("", dim(descriptives)[1]-length(NMpatterns))) # these weights govern inferential but not descriptive calculations @@ -280,7 +283,19 @@ balanceTest <- function(fmla, dimnames(descriptives)[[2]][nstats.previous + 1:2] <- c("z", "p") - # strip out summaries of not-missing indicators that only ever take the value T + inferentials <- do.call(rbind, lapply(tmp, function(s) { + data.frame(s$Msq, s$DF, pchisq(s$Msq, df = s$DF, lower.tail = FALSE)) + })) + colnames(inferentials) <- c("chisquare", "df", "p.value") + + tcovs <- lapply(tmp, function(r) { + tcov <- r$tcov + dimnames(tcov) <- + rep(list(dimnames(descriptives)[["vars"]]),2) + tcov + }) + + # Deal with summaries of not-missing indicators that only ever take the value T nmvars <- identify_NM_vars(dimnames(descriptives)[["vars"]]) # next line assumes every "stat" not in the given list is a mean # over a group assigned to some treatment condition. @@ -293,35 +308,39 @@ balanceTest <- function(fmla, toremove <- match(nmvars[bad], dimnames(descriptives)[["vars"]]) if(length(toremove)>0){ ## if toremove=integer(0) then it drops all vars from descriptives descriptives <- descriptives[-toremove,,,drop=FALSE] - origvars <- origvars[-toremove] - strings_to_remove <- dimnames(descriptives)[["vars"]][toremove] - NMpatterns <- NMpatterns[-toremove] # names of vars that - NMpatterns[ NMpatterns%in% strings_to_remove] <- "" + tcovs <- lapply(tcovs, + function(mat){mat[-toremove, -toremove, drop=FALSE]} + ) + origvars <- origvars[-toremove] + + ## also trim covars to NM patterns lookup table: + NMpatterns <- NMpatterns[-toremove] + ## If the NM pattern var that a covar got mapped to + ## was removed, we take the covar to have no missings + ## anywhere, and report "" as the name of its nonmissing + ## indicator variable: + strings_to_remove <- dimnames(descriptives)[["vars"]][toremove] + NMpatterns[ NMpatterns%in% strings_to_remove] <- "" } } - inferentials <- do.call(rbind, lapply(tmp, function(s) { - data.frame(s$Msq, s$DF, pchisq(s$Msq, df = s$DF, lower.tail = FALSE)) - })) - colnames(inferentials) <- c("chisquare", "df", "p.value") + for (s in 1L:dim(descriptives)[3]) { + descriptives[, "p", s] <- + p.adjust(descriptives[, "p", s], method = p.adjust.method) + } + attr(descriptives, "NMpatterns") <- NMpatterns + attr(descriptives, "originals") <- origvars + attr(descriptives, "term.labels") <- design@TermLabels +# Now `attr(d, "term.labels")[attr(d, "originals")]` +# associates variables to terms of fmla + attr(descriptives, "include.NA.flags") <- + include.NA.flags # hint to print and plot methods - # the meat of our xbal object +# the meat of our xbal object ans$overall <- inferentials + attr(ans$overall, "tcov") <- tcovs ans$results <- descriptives - ## do p.value adjustment - for (s in 1L:dim(descriptives)[3]) - ans$results[, "p", s] <- p.adjust(ans$results[, "p", s], method = p.adjust.method) -## ans$overall[, "p.value"] <- p.adjust(ans$overall[, "p.value"], method = p.adjust.method) - - attr(ans$results, "NMpatterns") <- NMpatterns - attr(ans$results, "originals") <- origvars - attr(ans$results, "term.labels") <- design@TermLabels - attr(ans$results, "include.NA.flags") <- include.NA.flags # hinting for print and plot methods - - attr(ans$overall, "tcov") <- lapply(tmp, function(r) { - r$tcov - }) attr(ans, "fmla") <- formula(fmla) attr(ans, "report") <- report # hinting to our summary method later class(ans) <- c("balancetest", "xbal", "list") diff --git a/R/plot.xbal.R b/R/plot.xbal.R index 6d74b7b..44e518b 100644 --- a/R/plot.xbal.R +++ b/R/plot.xbal.R @@ -334,11 +334,16 @@ balanceplot <- function(x, if (names(dev.cur()) != "svg") { mai <- par('mai') - mai[2] <- max(strwidth(rownames(x), units = "inches")) + mai[2] + mai[2] <- max(c(strwidth(rownames(x), units = "inches"), + 0)# avoids returning -Inf in case `x` has length 0 + ) + mai[2] par(mai = mai) } else { mar <- par("mar") - mar[2] <- max(nchar(x)) + mar[2] # assume one line per character + mar[2] <- max(c(nchar(rownames(x)), + 0) # avoids returning -Inf in case `x` has length 0 + )/2 + # assume 1/2 line per character + mar[2] par(mar = mar) } diff --git a/R/utils.R b/R/utils.R index df21ca8..fbf3524 100644 --- a/R/utils.R +++ b/R/utils.R @@ -375,7 +375,12 @@ XtX_pseudoinv_sqrt <- function(mat, mat.is.XtX = FALSE, tol = .Machine$double.ep if (nrow(mat) == 0 && ncol(mat) == 0) { - stop("Cannot calculate pseudoinverse: perhaps all covariates are constant (within strata)?") + message(paste("Degenerate null covariance (rank 0 Gram matrix):", + "perhaps all covariates are constant (within strata)?", + collapse="\\n")) + ans <- matrix(NA_real_, 0, 0) + attr(ans, "r") <- 0 + return(ans) } pst.svd <- try(svd(mat, nu=0)) diff --git a/tests/testthat/test.balanceTest.R b/tests/testthat/test.balanceTest.R index ee827c2..55f99af 100644 --- a/tests/testthat/test.balanceTest.R +++ b/tests/testthat/test.balanceTest.R @@ -159,12 +159,13 @@ test_that("balT returns covariance of tests", { expect_equal(length(tcov), 2) - ## Developer note: to strip out entries corresponding to intercept -- which has var 0, - ## except when there's variation in unit weights and/or cluster sizes -- - ## have to filter out rows and cols named "(Intercept)", separately for each - ## entry in list tcov. (Recording while updating test that follows, `c(4,4)` --> `c(5,5)`) - expect_equal(dim(tcov[[1]]), c(5,5)) -}) + ## The intercept is removed as it has permutational var 0 in this case. + ## That variance can be positive when there's variation in unit weights + ## and/or cluster sizes; if then tcov would have another row and columm. + expect_equal(dim(tcov[[1]]), c(4,4)) + expect_equivalent(dimnames(tcov[[1]])[[1]], dimnames(res$results)[[1]]) + expect_equivalent(dimnames(tcov[[2]])[[1]], dimnames(res$results)[[1]]) + }) }) test_that("Passing post.alignment.transform, #26", { @@ -410,16 +411,22 @@ test_that("Constant variables", { s = as.factor(sample(letters[1:3], 100, replace = TRUE)), z = rep(c(1,0), 50)) - ## this should be ok, no error - bt <- balanceTest(z ~ xv + xc, data = d) - + exp_na_results <- c("z", "p") + ## this should be ok, no warnings + expect_silent(bt <- balanceTest(z ~ xv + xc, data = d)) + expect_false(any(is.na(bt$results[1,exp_na_results,1]))) + ## but this gives problems - expect_error(balanceTest(z ~ xc, data = d), - "Cannot calculate pseudoinverse") - - ## this too - expect_error(balanceTest(z ~ s + strata(s), data = d), - "Cannot calculate pseudoinverse") + expect_message(bt2 <- balanceTest(z ~ xc, data = d), + "perhaps all covariates are constant") + expect_true(all(is.na(bt2$results[1,exp_na_results,1]))) + expect_true(is.na(bt2$overall[1,"p.value"])) + + ## issues here too + expect_message(bt3 <- balanceTest(z ~ s + strata(s), data = d), + "perhaps all covariates are constant") + expect_true(all(is.na(bt3$results[,exp_na_results,"s"]))) + expect_true(is.na(bt3$overall["s","p.value"])) }) @@ -437,6 +444,35 @@ test_that("Characters and factors", { btc <- balanceTest(z ~ char, data = d) btf <- balanceTest(z ~ fact, data = d) - expect_equal(dim(btc$results), dim(btf$results)) + ## removing expected difference before comparing... + attr(btc$results, "term.labels") <- + attr(btf$results, "term.labels") + expect_equal(unname(btc$results), unname(btf$results)) + expect_equal(unname(attr(btc$overall, "tcov")[["--"]]), + unname(attr(btf$overall, "tcov")[["--"]])) + ## removing another expected difference... + attr(btc$overall, "tcov")[["--"]] <- + attr(btf$overall, "tcov")[["--"]] expect_equal(btc$overall, btf$overall) }) + +test_that("Return variable/fmla term association", { + set.seed(393911) + tmp <- sample(letters[1:3], 100, replace = TRUE) + d <- data.frame(categorical = tmp, + scalar1 = rnorm(100), + scalar2 = rnorm(100), + z = rep(c(1,0), 50), + stringsAsFactors = FALSE) + bt <- balanceTest(z ~ categorical + scalar1 + + poly(scalar2, degree=2), data = d) + originals_ <- attr(bt$results, "originals") + term.labels_ <- attr(bt$results, "term.labels") + expect_equal(term.labels_[originals_], + c(rep("categorical", length(unique(tmp))), + "scalar1", + rep("poly(scalar2, degree = 2)",2) + ) + ) + +}) diff --git a/tests/testthat/test.plot.xbal.R b/tests/testthat/test.plot.xbal.R index 3ff6540..5b97966 100644 --- a/tests/testthat/test.plot.xbal.R +++ b/tests/testthat/test.plot.xbal.R @@ -152,7 +152,7 @@ test_that("Generic balance plots", { test_that("Issue 21: Cairo/pango errors when running plot.xbal", { - if (capabilities()["cairo"]) { + if (capabilities()["cairo"] && nchar(grSoftVersion()["cairo"])>0) { set.seed(20130522) z <- rbinom(100, size = 1, prob = 1/2) @@ -166,10 +166,29 @@ test_that("Issue 21: Cairo/pango errors when running plot.xbal", { # at the moment, I haven't found a way to capture the stderr output from the C level pango function # so, we'll just have to know that if the errors appear in the output stream during testing # we should come here to see the test case (not the best strategy) + svg(tmpo) + plot(xb) + dev.off() + file.remove(tmpo) + bt <- balanceTest(z ~ x+ y, data=data) tmpf <- tempfile() svg(tmpf) - plot(xb) + plot(bt) + dev.off() + + file.remove(tmpf) + tmpf <- tempfile() + svg(tmpf) + plot.xbal(bt, ggplot=FALSE) + dev.off() + file.remove(tmpf) + + bt$results["y", "std.diff",1] <- NA + + tmpf <- tempfile() + svg(tmpf) + plot.xbal(bt, ggplot=FALSE) dev.off() file.remove(tmpf) }