Skip to content

Commit 0484238

Browse files
Merge pull request #150 from SeanDLong/updated_ei_functions
updates from utils/update.r scripts, adding CI paramteres to ei_iter,…
2 parents 8b68303 + 593dd20 commit 0484238

17 files changed

+551
-27
lines changed

DESCRIPTION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: eiCompare
22
Type: Package
33
Title: Compares Different Ecological Inference Methods
4-
Version: 3.0.3
4+
Version: 3.0.4
55
Authors@R:
66
c(person(given = "Loren",
77
family = "Collingwood",
@@ -43,11 +43,11 @@ License: GPL-3
4343
Depends: R (>= 3.5.0), eiPack, ei, wru (>= 1.0.0)
4444
Imports: bayestestR, coda, data.table, doSNOW,
4545
dplyr, foreach, ggplot2, graphics, magrittr, mcmcse, methods,
46-
overlapping, purrr, rlang, sf, stringr, tidyr,tidyselect
46+
overlapping, purrr, rlang, sf, stringr, tidyr, tidyselect, viridis
4747
NeedsCompilation: no
4848
Suggests: knitr, plyr, rmarkdown, reshape2, RColorBrewer,
4949
RJSONIO, testthat, tigris
50-
RoxygenNote: 7.2.1
50+
RoxygenNote: 7.3.2
5151
Encoding: UTF-8
5252
VignetteBuilder: knitr
5353
Packaged: 2020-09-08 07:00:35 UTC; lorencollingwood

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ export(predict_race_multi_barreled)
4040
export(race_cand_cors)
4141
export(race_check_2_3)
4242
export(resolve_missing_vals)
43+
export(rpv_coef_plot)
4344
export(rpv_density)
45+
export(rpv_toDF)
4446
export(stdize_votes)
4547
export(stdize_votes_all)
4648
export(strip_special_characters)

NEWS.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
# eiCompare 3.0.4
2+
3+
## Package changes
4+
5+
* incorporated rpv_coef_plot() and rpv_toDF() functions from eiExpand package
6+
* edited ei_iter() to have flexible CI parameters (default is 0.95) using bayestestR for calculation and updated column naming, and to use reproducible parallel processing (.inorder=TRUE)
7+
* edited ei_rxc() with repdocuible parallel processing and changed column naming to fit ei_iter()
8+
* Fixed summary.eiCompare() print behavior
9+
* Added viridis to imports for color visualiztion and updated RoxygenNote to 7.3.2
10+
111
# eiCompare 3.0.3
212

313
## Package changes

R/data.R

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,3 +339,11 @@
339339
#' @usage data(gwinnett_ei)
340340
"gwinnett_ei"
341341

342+
#' Example RPV analysis results in Washington State
343+
#'
344+
"example_rpvDF"
345+
346+
#' Example election and demographic data from South Carolina 2020 General Elections
347+
#'
348+
"south_carolina"
349+

R/ei_iter.R

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838
#' @param n_cores The number of cores to use in parallel computation. Defaulted to NULL, in which case parallel::detectCores() - 1 is used
3939
#' @param verbose A boolean indicating whether to print out status messages.
4040
#' @param plot_path A string to specify plot save location. If NULL, plot is not saved
41+
#' @param CI Numeric. Confidence interval level (default = 0.95). Specifies the
42+
#' interval width for calculation with bayestestR package.
4143
#' @param ... Additional arguments passed directly to ei::ei()
4244
#'
4345
#' @return If eiCompare_class = TRUE, an object of class eiCompare is returned.
@@ -76,6 +78,7 @@ ei_iter <- function(
7678
n_cores = NULL,
7779
verbose = FALSE,
7880
plot_path = NULL,
81+
CI = .95,
7982
...) {
8083

8184
# Preparation for parallel processing if user specifies parallelization
@@ -156,7 +159,7 @@ ei_iter <- function(
156159
# Loop through each 2x2 ei
157160
ei_results <- foreach::foreach(
158161
i = seq_len(n_iters),
159-
.inorder = FALSE,
162+
.inorder = TRUE,
160163
.packages = c("ei", "stats", "utils", "mvtnorm"),
161164
.options.snow = opts
162165
) %myinfix% {
@@ -200,11 +203,6 @@ ei_iter <- function(
200203
)
201204
})
202205
break
203-
# This was meant to enable parameterization of the ei importance sample
204-
# size, but its inclusion changes results dramatically.
205-
# utils::capture.output({
206-
# ei_out <- suppressMessages(ei_sim(ei_out, samples))
207-
# })
208206
},
209207
error = function(cond) {
210208
if (ii == n_erhos) {
@@ -393,7 +391,7 @@ ei_iter <- function(
393391
# Both CIs
394392
suppressMessages({
395393
suppressWarnings({
396-
cis <- bayestestR::ci(aggs, ci = 0.95, method = "HDI")
394+
cis <- bayestestR::ci(aggs, ci = CI, method = "HDI")
397395
})
398396
})
399397
ci_lowers <- append(ci_lowers, cis$CI_low)
@@ -436,9 +434,9 @@ ei_iter <- function(
436434
estimates <- data.frame(cbind(means, sds, ci_lowers, ci_uppers))
437435
estimates <- cbind(cands, races, estimates)
438436
colnames(estimates) <- c(
439-
"cand", "race", "mean", "sd", "ci_95_lower", "ci_95_upper"
437+
"cand", "race", "mean", "sd",
438+
"ci_lower", "ci_upper"
440439
)
441-
442440
output <- list(
443441
"type" = "iter",
444442
"estimates" = estimates,

R/ei_rxc.R

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -162,12 +162,12 @@ ei_rxc <- function(
162162

163163
md_mcmc <- foreach::foreach(
164164
chain = seq_len(n_chains),
165-
.inorder = FALSE,
165+
.inorder = TRUE,
166166
.packages = c("ei"),
167167
.options.snow = opts
168-
) %myinfix% {
169-
# Bayes model estimation
170-
suppressWarnings(
168+
) %myinfix% {
169+
# Bayes model estimation
170+
suppressWarnings(
171171
md_out <- ei.MD.bayes(
172172
formula = formula,
173173
sample = samples,
@@ -263,14 +263,14 @@ ei_rxc <- function(
263263

264264
# Get point estimates and standard errors
265265
estimate <- mcmcse::mcse.mat(chains_pr)
266-
266+
267267
# Get standard deviation of each distribution
268268
sds <- apply(chains_pr, 2, stats::sd)
269269

270270
# The upper and lower CI estimates also have standard errors. Here these
271271
# errors are conservatively used to extend the 95% confidence bound further
272272

273-
# Set bounds according to
273+
# Set bounds according to
274274
if (eiCompare_class) {
275275
# eiCompare class object reports fixed CIs
276276
ci_lower <- 0.025
@@ -284,27 +284,27 @@ ei_rxc <- function(
284284
message(paste("Setting CI upper bound equal to", ci_upper))
285285
}
286286
}
287-
287+
288288
# Lower CI estimate
289289
lower <- mcmcse::mcse.q.mat(chains_pr, q = ci_lower)
290290
lower_est <- lower[, 1]
291291
lower_se <- lower[, 2]
292292
lower <- lower_est - lower_se
293-
293+
294294
# Upper CI estimate
295295
upper <- mcmcse::mcse.q.mat(chains_pr, q = ci_upper)
296296
upper_est <- upper[, 1]
297297
upper_se <- upper[, 2]
298298
upper <- upper_est + upper_se
299-
299+
300300
# Get race and cand cols for the final table
301301
cand_col <- rep(cand_cols, each = length(race_cols))
302302
race_col <- rep(race_cols, times = length(cand_cols))
303-
303+
304304
# Put names on chains_pr
305305
names <- paste(cand_col, race_col, sep = "_")
306306
colnames(chains_pr) <- names
307-
307+
308308
# Create, name an output table
309309
results_table <- data.frame(cbind(estimate[, 1], sds, lower, upper))
310310
results_table <- cbind(cand_col, race_col, results_table)
@@ -320,14 +320,14 @@ ei_rxc <- function(
320320
)
321321
} else {
322322
colnames(results_table) <- c(
323-
"cand", "race", "mean", "sd", "ci_95_lower", "ci_95_upper"
323+
"cand", "race", "mean", "sd", "ci_lower", "ci_upper"
324324
)
325325
}
326326

327327
if (!eiCompare_class) {
328328
# Match expected output
329329
results_table <- get_md_bayes_gen_output(results_table)
330-
330+
331331
# Return results and chains if requested
332332
if (ret_mcmc) {
333333
return(list(table = results_table, chains = chains_pr))

R/rpv_coef_plot.R

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#' @export
2+
#' @import ggplot2
3+
#' @importFrom rlang .data
4+
#'
5+
#' @author Rachel Carroll <rachelcarroll4@gmail.com>
6+
#' @author Stephen El-Khatib <stevekhatib@gmail.com>
7+
#' @author Loren Collingwood <lcollingwood@unm.edu>
8+
#'
9+
#' @title Racially Polarized Voting Analysis (RPV) Coefficient Plot
10+
#' @description Creates a coefficient plot showing of RPV results estimate ranges
11+
#' of all contests by voter race
12+
#' @param rpvDF A data.frame containing RPV results
13+
#' @param title The plot title
14+
#' @param caption The plot caption
15+
#' @param ylab Label along y axis
16+
#' @param colors Character vector of colors, one for each racial group. The order
17+
#' of colors will be respective to the order of racial groups.
18+
#' @param race_order Character vector of racial groups from the \code{voter_race} column of
19+
#' \code{rpvDF} in the order they should appear in the plot. If not specified,
20+
#' the race groups will appear in alphabetical order.
21+
#'
22+
#' @return Coefficient plot of RPV analysis as a ggplot2 object
23+
#'
24+
#' @examples
25+
#'library(eiCompare)
26+
#'data(example_rpvDF)
27+
#'
28+
#'dem_rpv_results <- example_rpvDF %>% dplyr::filter(Party == "Democratic")
29+
#'rpv_coef_plot(dem_rpv_results)
30+
#'
31+
rpv_coef_plot <- function(
32+
rpvDF = NULL,
33+
title = "Racially Polarized Voting Analysis Estimates",
34+
caption = "Data: eiCompare RPV estimates",
35+
ylab = NULL,
36+
colors = NULL,
37+
race_order = NULL
38+
) {
39+
40+
# ----------------------------- QC CHECKS -----------------------------
41+
42+
colnames(rpvDF) <- stringr::str_to_lower(colnames(rpvDF))
43+
44+
##### new code (copied from eiExpand lines 40-58)
45+
# make sure rpvDF argument is defined
46+
if(is.null(rpvDF)){stop("you must include rpvDF argument")}
47+
48+
# make sure necessary columns are included
49+
dif <- setdiff(c("party", "voter_race", "estimate", "lower_bound", "upper_bound"),
50+
colnames(rpvDF))
51+
52+
if( length(dif) > 0 ) {
53+
stop(paste("rpvDF is missing the following fields:",
54+
paste(dif, collapse = ", ")))
55+
}
56+
57+
# make sure only one party is in rpvDF
58+
if( length(unique(rpvDF$party)) > 1 ){
59+
stop("rpvDF should only contain one unique values in column Party")}
60+
##### end QC checks
61+
62+
# ---------------------- Prep Data and Plot Inputs ----------------------
63+
64+
##### Voter Race Order #####
65+
##### old code (from Updates_7_1_2024.R)
66+
# rpvDF$voter_race <- factor(rpvDF$voter_race, levels = race_order)
67+
##### new code (copied from eiExpand lines 64-69)
68+
# proper case for plot
69+
rpvDF$voter_race <- stringr::str_to_title(rpvDF$voter_race)
70+
#get factor order if not specified
71+
if( is.null(race_order) ) { race_order <- sort(unique(rpvDF$voter_race)) }
72+
#set factor
73+
rpvDF$voter_race <- factor(rpvDF$voter_race,
74+
levels = race_order)
75+
76+
##### Colors #####
77+
len_race <- length(unique(rpvDF$voter_race))
78+
##### old code (from Updates_7_1_2024.R)
79+
# if (is.null(colors)) {
80+
# if (len_race == 2) {
81+
# race_colors <- c(viridis::viridis(10)[4], viridis::viridis(10)[7])
82+
# names(race_colors) <- race_order
83+
# ggplot_color_obj <- scale_color_manual(values = race_colors)
84+
# }
85+
# else {
86+
# ggplot_color_obj <- viridis::scale_color_viridis(drop = FALSE,
87+
# discrete = TRUE, option = "turbo", alpha = 0.8)
88+
# }
89+
# }
90+
##### new code (copied from eiExpand lines 71-85)
91+
if( is.null(colors) ){
92+
if( len_race == 2 ){
93+
race_colors <- c(viridis::viridis(10)[4], viridis::viridis(10)[7])
94+
names(race_colors) <- race_order
95+
96+
ggplot_color_obj <- scale_color_manual(values = race_colors)
97+
98+
} else {
99+
ggplot_color_obj <- viridis::scale_color_viridis(drop = FALSE,
100+
discrete = TRUE,
101+
option = "turbo",
102+
alpha = .8)
103+
}
104+
} # END if( is.null(colors) )
105+
106+
##### ylab #####
107+
if( is.null(ylab) ){
108+
prty <- unique(rpvDF$party) %>% stringr::str_to_title()
109+
ylab <- paste("Percent Voting for", prty, "Candidate")
110+
}
111+
112+
##### mean percent vote for label #####
113+
mean <- rpvDF %>%
114+
dplyr::group_by(.data$voter_race) %>%
115+
dplyr::summarize(avg = mean(.data$estimate))
116+
117+
rpvDF <- dplyr::left_join(rpvDF, mean, by = "voter_race")
118+
rpvDF$panelLab <- paste0(rpvDF$voter_race, "\n(mean: ", round(rpvDF$avg,1), "%)")
119+
120+
# -------------------------- Build Plot --------------------------
121+
122+
coef_plot <- ggplot(rpvDF,
123+
aes(x = 0, y = 0:100)) +
124+
scale_y_continuous(breaks = seq(0,100, by = 10),
125+
limits = c(0, 100),
126+
labels = sprintf("%0.1f%%", seq(0,100, by = 10)),
127+
expand = c(0, 0)) +
128+
geom_hline(yintercept = 50, colour = "#000000", size = 0.75) + # Line at 0
129+
geom_pointrange(aes(y = .data$estimate,
130+
ymin = .data$lower_bound,
131+
ymax = .data$upper_bound,
132+
color = .data$voter_race),
133+
position = position_jitter(width = 0.1),
134+
size = 2,
135+
fatten = 1.5,
136+
show.legend = F) + # Ranges for each coefficient
137+
ggplot_color_obj +
138+
facet_grid(~panelLab) +
139+
labs(y = ylab,
140+
title = title,
141+
caption = caption) + # Labels
142+
theme_minimal() +
143+
theme(legend.title = element_blank(),
144+
axis.title.x = element_blank(),
145+
axis.ticks.x = element_blank(),
146+
axis.text.x = element_blank(),
147+
panel.border = element_rect(fill = NA, colour = "grey"),
148+
panel.grid.major.x = element_blank(),
149+
panel.grid.minor.x = element_blank(),
150+
panel.grid.minor.y = element_blank(),
151+
axis.text.y = element_text(size = 20, face = "bold", family = "serif"),
152+
axis.title.y = element_text(size = 24, face = "bold", family = "serif"),
153+
strip.text.x = element_text(size = 15, face = "bold", family = "serif"),
154+
#strip.text.x = element_blank(),
155+
title = element_text(size = 30, hjust = .5, face = "bold", family = "serif"),
156+
plot.caption = element_text(size = 12, face = "italic", family = "serif")
157+
)
158+
159+
# -------------------------- Return --------------------------
160+
return(coef_plot)
161+
}

0 commit comments

Comments
 (0)