From a56da753081053729065d455a0a0873f60bd427b Mon Sep 17 00:00:00 2001
From: Garrick Aden-Buie sequence variable records in what order cancers were diagnosed.
Problems with the sequence values can occur from errors at the time of manual data entry or through historical changes in coding standards for this variable.
Note that, while the data entries are fictitious, the problem is based on the real experiences of our group and others who use cancer registry systems.
example_data
-## # A tibble: 12 x 4
+example_data
+## # A tibble: 12 x 4
## id name cancerSite sequence
## <dbl> <chr> <chr> <chr>
## 1 3839 Bernie O'Reilly Prostate 0
@@ -53,10 +53,10 @@
Standard recode()
The first issue can be fixed with a standard use of recode() from dplyr.
-example_data <- example_data %>%
+example_data <- example_data %>%
mutate(sequence = recode(sequence, "99" = "1"))
example_data
-## # A tibble: 12 x 4
+## # A tibble: 12 x 4
## id name cancerSite sequence
## <dbl> <chr> <chr> <chr>
## 1 3839 Bernie O'Reilly Prostate 0
@@ -77,17 +77,17 @@ Standard recode()
recode_if()
For the second two issues, we need to condition the value updating on a second column.
-By this, we mean that we can’t use the value of sequence directly to choose which value to update – for example, we need to change the value of sequence when it equals 1, but only for id == 2702 & cancerSite == "Brain".
+By this, we mean that we can’t use the value of sequence directly to choose which value to update – for example, we need to change the value of sequence when it equals 1, but only for id == 2702 & cancerSite == "Brain".
To do this we introduce a simple function called recode_if() that provides a wrapper around if_else() and recode().
-recode_if <- function(x, condition, ...) {
+recode_if <- function(x, condition, ...) {
if_else(condition, recode(x, ...), x)
}
-Then we apply this function to change the value of sequence to 3 for the person with id == 2702 & cancerSite == "Brain".
-example_data <- example_data %>%
+Then we apply this function to change the value of sequence to 3 for the person with id == 2702 & cancerSite == "Brain".
+example_data <- example_data %>%
mutate(sequence = recode_if(sequence, id == 2702 & cancerSite == "Brain", "1" = "3"))
example_data
-## # A tibble: 12 x 4
+## # A tibble: 12 x 4
## id name cancerSite sequence
## <dbl> <chr> <chr> <chr>
## 1 3839 Bernie O'Reilly Prostate 0
@@ -103,11 +103,11 @@ recode_if()
## 11 2702 Abigale Senger-Schimmel Brain 3
## 12 3622 Regis Stracke-Bartell <NA> 0
And finally, we correct the historical uses of 0 and 60 in the sequence variable using recode_if().
-example_data <- example_data %>%
+example_data <- example_data %>%
mutate(sequence = recode_if(sequence, !is.na(cancerSite), "0" = "1", "60" = "2"))
example_data
-## # A tibble: 12 x 4
+## # A tibble: 12 x 4
## id name cancerSite sequence
## <dbl> <chr> <chr> <chr>
## 1 3839 Bernie O'Reilly Prostate 1
@@ -129,21 +129,21 @@ Comparison
recode() and recode_if() are two methods that are useful, but there are others.
In our opinion, the explict mapping of old values to new values in recode() and recode_if() makes the code clearer and easier to understand from a distance.
Here’s the full method using recode() and recode_if().
-example_data_orig %>%
+example_data_orig %>%
mutate(
sequence = recode(sequence, "99" = "1"),
sequence = recode_if(sequence, id == 2702 & cancerSite == "Brain", "1" = "3"),
sequence = recode_if(sequence, !is.na(cancerSite), "0" = "1", "60" = "2")
)
Another option is to use if_else() directly
-example_data_orig %>%
+example_data_orig %>%
mutate(
sequence = if_else(sequence == "99", "1", sequence),
sequence = if_else(id == 2702 & cancerSite == "Brain", "3", sequence),
sequence = if_else(!is.na(cancerSite) & sequence == "0", "1", sequence),
sequence = if_else(!is.na(cancerSite) & sequence == "60", "2", sequence)
)
-## # A tibble: 12 x 4
+## # A tibble: 12 x 4
## id name cancerSite sequence
## <dbl> <chr> <chr> <chr>
## 1 3839 Bernie O'Reilly Prostate 1
@@ -171,7 +171,7 @@ Comparison
sequence)
)
A third option is to use case_when(), as in
-example_data_orig %>%
+example_data_orig %>%
mutate(
sequence = case_when(
sequence == "99" ~ "1",
@@ -181,7 +181,7 @@ Comparison
TRUE ~ sequence
)
)
-## # A tibble: 12 x 4
+## # A tibble: 12 x 4
## id name cancerSite sequence
## <dbl> <chr> <chr> <chr>
## 1 3839 Bernie O'Reilly Prostate 1
diff --git a/content/blog/2018-09-20-import-a-directory-of-csv-files-at-once-using-purrr-and-readr.Rmd b/content/blog/2018-09-20-import-a-directory-of-csv-files-at-once-using-purrr-and-readr.Rmd
index 7c44670..69412e3 100644
--- a/content/blog/2018-09-20-import-a-directory-of-csv-files-at-once-using-purrr-and-readr.Rmd
+++ b/content/blog/2018-09-20-import-a-directory-of-csv-files-at-once-using-purrr-and-readr.Rmd
@@ -26,7 +26,8 @@ data_zip_file <- here::here("static/data/ie-general-referrals-by-hospital.zip")
# Warning! Everything else after this happens in the tempdir
knitr::opts_knit$set(root.dir = tempdir())
-knitr::opts_chunk$set(warning = FALSE, message = FALSE)
+knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE,
+ class.source = "code-source", class.output = "code-output")
```
```{r include=FALSE}
diff --git a/content/blog/2018-09-20-import-a-directory-of-csv-files-at-once-using-purrr-and-readr.html b/content/blog/2018-09-20-import-a-directory-of-csv-files-at-once-using-purrr-and-readr.html
index fdfe7ed..1b38a2b 100644
--- a/content/blog/2018-09-20-import-a-directory-of-csv-files-at-once-using-purrr-and-readr.html
+++ b/content/blog/2018-09-20-import-a-directory-of-csv-files-at-once-using-purrr-and-readr.html
@@ -17,7 +17,7 @@
This post demonstrates how you can use two packages from the tidyverse – purrr and readr – to read a list of CSV files into a single data frame in R.
We’ll also use a newer package called fs for file system operations.
-# Install packages if you need to
+# Install packages if you need to
install.packages(c("tidyverse", "fs"))
library(tidyverse) # attaches purrr and readr
@@ -32,18 +32,18 @@ Example Data
To make the example more concrete, we’ll use a dataset provided by the Government of Ireland showing the yearly number of e-referrals per hospital department, where the data from each year reside in individual CSV files.
We’ve collected the data for you, which you can download as a zipfile here, or you can download the original CSV files from data.gov.ie.
Once we’ve extracted the zip file or downloaded the CSV files a single folder, we store the location of the unzipped folder in data_dir.
-data_dir <- "ie-general-referrals-by-hospital"
+data_dir <- "ie-general-referrals-by-hospital"
We can then list the CSV files using fs::dir_ls().
-fs::dir_ls(data_dir)
-## ie-general-referrals-by-hospital/README.txt
+fs::dir_ls(data_dir)
+## ie-general-referrals-by-hospital/README.txt
## ie-general-referrals-by-hospital/general-referrals-by-hospital-department-2015.csv
## ie-general-referrals-by-hospital/general-referrals-by-hospital-department-2016.csv
## ie-general-referrals-by-hospital/general-referrals-by-hospital-department-2017.csv
## ie-general-referrals-by-hospital/general-referrals-by-hospital-department-2018.csv
Notice that there is an additional README.txt file that we don’t want to import, so we limit our directory listing to just the CSV files, i.e. the files that end with .csv.
-csv_files <- fs::dir_ls(data_dir, regexp = "\\.csv$")
+csv_files <- fs::dir_ls(data_dir, regexp = "\\.csv$")
csv_files
-## ie-general-referrals-by-hospital/general-referrals-by-hospital-department-2015.csv
+## ie-general-referrals-by-hospital/general-referrals-by-hospital-department-2015.csv
## ie-general-referrals-by-hospital/general-referrals-by-hospital-department-2016.csv
## ie-general-referrals-by-hospital/general-referrals-by-hospital-department-2017.csv
## ie-general-referrals-by-hospital/general-referrals-by-hospital-department-2018.csv
@@ -54,21 +54,21 @@ Import all the Files
Start with one file
Each of the CSV files can be individual read in using readr::read_csv().
Let’s read one file as an example:
-readr::read_csv(csv_files[1])
-## # A tibble: 837 x 6
-## Month_Year Hospital_Name Hospital_ID Hospital_Department ReferralType
-## <chr> <chr> <int> <chr> <chr>
-## 1 Aug-15 AMNCH 1049 Paediatric ENT General Ref…
-## 2 Aug-15 AMNCH 1049 Paediatric Gastroe… General Ref…
-## 3 Aug-15 AMNCH 1049 Paediatric General… General Ref…
-## 4 Aug-15 Bantry General… 704 Gastroenterology General Ref…
-## 5 Aug-15 Bantry General… 704 General Medicine General Ref…
-## 6 Aug-15 Bantry General… 704 General Surgery General Ref…
-## 7 Aug-15 Bantry General… 704 Medicine for the E… General Ref…
-## 8 Aug-15 Bantry General… 704 Outreach Dermatolo… General Ref…
-## 9 Aug-15 Bantry General… 704 Outreach Orthopaed… General Ref…
-## 10 Aug-15 Bantry General… 704 Outreach Surgical General Ref…
-## # ... with 827 more rows, and 1 more variable: TotalReferrals <int>
+readr::read_csv(csv_files[1])
+## # A tibble: 837 x 6
+## Month_Year Hospital_Name Hospital_ID Hospital_Depart… ReferralType
+## <chr> <chr> <dbl> <chr> <chr>
+## 1 Aug-15 AMNCH 1049 Paediatric ENT General Ref…
+## 2 Aug-15 AMNCH 1049 Paediatric Gast… General Ref…
+## 3 Aug-15 AMNCH 1049 Paediatric Gene… General Ref…
+## 4 Aug-15 Bantry Gener… 704 Gastroenterology General Ref…
+## 5 Aug-15 Bantry Gener… 704 General Medicine General Ref…
+## 6 Aug-15 Bantry Gener… 704 General Surgery General Ref…
+## 7 Aug-15 Bantry Gener… 704 Medicine for th… General Ref…
+## 8 Aug-15 Bantry Gener… 704 Outreach Dermat… General Ref…
+## 9 Aug-15 Bantry Gener… 704 Outreach Orthop… General Ref…
+## 10 Aug-15 Bantry Gener… 704 Outreach Surgic… General Ref…
+## # … with 827 more rows, and 1 more variable: TotalReferrals <dbl>
Scale up to all files
@@ -81,43 +81,43 @@ Scale up to all files
But knowing that each list element will be a tibble (or data.frame) and that each data frame has the same columns, we can use purrr’s typed functions to return a single data frame containing each of the imported CSV files using purrr::map_dfr().
The additional _dfr() tells purrr to return a data frame (df) by row-binding each element together (r).
(It’s the same as calling map() %>% bind_rows().)
-csv_files %>%
+csv_files %>%
map_dfr(read_csv)
-## # A tibble: 12,278 x 6
-## Month_Year Hospital_Name Hospital_ID Hospital_Department ReferralType
-## <chr> <chr> <int> <chr> <chr>
-## 1 Aug-15 AMNCH 1049 Paediatric ENT General Ref…
-## 2 Aug-15 AMNCH 1049 Paediatric Gastroe… General Ref…
-## 3 Aug-15 AMNCH 1049 Paediatric General… General Ref…
-## 4 Aug-15 Bantry General… 704 Gastroenterology General Ref…
-## 5 Aug-15 Bantry General… 704 General Medicine General Ref…
-## 6 Aug-15 Bantry General… 704 General Surgery General Ref…
-## 7 Aug-15 Bantry General… 704 Medicine for the E… General Ref…
-## 8 Aug-15 Bantry General… 704 Outreach Dermatolo… General Ref…
-## 9 Aug-15 Bantry General… 704 Outreach Orthopaed… General Ref…
-## 10 Aug-15 Bantry General… 704 Outreach Surgical General Ref…
-## # ... with 12,268 more rows, and 1 more variable: TotalReferrals <int>
+## # A tibble: 12,278 x 6
+## Month_Year Hospital_Name Hospital_ID Hospital_Depart… ReferralType
+## <chr> <chr> <dbl> <chr> <chr>
+## 1 Aug-15 AMNCH 1049 Paediatric ENT General Ref…
+## 2 Aug-15 AMNCH 1049 Paediatric Gast… General Ref…
+## 3 Aug-15 AMNCH 1049 Paediatric Gene… General Ref…
+## 4 Aug-15 Bantry Gener… 704 Gastroenterology General Ref…
+## 5 Aug-15 Bantry Gener… 704 General Medicine General Ref…
+## 6 Aug-15 Bantry Gener… 704 General Surgery General Ref…
+## 7 Aug-15 Bantry Gener… 704 Medicine for th… General Ref…
+## 8 Aug-15 Bantry Gener… 704 Outreach Dermat… General Ref…
+## 9 Aug-15 Bantry Gener… 704 Outreach Orthop… General Ref…
+## 10 Aug-15 Bantry Gener… 704 Outreach Surgic… General Ref…
+## # … with 12,268 more rows, and 1 more variable: TotalReferrals <dbl>
Modify settings for all files
Notice that the Month_Year column was imported as a character instead of a date-time.
We can modify the arguments of read_csv() inside the call to map_dfr(), which sets the arguments for each CSV import.
-csv_files %>%
+csv_files %>%
map_dfr(read_csv, col_types = cols("Month_Year" = col_date(format = "%b-%y")))
-## # A tibble: 12,278 x 6
-## Month_Year Hospital_Name Hospital_ID Hospital_Department ReferralType
-## <date> <chr> <int> <chr> <chr>
-## 1 2015-08-01 AMNCH 1049 Paediatric ENT General Ref…
-## 2 2015-08-01 AMNCH 1049 Paediatric Gastroe… General Ref…
-## 3 2015-08-01 AMNCH 1049 Paediatric General… General Ref…
-## 4 2015-08-01 Bantry General… 704 Gastroenterology General Ref…
-## 5 2015-08-01 Bantry General… 704 General Medicine General Ref…
-## 6 2015-08-01 Bantry General… 704 General Surgery General Ref…
-## 7 2015-08-01 Bantry General… 704 Medicine for the E… General Ref…
-## 8 2015-08-01 Bantry General… 704 Outreach Dermatolo… General Ref…
-## 9 2015-08-01 Bantry General… 704 Outreach Orthopaed… General Ref…
-## 10 2015-08-01 Bantry General… 704 Outreach Surgical General Ref…
-## # ... with 12,268 more rows, and 1 more variable: TotalReferrals <int>
+## # A tibble: 12,278 x 6
+## Month_Year Hospital_Name Hospital_ID Hospital_Depart… ReferralType
+## <date> <chr> <dbl> <chr> <chr>
+## 1 2015-08-01 AMNCH 1049 Paediatric ENT General Ref…
+## 2 2015-08-01 AMNCH 1049 Paediatric Gast… General Ref…
+## 3 2015-08-01 AMNCH 1049 Paediatric Gene… General Ref…
+## 4 2015-08-01 Bantry Gener… 704 Gastroenterology General Ref…
+## 5 2015-08-01 Bantry Gener… 704 General Medicine General Ref…
+## 6 2015-08-01 Bantry Gener… 704 General Surgery General Ref…
+## 7 2015-08-01 Bantry Gener… 704 Medicine for th… General Ref…
+## 8 2015-08-01 Bantry Gener… 704 Outreach Dermat… General Ref…
+## 9 2015-08-01 Bantry Gener… 704 Outreach Orthop… General Ref…
+## 10 2015-08-01 Bantry Gener… 704 Outreach Surgic… General Ref…
+## # … with 12,268 more rows, and 1 more variable: TotalReferrals <dbl>
Warning messages:
1: In rbind(names(probs), probs_f) :
number of columns of result is not a multiple of vector length (arg 1)
@@ -129,48 +129,48 @@ Fix date parsing after importing
Setting read_csv options for all of the files almost works.
Unfortunately, the format of the Month_Year column changed in 2016 from Jan-15 to Jan-2016, so the Month_Year column is NA for the years 2016 and 2017.
To fix this, the easiest solution is to import as a character and use lubridate to parse the date-times, as readr’s col_date() format only allows for one format.
-library(lubridate)
+library(lubridate)
csv_files %>%
map_dfr(read_csv) %>%
mutate(Month_Year = myd(Month_Year, truncated = 1))
-## # A tibble: 12,278 x 6
-## Month_Year Hospital_Name Hospital_ID Hospital_Department ReferralType
-## <date> <chr> <int> <chr> <chr>
-## 1 2015-08-01 AMNCH 1049 Paediatric ENT General Ref…
-## 2 2015-08-01 AMNCH 1049 Paediatric Gastroe… General Ref…
-## 3 2015-08-01 AMNCH 1049 Paediatric General… General Ref…
-## 4 2015-08-01 Bantry General… 704 Gastroenterology General Ref…
-## 5 2015-08-01 Bantry General… 704 General Medicine General Ref…
-## 6 2015-08-01 Bantry General… 704 General Surgery General Ref…
-## 7 2015-08-01 Bantry General… 704 Medicine for the E… General Ref…
-## 8 2015-08-01 Bantry General… 704 Outreach Dermatolo… General Ref…
-## 9 2015-08-01 Bantry General… 704 Outreach Orthopaed… General Ref…
-## 10 2015-08-01 Bantry General… 704 Outreach Surgical General Ref…
-## # ... with 12,268 more rows, and 1 more variable: TotalReferrals <int>
+## # A tibble: 12,278 x 6
+## Month_Year Hospital_Name Hospital_ID Hospital_Depart… ReferralType
+## <date> <chr> <dbl> <chr> <chr>
+## 1 2015-08-01 AMNCH 1049 Paediatric ENT General Ref…
+## 2 2015-08-01 AMNCH 1049 Paediatric Gast… General Ref…
+## 3 2015-08-01 AMNCH 1049 Paediatric Gene… General Ref…
+## 4 2015-08-01 Bantry Gener… 704 Gastroenterology General Ref…
+## 5 2015-08-01 Bantry Gener… 704 General Medicine General Ref…
+## 6 2015-08-01 Bantry Gener… 704 General Surgery General Ref…
+## 7 2015-08-01 Bantry Gener… 704 Medicine for th… General Ref…
+## 8 2015-08-01 Bantry Gener… 704 Outreach Dermat… General Ref…
+## 9 2015-08-01 Bantry Gener… 704 Outreach Orthop… General Ref…
+## 10 2015-08-01 Bantry Gener… 704 Outreach Surgic… General Ref…
+## # … with 12,268 more rows, and 1 more variable: TotalReferrals <dbl>
Add a source indicator
Finally, we may sometimes wish to include an indicator variable that tracks the source of each row, for example to keep track of the file from which the data was collected.
To do this, we set the .id parameter of map_dfr() to the name of the indicator variable, and the source file name will be included in the final data set.
-csv_files %>%
+csv_files %>%
map_dfr(read_csv, .id = "source") %>%
mutate(Month_Year = myd(Month_Year, truncated = 1))
-## # A tibble: 12,278 x 7
-## source Month_Year Hospital_Name Hospital_ID Hospital_Departm…
-## <chr> <date> <chr> <int> <chr>
-## 1 ie-general-ref… 2015-08-01 AMNCH 1049 Paediatric ENT
-## 2 ie-general-ref… 2015-08-01 AMNCH 1049 Paediatric Gastr…
-## 3 ie-general-ref… 2015-08-01 AMNCH 1049 Paediatric Gener…
-## 4 ie-general-ref… 2015-08-01 Bantry Genera… 704 Gastroenterology
-## 5 ie-general-ref… 2015-08-01 Bantry Genera… 704 General Medicine
-## 6 ie-general-ref… 2015-08-01 Bantry Genera… 704 General Surgery
-## 7 ie-general-ref… 2015-08-01 Bantry Genera… 704 Medicine for the…
-## 8 ie-general-ref… 2015-08-01 Bantry Genera… 704 Outreach Dermato…
-## 9 ie-general-ref… 2015-08-01 Bantry Genera… 704 Outreach Orthopa…
-## 10 ie-general-ref… 2015-08-01 Bantry Genera… 704 Outreach Surgical
-## # ... with 12,268 more rows, and 2 more variables: ReferralType <chr>,
-## # TotalReferrals <int>
+## # A tibble: 12,278 x 7
+## source Month_Year Hospital_Name Hospital_ID Hospital_Depart…
+## <chr> <date> <chr> <dbl> <chr>
+## 1 ie-ge… 2015-08-01 AMNCH 1049 Paediatric ENT
+## 2 ie-ge… 2015-08-01 AMNCH 1049 Paediatric Gast…
+## 3 ie-ge… 2015-08-01 AMNCH 1049 Paediatric Gene…
+## 4 ie-ge… 2015-08-01 Bantry Gener… 704 Gastroenterology
+## 5 ie-ge… 2015-08-01 Bantry Gener… 704 General Medicine
+## 6 ie-ge… 2015-08-01 Bantry Gener… 704 General Surgery
+## 7 ie-ge… 2015-08-01 Bantry Gener… 704 Medicine for th…
+## 8 ie-ge… 2015-08-01 Bantry Gener… 704 Outreach Dermat…
+## 9 ie-ge… 2015-08-01 Bantry Gener… 704 Outreach Orthop…
+## 10 ie-ge… 2015-08-01 Bantry Gener… 704 Outreach Surgic…
+## # … with 12,268 more rows, and 2 more variables: ReferralType <chr>,
+## # TotalReferrals <dbl>
diff --git a/static/css/custom.css b/static/css/custom.css
index e8998eb..8ed2808 100644
--- a/static/css/custom.css
+++ b/static/css/custom.css
@@ -204,8 +204,18 @@ a {
}
a:hover {
- color: #7F8BBF; }
-
+ color: #D3751C;
+}
+
+#post-box a:hover {
+ background: #e7e7f3;
+ color: #161688;
+ padding: 3px;
+ margin-left: -3px;
+ margin-right: -3px;
+ border-radius: 3px;
+}
+
.button.primary {
background-color: #D3751C;
}
@@ -260,6 +270,8 @@ p > .pkg > a:hover {
margin-right: -4px;
padding-right: 4px;
background-color: #eff4ef;
+ padding-top: 1px;
+ padding-bottom: 1px;
}
h1 > code, h2 > code, h3 > code, h4 > code, h5 > code {
@@ -271,6 +283,22 @@ pre code {
overflow-x: auto;
white-space: pre;
}
+
+/*
+ Fancier looking code chunks ----
+ knitr::opts_chunk$set(class.source = "code-source", class.output = "code-output")
+ */
+.code-source {
+ padding: 0em;
+ border: 5px solid #eaeaea;
+}
+.code-output {
+ padding: 10px;
+ background: #fafafa;
+}
+.code-source + pre:not(.code-source) {
+ margin-top: -16px;
+}
#components h4 a {
color: #D3751C; }
From 8167c43b01e2af96fbb3a5a394851c5d14c0ec71 Mon Sep 17 00:00:00 2001
From: Garrick Aden-Buie
Date: Tue, 4 Jun 2019 16:18:16 -0400
Subject: [PATCH 2/3] Tweak fancy code output chunks
---
static/css/custom.css | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/static/css/custom.css b/static/css/custom.css
index 8ed2808..d8862a2 100644
--- a/static/css/custom.css
+++ b/static/css/custom.css
@@ -290,7 +290,7 @@ pre code {
*/
.code-source {
padding: 0em;
- border: 5px solid #eaeaea;
+ border: 3px solid #eaeaea;
}
.code-output {
padding: 10px;
@@ -299,6 +299,12 @@ pre code {
.code-source + pre:not(.code-source) {
margin-top: -16px;
}
+.code-error, pre.code-error code.hljs {
+ background: #f6d3d3;
+}
+.code-error {
+ border: solid 3px #efcbcb;
+}
#components h4 a {
color: #D3751C; }
From 526e7699295288f9036e759a376e2848b92fc7a6 Mon Sep 17 00:00:00 2001
From: Garrick Aden-Buie
Date: Tue, 4 Jun 2019 16:19:02 -0400
Subject: [PATCH 3/3] wip post working with missing list elements
---
...-with-missing-elements-in-list-columns.Rmd | 89 +++++++++++++
...with-missing-elements-in-list-columns.html | 117 ++++++++++++++++++
.../img/hero/annie-spratt-746955-unsplash.jpg | Bin 0 -> 2348561 bytes
3 files changed, 206 insertions(+)
create mode 100644 content/blog/2019/2019-06-04-working-with-missing-elements-in-list-columns.Rmd
create mode 100644 content/blog/2019/2019-06-04-working-with-missing-elements-in-list-columns.html
create mode 100644 static/img/hero/annie-spratt-746955-unsplash.jpg
diff --git a/content/blog/2019/2019-06-04-working-with-missing-elements-in-list-columns.Rmd b/content/blog/2019/2019-06-04-working-with-missing-elements-in-list-columns.Rmd
new file mode 100644
index 0000000..09d2a42
--- /dev/null
+++ b/content/blog/2019/2019-06-04-working-with-missing-elements-in-list-columns.Rmd
@@ -0,0 +1,89 @@
+---
+title: Working with Missing Elements in List Columns
+author:
+ - Garrick Aden-Buie
+date: '2019-06-04'
+slug: working-with-missing-elements-in-list-columns
+categories:
+ - R
+tags:
+ - R
+ - Tips
+description: ''
+hero_bg: "/img/hero/annie-spratt-746955-unsplash.jpg"
+weight: 20
+---
+
+```{css, echo=FALSE}
+#hero {
+ background-position: 0 70%;
+}
+```
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(
+ echo = TRUE, warning = FALSE, message = FALSE,
+ class.source = "code-source",
+ class.output = "code-output",
+ class.error = "code-error"
+)
+```
+
+When working with list columns, it can be useful to mark entire elements as missing, but I’m struggling to find a consistent and easy-to-use data structure that works well with `unnest()`.
+
+Here’s a small example with a list column of tibbles, where, ideally, the 2nd element is “missing”. I’d like to `unnest()` column `y` but keep all of the rows in the original data frame. In real life, the tibbles in `y` are more complicated, but when present they all have the same number and type of columns.
+
+The first idea I tried was to store missingness in the list column as `NULL`, but `unnest()` throws an error in this case.
+
+```{r, error=TRUE, warning=FALSE, message=FALSE}
+library(tidyverse)
+(data_null <- tibble(x = 1:2, y = list(tibble(z = 1L), NULL)))
+data_null %>% unnest()
+```
+
+The second idea was to use a zero-row data frame. I was hopeful this would work because it’s easy to grab a valid example and use the `valid_ex[0, ]` trick to create the zero-row data frame with the correct number and type of columns. This now works, but we lose the row with the zero-length data frame.
+
+```{r}
+(data_zero_tibble <- tibble(x = 1:2, y = list(tibble(z = 1L), tibble())))
+data_zero_tibble %>% unnest()
+```
+
+Even trying to `.preserve` column y in the `unest()` drops the zero-length row.
+
+```{r}
+data_zero_tibble %>% unnest(y, .preserve = "y")
+```
+
+What does work is to explicitly use `NA` across rows with missing values.
+
+```{r}
+(data_na_int <- tibble(x = 1:2, y = list(tibble(z = 1L), tibble(z = NA_integer_))))
+data_na_int %>% unnest()
+```
+
+And the type of missing value doesn’t seem to matter.
+
+```{r}
+(data_na_chr <- tibble(x = 1:2, y = list(tibble(z = 1L), tibble(.drop = NA_character_))))
+data_na_chr %>% unnest()
+```
+
+This might be the best solution, because it's not necessary to know anything about the other list elements in advance. All that is needed is an `NA` value in the same *data shape* as the other list elements.
+
+```{r}
+(data_iris_zero <- tibble(x = 1:2, y = list(iris[1:2, ], iris[0,])))
+data_iris_zero %>% unnest()
+
+(data_iris_na <- tibble(x = 1:2, y = list(iris[1:2, ], data.frame(Sepal.Length = NA))))
+data_iris_na %>% unnest()
+```
+
+Finally, another solution is to use the zero-length data frame element and
+`full_join()` the `unnest()`ed data with the original data, minus the list column.
+
+```{r}
+full_join(
+ data_iris_zero %>% unnest(),
+ data_iris_zero %>% select(-y)
+)
+```
diff --git a/content/blog/2019/2019-06-04-working-with-missing-elements-in-list-columns.html b/content/blog/2019/2019-06-04-working-with-missing-elements-in-list-columns.html
new file mode 100644
index 0000000..a2db8ab
--- /dev/null
+++ b/content/blog/2019/2019-06-04-working-with-missing-elements-in-list-columns.html
@@ -0,0 +1,117 @@
+---
+title: Working with Missing Elements in List Columns
+author:
+ - Garrick Aden-Buie
+date: '2019-06-04'
+slug: working-with-missing-elements-in-list-columns
+categories:
+ - R
+tags:
+ - R
+ - Tips
+description: ''
+hero_bg: "/img/hero/annie-spratt-746955-unsplash.jpg"
+weight: 20
+---
+
+
+
+
+When working with list columns, it can be useful to mark entire elements as missing, but I’m struggling to find a consistent and easy-to-use data structure that works well with unnest().
+Here’s a small example with a list column of tibbles, where, ideally, the 2nd element is “missing”. I’d like to unnest() column y but keep all of the rows in the original data frame. In real life, the tibbles in y are more complicated, but when present they all have the same number and type of columns.
+The first idea I tried was to store missingness in the list column as NULL, but unnest() throws an error in this case.
+library(tidyverse)
+(data_null <- tibble(x = 1:2, y = list(tibble(z = 1L), NULL)))
+## # A tibble: 2 x 2
+## x y
+## <int> <list>
+## 1 1 <tibble [1 × 1]>
+## 2 2 <NULL>
+data_null %>% unnest()
+## Each column must either be a list of vectors or a list of data frames [y]
+The second idea was to use a zero-row data frame. I was hopeful this would work because it’s easy to grab a valid example and use the valid_ex[0, ] trick to create the zero-row data frame with the correct number and type of columns. This now works, but we lose the row with the zero-length data frame.
+(data_zero_tibble <- tibble(x = 1:2, y = list(tibble(z = 1L), tibble())))
+## # A tibble: 2 x 2
+## x y
+## <int> <list>
+## 1 1 <tibble [1 × 1]>
+## 2 2 <tibble [0 × 0]>
+data_zero_tibble %>% unnest()
+## # A tibble: 1 x 2
+## x z
+## <int> <int>
+## 1 1 1
+Even trying to .preserve column y in the unest() drops the zero-length row.
+data_zero_tibble %>% unnest(y, .preserve = "y")
+## # A tibble: 1 x 3
+## x y z
+## <int> <list> <int>
+## 1 1 <tibble [1 × 1]> 1
+What does work is to explicitly use NA across rows with missing values.
+(data_na_int <- tibble(x = 1:2, y = list(tibble(z = 1L), tibble(z = NA_integer_))))
+## # A tibble: 2 x 2
+## x y
+## <int> <list>
+## 1 1 <tibble [1 × 1]>
+## 2 2 <tibble [1 × 1]>
+data_na_int %>% unnest()
+## # A tibble: 2 x 2
+## x z
+## <int> <int>
+## 1 1 1
+## 2 2 NA
+And the type of missing value doesn’t seem to matter.
+(data_na_chr <- tibble(x = 1:2, y = list(tibble(z = 1L), tibble(.drop = NA_character_))))
+## # A tibble: 2 x 2
+## x y
+## <int> <list>
+## 1 1 <tibble [1 × 1]>
+## 2 2 <tibble [1 × 1]>
+data_na_chr %>% unnest()
+## # A tibble: 2 x 3
+## x z .drop
+## <int> <int> <chr>
+## 1 1 1 <NA>
+## 2 2 NA <NA>
+This might be the best solution, because it’s not necessary to know anything about the other list elements in advance. All that is needed is an NA value in the same data shape as the other list elements.
+(data_iris_zero <- tibble(x = 1:2, y = list(iris[1:2, ], iris[0,])))
+## # A tibble: 2 x 2
+## x y
+## <int> <list>
+## 1 1 <df[,5] [2 × 5]>
+## 2 2 <df[,5] [0 × 5]>
+data_iris_zero %>% unnest()
+## # A tibble: 2 x 6
+## x Sepal.Length Sepal.Width Petal.Length Petal.Width Species
+## <int> <dbl> <dbl> <dbl> <dbl> <fct>
+## 1 1 5.1 3.5 1.4 0.2 setosa
+## 2 1 4.9 3 1.4 0.2 setosa
+(data_iris_na <- tibble(x = 1:2, y = list(iris[1:2, ], data.frame(Sepal.Length = NA))))
+## # A tibble: 2 x 2
+## x y
+## <int> <list>
+## 1 1 <df[,5] [2 × 5]>
+## 2 2 <df[,1] [1 × 1]>
+data_iris_na %>% unnest()
+## # A tibble: 3 x 6
+## x Sepal.Length Sepal.Width Petal.Length Petal.Width Species
+## <int> <dbl> <dbl> <dbl> <dbl> <fct>
+## 1 1 5.1 3.5 1.4 0.2 setosa
+## 2 1 4.9 3 1.4 0.2 setosa
+## 3 2 NA NA NA NA <NA>
+Finally, another solution is to use the zero-length data frame element and
+full_join() the unnest()ed data with the original data, minus the list column.
+full_join(
+ data_iris_zero %>% unnest(),
+ data_iris_zero %>% select(-y)
+)
+## # A tibble: 3 x 6
+## x Sepal.Length Sepal.Width Petal.Length Petal.Width Species
+## <int> <dbl> <dbl> <dbl> <dbl> <fct>
+## 1 1 5.1 3.5 1.4 0.2 setosa
+## 2 1 4.9 3 1.4 0.2 setosa
+## 3 2 NA NA NA NA <NA>
diff --git a/static/img/hero/annie-spratt-746955-unsplash.jpg b/static/img/hero/annie-spratt-746955-unsplash.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..97179a709158b45a31cabe84e4f0f8bd0c83585c
GIT binary patch
literal 2348561
zcmbrl3s_TE_BXsw0tp}xk<Ta0yAY+=7G<#Di@8=DXd5Cj47@CPj4_x@wT?%jL&g5=nQJu&bC0N(L?
zM2C-{Rs-<<;k=`QxLv`2{8L&mwjZE?C%^z20B;L(vy)O|UkC6=h}j*S3vXJH|8vX^
z1Ncb*n6||8gM$V^16JhBf60IN=RNsa_YWU{_pFRL`@s7LR^%U{{9*pFcc6^k1LafsqIU|Q
zJOt$p?;U$z1m$@sK6)T{_mTIr-#%Ofz`y3a
z@-DDuMYq9F$*^1EBpR00FoEb8dqQ0Ne=xzA^mQeAcemOIBW9cEq-A1qB6LMehi=
zt|;{1$N%RI|Gx8oFZ|c@wyr$y-?|Ha?VW>fAA2t^ctxqgBkvtKb~HFQ`)%Pn!CU^{
zM*Kg1<9`k7zsB(g!8-@vIr`3F_^3Cbm5B~#!sR}EK$IssayVFY_&kUnG(`4zc`SdnLl6NN^3qwpvqY7=T3ih<&w5>cfA26edG3{o5}5-+mzdqyN^4;{blzE
z_XPJpx@Wo&tfo`wU|)MPE0)JPnbiPVoU|50n>`<#tdT2
z7zY-M4aB~TWnz=DZ)5YYr?8)4|BCIx8nF+t^B$`_$R0a9Vm;D44tpH;_|)SIkEc#YW-RqE7nb$e5OI|%*cf4j+
zty;ByRrsouRiahWRp(ZHwd%&Id#e__*LZLC-tC?4eau_#t@G~o{?&WN$H!-r58Efr
zC(lRj)9mwu&t0E+Uw_}Nz6rhud?mi0`+nnV^!**@iQ9XFqBKYu@(U$Wo(eieRS`1SccUV~k;X-&+U18YvKQLp)7joBaV
zAL75;U+7=%ukr8ke~8E6L-7gtL->#JE%<)?)C+4~puh0u3xzLy{zB&q<7?4tH?2)r
z``+5hwSQYXyw>?5@kP#y@4WcYi)6Vn4l*7LU0gC#5iI$@iXEN
z#HXaSBsS>~NkO_wvXcGD(PR->LGB_y34S4%6Z~HAx!@mzr`8eH#jiWI?)lE-=#KC2R5TNzp^=N^XHolFQZ<5^<~k^
z4KEM=!Tk^6e>nVy3x62d;=P5lC4bA8TkdULyLHdj<6GOe{=RL)wzO^4+w|L(w!gCd
zz3rOqqqH?NF6}t&TiUZ7n|BCze7<8S%qJ{1OcK@+Hcfw-{tjJ9zx9g$D@m`Me&zZr
zi?8l{_2{dYUbXJruyfze`kf;TJcG}uVB83I58o9o3I9HPA>!4DyokR=Oh#^rd^hro
z$j4EkQSU@)qRh>;vp3wuM9G9OC?y^ZTyt
zyN>RT7S%%m^WhTV(!MSkIjtzGIlENmAK-#?s)h3#Q5s?
zkpyCbFrhhNio26r!u>JPJ25q}F7f`J&3lgS`8Ela#7(M6y2IPZJIrfO2FVG@HOas6
zL-{%U?@}-+{FKjA9tmiI4+Q#D|J1#yU!=~yzU%eM*GJ!=ym9P}>u=)TOn1)hE01jttrj%=a@gA{qw{0SJF?V5C0|fFNJ^U%LvMN
zFQaoWZtvTB|Gv+C-&^~>*ynmX`RxmDJN75+uirl<+%2pX+77S}C=OWPVZL+b9m_%H
z!7~RRXEHNuGObyxtWUF^igt_YMAL`j4>cZ|dpG%A-Mh>0z4cz}`yTJ_d;iMeHHY6l
z{NoYQk-{T`*)M0G${x>&$T^!cbu{s4Q!bMGm)wp#JWQs3IYv2l^4Pun=={&~=L_B}
z_yi-33CTlgob+Ou
zSJ}H|H;->We&)En{H^kDPY_Q?Pnb`}o&55Y@2R7wMovebZv4>gL(zx*ABBDN*+)yV
z1G1k!-v05YAG<1q6+eAK`{djw%kqQr{>oP?l~tIk!&Sr8tm=zr{LYBam}`=1zEK1#
zWD5J)y=VJArGMI3>s5QKcKqC)a~+>;_^j$PS6x=!KR(~}d22nP{-gT2h64>oC0lu^
zk=Xcgqf;eP-8vt4{+kP%E_|xSsPom2G;eD3T86f{DX8h=rbXRh-S|bp#UGn@HaCAk
z_@eSl)R)J;wEi{yuLCWwwRBwCa_M~Q+SZT10$&~b>dDvpz8?Nt;@__So$>ceZ5!L_
zFZ*AXeS>^c^vzUzR{MCzpE?GwaIakdHuBrc-);R)_x-x>Kks~@v+}Cf)e~2jyTo0N
z?(FWT*D|k}ukXD+`okZ882T~g$Nrwgo*TWfy+7zV`mP(y8{hXu^j-Oh@l*THum1ea
zFR%P^`Q|G(FZaLN|BYd%p<^I?;M>8d!K+4=@tWy1Q}0m1(9i$i{bO+Wjp17(=_BK}
z4%~YD>wCXW-_F1Nd{lbJ{f_Ln)xUjumvHyO7;jkOLu`SX*9wrtz;r>7?TChC8u{N6Unp8RF%k5dn)k4`T?
ztC$I#(b?(t>khu-?(F-s&*xo#pqJmzPJu
z34lVO(C~0`L%XjWSh#^F9-&wHiYdzdfd}%
z?TZ0{K^r%PQmC8hue`dG5gxJowV2qr_ylh1>u
ze&Xb*(;t2$J5!@L`)TdD&+0CyHQFZK#pW;m_V>2S-?VpJ`S$t`Klb$MZ}k0SG!6Y@
zc;wcvx9{CIKX~}a^4R)h`q_-#F*`TE;6woWU(@;bjQ(presDf0xCCz4mH8m3f|beo
zxw)^SVb*X`uy5!1uisJTf!|eq{_AU=Az^~47xo_=^jf=tesAOS%9Q>&qyM`J9seKA
z=-(6i_k5Ob18+0}7YywOSYYu0K}c$tS9$v#XI|4_8FsZ?vfIYS#6=P0L;Y%98b4WQ
z;RQ@oFS-MI_FH8pLVoj0PjV_2!;;YFf$(cb^9yDDxD>XZl6J#dFAyqOdKs;}d)A;0
zk3}AoP>>g+>WdGg*0wXhRSuPp)Hnsw?ImHR7VvUuU)39}02=Q+
z8_zAXowxWnB&4;=_{l9IhO*{O|F11w^m7h9Z|J}h{xhrnCYlf)qP#Unzf+Qhbp1eh
z)cBq1!5T3iMSbwBRH-;Gwp&MrtFq%VI!i=xx$-ch^uoc|ORyNieq#@x^8HC88@2p3gb?s82xww(z0kDu)<0ZxqigOv^Y*N`U{!vso>AtU}=TG$(n3P}g-)<=z{Y+*`E<
zI2}#WxOp#+@KEyUG?3b7iL$E+^*FsXPN-}+x#?*|?o+H7WL(jgUbT1FplbJ5ZQ~_p
zTvgQ0o
zJX*2
zW0HrxlWDBnQ!`RFTo-4tva!*s+-|#e13{w9dNVX17x1V|8OND4;XNaz=w)!J_@FdC
zN=UMQ!H9OHo<1Dj&!oIaIro4(FsF`Q25HP4k;by>fjoyS?Sx*?tfG{|+zWPHm`qls~j?-so2@PNw
zr8H&ZCzJ_$KQjvu=gq`Y1yU96V10rvyQXi~ZVG?kdj$?d$;2hMu)+=G2Z9}Tr$x4h
zKw@5P^L*Khp(kE(iH`0FCsIG})ZgP7m{WyLK1JFaQOw6O45q8O38t^DM&&Qt!!1p?
zJ$(Kh2Wc6!gr*-AJyFXe?a+gEFqq*g%tg03eY)vcxVcKgWfqcCI8pxk`2loJrFJVe
zDzou=)iG;j>!I{wd-x3%s2&Lo)Z;sIoL;+cS;)f*gbn)iA$JvKOhsOAM7~D2;BdXW
z9q*kf+vB&$)^zthaHg~~SuAZ9P}}L}sYFl}%;y5+21rpAeVgS`@MV;BfS;
zGn~h34#*=a4wk)^DlhEQ&zu+MP3&@57mRaJkB*-VA>J@ZwA%sRN!CPtI5E%
zrw_QP>!+AAMVddW#fEAj7aKMQg3s)FWL*%YnVS6@LEzf|$2G}PqEP3(^lP?rG`
zD_;6c1pglAXG+CBb89OdNQGA;`MA(WPa=X+3^OGA>6$Tyb)*iY
zcDr&pp{^trFjw2>E?y%Vm=t8G;fkv#xSwNg#Uo|&dCcbA3a3{?cVwl7>tUETF;_1X*TA8(`uj%DtpHf5K<=$2R~`Z+=mT(MK}h6>Z>e0?yN%oJ>cAUzas&1WF$(w(95bJn^H#-oayvIQefWpfUoPequ!4zY`D`VWj1mAYqI
zjMV#>bAN3pL9&@0k=q|Jdy_D8jSlXO&A6-)4!zkH*GgYIUO#~PG|FPtb3JD#hHI=Q
z-LS$wp09FaV&YlGdJN6^T!1z;wsvA$LmCZQ)S-Lx#_7f<6zrI_F8OEX7eybwNO+#2
z)Z3IlrpP0fK}~$D@H?4Z;n3gFa2;N7N6q(^p3+5b-8HXP{o}*8nVKbSc1oMdU
z_?ZH_Z@A^zB%9#d^-~1^)17jx#3o
zyW@g$ObaQ_#jIN!0^RuG%m`e6g*em3*=t~y96Irjlf>j!ts9Q_tJXjsFe0umiaz$P
zuLhBog_UY!dvta+Y8mpb^m^0lLSl5e
zv0mS7TLvRvRQc$Wf2wsWFl&Ra=@%Z=cGxEbzz2=}CTr9=fD35>fvi1C3LUB8>z)7&
z@e~EgaWyBGF4^nF%fRuNVaML$qx>fATI@oe1{-eWF(Yv!bnw1q6yI2;d%>o^SC_@;
z4z;p~-Y-KHq0XEgHs$>^FYN*Uj
z(nDtiKRlcG#2frM%8>2O@Lk`NcRf;Fwy_XQ+g3vu;VP_yi-}R7VVTyJz;8d?z_2%JvEHU@7*u?23p4^)2
zT51FJ%YYYYG>^4EjLjLfa$jm4dH@rqWzg7LRX~uO1opXEm;@!GVXoBvOeNXPiM}IF
z8R>e+MhGxsSB|s}D|~-#5;jE+pucnG#F9t`e
z=ZjO?pV_#`GKd}mx~nBDTEdw2rEevhneqB|Gutbf{4>3NJcnm@{2sG2WMCP50>F4F
z1FyTdl-CjceX4i>G3b(q8>){}J86dbfnq+Yyq6VaY}5vm?%o+3eOx?%s%$M)KxH_Z
z|5!Yyv9VW;T;3Y#%hCor7jrqxWEK~&NMkJl4h_RxZmh*BC0-z-(3$^~>Jwlye5JpP
zt*&N!gvt03lFgAM5pWJ5W78^0yl_9nprSo2OM+^pK$9XxM^rTPjzwX8?ut-EUMX6?c5y8MCajAXszbenZ0i0BUf=
zHK+1hNP#7TcqSoHey1cUxIYWwEVT(_9M?qzEagq^wZyx_9ndy_%Hiqn$=DH#qBw_@
zjcQy5Z(!ROY#e>s_QFR}6fR>iTuTtzHLSFB7>i?z%%gSrarA~_sCjGh5nUSCEoW#g
zw+1WUlO_K^m}akv$L@W|>&0<-B^01fOC7Uu97`E(jVA$xT${GLpV}>vi?D}tt!&SV
zVo5~7WjD!5;%Q!?%1s;UfU%dS=op`_1L?SAy)8+foW{oyN+|mIIP
z3w^f=RxKSGVuw1wK*-k#7X|>WXA|q}x`{C>Hwgrc^Ci(#TDKlm4|A|PdFE*?y}@a^
zh)^bPS3NJ|xhT+3lD28rS?31;Vv}LhFyez%h?VX2lCG2V{W0CK2}jc-DZb3=(Du?C
zu8S^;i^ldWi(BZS(*4<9*se~rEk06EQHlq`sS9Os_t>8E`DVNyjB47O+tL8Xt{$n&
zj!Mm$V0(lc@NH6uRuAuy$3zlEtRM=)SW?`lYKl`8c`7#|yC*WhDInbuG%j_5oq@B^
zboWSV;W1mE&0t~c`vo_OAcCG6oz}8`)2gR-gC`tVQ
zP2TXF&vB@NK>C3^J+s*}Du)}bxEli^>8X;l(Ncesonq0Fm%ha=gD<4s15_M0W%M!t
zd1{i~D|O*H9~rLX*&AqyY>&uXj=2uFC`_1hyF@WyaI6L3g|`(1p+lo`n;e!g!<~Ww
zL`mt{O1<)gf@Fn0Q{zcA{jk)327e$0I)Wquq*1Yl(t}`#)`I6oW4_8g$|$KfmL)Fb
zvVG`JQ$<+_HlwXgjUQzD+SRZM!&Y-6`KVcY6ZHE}&{i41Ftki6Hhnq{`jJ
zYTY5gq^v9^H4Dj|OsKZTY|5efR*g^nKbOXT$RsZh03nhR2V5S60847TRRFl2|_0gO7E&eTq+mH{+3S{&DX
zQx+oe;-=g0!!mC#kIJO}pxwE^M=9G5tJ^PH6WE@y$c2hLSj*;e@F{R~)QR{w9yNKm
z0#;(=pQ?g9)G*@7sksvg)|(jVntE&}-PisUnupGsP?u_~{#*;TvCoW2QDj>L1vHZb8+W
zWB|AxYslPGr5+iYxL9j+)gmEmETa>pdKB_zUy3D@=zDQM`M#tN9_J%*oX
zF_v+Nj5!rbMp2;}8q=VI;z8HRU^QL{^H4zwmjaEIazXlHOQ1%JhSuQ4Jr;TyY{Uy@
zzjy?Q7boJ(v69UR>24~)piJ{uRO-n9yN>^xba#^0MTu_PxzD?t*AuMPQa<`pYS(JC
z>YR9sxd?ywP`!bWT{cs>%d
zC0K>3$h!|Osbm?~&q%$}xPmN%T9VbX5l~5Oxp9YfWFf`;Xazc9o{z{fhIDo{9SkNq
zP0eZwAijV#z@KF`+r^+;tUW(r*
zxL*aKk--G$iU6e4rA5LZdUKsNx~-JJM+Ig4CSp>dyAiF)T$*x7vKd6?tAZ%&rRc{*9zJ0oN+NM*;XN&?mh3Vgb&mxG!Cu5`za#XHErsl^rycW)VMnti@Cftjmq3vggzU
zKEkHABv_q%WB`6x8V!c)Rf|+n*V(OEh)STmGguaf2UqZew`;#v!$hYv?}SrOz91ka
zfyj1f!O8|vT3LIJ;(LBFK3~O=AzBv|Yh`10WI@j;xW~o@;`fOM<%-M|^Hd$Iyk1rZ
zII-f?uZ>l3=zLV&p697odUkG-;+|0gw5Yl)n0;HR;uN+Ql`3)v1VQT8)_9J7P_%Rf
zP}3FTJec;dJwzqcHZMT^_5SDF7(l3_u_t)r189z_nv2C}yRSsI6N6l+;?H2s$7mWr
z!A01dKAi-~d?c}o$_k>HVBS}4jTdFR2ar#Ko%qSLJu~c^V5=x%03#Cf(MgP;6nj$*
z+l!UPqpYumJIhmQ+Y&3{2rL~xo
zvif*sZweupznen!2+M|m5KNE{syjNSs}kY<|gLsDG*KIL{Lj0Ibh
z$io$UK%11MBqkYn6ojL$AFSkhsL@FNE#X8}p6D8;uC22f&t++~s?-^yNPjce!bsiF>nhAZlpP#x6D
zJ};!nqTNcn2?FM{<0i>j$|T$Kw!+)gj5|z0Ivyz;wm43L4DhKeuN)O0h$5YWC54a<
z>JKW61u6)|QW!U(=0fPeSfsV9-w#j$3!T)4Gai?~jQ}xL4l53pu|3>LZDQkosdr9d
zk6Ug$jt&c>ggTlG2P+(7)tuJQiMQYeHN!HncaKp}#|0F%7VaDzW~2b-E-O(0_fwTDJ%#O&yny#ljK`$`>UPl(
zAN%2%SPX`mT-ucq8#a&~7d>S3jVLg~9DeU!{bi~-D%X}$$=1ViwF>OW)wf;-!Ay(<
z$30qBV`{~J&P%v4UZ5^TETh4E^v8|>i#4IV-f#5sI)_C^AKqXtx7C3n;ztvsRg`X9
zKj*&U!0d$GmK0iZ&91^8s2K~sFTJ}A$|jQZDF%nuU?R6*%0HSlT=u$a87PZyF_qV)
z4oro+9*3R_5E3fiP8h3^FLme+5CArf7ngHwoKOFqN}7+p+jcZ}DDzUd7JZyyyJl}1
zuNc_+HZFkaED4L-^DLJ~C^HS8!{o@k2=Qjt=@!OnwsU&VJ|2#A#i2t?CR39SpgySF
zo6-}E4n~#C^hA>PSh`y1Rmqa69yE#39UAbMy2w06iwdH5vIh`u8TnhpvX2RADZ&Kp${|@n^ncxMSAV}dJ+$(
zMSBa$BVXZNYjDJG#U|QN&csuZxJ=JvZ3a0h`u9u;0kBd>luNKu-9{>BIL8N2W#x0H
zn9q&Aw+zfAtd6HR9f%&OhT!ZuyTLS#Ki3}*4rGg9y;Lbq&kLTrFZK48i|zW$!NIUT
z4KCMPI7DU_O!#|aB|&hT%*w(GwkMO`(A;k%jz59KgMdZ-v@syo3
z4imE@M^4c*lL-Vq-+PXjy!TMX_GHqDL48QQAjd?mB~n0?5L>ZWs2v@-C0Kh6OS&^%
z!(o06Yao?dfT3#_mB-TC5)81G;ob`9&ov(3|8jPOvXPtqyg-j19JSu9tbyTR$yP;T~WiZi4_)z-UmVe
zH^*e_rVrflCim9gDXVbw;laKBLJNbWIA`roK0l|LQFc3v`5xxVW$+iUx$y(BiN}-N
z&=TgJKm`3M7i|hzsCgVdC(7CO*9XXkicb>xug5yV4>R)>pc(gqjGj
zp#!o#z9$?}2kW=fU^2WWr_7Eup0bUOJkta)yP}6axuJ3ocRP@IRTQp2l-gfNq<;Tk
z{Q1xgbg=nq#2nsRIhKaIlzf%ZRN*}%00GCb?@dLdw)=*gM@Hzw&QC-8adQ198ij$NW+3#b%HiXcZ~t%7!aW{*EwLbB?L<`j_Y2
zu!IMd*A@PHP?1^Zfe!hmcqTqR^pfN(A3PQxa}F+uQ%}PkwY>fC9l_!5fWGLfLmdI*
zOC?{&hbft?v>m|ea2oGmCDH0J`rQfFw{S~jXsrV^%GkpDa=y3STwg-DW?-epCbtrM
z06C-HE1aDHz2!2!K6AnGNMU~*HeySx*5i#|fdH4->o2O(!XCQ3UdYl$68ze4pYwPt
zz&Xtebmg+K*vkGY@0C$7?324@tEL#RodKmSH
zq0|REZ9Z>(9sMFBip*gXS+$o$zebTPtu2g7%gAkMkl0){5z8QEp{-9vKRYI_soc_^
z(Z;la;eC(lJIqQ^$Bo74HlU3M=E_6ZWIbt!+8bL673YH0hDsQq*WF7(mo19^Tw~Sa
zuSDfR9*@82($=s$sl1Xa0UHMOx@C}|C{Os#i@kk&m>wp4_)fOv?(w)5PH;Hd#;bB~ZW4Px
zAJ2(>Do>bG?Vnp}^F1yNN8^26C7TZ5|_c*w5lG_9B!%!Sccd|uz1ceEJYl6u~f7t!*D$7nzj?T9UOQIN1D@<%#!<-@q>hHvX=jWfei
zZ^u&JIK%06q=VV~D^!#h$Wl?YcKiWY1A9LZoQ$dz^ToOSxe0el4?>QOkguov4!erfE&U_Gv?Tuyi
zi;AFuPywt6x?x#$q9g|G6xz7DyA{Hy0T7$aD`8qo7_%zU=}K5%anl0D5xR$Lbb@n=Yyy~?nz
z?D9&*u~63|sV~fiN7~IiVpjujsL)fYSxumR?@yGo9~{OgtkW3IhBhYdlp#$|CRq+y{V)
zeE3(o7@}#+LNyE&5lm`AFY5+`K(vdZF36h1QggeVk3P5Z?n`~c>P?mQ@zmTK>Zm%v
z>Xqi)B()(Xqtxr@Gc36Q-;f3LGuC)qJDuPhR7NaPkRrSGx8cu~N@#{9Ik22_meQri
zZs1i8RsJI>(wIRFo9Nk^6+(pRr^g}%-P*EoTd4vFez%NXCJ&eQvk*2yd`!Af%gjx>
z2^cwdmhh+B$tT^41^f2NfCO!`Cb76|iQzH>VPkOfM)LV6LF!M>7qbve9e5%V!=O>k
zMkei#A$*I;GAuwyM?Q4NZnyECM2e@`9y(&z=G49@oHo#Eu*x`hrTAGj#Ee_nsvO5t
z3PP*wua9ddDH@wqm6vF2BoK2pi7x-%DR7DhP<4n6VtTOf1C*v2+uiy?gF{0HW}EW2
ziw?8A?XJEq9m3_-3c1~v?KDEzW{D0mhf*4~6uLhoc1jyRgb;0Sq#%>j2Gr0#ND;y-E7MgaS*G?t@Z);#3x7#@wlRaQdRAXWKripJJViUn$^iu{54W1?*O?FO}7>2^xr=o~a|Bg=3F1C$_sF*h+Cx%ykqHq76{D
zxaIZ70J2#|iw%=SK<5Y*_5@f>w`-YH+9b>XRA?>5khVwIp@rMJz7u_0;d@oV2kw;c
ziHUezC#lOQ#XY=TTOrsl%|Z(2G#AR*tKx)rgC&e2aM0g%+?m*jUy=_kTzTe
z8GjAk%#DDtUdEcfVL^`FBIwQflAL9`}r;eO>z&pJBjP(#ip5aKLK
zMPt8$4^3PH4$38)Jt1C5jZB4<6=XEAbam%Wyyp1;aNl}62|^od6lD_$iDeM3FT073
zR*2hz(*EWkYzNv_ufsN7MN+rc$XQ67
z%`oj;Y2%PZrc`RlS`a+RrDmxh5Ku2b$BK;us7zT#r!~w7QQn|5cG%roQytjLrBb0;
zo}p6kT_-`XjRS$VtYC;Yb0HT2(aGw>YY^xI_iM>#x242GQb%Z4UyeRkEan3WN}9us
z>wIfFf`Kbvo@7mmly+$YHwY?Q);-cjf+vTW)a$aBIy`8x)GQ4O;flg065nk2@wZds
zkoz-qhBD57dAtCD@I1C+Fj4W+S2>9>fok}_UxKWlWh1|RwYOjizrUQbj=h3GLvsZ9
z4t0pJob4Hd?}93YBv%?YmF*4${3O7Tw>6v$Sbs+PA|5>v2i5SR{bsN;#W}bFW|l)#
zGO@A0N(R#Nv^}8R|Fy!+DTmvT|Twm`|h^^~$Ukd%N6NhIL%17{~%WTS5<0
zlR&VxN#2IL1dGH(8AT;ZhZK-VQ$Mhdty-jpYi|OOGyG|d3L^G3MmZIdx*CTzdWet6
z?We5}Hj^&BJ}$xzSHZ778EY9@1O3E(DOyPtVXcYKLsSqDuLH?5b!kWJD?ne9jMb+x
z90F>Yv`@wvX6b_~N-QQh!V^JWO4GA=&@H
z>zC6I`Q~BDhsyED6%3h2Vf}QGGprC01Q5vC2TyT5wRCVB$E3i5L<_KZNXUfK%e(O4
zcD6MZ_7Bq7fT7W$Z&w9@)cBk@E`$eZH%SVljs|^)WLGLwe2xlZykC{V_BG%ooA*p7
zcGJO$l=8%K?n*z3+E^hTD8z%D1da-7DlNrn+z%rRQi#7xVXm+o*L3Ju2%75h)CA)I
z`Y4UPNSWY_54?HJ#$Kf6-rVB$A&a{mM%OqC8w)d+Hla1%2W<5Td{k4nmkYn}=DsNg
z3!Q8-VS;CihYY}!%8#cB9zbWQg5YC+F&bf(NdYL?ZfKh+Jjl-ZCB#xf@ZeM}{yx%F
z&qwZ_Y0z5|%GsC*Xo_XB0xj4fD|O!q29OwvT?6Cz`dYezk22qd++nE#!pJ8Cf@ki*
zFml}tZ9&)h0OHAw3}0#+5R}^zAqUb1I`F4k$gP(tur*;5HRmP;e@g1ll4`Wq)SUo8
zhf!QXNxf5K$-T9(UO81ID?rsiV^ADa?5|paQ5r8a%3Xpw@DA0M2x-Vw-_&es=%;$Z
z2?^Rl6*42LHBI~`)lx5AT1#%PpsR`_DsAhhto{$Xd0{o)LYu$;$ht7
za5*|b4n&YzNOqyI+`bZ%F)g9t1+tR~6vOqbdkG-mYzjn&GdN51rs6i&Uz<
zI*y^jm@18xy>d@Th>TOQ+zfXR3KR6-k1GaHLfM|@f^oqBx=`D#%7cjXIeZYs3?cGq
zwjDB2GQe}E!XUBj5MU55s;AYEmpN|0p!=H
zF`?#I=7I|INKv5Gs@z-*^+HNVicz{+mgc^x1h!sFRbk4w?%NLezyWV{4~AP8BwEo~8jI{0eUSntt%-F^NOhoz
z%;q>_4atsXW9L(3SYrcZgp%4aZ`ZDuuAYUQ^bV?r3dlUQF37+C6YhoKvl)(yf>O*t
zV3DLc(IG8pvx=UY326?+TO4#@dJ}=
zJhOoHlN1<2Lv&)0GpkQ8iP=8u`r1U#rWtrcfBpUd0j?P);@GTq9M`miq7P~Oz!@Y+
z4iC|SP2o{w8z<_REAG%PNz@{h{kSMRz;a%NZfy?;(QyUQS2UqzN{l3eQ?A66Ttpr$
z?Y$)WF8;BnRuCa%8amw`CZ}f}kAw;6tpXRL*|YC4i)6o6Z^qh&&c*Y31
z{K`zVo>{hdNKw
zfv$Q!Gx~~lA42Bt9;=ib^29E9fx}_)$jlHeQ=VHD-Byow?#t6Mi27u%XVvFln`WAj
z0rOb&kRyr!%Z;JI@0#Mi$y$?7F4ym=t~(MnvkZoBZVP;zVAEgTnS++=*Oy_KmAtq^
z`A;pa=Sf4oL