From 054e7b3dcdff418d49ad4d147ffb256545622493 Mon Sep 17 00:00:00 2001 From: Vegard Lysne Date: Wed, 1 Oct 2025 14:06:39 +0200 Subject: [PATCH 1/9] added extra_geo argument --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ R/norgeo.R | 16 +++++++++++----- R/reshape.R | 2 +- man/geo_map.Rd | 10 +++++++++- 5 files changed, 26 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 148cb6af..65ada96f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: orgdata Title: Aggregating Original Data -Version: 1.5.6 +Version: 1.5.7 Authors@R: c(person(given = "Vegard", family = "Lysne", diff --git a/NEWS.md b/NEWS.md index 817cf8af..a56ed788 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# orgdata 1.5.7 +- Added argument extra_geo to geo_map and geo_map_multi, to be able to generate tblGeo without levekaar/okonomisk. This allow for manuall merging if needed. Corresponding to same argument in norgeo::cast_geo. +- Small syntax fix in reshape + # orgdata 1.5.6 - Instead of removing attributes before saving as .parquet, the data is converted to an arrow_table - Fixed problem where levekaar geographical codes > the maximum value for what can be represented as integer32 was coerced to NA. Levekaar is now kept as numeric. diff --git a/R/norgeo.R b/R/norgeo.R index d6ed24f7..eca59219 100644 --- a/R/norgeo.R +++ b/R/norgeo.R @@ -6,6 +6,7 @@ #' the table if it already exists #' @param append Append the data to an existing table in the `orgdata.geo` #' @param table Table name to be created in the database. Default is `tblGeo` +#' @param extra_geo option to add `levekaar` and/or `okonomisk` to tblGeo #' @importFrom norgeo cast_geo #' @family geo codes functions #' @examples @@ -14,7 +15,10 @@ #' geo_map(2021, append = TRUE) #' } #' @export -geo_map <- function(year = NULL, write = FALSE, append = FALSE, table = "tblGeo") { +geo_map <- function(year = NULL, write = FALSE, append = FALSE, table = "tblGeo", extra_geo = NULL) { + if (!is.null(extra_geo) && !all(extra_geo %in% c("grunnkrets", "kommune", "fylke", "bydel", "levekaar", "okonomisk"))) { + stop("extra_geo må være NULL eller kun inneholde 'levekaar' og/eller 'okonomisk'") + } is_null(year) is_write_msg(msg = "fetch") ## break msg before showing message from cast_geo @@ -30,7 +34,7 @@ geo_map <- function(year = NULL, write = FALSE, append = FALSE, table = "tblGeo" geo <- listenv::listenv() } - DT <- norgeo::cast_geo(year = year) + DT <- norgeo::cast_geo(year = year, extra_geo = extra_geo) DT <- is_grunnkrets_00(DT) DT <- is_kommune_99(DT) geo$tblvalue <- DT[, "batch" := is_batch("date")] @@ -65,11 +69,13 @@ geo_map <- function(year = NULL, write = FALSE, append = FALSE, table = "tblGeo" #' @param write Write table to the `orgdata.geo` database. It will overwrite #' the table if it already exists #' @param table Table name to be created in the database. Default is `tblGeo` +#' @param extra_geo option to add `levekaar` and/or `okonomisk` to tblGeo #' @export geo_map_multi <- function(from = NULL, to = NULL, write = FALSE, - table = "tblGeo") { + table = "tblGeo", + extra_geo = NULL) { if (write) { geoFile <- is_path_db(getOption("orgdata.geo"), check = TRUE) geo <- KHelse$new(geoFile) @@ -82,8 +88,8 @@ geo_map_multi <- function(from = NULL, for (year in from:to) { message(paste0("Processing year: ", year)) - dt <- geo_map(year, append = FALSE, write = FALSE) - DT <- data.table::rbindlist(list(DT, dt)) + dt <- geo_map(year, append = FALSE, write = FALSE, extra_geo = extra_geo) + DT <- data.table::rbindlist(list(DT, dt), use.names = TRUE, fill = TRUE) } geo$tblvalue <- DT[, "batch" := is_batch("date")] diff --git a/R/reshape.R b/R/reshape.R index ed4e472a..bc9e1a2c 100644 --- a/R/reshape.R +++ b/R/reshape.R @@ -68,7 +68,7 @@ do_reshape <- function(dt = NULL, respec = NULL){ col <- is_separate(varCols[i], sep = ",") listCols[[i]] <- col } - dt <- data.table::melt(dt, id.vars = idCols, measure.vars = listCols) + dt <- data.table::melt(dt, id.vars = idCols, measure.vars = unlist(listCols)) } else { dt <- data.table::melt(dt, id.vars = idCols, measure.vars = varCols) } diff --git a/man/geo_map.Rd b/man/geo_map.Rd index 5f6cb703..8c3c8301 100644 --- a/man/geo_map.Rd +++ b/man/geo_map.Rd @@ -4,7 +4,13 @@ \alias{geo_map} \title{Granularity of Geographical Codes} \usage{ -geo_map(year = NULL, write = FALSE, append = FALSE, table = "tblGeo") +geo_map( + year = NULL, + write = FALSE, + append = FALSE, + table = "tblGeo", + extra_geo = NULL +) } \arguments{ \item{year}{Year for the valid geographical codes} @@ -15,6 +21,8 @@ the table if it already exists} \item{append}{Append the data to an existing table in the \code{orgdata.geo}} \item{table}{Table name to be created in the database. Default is \code{tblGeo}} + +\item{extra_geo}{option to add \code{levekaar} and/or \code{okonomisk} to tblGeo} } \description{ Create a database granularity of geographical codes to aggregate From f89f05838b8db736c9dffd445b83f1a1d363ce41 Mon Sep 17 00:00:00 2001 From: Vegard Lysne Date: Wed, 1 Oct 2025 15:45:29 +0200 Subject: [PATCH 2/9] fix encoding of file when geo_merge --- R/norgeo.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/norgeo.R b/R/norgeo.R index eca59219..85fbf591 100644 --- a/R/norgeo.R +++ b/R/norgeo.R @@ -257,7 +257,8 @@ geo_merge <- function(id.table = NULL, DT <- geo$db_read(table.name) DT[, batch := as.Date(batch)] } - dt <- read_file(file, encoding = "UTF-8", colClasses = "character") + encoding <- ifelse(grepl(".csv$", file), getOption("orgdata.encoding.csv"), getOption("orgdata.encoding.access")) + dt <- read_file(file, encoding = encoding, colClasses = "character") if(geo.col == geo.level){ setnames(dt, geo.col, paste0(geo.col, "_new")) From e611b9d310a077456cc5ff34c2074effdbc21629 Mon Sep 17 00:00:00 2001 From: Vegard Lysne Date: Wed, 1 Oct 2025 15:45:47 +0200 Subject: [PATCH 3/9] remove csv output of make_file --- R/save-file.R | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/R/save-file.R b/R/save-file.R index cef8eb27..1855238e 100644 --- a/R/save-file.R +++ b/R/save-file.R @@ -43,7 +43,7 @@ save_file <- function(dt = NULL, is_null(name) file <- is_file_csv(group = name, path = path, date = date, fgSpec = fgSpec, action = "save") - data.table::fwrite(dt, file = file, sep = sep, ...) + # data.table::fwrite(dt, file = file, sep = sep, ...) parquetname <- gsub(".csv", ".parquet", file) do_save_parquet(dt = dt, filename = parquetname) } @@ -109,6 +109,48 @@ is_file_csv <- function(group = NULL, return(fileOut) } +is_file_parquet <- function(group = NULL, + path = NULL, + date = FALSE, + verbose = NULL, + fgSpec = NULL, + action = c("save", "read")){ + + if (is.null(verbose)) verbose <- getOption("orgdata.verbose") + + if (date){ + batch <- is_batch("time") + fileName <- paste0(group, "_", batch, ".parquet") + } else { + fileName <- paste0(group, ".parquet") + } + + if (is.null(path)){ + fpath <- is_save_path(group = group, fgSpec = fgSpec, action = action) + fileOut <- file.path(fpath, fileName) + } else { + fileOut <- file.path(path, fileName) + if (!fs::dir_exists(path)) { + is_stop(msg = "Folder not found!", var = path) + } + } + + msg <- switch(action, + save = "Save file:", + read = "Read file:", + "File:") + + fileOut <- gsub("\\\\", "/", fileOut) + + if (action == "read"){ + withr::local_options(list(orgdata.verbose = FALSE)) + } + + is_verbose(fileOut, msg = msg) + + return(fileOut) +} + is_save_path <- function(group = NULL, fgSpec = NULL, ...){ if (is.null(fgSpec)){ From 315a8d98dcf23742f7a5dbc1eba7ac601f593e22 Mon Sep 17 00:00:00 2001 From: Vegard Lysne Date: Wed, 1 Oct 2025 15:46:52 +0200 Subject: [PATCH 4/9] deprecate csv output of make_file --- R/save-file.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/save-file.R b/R/save-file.R index 1855238e..aee01711 100644 --- a/R/save-file.R +++ b/R/save-file.R @@ -42,11 +42,10 @@ save_file <- function(dt = NULL, is_null(dt) is_null(name) - file <- is_file_csv(group = name, path = path, date = date, fgSpec = fgSpec, action = "save") - # data.table::fwrite(dt, file = file, sep = sep, ...) + file <- is_file_parquet(group = name, path = path, date = date, fgSpec = fgSpec, action = "save") parquetname <- gsub(".csv", ".parquet", file) do_save_parquet(dt = dt, filename = parquetname) -} +} #' @title do_save_parquet #' @description From 8df321f0443a42acccb0105f2548587091a8244d Mon Sep 17 00:00:00 2001 From: Vegard Lysne Date: Fri, 10 Oct 2025 09:24:06 +0200 Subject: [PATCH 5/9] update news --- NEWS.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index a56ed788..9f019ac2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # orgdata 1.5.7 -- Added argument extra_geo to geo_map and geo_map_multi, to be able to generate tblGeo without levekaar/okonomisk. This allow for manuall merging if needed. Corresponding to same argument in norgeo::cast_geo. +- Deprecated csv output from `make_file`/`lag_fil` +- Added `is_file_parquet` to give message when reading/saving parquet. Copied from `is_file_csv`, returns file path. +- Added argument extra_geo to geo_map and geo_map_multi, to be able to generate tblGeo without levekaar/okonomisk. This allow for manual merging if needed. Corresponding to same argument in norgeo::cast_geo. - Small syntax fix in reshape # orgdata 1.5.6 From 8aaf80f3d9b847fc1ccb9c21e8d2c8e46055f12a Mon Sep 17 00:00:00 2001 From: Vegard Date: Fri, 10 Oct 2025 11:36:04 +0200 Subject: [PATCH 6/9] Added reshape type cols to provide only columns to reshape long --- R/reshape.R | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/R/reshape.R b/R/reshape.R index bc9e1a2c..f9d7a377 100644 --- a/R/reshape.R +++ b/R/reshape.R @@ -68,7 +68,7 @@ do_reshape <- function(dt = NULL, respec = NULL){ col <- is_separate(varCols[i], sep = ",") listCols[[i]] <- col } - dt <- data.table::melt(dt, id.vars = idCols, measure.vars = unlist(listCols)) + dt <- data.table::melt(dt, id.vars = idCols, measure.vars = listCols) } else { dt <- data.table::melt(dt, id.vars = idCols, measure.vars = varCols) } @@ -110,8 +110,9 @@ get_reshape_id_val <- function(dt = NULL, group = NULL, con = NULL, spec = NULL) reshVars <- switch(resh, all = is_reshape_var_all(dtnames = dtNames, reshapeid = reshapeID), - list = is_reshape_var_list(spec), - not = is_reshape_var_other(dtnames = dtNames, reshapeid = reshapeID, spec)) + list = is_reshape_var_list(spec = spec), + cols = is_reshape_var_cols(dtnames = dtNames, spec = spec), + not = is_reshape_var_not(dtnames = dtNames, reshapeid = reshapeID, spec = spec)) list(id = reshapeID, var = reshVars, type = resh) } @@ -187,6 +188,8 @@ is_reshape_input <- function(input){ out <- "list" } else if (grepl("^-", input)){ out <- "not" + } else if (grepl("^\\(?\\S", input)){ + out <- "cols" } else { out <- "error" } @@ -206,10 +209,19 @@ is_reshape_var_list <- function(spec){ trimws(v4) } -is_reshape_var_other <- function(dtnames, reshapeid, spec){ +is_reshape_var_not <- function(dtnames, reshapeid, spec){ input <- spec$RESHAPE_VAL vars <- gsub("^-\\((.*)\\)", "\\1", input) vars <- is_separate(vars, sep = ",") vars <- c(vars, reshapeid) setdiff(dtnames, vars) } + +is_reshape_var_cols <- function(dtnames, spec){ + input <- spec$RESHAPE_VAL + vars <- gsub("^\\(?([^()]+?)\\)?$", "\\1", input) + vars <- is_separate(vars, sep = ",") + vars <- trimws(vars) + if(!all(vars %in% dtnames)) is_stop("RESHAPE_VAL contains columns not in data:", input) + vars +} \ No newline at end of file From 4e5b257b86b3ed4a5fb11ee3cabf6bfb67a14cbb Mon Sep 17 00:00:00 2001 From: Vegard Date: Fri, 10 Oct 2025 11:48:12 +0200 Subject: [PATCH 7/9] update documentation --- DESCRIPTION | 2 +- NEWS.md | 3 ++- man/geo_map_multi.Rd | 10 +++++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 65ada96f..751dc067 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,7 @@ BugReports: https://github.com/helseprofil/orgdata/issues Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Depends: R (>= 4.1.0) Imports: diff --git a/NEWS.md b/NEWS.md index 9f019ac2..7ffaa6ce 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,9 @@ -# orgdata 1.5.7 +# orgdata 1.5.7 (2025-10-10) - Deprecated csv output from `make_file`/`lag_fil` - Added `is_file_parquet` to give message when reading/saving parquet. Copied from `is_file_csv`, returns file path. - Added argument extra_geo to geo_map and geo_map_multi, to be able to generate tblGeo without levekaar/okonomisk. This allow for manual merging if needed. Corresponding to same argument in norgeo::cast_geo. - Small syntax fix in reshape +- For reshape long: `RESHAPE_VAL` can now be provided as a vector of columns to reshape. This can reduce file size drastically when all columns are not needed. # orgdata 1.5.6 - Instead of removing attributes before saving as .parquet, the data is converted to an arrow_table diff --git a/man/geo_map_multi.Rd b/man/geo_map_multi.Rd index 07f4bd7c..52c1b025 100644 --- a/man/geo_map_multi.Rd +++ b/man/geo_map_multi.Rd @@ -4,7 +4,13 @@ \alias{geo_map_multi} \title{Granularity of Geographical Codes (multi-year)} \usage{ -geo_map_multi(from = NULL, to = NULL, write = FALSE, table = "tblGeo") +geo_map_multi( + from = NULL, + to = NULL, + write = FALSE, + table = "tblGeo", + extra_geo = NULL +) } \arguments{ \item{from}{starting year} @@ -15,6 +21,8 @@ geo_map_multi(from = NULL, to = NULL, write = FALSE, table = "tblGeo") the table if it already exists} \item{table}{Table name to be created in the database. Default is \code{tblGeo}} + +\item{extra_geo}{option to add \code{levekaar} and/or \code{okonomisk} to tblGeo} } \description{ A wrapper around \code{\link[=geo_map]{geo_map()}} to generate a database granularity From 19bc0bae5011b559ee0f8578e6137460c6755d2d Mon Sep 17 00:00:00 2001 From: Vegard Date: Fri, 10 Oct 2025 11:49:25 +0200 Subject: [PATCH 8/9] update data.table min version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 751dc067..1b6e150e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,9 +26,9 @@ RoxygenNote: 7.3.3 Depends: R (>= 4.1.0) Imports: - data.table (>= 1.15.0), + data.table (>= 1.17.0), DBI (>= 1.1.3), - norgeo (>= 2.4.6), + norgeo (>= 2.4.7), odbc (>= 1.3.4), R6 (>= 2.5.1), readxl (>= 1.4.1), From 463443928c7c9d0b90f62335967324b72d5124ff Mon Sep 17 00:00:00 2001 From: Vegard Date: Fri, 10 Oct 2025 11:52:34 +0200 Subject: [PATCH 9/9] update news --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 7ffaa6ce..ecce1ee6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,7 +5,7 @@ - Small syntax fix in reshape - For reshape long: `RESHAPE_VAL` can now be provided as a vector of columns to reshape. This can reduce file size drastically when all columns are not needed. -# orgdata 1.5.6 +# orgdata 1.5.6 (2025-08-26) - Instead of removing attributes before saving as .parquet, the data is converted to an arrow_table - Fixed problem where levekaar geographical codes > the maximum value for what can be represented as integer32 was coerced to NA. Levekaar is now kept as numeric.