Skip to content
Merged

1.5.7 #341

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: orgdata
Title: Aggregating Original Data
Version: 1.5.6
Version: 1.5.7
Authors@R:
c(person(given = "Vegard",
family = "Lysne",
Expand All @@ -22,13 +22,13 @@ BugReports: https://github.com/helseprofil/orgdata/issues
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
RoxygenNote: 7.3.3
Depends:
R (>= 4.1.0)
Imports:
data.table (>= 1.15.0),
data.table (>= 1.17.0),
DBI (>= 1.1.3),
norgeo (>= 2.4.6),
norgeo (>= 2.4.7),
odbc (>= 1.3.4),
R6 (>= 2.5.1),
readxl (>= 1.4.1),
Expand Down
9 changes: 8 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
# orgdata 1.5.6
# orgdata 1.5.7 (2025-10-10)
- Deprecated csv output from `make_file`/`lag_fil`
- Added `is_file_parquet` to give message when reading/saving parquet. Copied from `is_file_csv`, returns file path.
- Added argument extra_geo to geo_map and geo_map_multi, to be able to generate tblGeo without levekaar/okonomisk. This allow for manual merging if needed. Corresponding to same argument in norgeo::cast_geo.
- Small syntax fix in reshape
- For reshape long: `RESHAPE_VAL` can now be provided as a vector of columns to reshape. This can reduce file size drastically when all columns are not needed.

# orgdata 1.5.6 (2025-08-26)
- Instead of removing attributes before saving as .parquet, the data is converted to an arrow_table
- Fixed problem where levekaar geographical codes > the maximum value for what can be represented as integer32 was coerced to NA. Levekaar is now kept as numeric.

Expand Down
19 changes: 13 additions & 6 deletions R/norgeo.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#' the table if it already exists
#' @param append Append the data to an existing table in the `orgdata.geo`
#' @param table Table name to be created in the database. Default is `tblGeo`
#' @param extra_geo option to add `levekaar` and/or `okonomisk` to tblGeo
#' @importFrom norgeo cast_geo
#' @family geo codes functions
#' @examples
Expand All @@ -14,7 +15,10 @@
#' geo_map(2021, append = TRUE)
#' }
#' @export
geo_map <- function(year = NULL, write = FALSE, append = FALSE, table = "tblGeo") {
geo_map <- function(year = NULL, write = FALSE, append = FALSE, table = "tblGeo", extra_geo = NULL) {
if (!is.null(extra_geo) && !all(extra_geo %in% c("grunnkrets", "kommune", "fylke", "bydel", "levekaar", "okonomisk"))) {
stop("extra_geo må være NULL eller kun inneholde 'levekaar' og/eller 'okonomisk'")
}
is_null(year)
is_write_msg(msg = "fetch")
## break msg before showing message from cast_geo
Expand All @@ -30,7 +34,7 @@ geo_map <- function(year = NULL, write = FALSE, append = FALSE, table = "tblGeo"
geo <- listenv::listenv()
}

DT <- norgeo::cast_geo(year = year)
DT <- norgeo::cast_geo(year = year, extra_geo = extra_geo)
DT <- is_grunnkrets_00(DT)
DT <- is_kommune_99(DT)
geo$tblvalue <- DT[, "batch" := is_batch("date")]
Expand Down Expand Up @@ -65,11 +69,13 @@ geo_map <- function(year = NULL, write = FALSE, append = FALSE, table = "tblGeo"
#' @param write Write table to the `orgdata.geo` database. It will overwrite
#' the table if it already exists
#' @param table Table name to be created in the database. Default is `tblGeo`
#' @param extra_geo option to add `levekaar` and/or `okonomisk` to tblGeo
#' @export
geo_map_multi <- function(from = NULL,
to = NULL,
write = FALSE,
table = "tblGeo") {
table = "tblGeo",
extra_geo = NULL) {
if (write) {
geoFile <- is_path_db(getOption("orgdata.geo"), check = TRUE)
geo <- KHelse$new(geoFile)
Expand All @@ -82,8 +88,8 @@ geo_map_multi <- function(from = NULL,

for (year in from:to) {
message(paste0("Processing year: ", year))
dt <- geo_map(year, append = FALSE, write = FALSE)
DT <- data.table::rbindlist(list(DT, dt))
dt <- geo_map(year, append = FALSE, write = FALSE, extra_geo = extra_geo)
DT <- data.table::rbindlist(list(DT, dt), use.names = TRUE, fill = TRUE)
}

geo$tblvalue <- DT[, "batch" := is_batch("date")]
Expand Down Expand Up @@ -251,7 +257,8 @@ geo_merge <- function(id.table = NULL,
DT <- geo$db_read(table.name)
DT[, batch := as.Date(batch)]
}
dt <- read_file(file, encoding = "UTF-8", colClasses = "character")
encoding <- ifelse(grepl(".csv$", file), getOption("orgdata.encoding.csv"), getOption("orgdata.encoding.access"))
dt <- read_file(file, encoding = encoding, colClasses = "character")

if(geo.col == geo.level){
setnames(dt, geo.col, paste0(geo.col, "_new"))
Expand Down
18 changes: 15 additions & 3 deletions R/reshape.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,9 @@ get_reshape_id_val <- function(dt = NULL, group = NULL, con = NULL, spec = NULL)

reshVars <- switch(resh,
all = is_reshape_var_all(dtnames = dtNames, reshapeid = reshapeID),
list = is_reshape_var_list(spec),
not = is_reshape_var_other(dtnames = dtNames, reshapeid = reshapeID, spec))
list = is_reshape_var_list(spec = spec),
cols = is_reshape_var_cols(dtnames = dtNames, spec = spec),
not = is_reshape_var_not(dtnames = dtNames, reshapeid = reshapeID, spec = spec))

list(id = reshapeID, var = reshVars, type = resh)
}
Expand Down Expand Up @@ -187,6 +188,8 @@ is_reshape_input <- function(input){
out <- "list"
} else if (grepl("^-", input)){
out <- "not"
} else if (grepl("^\\(?\\S", input)){
out <- "cols"
} else {
out <- "error"
}
Expand All @@ -206,10 +209,19 @@ is_reshape_var_list <- function(spec){
trimws(v4)
}

is_reshape_var_other <- function(dtnames, reshapeid, spec){
is_reshape_var_not <- function(dtnames, reshapeid, spec){
input <- spec$RESHAPE_VAL
vars <- gsub("^-\\((.*)\\)", "\\1", input)
vars <- is_separate(vars, sep = ",")
vars <- c(vars, reshapeid)
setdiff(dtnames, vars)
}

is_reshape_var_cols <- function(dtnames, spec){
input <- spec$RESHAPE_VAL
vars <- gsub("^\\(?([^()]+?)\\)?$", "\\1", input)
vars <- is_separate(vars, sep = ",")
vars <- trimws(vars)
if(!all(vars %in% dtnames)) is_stop("RESHAPE_VAL contains columns not in data:", input)
vars
}
47 changes: 44 additions & 3 deletions R/save-file.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,10 @@ save_file <- function(dt = NULL,
is_null(dt)
is_null(name)

file <- is_file_csv(group = name, path = path, date = date, fgSpec = fgSpec, action = "save")
data.table::fwrite(dt, file = file, sep = sep, ...)
file <- is_file_parquet(group = name, path = path, date = date, fgSpec = fgSpec, action = "save")
parquetname <- gsub(".csv", ".parquet", file)
do_save_parquet(dt = dt, filename = parquetname)
}
}

#' @title do_save_parquet
#' @description
Expand Down Expand Up @@ -109,6 +108,48 @@ is_file_csv <- function(group = NULL,
return(fileOut)
}

is_file_parquet <- function(group = NULL,
path = NULL,
date = FALSE,
verbose = NULL,
fgSpec = NULL,
action = c("save", "read")){

if (is.null(verbose)) verbose <- getOption("orgdata.verbose")

if (date){
batch <- is_batch("time")
fileName <- paste0(group, "_", batch, ".parquet")
} else {
fileName <- paste0(group, ".parquet")
}

if (is.null(path)){
fpath <- is_save_path(group = group, fgSpec = fgSpec, action = action)
fileOut <- file.path(fpath, fileName)
} else {
fileOut <- file.path(path, fileName)
if (!fs::dir_exists(path)) {
is_stop(msg = "Folder not found!", var = path)
}
}

msg <- switch(action,
save = "Save file:",
read = "Read file:",
"File:")

fileOut <- gsub("\\\\", "/", fileOut)

if (action == "read"){
withr::local_options(list(orgdata.verbose = FALSE))
}

is_verbose(fileOut, msg = msg)

return(fileOut)
}

is_save_path <- function(group = NULL, fgSpec = NULL, ...){

if (is.null(fgSpec)){
Expand Down
10 changes: 9 additions & 1 deletion man/geo_map.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 9 additions & 1 deletion man/geo_map_multi.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading