From 49ca1aa2950be861e5d3af1772033ac6a015006b Mon Sep 17 00:00:00 2001 From: almac2022 Date: Sun, 11 Jan 2026 07:16:36 -0800 Subject: [PATCH 1/4] - add [ngr_fs_type_write()] for type-preserving flat file writes Closes [#27](https://github.com/NewGraphEnvironment/ngr/issues/27) Co-Authored-By: Claude Opus 4.5 --- DESCRIPTION | 1 + NAMESPACE | 7 ++++ R/ngr_fs_type_write.R | 58 ++++++++++++++++++++++++++++++++ man/ngr_fs_copy_if_missing.Rd | 4 ++- man/ngr_fs_id_missing.Rd | 4 ++- man/ngr_fs_type_write.Rd | 62 +++++++++++++++++++++++++++++++++++ 6 files changed, 134 insertions(+), 2 deletions(-) create mode 100644 R/ngr_fs_type_write.R create mode 100644 man/ngr_fs_type_write.Rd diff --git a/DESCRIPTION b/DESCRIPTION index a4f5f09..3e254d0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,6 +24,7 @@ Config/testthat/edition: 3 URL: https://github.com/NewGraphEnvironment/ngr, https://newgraphenvironment.github.io/ngr/ BugReports: https://github.com/NewGraphEnvironment/ngr/issues Imports: + arrow, chk, cli, curl, diff --git a/NAMESPACE b/NAMESPACE index ceac108..83dcc91 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,8 @@ export(ngr_dbqs_ltree) export(ngr_dbqs_tbl_quote) export(ngr_fs_copy_if_missing) export(ngr_fs_id_missing) +export(ngr_fs_type_read) +export(ngr_fs_type_write) export(ngr_git_issue) export(ngr_git_issue_details) export(ngr_hyd_q_daily) @@ -44,6 +46,11 @@ export(ngr_tidy_type) export(ngr_xl_map_colnames) export(ngr_xl_map_formulas) export(ngr_xl_read_formulas) +importFrom(arrow,read_csv_arrow) +importFrom(arrow,read_parquet) +importFrom(arrow,schema) +importFrom(arrow,write_csv_arrow) +importFrom(arrow,write_parquet) importFrom(chk,abort_chk) importFrom(chk,chk_character) importFrom(chk,chk_data) diff --git a/R/ngr_fs_type_write.R b/R/ngr_fs_type_write.R new file mode 100644 index 0000000..f869053 --- /dev/null +++ b/R/ngr_fs_type_write.R @@ -0,0 +1,58 @@ +#' Write data to a flat file with type schema preservation +#' +#' Writes a data frame to a flat file format (CSV by default) and stores the +#' column type schema in a companion parquet file. This enables type-preserving +#' round-trips for formats that don't natively preserve types, while keeping +#' data in human-readable flat files suitable for GitHub collaboration. +#' +#' @param x A [data.frame] or [tibble] to write. +#' @param path Character. Path to write the file to. +#' @param format Character. File extension to replace when creating the schema file. +#' Default is "csv". +#' +#' @return Invisibly returns the file path. +#' @family fs +#' @family serialization +#' @seealso [ngr_fs_type_read()] for reading files with preserved types +#' @export +#' @importFrom arrow write_csv_arrow write_parquet +#' @importFrom chk chk_data chk_string +#' @importFrom fs dir_create path_dir +#' +#' @examples +#' \dontrun{ +#' # Create example data with various types +#' df <- data.frame( +#' int_col = 1:3L, +#' dbl_col = c(1.1, 2.2, 3.3), +#' chr_col = c("a", "b", "c"), +#' date_col = as.Date(c("2024-01-01", "2024-01-02", "2024-01-03")), +#' lgl_col = c(TRUE, FALSE, TRUE) +#' ) +#' +#' # Write to temporary file +#' path <- tempfile(fileext = ".csv") +#' ngr_fs_type_write(df, path) +#' +#' # Schema file is created alongside +#' schema_path <- sub("\\.csv$", "_schema.parquet", path) +#' file.exists(schema_path) +#' +#' # Read back with types preserved +#' df2 <- ngr_fs_type_read(path) +#' str(df2) +#' } +ngr_fs_type_write <- function(x, path, format = "csv") { + chk::chk_data(x) + chk::chk_string(path) + chk::chk_string(format) + + pattern <- paste0("\\.", format, "$") + schema_path <- sub(pattern, "_schema.parquet", path, ignore.case = TRUE) + + fs::dir_create(fs::path_dir(path)) + arrow::write_csv_arrow(x, path) + arrow::write_parquet(x[0, , drop = FALSE], schema_path) + + invisible(path) +} diff --git a/man/ngr_fs_copy_if_missing.Rd b/man/ngr_fs_copy_if_missing.Rd index 385331b..1c380b6 100644 --- a/man/ngr_fs_copy_if_missing.Rd +++ b/man/ngr_fs_copy_if_missing.Rd @@ -27,6 +27,8 @@ fs::file_exists(fs::path(dir_out, "a", "test.txt")) } \seealso{ Other fs: -\code{\link{ngr_fs_id_missing}()} +\code{\link{ngr_fs_id_missing}()}, +\code{\link{ngr_fs_type_read}()}, +\code{\link{ngr_fs_type_write}()} } \concept{fs} diff --git a/man/ngr_fs_id_missing.Rd b/man/ngr_fs_id_missing.Rd index b92d755..cca08e4 100644 --- a/man/ngr_fs_id_missing.Rd +++ b/man/ngr_fs_id_missing.Rd @@ -25,6 +25,8 @@ Returns paths from dir_in that do not exist under the same relative path in dir_ } \seealso{ Other fs: -\code{\link{ngr_fs_copy_if_missing}()} +\code{\link{ngr_fs_copy_if_missing}()}, +\code{\link{ngr_fs_type_read}()}, +\code{\link{ngr_fs_type_write}()} } \concept{fs} diff --git a/man/ngr_fs_type_write.Rd b/man/ngr_fs_type_write.Rd new file mode 100644 index 0000000..b90716a --- /dev/null +++ b/man/ngr_fs_type_write.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ngr_fs_type_write.R +\name{ngr_fs_type_write} +\alias{ngr_fs_type_write} +\title{Write data to a flat file with type schema preservation} +\usage{ +ngr_fs_type_write(x, path, format = "csv") +} +\arguments{ +\item{x}{A \link{data.frame} or \link[tibble:tibble]{tibble::tibble} to write.} + +\item{path}{Character. Path to write the file to.} + +\item{format}{Character. File extension to replace when creating the schema file. +Default is "csv".} +} +\value{ +Invisibly returns the file path. +} +\description{ +Writes a data frame to a flat file format (CSV by default) and stores the +column type schema in a companion parquet file. This enables type-preserving +round-trips for formats that don't natively preserve types, while keeping +data in human-readable flat files suitable for GitHub collaboration. +} +\examples{ +\dontrun{ +# Create example data with various types +df <- data.frame( + int_col = 1:3L, + dbl_col = c(1.1, 2.2, 3.3), + chr_col = c("a", "b", "c"), + date_col = as.Date(c("2024-01-01", "2024-01-02", "2024-01-03")), + lgl_col = c(TRUE, FALSE, TRUE) +) + +# Write to temporary file +path <- tempfile(fileext = ".csv") +ngr_fs_type_write(df, path) + +# Schema file is created alongside +schema_path <- sub("\\\\.csv$", "_schema.parquet", path) +file.exists(schema_path) + +# Read back with types preserved +df2 <- ngr_fs_type_read(path) +str(df2) +} +} +\seealso{ +\code{\link[=ngr_fs_type_read]{ngr_fs_type_read()}} for reading files with preserved types + +Other fs: +\code{\link{ngr_fs_copy_if_missing}()}, +\code{\link{ngr_fs_id_missing}()}, +\code{\link{ngr_fs_type_read}()} + +Other serialization: +\code{\link{ngr_fs_type_read}()} +} +\concept{fs} +\concept{serialization} From ff01a95cc44520a2440bc77fb631a6cb495a07bf Mon Sep 17 00:00:00 2001 From: almac2022 Date: Sun, 11 Jan 2026 07:17:13 -0800 Subject: [PATCH 2/4] - add [ngr_fs_type_read()] for type-preserving flat file reads Closes [#28](https://github.com/NewGraphEnvironment/ngr/issues/28) Co-Authored-By: Claude Opus 4.5 --- R/ngr_fs_type_read.R | 54 +++++++++++++++++++++++++++++++++++++ man/ngr_fs_type_read.Rd | 60 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 R/ngr_fs_type_read.R create mode 100644 man/ngr_fs_type_read.Rd diff --git a/R/ngr_fs_type_read.R b/R/ngr_fs_type_read.R new file mode 100644 index 0000000..3c970db --- /dev/null +++ b/R/ngr_fs_type_read.R @@ -0,0 +1,54 @@ +#' Read data from a flat file with type schema preservation +#' +#' Reads a data frame from a flat file format (CSV by default) using a companion +#' parquet schema file to restore column types. This enables type-preserving +#' round-trips for formats that don't natively preserve types. +#' +#' @param path Character. Path to the file to read. +#' A companion schema file with suffix `_schema.parquet` must exist. +#' @param format Character. File extension to replace when finding the schema file. +#' Default is "csv". +#' +#' @return A [tibble] with column types restored from the schema file. +#' @family fs +#' @family serialization +#' @seealso [ngr_fs_type_write()] for writing files with type preservation +#' @export +#' @importFrom arrow read_csv_arrow read_parquet schema +#' @importFrom chk chk_file chk_string +#' +#' @examples +#' \dontrun{ +#' # Create example data with various types +#' df <- data.frame( +#' int_col = 1:3L, +#' dbl_col = c(1.1, 2.2, 3.3), +#' chr_col = c("a", "b", "c"), +#' date_col = as.Date(c("2024-01-01", "2024-01-02", "2024-01-03")), +#' lgl_col = c(TRUE, FALSE, TRUE) +#' ) +#' +#' # Write to temporary file +#' path <- tempfile(fileext = ".csv") +#' ngr_fs_type_write(df, path) +#' +#' # Read back with types preserved +#' df2 <- ngr_fs_type_read(path) +#' str(df2) +#' +#' # Compare types +#' sapply(df, class) +#' sapply(df2, class) +#' } +ngr_fs_type_read <- function(path, format = "csv") { + chk::chk_string(path) + chk::chk_file(path) + chk::chk_string(format) + + pattern <- paste0("\\.", format, "$") + schema_path <- sub(pattern, "_schema.parquet", path, ignore.case = TRUE) + chk::chk_file(schema_path) + + schema <- arrow::schema(arrow::read_parquet(schema_path)) + arrow::read_csv_arrow(path, schema = schema, skip = 1) +} diff --git a/man/ngr_fs_type_read.Rd b/man/ngr_fs_type_read.Rd new file mode 100644 index 0000000..d65d9e1 --- /dev/null +++ b/man/ngr_fs_type_read.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ngr_fs_type_read.R +\name{ngr_fs_type_read} +\alias{ngr_fs_type_read} +\title{Read data from a flat file with type schema preservation} +\usage{ +ngr_fs_type_read(path, format = "csv") +} +\arguments{ +\item{path}{Character. Path to the file to read. +A companion schema file with suffix \verb{_schema.parquet} must exist.} + +\item{format}{Character. File extension to replace when finding the schema file. +Default is "csv".} +} +\value{ +A \link[tibble:tibble]{tibble::tibble} with column types restored from the schema file. +} +\description{ +Reads a data frame from a flat file format (CSV by default) using a companion +parquet schema file to restore column types. This enables type-preserving +round-trips for formats that don't natively preserve types. +} +\examples{ +\dontrun{ +# Create example data with various types +df <- data.frame( + int_col = 1:3L, + dbl_col = c(1.1, 2.2, 3.3), + chr_col = c("a", "b", "c"), + date_col = as.Date(c("2024-01-01", "2024-01-02", "2024-01-03")), + lgl_col = c(TRUE, FALSE, TRUE) +) + +# Write to temporary file +path <- tempfile(fileext = ".csv") +ngr_fs_type_write(df, path) + +# Read back with types preserved +df2 <- ngr_fs_type_read(path) +str(df2) + +# Compare types +sapply(df, class) +sapply(df2, class) +} +} +\seealso{ +\code{\link[=ngr_fs_type_write]{ngr_fs_type_write()}} for writing files with type preservation + +Other fs: +\code{\link{ngr_fs_copy_if_missing}()}, +\code{\link{ngr_fs_id_missing}()}, +\code{\link{ngr_fs_type_write}()} + +Other serialization: +\code{\link{ngr_fs_type_write}()} +} +\concept{fs} +\concept{serialization} From 65061e44cac7da980a7fd830fabf8960785719f5 Mon Sep 17 00:00:00 2001 From: almac2022 Date: Sun, 11 Jan 2026 07:17:31 -0800 Subject: [PATCH 3/4] add tests for type-preserving serialization functions Co-Authored-By: Claude Opus 4.5 --- tests/testthat/test-ngr_fs_type.R | 130 ++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 tests/testthat/test-ngr_fs_type.R diff --git a/tests/testthat/test-ngr_fs_type.R b/tests/testthat/test-ngr_fs_type.R new file mode 100644 index 0000000..62ac640 --- /dev/null +++ b/tests/testthat/test-ngr_fs_type.R @@ -0,0 +1,130 @@ +test_that("ngr_fs_type_write and ngr_fs_type_read round-trip preserves basic types", { + df <- data.frame( + int_col = 1:3L, + dbl_col = c(1.1, 2.2, 3.3), + chr_col = c("a", "b", "c"), + lgl_col = c(TRUE, FALSE, TRUE), + stringsAsFactors = FALSE + ) + + path <- tempfile(fileext = ".csv") + ngr_fs_type_write(df, path) + + # Check files exist + schema_path <- sub("\\.csv$", "_schema.parquet", path) + expect_true(file.exists(path)) + expect_true(file.exists(schema_path)) + + # Read back and compare types + + df2 <- ngr_fs_type_read(path) + + expect_type(df2$int_col, "integer") + expect_type(df2$dbl_col, "double") + expect_type(df2$chr_col, "character") + expect_type(df2$lgl_col, "logical") + + # Compare values + + expect_equal(df2$int_col, df$int_col) + expect_equal(df2$dbl_col, df$dbl_col) + expect_equal(df2$chr_col, df$chr_col) + expect_equal(df2$lgl_col, df$lgl_col) +}) + +test_that("ngr_fs_type preserves types for columns with all NA values", { + # This is the key case - columns defined as specific types but containing only NAs + df <- data.frame( + int_col = c(NA_integer_, NA_integer_, NA_integer_), + dbl_col = c(NA_real_, NA_real_, NA_real_), + chr_col = c(NA_character_, NA_character_, NA_character_), + lgl_col = c(NA, NA, NA), + stringsAsFactors = FALSE + ) + + path <- tempfile(fileext = ".csv") + ngr_fs_type_write(df, path) + df2 <- ngr_fs_type_read(path) + + # Types should be preserved even though all values are NA + expect_type(df2$int_col, "integer") + expect_type(df2$dbl_col, "double") + expect_type(df2$chr_col, "character") + expect_type(df2$lgl_col, "logical") +}) + +test_that("ngr_fs_type preserves types for columns with mixed values and NAs", { + df <- data.frame( + int_col = c(1L, NA_integer_, 3L), + dbl_col = c(1.1, NA_real_, 3.3), + chr_col = c("a", NA_character_, "c"), + lgl_col = c(TRUE, NA, FALSE), + stringsAsFactors = FALSE + ) + + path <- tempfile(fileext = ".csv") + ngr_fs_type_write(df, path) + df2 <- ngr_fs_type_read(path) + + expect_type(df2$int_col, "integer") + expect_type(df2$dbl_col, "double") + expect_type(df2$chr_col, "character") + expect_type(df2$lgl_col, "logical") + + # Check NA positions preserved + expect_true(is.na(df2$int_col[2])) + expect_true(is.na(df2$dbl_col[2])) + expect_true(is.na(df2$chr_col[2])) + expect_true(is.na(df2$lgl_col[2])) + + # Check non-NA values + expect_equal(df2$int_col[c(1, 3)], c(1L, 3L)) + expect_equal(df2$dbl_col[c(1, 3)], c(1.1, 3.3)) + expect_equal(df2$chr_col[c(1, 3)], c("a", "c")) + expect_equal(df2$lgl_col[c(1, 3)], c(TRUE, FALSE)) +}) + +test_that("ngr_fs_type preserves Date columns", { + df <- data.frame( + date_col = as.Date(c("2024-01-01", "2024-01-02", "2024-01-03")), + stringsAsFactors = FALSE + ) + + path <- tempfile(fileext = ".csv") + ngr_fs_type_write(df, path) + df2 <- ngr_fs_type_read(path) + + expect_s3_class(df2$date_col, "Date") + expect_equal(df2$date_col, df$date_col) +}) + +test_that("ngr_fs_type preserves Date columns with all NAs", { + df <- data.frame( + date_col = as.Date(c(NA, NA, NA)), + stringsAsFactors = FALSE + ) + + path <- tempfile(fileext = ".csv") + ngr_fs_type_write(df, path) + df2 <- ngr_fs_type_read(path) + + expect_s3_class(df2$date_col, "Date") + expect_true(all(is.na(df2$date_col))) +}) + +test_that("ngr_fs_type creates directory if it doesn't exist", { + df <- data.frame(x = 1:3) + path <- file.path(tempfile(), "subdir", "test.csv") + + ngr_fs_type_write(df, path) + + expect_true(file.exists(path)) + expect_true(file.exists(sub("\\.csv$", "_schema.parquet", path))) +}) + +test_that("ngr_fs_type_read errors when schema file missing", { + path <- tempfile(fileext = ".csv") + writeLines("a,b,c\n1,2,3", path) + + expect_error(ngr_fs_type_read(path)) +}) From e5da71c5b5770815b5b74f4a261ba16fd7c8a273 Mon Sep 17 00:00:00 2001 From: almac2022 Date: Sun, 11 Jan 2026 07:17:44 -0800 Subject: [PATCH 4/4] update CLAUDE.md with development best practices Co-Authored-By: Claude Opus 4.5 --- CLAUDE.md | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 2591a4a..fae5de8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,11 +6,13 @@ R package with utility functions for dynamic reporting, spatial analysis, hydrol ```r devtools::document() -devtools::test() -devtools::check() +devtools::test() # Use this for development - faster +devtools::check(vignettes = FALSE) # Only when needed; always skip vignettes devtools::install() ``` -Build documentation and run checks before committing. +- Prefer `devtools::test()` during development - it's faster +- Only run `devtools::check()` when preparing for release or CI +- Always skip vignettes during checks (`vignettes = FALSE`) ## Commit Style (fledge) @@ -72,3 +74,20 @@ All exported functions use prefix `ngr_` followed by category: - Keep Imports alphabetized - Don't duplicate packages in both Imports and Suggests - Add vignette-only packages to Suggests (e.g., mapview, rstac) + +## GitHub CLI (gh) Idiosyncrasies + +- **Use backticks for function names in issue titles** - makes them pop: + ```bash + gh issue create --title "Add \`ngr_fs_type_write()\` for feature" # correct + ``` +- **No `gh milestone` command** - use `gh api` to create/manage milestones: + ```bash + gh api repos/NewGraphEnvironment/ngr/milestones -X POST -f title="Milestone title" -f description="Description" + ``` +- **`--milestone` flag needs title, not number** - use the milestone name: + ```bash + gh issue create --title "Issue" --milestone "Type-preserving flat file operations" # correct + gh issue create --title "Issue" --milestone 6 # won't work + ``` +- **Flag CLI limitations to user** - if you encounter unexpected CLI behavior, inform the user so they're aware of the limitation