diff --git a/.github/workflows/cmd-check.yaml b/.github/workflows/cmd-check.yaml index f553284c..428913be 100644 --- a/.github/workflows/cmd-check.yaml +++ b/.github/workflows/cmd-check.yaml @@ -93,7 +93,9 @@ jobs: shell: Rscript {0} - name: Check - run: rcmdcheck::rcmdcheck(args = ${{ matrix.config.args }}, error_on = 'warning', check_dir = 'check') + run: | + cat(paste0('options(bcdata.cache_path = "', file.path(Sys.getenv("GITHUB_WORKSPACE"), "bcdata_cache"), '")\n'), file = "~/.Rprofile", append = TRUE) + rcmdcheck::rcmdcheck(args = ${{ matrix.config.args }}, error_on = 'warning', check_dir = 'check') shell: Rscript {0} - name: Upload check results diff --git a/DESCRIPTION b/DESCRIPTION index 6e69abb7..9a3e7f78 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -49,7 +49,9 @@ Imports: sf (>= 0.7), tidyselect (>= 0.2.5), utils, - xml2 + xml2, + memoise (>= 1.1.0), + rappdirs (>= 0.3.1) Suggests: covr, ggplot2, diff --git a/NAMESPACE b/NAMESPACE index 88d18d9f..433a5c66 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,7 +45,10 @@ export(TOUCHES) export(WITHIN) export(as_tibble) export(bcdc_browse) +export(bcdc_cache_path) +export(bcdc_cache_timeout) export(bcdc_describe_feature) +export(bcdc_forget) export(bcdc_get_data) export(bcdc_get_record) export(bcdc_list) @@ -65,6 +68,7 @@ exportClasses(wfsConnection) exportMethods(dbQuoteIdentifier) exportMethods(dbQuoteString) import(DBI) +import(memoise) import(methods) importFrom(cli,cat_bullet) importFrom(cli,cat_line) diff --git a/R/bcdc_options.R b/R/bcdc_options.R index 8132efc0..896cb0d2 100644 --- a/R/bcdc_options.R +++ b/R/bcdc_options.R @@ -10,24 +10,49 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -#' Retrieve options used in bcdata, their value if set and the default value. +#' Retrieve options used in bcdata, their value if set and +#' the default value. #' -#' This function retrieves bcdata specific options that can be set. These options can be set -#' using `option({name of the option} = {value of the option})`. The default options are purposefully -#' set conservatively to hopefully ensure successful requests. Resetting these options may result in -#' failed calls to the data catalogue. Options in R are reset every time R is re-started. See examples for -#' addition ways to restore your initial state. +#' This function retrieves bcdata specific options that +#' can be set. These options can be set using +#' `option({name of the option} = {value of the option})`. +#' The default options are purposefully set conservatively +#' to hopefully ensure successful requests. Resetting +#' these options may result in failed calls to the data +#' catalogue. Options in R are reset every time R is +#' re-started. See examples for addition ways to restore +#' your initial state. #' -#' `bcdata.max_geom_pred_size` is the maximum size of an object used for a geometric operation. Objects -#' that are bigger than this value will have a bounding box drawn and apply the geometric operation -#' on that simpler polygon. Users can try to increase the maximum geometric predicate size and see -#' if the bcdata catalogue accepts their request. +#' `bcdata.max_geom_pred_size` is the maximum size of an +#' object used for a geometric operation. Objects that are +#' bigger than this value will have a bounding box drawn +#' and apply the geometric operation on that simpler +#' polygon. Users can try to increase the maximum +#' geometric predicate size and see if the bcdata +#' catalogue accepts their request. #' -#' `bcdata.chunk_limit` is an option useful when dealing with very large data sets. When requesting large objects -#' from the catalogue, the request is broken up into smaller chunks which are then recombined after they've -#' been downloaded. bcdata does this all for you but using this option you can set the size of the chunk -#' requested. On faster internet connections, a bigger chunk limit could be useful while on slower connections, -#' it is advisable to lower the chunk limit. Chunks must be less than 10000. +#' `bcdata.chunk_limit` is an option useful when dealing +#' with very large data sets. When requesting large +#' objects from the catalogue, the request is broken up +#' into smaller chunks which are then recombined after +#' they've been downloaded. bcdata does this all for you +#' but using this option you can set the size of the chunk +#' requested. On faster internet connections, a bigger +#' chunk limit could be useful while on slower +#' connections, it is advisable to lower the chunk limit. +#' Chunks must be less than 10000. +#' +#' `bcdata.cache_path` is the location on your computer +#' where results from web requests are cached. The default +#' is set by [rappdirs::user_cache_dir()] via +#' [bcdc_cache_path()]. This option can only be set before +#' the package is loaded (e.g., by setting it in your +#' .Rprofile file). +#' +#' `bcdata.cache_timeout` is the time, in seconds, that +#' the cache is maintained. Default is 3600 seconds (one +#' hour). This option can only be set before the package +#' is loaded (e.g., by setting it in your .Rprofile file). #' #' @examples #' \donttest{ @@ -64,8 +89,10 @@ bcdc_options <- function() { dplyr::tribble( ~ option, ~ value, ~default, - "bcdata.max_geom_pred_size", null_to_na(getOption("bcdata.max_geom_pred_size")), 5E5, - "bcdata.chunk_limit",null_to_na(getOption("bcdata.chunk_limit")), 1000 + "bcdata.max_geom_pred_size", null_to_na(getOption("bcdata.max_geom_pred_size")), as.character(5E5), + "bcdata.chunk_limit",null_to_na(getOption("bcdata.chunk_limit")), as.character(1000), + "bcdata.cache_path",null_to_na(getOption("bcdata.cache_path")), rappdirs::user_cache_dir("bcdata"), + "bcdata.cache_timeout",null_to_na(getOption("bcdata.cache_timeout")), as.character(3600) ) } diff --git a/R/utils-classes.R b/R/utils-classes.R index e20348d2..07fda156 100644 --- a/R/utils-classes.R +++ b/R/utils-classes.R @@ -325,32 +325,7 @@ mutate.bcdc_promise <- function(.data, ...){ mutate({dots}) "), call. = FALSE) } - -#' Force collection of Web Service request from B.C. Data Catalogue -#' -#' After tuning a query, `collect()` is used to actually bring the data into memory. -#' This will retrieve an sf object into R. The `as_tibble()` function can be used -#' interchangeably with `collect` which matches `dbplyr` behaviour. -#' -#' @param x object of class bcdc_promise -#' @inheritParams collect -#' @rdname collect-methods -#' @export -#' -#' @examples -#' \donttest{ -#' try( -#' bcdc_query_geodata("bc-airports") %>% -#' collect() -#' ) -#' -#' try( -#' bcdc_query_geodata("bc-airports") %>% -#' as_tibble() -#' ) -#' } -#' -collect.bcdc_promise <- function(x, ...){ +collect_bcdc_promise_ <- function(x, ...){ check_chunk_limit() x$query_list$CQL_FILTER <- finalize_cql(x$query_list$CQL_FILTER) @@ -406,11 +381,94 @@ collect.bcdc_promise <- function(x, ...){ txt <- cc$parse("UTF-8") - as.bcdc_sf(bcdc_read_sf(txt), query_list = query_list, url = url, + ret <- as.bcdc_sf(bcdc_read_sf(txt), query_list = query_list, url = url, full_url = full_url) + if (getOption("bcdata.cache_verbose", FALSE)) { + message("caching for ", bcdc_cache_timeout(), + " seconds at ", bcdc_cache_path()) + } + + ret + } +#' Retrieve Default Cache timeout +#' +#' Retrieves the length of time that a cache of [collect()]ed +#' web resources is kept. Default is 1 hour (3600 secons). +#' +#' @export +bcdc_cache_timeout <- function() { + getOption("bcdata.cache_timeout", 3600) +} + +#' Retrieve Default Cache Path +#' +#' Retrieves the default path used to cache the result of web requests. Makes +#' use of the \code{rappdirs} package to use cache folders +#' defined by each operating system +#' +#' @export +bcdc_cache_path <- function() { + getOption("bcdata.cache_path", rappdirs::user_cache_dir("bcdata")) +} + +#' Force collection of Web Service request from B.C. Data +#' Catalogue +#' +#' After tuning a query, `collect()` is used to actually +#' bring the data into memory. This will retrieve an sf +#' object into R. The `as_tibble()` function can be used +#' interchangeably with `collect` which matches `dbplyr` +#' behaviour. +#' +#' The result of `collect()`-ing a query will be cached to +#' avoid repeatedly requesting the same data from the +#' server. The duration of the caching can be customized +#' by setting the option `bcdc_cache_timeout` to a +#' different value (in seconds). The default is one hour +#' (3600 seconds). +#' +#' The cache can be cleared by running [bcdc_forget()]. +#' Note this will clear the cache for all `collect()` +#' calls in the previous time frame specified in the +#' `bcdc_cache_timeout` option. +#' +#' @param x object of class bcdc_promise +#' @import memoise +#' @inheritParams collect +#' @rdname collect-methods +#' @export +#' +#' @examples +#' \donttest{ +#' try( +#' bcdc_query_geodata("bc-airports") %>% +#' collect() +#' ) +#' +#' try( +#' bcdc_query_geodata("bc-airports") %>% +#' as_tibble() +#' ) +#' } +#' +collect.bcdc_promise <- memoise( + collect_bcdc_promise_, + ~ timeout(bcdc_cache_timeout()), # 1 hour + cache = cache_filesystem(bcdc_cache_path()) +) + +#' Forget (clear) the cache of objects returned by +#' [collect()] +#' +#' @return `TRUE` if the cache existed previously and was +#' successfully cleared, otherwise `FALSE`. +#' @export +bcdc_forget <- function() { + memoise::forget(collect.bcdc_promise) +} #' @inheritParams collect.bcdc_promise #' @rdname collect-methods diff --git a/man/bcdc_cache_path.Rd b/man/bcdc_cache_path.Rd new file mode 100644 index 00000000..70a22b96 --- /dev/null +++ b/man/bcdc_cache_path.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-classes.R +\name{bcdc_cache_path} +\alias{bcdc_cache_path} +\title{Retrieve Default Cache Path} +\usage{ +bcdc_cache_path() +} +\description{ +Retrieves the default path used to cache the result of web requests. Makes +use of the \code{rappdirs} package to use cache folders +defined by each operating system +} diff --git a/man/bcdc_cache_timeout.Rd b/man/bcdc_cache_timeout.Rd new file mode 100644 index 00000000..cdad94b7 --- /dev/null +++ b/man/bcdc_cache_timeout.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-classes.R +\name{bcdc_cache_timeout} +\alias{bcdc_cache_timeout} +\title{Retrieve Default Cache timeout} +\usage{ +bcdc_cache_timeout() +} +\description{ +Retrieves the length of time that a cache of \code{\link[=collect]{collect()}}ed +web resources is kept. Default is 1 hour (3600 secons). +} diff --git a/man/bcdc_forget.Rd b/man/bcdc_forget.Rd new file mode 100644 index 00000000..6277a04a --- /dev/null +++ b/man/bcdc_forget.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-classes.R +\name{bcdc_forget} +\alias{bcdc_forget} +\title{Forget (clear) the cache of objects returned by +\code{\link[=collect]{collect()}}} +\usage{ +bcdc_forget() +} +\value{ +\code{TRUE} if the cache existed previously and was +successfully cleared, otherwise \code{FALSE}. +} +\description{ +Forget (clear) the cache of objects returned by +\code{\link[=collect]{collect()}} +} diff --git a/man/bcdc_options.Rd b/man/bcdc_options.Rd index 6e9b2ba1..ef37850c 100644 --- a/man/bcdc_options.Rd +++ b/man/bcdc_options.Rd @@ -2,28 +2,53 @@ % Please edit documentation in R/bcdc_options.R \name{bcdc_options} \alias{bcdc_options} -\title{Retrieve options used in bcdata, their value if set and the default value.} +\title{Retrieve options used in bcdata, their value if set and +the default value.} \usage{ bcdc_options() } \description{ -This function retrieves bcdata specific options that can be set. These options can be set -using \verb{option(\{name of the option\} = \{value of the option\})}. The default options are purposefully -set conservatively to hopefully ensure successful requests. Resetting these options may result in -failed calls to the data catalogue. Options in R are reset every time R is re-started. See examples for -addition ways to restore your initial state. +This function retrieves bcdata specific options that +can be set. These options can be set using +\verb{option(\{name of the option\} = \{value of the option\})}. +The default options are purposefully set conservatively +to hopefully ensure successful requests. Resetting +these options may result in failed calls to the data +catalogue. Options in R are reset every time R is +re-started. See examples for addition ways to restore +your initial state. } \details{ -\code{bcdata.max_geom_pred_size} is the maximum size of an object used for a geometric operation. Objects -that are bigger than this value will have a bounding box drawn and apply the geometric operation -on that simpler polygon. Users can try to increase the maximum geometric predicate size and see -if the bcdata catalogue accepts their request. +\code{bcdata.max_geom_pred_size} is the maximum size of an +object used for a geometric operation. Objects that are +bigger than this value will have a bounding box drawn +and apply the geometric operation on that simpler +polygon. Users can try to increase the maximum +geometric predicate size and see if the bcdata +catalogue accepts their request. -\code{bcdata.chunk_limit} is an option useful when dealing with very large data sets. When requesting large objects -from the catalogue, the request is broken up into smaller chunks which are then recombined after they've -been downloaded. bcdata does this all for you but using this option you can set the size of the chunk -requested. On faster internet connections, a bigger chunk limit could be useful while on slower connections, -it is advisable to lower the chunk limit. Chunks must be less than 10000. +\code{bcdata.chunk_limit} is an option useful when dealing +with very large data sets. When requesting large +objects from the catalogue, the request is broken up +into smaller chunks which are then recombined after +they've been downloaded. bcdata does this all for you +but using this option you can set the size of the chunk +requested. On faster internet connections, a bigger +chunk limit could be useful while on slower +connections, it is advisable to lower the chunk limit. +Chunks must be less than 10000. + +\code{bcdata.cache_path} is the location on your computer +where results from web requests are cached. The default +is set by \code{\link[rappdirs:user_cache_dir]{rappdirs::user_cache_dir()}} via +\code{\link[=bcdc_cache_path]{bcdc_cache_path()}}. This option can only be set before +the package is loaded (e.g., by setting it in your +.Rprofile file). + +\code{bcdata.cache_timeout} is the time, in seconds, that +the cache is maintained. Default is 3600 seconds (one +hour). This option can only be set before the package +is loaded (e.g., by setting it in your .Rprofile file). } \examples{ \donttest{ diff --git a/man/collect-methods.Rd b/man/collect-methods.Rd index 0bc8fbb0..c6884ac1 100644 --- a/man/collect-methods.Rd +++ b/man/collect-methods.Rd @@ -18,12 +18,27 @@ \description{ See \code{tibble::\link[tibble]{as_tibble}} for details. -After tuning a query, \code{collect()} is used to actually bring the data into memory. -This will retrieve an sf object into R. The \code{as_tibble()} function can be used -interchangeably with \code{collect} which matches \code{dbplyr} behaviour. +After tuning a query, \code{collect()} is used to actually +bring the data into memory. This will retrieve an sf +object into R. The \code{as_tibble()} function can be used +interchangeably with \code{collect} which matches \code{dbplyr} +behaviour. See \code{dplyr::\link[dplyr:compute]{collect}} for details. } +\details{ +The result of \code{collect()}-ing a query will be cached to +avoid repeatedly requesting the same data from the +server. The duration of the caching can be customized +by setting the option \code{bcdc_cache_timeout} to a +different value (in seconds). The default is one hour +(3600 seconds). + +The cache can be cleared by running \code{\link[=bcdc_forget]{bcdc_forget()}}. +Note this will clear the cache for all \code{collect()} +calls in the previous time frame specified in the +\code{bcdc_cache_timeout} option. +} \examples{ \donttest{ try(