From 23727c301e0b609d6ff88580bf1babf269daed3a Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 10 Sep 2024 19:58:27 +0200 Subject: [PATCH 01/50] add an Spde class --- DESCRIPTION | 2 ++ NAMESPACE | 2 ++ R/spde.R | 18 ++++++++++++++++++ R/spde_class.R | 35 +++++++++++++++++++++++++++++++++++ man/Spde-class.Rd | 21 +++++++++++++++++++++ man/spde.Rd | 20 ++++++++++++++++++++ 6 files changed, 98 insertions(+) create mode 100644 R/spde.R create mode 100644 R/spde_class.R create mode 100644 man/Spde-class.Rd create mode 100644 man/spde.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 173d8bfe..92a12599 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -89,6 +89,7 @@ Collate: 'fit_model_n2k_manifest.R' 'n2k_model_imputed_class.R' 'fit_model_n2k_model_imputed.R' + 'spde_class.R' 'fit_model_s3_object.R' 'get_analysis_date.R' 'get_analysis_version.R' @@ -146,6 +147,7 @@ Collate: 'select_observed_range.R' 'session_package.R' 'sha1.R' + 'spde.R' 'status.R' 'store_manifest.R' 'store_manifest_yaml.R' diff --git a/NAMESPACE b/NAMESPACE index 6d3c9271..376fe343 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,7 +12,9 @@ export(select_factor_count_strictly_positive) export(select_factor_threshold) export(select_observed_range) export(sha1) +export(spde) export(union) +exportClasses(Spde) exportClasses(n2kAggregate) exportClasses(n2kAnalysisMetadata) exportClasses(n2kAnalysisVersion) diff --git a/R/spde.R b/R/spde.R new file mode 100644 index 00000000..ba14d864 --- /dev/null +++ b/R/spde.R @@ -0,0 +1,18 @@ +#' Create an `Spde` object +#' @param coordinates a `data.frame` of coordinates use to define the mesh. +#' @param range a numeric vector of length 2. +#' Will be used as the `prior.range` argument of [INLA::inla.spde2.pcmatern]. +#' @param Sigma a numeric vector of length 2. +#' Will be used as the `prior.sigma` argument of [INLA::inla.spde2.pcmatern]. +#' @export +#' @importFrom assertthat assert_that noNA +#' @importFrom methods new +spde <- function(coordinates, range, sigma) { + assert_that( + inherits(coordinates, "data.frame"), is.numeric(range), is.numeric(sigma), + noNA(coordinates), noNA(range), noNA(sigma), ncol(coordinates) == 2, + length(range) == 2, length(sigma) == 2, all(range > 0), all(sigma > 0), + range[2] < 1, sigma[2] < 1 + ) + new("Spde", Coordinates = coordinates, Range = range, Sigma = sigma) +} diff --git a/R/spde_class.R b/R/spde_class.R new file mode 100644 index 00000000..3e2c1067 --- /dev/null +++ b/R/spde_class.R @@ -0,0 +1,35 @@ +#' The `spde` class +#' +#' It holds the coordinates, range and sigma parameters for the SPDE model. +#' @slot Coordinates a `data.frame` with the coordinates used for the mesh. +#' @slot Range a numeric vector of length 2. +#' Will be used as the `prior.range` argument of [INLA::inla.spde2.pcmatern]. +#' @slot Sigma a numeric vector of length 2. +#' Will be used as the `prior.sigma` argument of [INLA::inla.spde2.pcmatern]. +#' @name Spde-class +#' @rdname Spde-class +#' @exportClass Spde +#' @aliases Spde-class +#' @importFrom methods setClass +#' @docType class +setClass( + "Spde", + representation = representation( + Coordinates = "data.frame", Range = "numeric", Sigma = "numeric" + ) +) + +#' @importFrom assertthat assert_that noNA +#' @importFrom methods setValidity +setValidity( + "Spde", + function(object) { + assert_that( + noNA(object@Coordinates), noNA(object@Range), noNA(object@Sigma), + ncol(object@Coordinates) == 2, length(object@Range) == 2, + length(object@Sigma) == 2, all(object@Range > 0), all(object@Sigma > 0), + object@Range[2] < 1, object@Sigma[2] < 1 + ) + return(TRUE) + } +) diff --git a/man/Spde-class.Rd b/man/Spde-class.Rd new file mode 100644 index 00000000..88cafaa7 --- /dev/null +++ b/man/Spde-class.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spde_class.R +\docType{class} +\name{Spde-class} +\alias{Spde-class} +\title{The \code{spde} class} +\description{ +It holds the coordinates, range and sigma parameters for the SPDE model. +} +\section{Slots}{ + +\describe{ +\item{\code{Coordinates}}{a \code{data.frame} with the coordinates used for the mesh.} + +\item{\code{Range}}{a numeric vector of length 2. +Will be used as the \code{prior.range} argument of \link[INLA:inla.spde2.pcmatern]{INLA::inla.spde2.pcmatern}.} + +\item{\code{Sigma}}{a numeric vector of length 2. +Will be used as the \code{prior.sigma} argument of \link[INLA:inla.spde2.pcmatern]{INLA::inla.spde2.pcmatern}.} +}} + diff --git a/man/spde.Rd b/man/spde.Rd new file mode 100644 index 00000000..04f525dd --- /dev/null +++ b/man/spde.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spde.R +\name{spde} +\alias{spde} +\title{Create an \code{Spde} object} +\usage{ +spde(coordinates, range, sigma) +} +\arguments{ +\item{coordinates}{a \code{data.frame} of coordinates use to define the mesh.} + +\item{range}{a numeric vector of length 2. +Will be used as the \code{prior.range} argument of \link[INLA:inla.spde2.pcmatern]{INLA::inla.spde2.pcmatern}.} + +\item{Sigma}{a numeric vector of length 2. +Will be used as the \code{prior.sigma} argument of \link[INLA:inla.spde2.pcmatern]{INLA::inla.spde2.pcmatern}.} +} +\description{ +Create an \code{Spde} object +} From 0937ab9a46b2c16657e79982a136eb913211cc74 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 10 Sep 2024 20:04:33 +0200 Subject: [PATCH 02/50] add spec2mesh() --- DESCRIPTION | 1 + NAMESPACE | 1 + R/spde2mesh.R | 39 +++++++++++++++++++++++++++++++++++++++ man/spde2mesh.Rd | 21 +++++++++++++++++++++ 4 files changed, 62 insertions(+) create mode 100644 R/spde2mesh.R create mode 100644 man/spde2mesh.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 92a12599..1f7349da 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -148,6 +148,7 @@ Collate: 'session_package.R' 'sha1.R' 'spde.R' + 'spde2mesh.R' 'status.R' 'store_manifest.R' 'store_manifest_yaml.R' diff --git a/NAMESPACE b/NAMESPACE index 376fe343..62cde52c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -67,6 +67,7 @@ exportMethods(read_result) exportMethods(result_estimate) exportMethods(result_metadata) exportMethods(session_package) +exportMethods(spde2mesh) exportMethods(status) exportMethods(store_manifest) exportMethods(store_manifest_yaml) diff --git a/R/spde2mesh.R b/R/spde2mesh.R new file mode 100644 index 00000000..91f4136e --- /dev/null +++ b/R/spde2mesh.R @@ -0,0 +1,39 @@ +#' Convert an `Spde` object to a mesh object +#' @param object The `Spde` object +#' @name spde2mesh +#' @rdname spde2mesh +#' @exportMethod spde2mesh +#' @docType methods +#' @importFrom methods setGeneric +setGeneric( + name = "spde2mesh", + def = function(object) { + standardGeneric("spde2mesh") # nocov + } +) + +#' @rdname spde2mesh +#' @importFrom methods setMethod new +#' @include spde_class.R +setMethod( + f = "spde2mesh", + signature = signature(object = "Spde"), + definition = function(object) { + stopifnot( + "fmesher package required but not installed." = + requireNamespace("fmesher", quietly = TRUE), + "sf package required but not installed." = + requireNamespace("sf", quietly = TRUE) + ) + max_dist <- object@Range[1] + object@Coordinates |> + sf::st_as_sf(coords = colnames(object@Coordinates)) |> + sf::st_buffer(dist = max_dist) |> + sf::st_union() |> + sf::st_simplify(dTolerance = max_dist / 10) -> region + fmesher::fm_mesh_2d_inla( + boundary = region, max.edge = c(max_dist / 3, max_dist * 2), + cutoff = max_dist / 10 + ) + } +) diff --git a/man/spde2mesh.Rd b/man/spde2mesh.Rd new file mode 100644 index 00000000..a4c35b3b --- /dev/null +++ b/man/spde2mesh.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spde2matern.R, R/spde2mesh.R +\docType{methods} +\name{spde2matern,Spde-method} +\alias{spde2matern,Spde-method} +\alias{spde2mesh} +\alias{spde2mesh,Spde-method} +\title{Convert an \code{Spde} object to a mesh object} +\usage{ +\S4method{spde2matern}{Spde}(object) + +spde2mesh(object) + +\S4method{spde2mesh}{Spde}(object) +} +\arguments{ +\item{object}{The \code{Spde} object} +} +\description{ +Convert an \code{Spde} object to a mesh object +} From 7cba059915e28e72266135c2fa26b1f17ccb39d5 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 10 Sep 2024 20:05:15 +0200 Subject: [PATCH 03/50] add spec2matern() --- DESCRIPTION | 1 + NAMESPACE | 1 + R/spde2matern.R | 31 +++++++++++++++++++++++++++++++ man/spde2matern.Rd | 15 +++++++++++++++ 4 files changed, 48 insertions(+) create mode 100644 R/spde2matern.R create mode 100644 man/spde2matern.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 1f7349da..61225db2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -148,6 +148,7 @@ Collate: 'session_package.R' 'sha1.R' 'spde.R' + 'spde2matern.R' 'spde2mesh.R' 'status.R' 'store_manifest.R' diff --git a/NAMESPACE b/NAMESPACE index 62cde52c..475963f9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -67,6 +67,7 @@ exportMethods(read_result) exportMethods(result_estimate) exportMethods(result_metadata) exportMethods(session_package) +exportMethods(spde2matern) exportMethods(spde2mesh) exportMethods(status) exportMethods(store_manifest) diff --git a/R/spde2matern.R b/R/spde2matern.R new file mode 100644 index 00000000..af8b3fc2 --- /dev/null +++ b/R/spde2matern.R @@ -0,0 +1,31 @@ +#' Convert an `Spde` object to a matern object +#' @param object The `Spde` object +#' @name spde2matern +#' @rdname spde2matern +#' @exportMethod spde2matern +#' @docType methods +#' @importFrom methods setGeneric +setGeneric( + name = "spde2matern", + def = function(object) { + standardGeneric("spde2matern") # nocov + } +) + +#' @rdname spde2mesh +#' @importFrom methods setMethod new +#' @include spde_class.R +setMethod( + f = "spde2matern", + signature = signature(object = "Spde"), + definition = function(object) { + stopifnot( + "INLA package required but not installed." = + requireNamespace("INLA", quietly = TRUE) + ) + INLA::inla.spde2.pcmatern( + mesh = spde2mesh(object), prior.range = object@Range, + prior.sigma = object@Sigma + ) + } +) diff --git a/man/spde2matern.Rd b/man/spde2matern.Rd new file mode 100644 index 00000000..c5abe3d3 --- /dev/null +++ b/man/spde2matern.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spde2matern.R +\docType{methods} +\name{spde2matern} +\alias{spde2matern} +\title{Convert an \code{Spde} object to a matern object} +\usage{ +spde2matern(object) +} +\arguments{ +\item{object}{The \code{Spde} object} +} +\description{ +Convert an \code{Spde} object to a matern object +} From 3c5d458c292172370d0f6b8e40059758785ee97b Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 10 Sep 2024 20:06:03 +0200 Subject: [PATCH 04/50] add the n2kSpde class --- DESCRIPTION | 5 +- NAMESPACE | 1 + R/n2k_spde.R | 219 +++++++++++++++++++++++++++++++++++++++++++ R/n2k_spde_class.R | 26 +++++ man/n2kSpde-class.Rd | 15 +++ man/n2k_spde.Rd | 122 ++++++++++++++++++++++++ 6 files changed, 387 insertions(+), 1 deletion(-) create mode 100644 R/n2k_spde.R create mode 100644 R/n2k_spde_class.R create mode 100644 man/n2kSpde-class.Rd create mode 100644 man/n2k_spde.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 61225db2..8fa45edc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -57,7 +57,7 @@ Config/checklist/keywords: analysis, reproducible research, natura 2000, Encoding: UTF-8 Language: en-GB Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Collate: 'n2k_parameter_class.R' 'n2k_anomaly_class.R' @@ -90,6 +90,7 @@ Collate: 'n2k_model_imputed_class.R' 'fit_model_n2k_model_imputed.R' 'spde_class.R' + 'fit_model_n2k_spde.R' 'fit_model_s3_object.R' 'get_analysis_date.R' 'get_analysis_version.R' @@ -121,6 +122,7 @@ Collate: 'get_species_group_id.R' 'get_status_fingerprint.R' 'inla_inverse.R' + 'make_a.R' 'manifest_yaml_to_bash.R' 'mark_obsolete_dataset.R' 'n2k_aggregated.R' @@ -132,6 +134,7 @@ Collate: 'n2k_inla_comparison.R' 'n2k_manifest.R' 'n2k_model_imputed.R' + 'n2k_spde.R' 'parent_status.R' 'read_manifest.R' 'read_model.R' diff --git a/NAMESPACE b/NAMESPACE index 475963f9..600f2177 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,6 +30,7 @@ exportClasses(n2kModel) exportClasses(n2kModelImputed) exportClasses(n2kParameter) exportClasses(n2kResult) +exportClasses(n2kSpde) exportMethods("parent_status<-") exportMethods("status<-") exportMethods(combine) diff --git a/R/n2k_spde.R b/R/n2k_spde.R new file mode 100644 index 00000000..71843903 --- /dev/null +++ b/R/n2k_spde.R @@ -0,0 +1,219 @@ +#' Create an `n2kSpde` object +#' @param data a `data.frame` with the data to analyse +#' @param model_fit The fitted model +#' @param ... other arguments. See below +#' @name n2k_spde +#' @rdname n2k_spde +#' @exportMethod n2k_spde +#' @docType methods +#' @importFrom methods setGeneric +setGeneric( + name = "n2k_spde", + def = function( + data, ..., model_fit + ) { + standardGeneric("n2k_spde") # nocov + } +) + +#' @description A new `n2kSpde` model is created when `data` is a `data.frame`. +#' @rdname n2k_spde +#' @aliases n2k_spde,n2kSpde-methods +#' @importFrom methods setMethod new +#' @importFrom assertthat assert_that is.count is.string is.time +#' @importFrom digest sha1 +#' @importFrom stats as.formula +#' @importFrom utils sessionInfo +#' @include n2k_spde_class.R +#' @inheritParams n2k_inla_comparison +#' @inheritParams n2k_inla +#' @inheritParams multimput::impute +setMethod( + f = "n2k_spde", + signature = signature(data = "data.frame"), + definition = function( + data, status = "new", result_datasource_id, scheme_id, family = "poisson", + formula, species_group_id, location_group_id, model_type, spde, + first_imported_year, last_imported_year, duration, last_analysed_year, + analysis_date, lin_comb = NULL, minimum = "", imputation_size, + parent = character(0), seed, replicate_name = list(), control = list(), + parent_status = "converged", parent_statusfingerprint, extra, ..., model_fit + ) { + assert_that(is.string(status)) + assert_that(is.string(minimum)) + if (missing(seed)) { + seed <- sample(.Machine$integer.max, 1) + } + assert_that(is.count(seed)) + seed <- as.integer(seed) + if (missing(imputation_size)) { + imputation_size <- 0L + } else { + assert_that(is.count(imputation_size)) + imputation_size <- as.integer(imputation_size) + } + assert_that( + is.string(result_datasource_id), is.string(scheme_id), + is.string(species_group_id), is.string(location_group_id), + is.string(model_type), is.string(formula), is.count(first_imported_year), + is.count(last_imported_year) + ) + first_imported_year <- as.integer(first_imported_year) + last_imported_year <- as.integer(last_imported_year) + if (missing(duration)) { + duration <- last_imported_year - first_imported_year + 1L + } else { + assert_that(is.count(duration)) + duration <- as.integer(duration) + } + if (missing(last_analysed_year)) { + last_analysed_year <- last_imported_year + } + assert_that(is.count(last_analysed_year)) + last_analysed_year <- as.integer(last_analysed_year) + assert_that(is.time(analysis_date)) + assert_that( + is.null(lin_comb) || inherits(lin_comb, "list") || + (inherits(lin_comb, "matrix") && length(dim(lin_comb) == 2)), + msg = "lin_comb must be either a list or a matrix" + ) + assert_that(is.list(replicate_name)) + assert_that( + length(replicate_name) == 0 || !is.null(names(replicate_name)), + msg = "replicate_name must have names" + ) + assert_that(is.character(family), length(family) >= 1) + assert_that(is.list(control)) + control$control.compute$dic <- ifelse( + is.null(control$control.compute$dic), TRUE, control$control.compute$dic + ) + control$control.compute$waic <- ifelse( + is.null(control$control.compute$waic), TRUE, control$control.compute$waic + ) + control$control.compute$cpo <- ifelse( + is.null(control$control.compute$cpo), TRUE, control$control.compute$cpo + ) + control$control.compute$config <- ifelse( + is.null(control$control.compute$config), TRUE, + control$control.compute$config + ) + control$control.predictor$compute <- ifelse( + is.null(control$control.predictor$compute), TRUE, + control$control.predictor$compute + ) + if (is.null(control$control.predictor$link)) { + control$control.predictor$link <- 1 + } + control$control.fixed$prec.intercept <- ifelse( + is.null(control$control.fixed$prec.intercept), + 1, control$control.fixed$prec.intercept + ) + if (missing(extra)) { + extra <- data[0, ] + } + + file_fingerprint <- sha1( + list( + data, result_datasource_id, scheme_id, species_group_id, + location_group_id, family, model_type, formula, first_imported_year, + last_imported_year, duration, last_analysed_year, + format(analysis_date, tz = "UTC"), seed, parent, replicate_name, + lin_comb, imputation_size, minimum, control, extra + ) + ) + + if (length(parent) == 0) { + analysis_relation <- data.frame( + analysis = character(0), parent_analysis = character(0), + parentstatus_fingerprint = character(0), parent_status = character(0), + stringsAsFactors = FALSE + ) + } else { + assert_that(is.string(parent)) + assert_that(is.string(parent_status)) + if (missing(parent_statusfingerprint)) { + parent_statusfingerprint <- sha1(parent_status) + } else { + assert_that(is.string(parent_statusfingerprint)) + } + analysis_relation <- data.frame( + analysis = file_fingerprint, parent_analysis = parent, + parentstatus_fingerprint = parent_statusfingerprint, + parent_status = parent_status, stringsAsFactors = FALSE + ) + } + version <- get_analysis_version(sessionInfo()) + status_fingerprint <- sha1( + list( + file_fingerprint, status, NULL, version@AnalysisVersion$fingerprint, + version@AnalysisVersion, version@RPackage, + version@AnalysisVersionRPackage, analysis_relation, NULL + ), + digits = 6L + ) + + new( + "n2kSpde", + AnalysisVersion = version@AnalysisVersion, RPackage = version@RPackage, + AnalysisVersionRPackage = version@AnalysisVersionRPackage, + AnalysisMetadata = data.frame( + result_datasource_id = result_datasource_id, scheme_id = scheme_id, + species_group_id = species_group_id, + location_group_id = location_group_id, model_type = model_type, + formula = formula, first_imported_year = first_imported_year, + last_imported_year = last_imported_year, duration = duration, + last_analysed_year = last_analysed_year, analysis_date = analysis_date, + seed = seed, status = status, + analysis_version = version@AnalysisVersion$fingerprint, + file_fingerprint = file_fingerprint, + status_fingerprint = status_fingerprint, stringsAsFactors = FALSE + ), + AnalysisFormula = list(as.formula(formula)), LinearCombination = lin_comb, + AnalysisRelation = analysis_relation, Data = data, Model = NULL, + ReplicateName = replicate_name, Family = family, Control = control, + ImputationSize = imputation_size, Minimum = minimum, RawImputed = NULL, + Extra = extra, Spde = spde + ) + } +) + +#' @description In case `data` is an `n2kSpde` object, then only the model and +#' status are updated. +#' All other slots are unaffected. +#' @rdname n2k_spde +#' @aliases n2k_spde,n2kSpde-methods +#' @importFrom methods setMethod validObject new +#' @importFrom digest sha1 +#' @importFrom utils sessionInfo +#' @include n2k_spde_class.R +#' @param raw_imputed the optional `rawImputed` object +setMethod( + f = "n2k_spde", + signature = signature(data = "n2kSpde", model_fit = "inla"), + definition = function( + data, status, raw_imputed = NULL, ..., model_fit + ) { + assert_that(is.string(status)) + data@Model <- model_fit + data@AnalysisMetadata$status <- status + version <- get_analysis_version(sessionInfo()) + new_version <- union(data, version) + data@AnalysisVersion <- new_version$Union@AnalysisVersion + data@RPackage <- new_version$Union@RPackage + data@AnalysisVersionRPackage <- new_version$Union@AnalysisVersionRPackage + data@AnalysisMetadata$analysis_version <- new_version$Unionfingerprint + data@RawImputed <- raw_imputed + data@AnalysisMetadata$status_fingerprint <- sha1( + list( + data@AnalysisMetadata$file_fingerprint, data@AnalysisMetadata$status, + data@Model, data@AnalysisMetadata$analysis_version, + data@AnalysisVersion, data@RPackage, data@AnalysisVersionRPackage, + data@AnalysisRelation, data@RawImputed + ), + digits = 6L + ) + + validObject(data) + return(data) + } +) diff --git a/R/n2k_spde_class.R b/R/n2k_spde_class.R new file mode 100644 index 00000000..10b8d6e8 --- /dev/null +++ b/R/n2k_spde_class.R @@ -0,0 +1,26 @@ +#' The `n2kSpde` class +#' +#' It hold analysis data based on an INLA model with SPDE. +#' @slot Spde A `list` containing the information for the SPDE. +#' @name n2kSpde-class +#' @rdname n2kSpde-class +#' @exportClass n2kSpde +#' @aliases n2kSpde-class +#' @importFrom methods setClass +#' @docType class +#' @include n2k_inla_class.R +#' @include spde_class.R +setClass( + "n2kSpde", + representation = representation(Spde = "Spde"), + contains = "n2kInla" +) + +#' @importFrom methods setValidity +#' @importFrom digest sha1 +setValidity( + "n2kSpde", + function(object) { + return(TRUE) + } +) diff --git a/man/n2kSpde-class.Rd b/man/n2kSpde-class.Rd new file mode 100644 index 00000000..d9fa67d6 --- /dev/null +++ b/man/n2kSpde-class.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/n2k_spde_class.R +\docType{class} +\name{n2kSpde-class} +\alias{n2kSpde-class} +\title{The \code{n2kSpde} class} +\description{ +It hold analysis data based on an INLA model with SPDE. +} +\section{Slots}{ + +\describe{ +\item{\code{Spde}}{A \code{list} containing the information for the SPDE.} +}} + diff --git a/man/n2k_spde.Rd b/man/n2k_spde.Rd new file mode 100644 index 00000000..d177394a --- /dev/null +++ b/man/n2k_spde.Rd @@ -0,0 +1,122 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/n2k_spde.R +\docType{methods} +\name{n2k_spde} +\alias{n2k_spde} +\alias{n2k_spde,data.frame,ANY-method} +\alias{n2k_spde,n2kSpde-methods} +\alias{n2k_spde,n2kSpde,inla-method} +\title{Create an \code{n2kSpde} object} +\usage{ +n2k_spde(data, ..., model_fit) + +\S4method{n2k_spde}{data.frame,ANY}( + data, + status = "new", + result_datasource_id, + scheme_id, + family = "poisson", + formula, + species_group_id, + location_group_id, + model_type, + spde, + first_imported_year, + last_imported_year, + duration, + last_analysed_year, + analysis_date, + lin_comb = NULL, + minimum = "", + imputation_size, + parent = character(0), + seed, + replicate_name = list(), + control = list(), + parent_status = "converged", + parent_statusfingerprint, + extra, + ..., + model_fit +) + +\S4method{n2k_spde}{n2kSpde,inla}(data, status, raw_imputed = NULL, ..., model_fit) +} +\arguments{ +\item{data}{a \code{data.frame} with the data to analyse} + +\item{...}{other arguments. See below} + +\item{model_fit}{The fitted model} + +\item{status}{A single character indicating the status of the model. +Defaults to \code{"waiting"}.} + +\item{result_datasource_id}{A string identifying the data source.} + +\item{scheme_id}{A single integer holding the id of the scheme.} + +\item{family}{the family to use in the INLA model.} + +\item{formula}{A single character identifying the comparison.} + +\item{species_group_id}{A string identifying the species group.} + +\item{location_group_id}{A string identifying the location group.} + +\item{model_type}{The type of the models. +Must start with \code{"inla comparison:"}.} + +\item{first_imported_year}{Oldest year considered in the data.} + +\item{last_imported_year}{Most recent year considered in the data.} + +\item{duration}{The width of the moving window. +Defaults to the \code{last_imported_year - first_imported_year + 1}.} + +\item{last_analysed_year}{Most recent year in the window. +Defaults to \code{last_imported_year}.} + +\item{analysis_date}{A \code{POSIXct} date indicating the date that the dataset +was imported.} + +\item{lin_comb}{A model matrix to calculate linear combinations.} + +\item{minimum}{The name of the variable which holds the minimum counts. +Only relevant in case of multiple imputation.} + +\item{imputation_size}{The required number of imputations defaults to 0.} + +\item{parent}{The file fingerprint of the optional parent analysis.} + +\item{seed}{A single integer used as a seed for all calculations. +A random seed will be inserted when missing.} + +\item{replicate_name}{A list with the names of replicates. +Defaults to an empty list. +Used in case of \code{f(X, ..., replicate = Z)}. +Should be a named list like e.g. \code{list(X = c("a", "b", "c"))}.} + +\item{control}{A named list passed to \code{\link[INLA:inla]{INLA::inla()}} when fitting +the model.} + +\item{parent_status}{A \code{data.frame} with columns +\code{parent_analysis} (the file fingerprint of the parent), +\code{parentstatus_fingerprint} (the status fingerprint of the parent), +and \code{parent_status} (the status of the parent).} + +\item{parent_statusfingerprint}{The status fingerprint of the parent +analysis.} + +\item{extra}{a \code{data.frame} with extra observations not used in the model. +They will be added in subsequent analyses.} + +\item{raw_imputed}{the optional \code{rawImputed} object} +} +\description{ +A new \code{n2kSpde} model is created when \code{data} is a \code{data.frame}. + +In case \code{data} is an \code{n2kSpde} object, then only the model and +status are updated. +All other slots are unaffected. +} From 705ee809d482aa9d3aa8bde9f58d96585350c861 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 10 Sep 2024 20:06:26 +0200 Subject: [PATCH 05/50] add make_a() --- NAMESPACE | 1 + R/make_a.R | 34 ++++++++++++++++++++++++++++++++++ man/make_a.Rd | 18 ++++++++++++++++++ 3 files changed, 53 insertions(+) create mode 100644 R/make_a.R create mode 100644 man/make_a.Rd diff --git a/NAMESPACE b/NAMESPACE index 600f2177..4f94bda8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -52,6 +52,7 @@ exportMethods(get_scheme_id) exportMethods(get_seed) exportMethods(get_species_group_id) exportMethods(get_status_fingerprint) +exportMethods(make_a) exportMethods(manifest_yaml_to_bash) exportMethods(n2k_aggregate) exportMethods(n2k_composite) diff --git a/R/make_a.R b/R/make_a.R new file mode 100644 index 00000000..b5a9af86 --- /dev/null +++ b/R/make_a.R @@ -0,0 +1,34 @@ +#' Convert an `Spde` object to a A object +#' @param object The `Spde` object +#' @name make_a +#' @rdname make_a +#' @exportMethod make_a +#' @docType methods +#' @importFrom methods setGeneric +setGeneric( + name = "make_a", + def = function(object, data) { + standardGeneric("make_a") # nocov + } +) + +#' @rdname make_a +#' @importFrom methods setMethod new +#' @include spde_class.R +setMethod( + f = "make_a", + signature = signature(object = "Spde"), + definition = function(object, data) { + assert_that( + inherits(data, "data.frame"), + all(colnames(object@Coordinates) %in% colnames(data)) + ) + stopifnot( + "INLA package required but not installed." = + requireNamespace("INLA", quietly = TRUE) + ) + data[colnames(object@Coordinates)] |> + as.matrix() |> + INLA::inla.spde.make.A(mesh = spde2mesh(object)) + } +) diff --git a/man/make_a.Rd b/man/make_a.Rd new file mode 100644 index 00000000..ab64631c --- /dev/null +++ b/man/make_a.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/make_a.R +\docType{methods} +\name{make_a} +\alias{make_a} +\alias{make_a,Spde-method} +\title{Convert an \code{Spde} object to a A object} +\usage{ +make_a(object, data) + +\S4method{make_a}{Spde}(object, data) +} +\arguments{ +\item{object}{The \code{Spde} object} +} +\description{ +Convert an \code{Spde} object to a A object +} From 63a6909efd591bb2594aa55eb9d89945a06f4b1a Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 10 Sep 2024 20:06:54 +0200 Subject: [PATCH 06/50] add a method for fit_model() on an n2kSpde --- DESCRIPTION | 1 + NAMESPACE | 1 + R/fit_model_n2k_spde.R | 159 +++++++++++++++++++++++++++++++++++++++++ man/fit_model.Rd | 14 +++- 4 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 R/fit_model_n2k_spde.R diff --git a/DESCRIPTION b/DESCRIPTION index 8fa45edc..4a57b3f6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -90,6 +90,7 @@ Collate: 'n2k_model_imputed_class.R' 'fit_model_n2k_model_imputed.R' 'spde_class.R' + 'n2k_spde_class.R' 'fit_model_n2k_spde.R' 'fit_model_s3_object.R' 'get_analysis_date.R' diff --git a/NAMESPACE b/NAMESPACE index 4f94bda8..3340f8bd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -62,6 +62,7 @@ exportMethods(n2k_inla) exportMethods(n2k_inla_comparison) exportMethods(n2k_manifest) exportMethods(n2k_model_imputed) +exportMethods(n2k_spde) exportMethods(parent_status) exportMethods(read_manifest) exportMethods(read_model) diff --git a/R/fit_model_n2k_spde.R b/R/fit_model_n2k_spde.R new file mode 100644 index 00000000..0084cffb --- /dev/null +++ b/R/fit_model_n2k_spde.R @@ -0,0 +1,159 @@ +#' @rdname fit_model +#' @importFrom methods setMethod new +#' @importFrom assertthat assert_that is.number +#' @importMethodsFrom multimput impute +#' @include n2k_spde_class.R +#' @param timeout the optional number of second until the model will time out +#' @inheritParams multimput::impute +setMethod( + f = "fit_model", + signature = signature(x = "n2kSpde"), + definition = function( + x, status = "new", ..., timeout = NULL, seed = get_seed(x), + num_threads = NULL, parallel_configs = TRUE + ) { + assert_that( + requireNamespace("INLA", quietly = TRUE), + msg = "INLA package required but not installed." + ) + validObject(x) + assert_that(is.character(status), length(status) >= 1) + + # don't fit model when status doesn't match + if (!(status(x) %in% status)) { + return(x) + } + + set.seed(seed) + + data <- get_data(x) + matern <- spde2matern(x@Spde) + index <- INLA::inla.spde.make.index("matern_spde", n.spde = matern$n.spde) + x@AnalysisMetadata$formula |> + c("f(matern_spde, model = matern)") |> + paste(collapse = " + ") |> + as.formula() -> model_formula + fm <- terms(model_formula) + response <- all.vars(fm)[attr(fm, "response")] + stack_observed <- INLA::inla.stack( + data = data[!is.na(data[[response]]), response, drop = FALSE], + A = list(1, make_a(object = x@Spde, data[!is.na(data[[response]]),])), + tag = "observed", + effects = list( + data[ + !is.na(data[[response]]), colnames(data) != response, drop = FALSE + ], + index + ) + ) + INLA::inla.stack( + data = data[is.na(data[[response]]), response, drop = FALSE], + A = list(1, make_a(object = x@Spde, data[is.na(data[[response]]),])), + tag = "observed", + effects = list( + data[is.na(data[[response]]), colnames(data) != response, drop = FALSE], + index + ) + ) |> + INLA::inla.stack(stack_observed) -> stack_total + # prepare linear combinations + lc <- model2lincomb(x@LinearCombination) + # prepare inla() arguments + control <- x@Control + control$formula <- model_formula + control$family <- x@Family + # fit model + if (mean(is.na(data[[response]])) < 0.10) { + # directly fit model when less than 10% missing data + control$data <- INLA::inla.stack.data(stack_total, spde = spde) + control$control.predictor = list( + A = INLA::inla.stack.A(stack_total), link = 1 + ) + control$lincomb <- lc + model <- try({ + if (!is.null(timeout)) { + assert_that(is.number(timeout), timeout > 0) + setTimeLimit(cpu = timeout, elapsed = timeout) + } + do.call(INLA::inla, control) + }, silent = TRUE) + } else { + # first fit model without missing data + control$data <- INLA::inla.stack.data(stack_observed, spde = spde) + control$control.predictor = list(A = INLA::inla.stack.A(stack_observed)) + m0 <- try({ + if (!is.null(timeout)) { + assert_that(is.number(timeout), timeout > 0) + setTimeLimit(cpu = timeout, elapsed = timeout) + } + do.call(INLA::inla, control) + }, silent = TRUE) + if (inherits(m0, "try-error")) { + status(x) <- ifelse( + grepl("time limit", m0), "time-out", "error" + ) + return(x) + } + # then refit with missing data + control$data <- INLA::inla.stack.data(stack_total, spde = spde) + control$control.predictor = list( + A = INLA::inla.stack.A(stack_total), link = 1 + ) + control$lincomb <- lc + control$control.update <- list(result = m0) + model <- try({ + if (!is.null(timeout)) { + assert_that(is.number(timeout), timeout > 0) + setTimeLimit(cpu = timeout, elapsed = timeout) + } + do.call(INLA::inla, control) + }, silent = TRUE) + } + + # handle error in model fit + if (inherits(model, "try-error")) { + status(x) <- ifelse( + grepl("time limit", model), "time-out", "error" + ) + return(x) + } + # return fitted model when no imputation is required + if (x@ImputationSize == 0) { + return(n2k_spde(data = x, model_fit = model, status = "converged")) + } + + imputed <- try(impute( + model = model, n_imp = x@ImputationSize, minimum = x@Minimum, + seed = seed, num_threads = num_threads, extra = x@Extra, + parallel_configs = parallel_configs + )) + if (inherits(imputed, "try-error")) { + return(n2k_spde(data = x, model_fit = model, status = "error")) + } + # return fitted model with imputations + return(n2k_inla( + data = x, model_fit = model, status = "converged", raw_imputed = imputed + )) + } +) + +model2lincomb <- function(lincomb) { + if (is.null(lincomb)) { + return(NULL) + } + if (inherits(lincomb, "matrix")) { + lincomb |> + as.data.frame() |> + as.list() %>% + INLA::inla.make.lincombs() |> + setNames(rownames(lincomb)) -> lc + return(lc) + } + lc <- INLA::inla.make.lincombs(lincomb) + if (is.matrix(lincomb[[1]])) { + names(lc) <- rownames(lincomb[[1]]) + } else { + names(lc) <- names(lincomb[[1]]) + } + return(lc) +} diff --git a/man/fit_model.Rd b/man/fit_model.Rd index beb447d6..31782e39 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -3,7 +3,8 @@ % R/fit_model_n2k_aggregate.R, R/fit_model_n2k_composite.R, % R/fit_model_n2k_hurdle_imputed.R, R/fit_model_n2k_inla.R, % R/fit_model_n2k_inla_comparison.R, R/fit_model_n2k_manifest.R, -% R/fit_model_n2k_model_imputed.R, R/fit_model_s3_object.R +% R/fit_model_n2k_model_imputed.R, R/fit_model_n2k_spde.R, +% R/fit_model_s3_object.R \docType{methods} \name{fit_model} \alias{fit_model} @@ -15,6 +16,7 @@ \alias{fit_model,n2kInlaComparison-method} \alias{fit_model,n2kManifest-method} \alias{fit_model,n2kModelImputed-method} +\alias{fit_model,n2kSpde-method} \alias{fit_model,s3_object-method} \title{Fit an \code{n2kModel} object} \usage{ @@ -61,6 +63,16 @@ fit_model(x, ...) \S4method{fit_model}{n2kModelImputed}(x, ...) +\S4method{fit_model}{n2kSpde}( + x, + status = "new", + ..., + timeout = NULL, + seed = get_seed(x), + num_threads = NULL, + parallel_configs = TRUE +) + \S4method{fit_model}{s3_object}(x, status = c("new", "waiting"), ...) } \arguments{ From eea97118cc6a902eeaa02c773f064410f19fe14f Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 10 Sep 2024 20:29:03 +0200 Subject: [PATCH 07/50] bump package version --- .zenodo.json | 8 ++++---- CITATION.cff | 2 +- DESCRIPTION | 2 +- LICENSE.md | 2 +- NEWS.md | 4 ++++ inst/CITATION | 4 ++-- 6 files changed, 13 insertions(+), 9 deletions(-) diff --git a/.zenodo.json b/.zenodo.json index b1088cec..1d1a09aa 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,9 +1,9 @@ { "title": "n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring", - "version": "0.3.2", + "version": "0.4.0", "license": "GPL-3.0", "upload_type": "software", - "description": "

All generic functions and classes for the analysis for the ‘Natura\n2000’ monitoring. The classes contain all required data and definitions\nto fit the model without the need to access other sources. Potentially\nthey might need access to one or more parent objects. An aggregation\nobject might for example need the result of an imputation object. The\nactual definition of the analysis, using these generic function and\nclasses, is defined in dedictated analysis R packages for every\nmonitoring scheme. For example ‘abvanalysis’ and\n‘watervogelanalysis’.<\/p>", + "description": "

All generic functions and classes for the analysis for the ‘Natura 2000’ monitoring. The classes contain all required data and definitions to fit the model without the need to access other sources. Potentially they might need access to one or more parent objects. An aggregation object might for example need the result of an imputation object. The actual definition of the analysis, using these generic function and classes, is defined in dedictated analysis R packages for every monitoring scheme. For example ‘abvanalysis’ and ‘watervogelanalysis’.<\/p>", "keywords": [ "analysis, reproducible research, natura 2000, monitoring" ], @@ -14,11 +14,11 @@ "name": "Onkelinx, Thierry", "affiliation": "Research Institute for Nature and Forest (INBO)", "orcid": "0000-0001-8804-4216", - "type": "ContactPerson" + "type": "contactperson" }, { "name": "Research Institute for Nature and Forest (INBO)", - "type": "RightsHolder" + "type": "rightsholder" } ], "creators": [ diff --git a/CITATION.cff b/CITATION.cff index dcd4a619..175d3594 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -27,4 +27,4 @@ abstract: "All generic functions and classes for the analysis for the 'Natura 20 identifiers: - type: doi value: 10.5281/zenodo.3576047 -version: 0.3.2 +version: 0.4.0 diff --git a/DESCRIPTION b/DESCRIPTION index 4a57b3f6..8b18f2d5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: n2kanalysis Title: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring -Version: 0.3.2 +Version: 0.4.0 Authors@R: c( person("Thierry", "Onkelinx", , "thierry.onkelinx@inbo.be", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8804-4216", affiliation = "Research Institute for Nature and Forest (INBO)")), diff --git a/LICENSE.md b/LICENSE.md index 2fb2e74d..379c1b2d 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,4 +1,4 @@ -### GNU GENERAL PUBLIC LICENSE +# GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 diff --git a/NEWS.md b/NEWS.md index 028e2164..e139687a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# `n2kanalysis` 0.4.0 + +* Handle INLA models with an SPDE element. + # `n2kanalysis` 0.3.2 * Make `fit_model()` more efficient. diff --git a/inst/CITATION b/inst/CITATION index f0100d0f..a165738f 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -2,12 +2,12 @@ citHeader("To cite `n2kanalysis` in publications please use:") # begin checklist entry bibentry( bibtype = "Manual", - title = "n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring. Version 0.3.2", + title = "n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring. Version 0.4.0", author = c( author = c(person(given = "Thierry", family = "Onkelinx"))), year = 2024, url = "https://github.com/inbo/n2kanalysis/", abstract = "All generic functions and classes for the analysis for the 'Natura 2000' monitoring. The classes contain all required data and definitions to fit the model without the need to access other sources. Potentially they might need access to one or more parent objects. An aggregation object might for example need the result of an imputation object. The actual definition of the analysis, using these generic function and classes, is defined in dedictated analysis R packages for every monitoring scheme. For example 'abvanalysis' and 'watervogelanalysis'.", - textVersion = "Onkelinx, Thierry (2024) n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring. Version 0.3.2. https://github.com/inbo/n2kanalysis/", + textVersion = "Onkelinx, Thierry (2024) n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring. Version 0.4.0. https://github.com/inbo/n2kanalysis/", keywords = "analysis, reproducible research, natura 2000, monitoring", doi = "10.5281/zenodo.3576047", ) From e03ab117e0a01c4e906c253ed35ac070e675d8bd Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 11 Sep 2024 17:37:48 +0200 Subject: [PATCH 08/50] fix checklist issues --- R/fit_model_n2k_spde.R | 10 +++++----- R/make_a.R | 2 ++ R/n2k_spde.R | 1 + R/spde.R | 8 +++++--- R/spde2matern.R | 2 +- R/spde_class.R | 6 ++++-- man/Spde-class.Rd | 6 ++++-- man/make_a.Rd | 3 +++ man/n2k_spde.Rd | 2 ++ man/spde.Rd | 8 +++++--- man/spde2matern.Rd | 4 ++-- 11 files changed, 34 insertions(+), 18 deletions(-) diff --git a/R/fit_model_n2k_spde.R b/R/fit_model_n2k_spde.R index 0084cffb..3ab101dd 100644 --- a/R/fit_model_n2k_spde.R +++ b/R/fit_model_n2k_spde.R @@ -37,7 +37,7 @@ setMethod( response <- all.vars(fm)[attr(fm, "response")] stack_observed <- INLA::inla.stack( data = data[!is.na(data[[response]]), response, drop = FALSE], - A = list(1, make_a(object = x@Spde, data[!is.na(data[[response]]),])), + A = list(1, make_a(object = x@Spde, data[!is.na(data[[response]]), ])), tag = "observed", effects = list( data[ @@ -48,7 +48,7 @@ setMethod( ) INLA::inla.stack( data = data[is.na(data[[response]]), response, drop = FALSE], - A = list(1, make_a(object = x@Spde, data[is.na(data[[response]]),])), + A = list(1, make_a(object = x@Spde, data[is.na(data[[response]]), ])), tag = "observed", effects = list( data[is.na(data[[response]]), colnames(data) != response, drop = FALSE], @@ -66,7 +66,7 @@ setMethod( if (mean(is.na(data[[response]])) < 0.10) { # directly fit model when less than 10% missing data control$data <- INLA::inla.stack.data(stack_total, spde = spde) - control$control.predictor = list( + control$control.predictor <- list( A = INLA::inla.stack.A(stack_total), link = 1 ) control$lincomb <- lc @@ -80,7 +80,7 @@ setMethod( } else { # first fit model without missing data control$data <- INLA::inla.stack.data(stack_observed, spde = spde) - control$control.predictor = list(A = INLA::inla.stack.A(stack_observed)) + control$control.predictor <- list(A = INLA::inla.stack.A(stack_observed)) m0 <- try({ if (!is.null(timeout)) { assert_that(is.number(timeout), timeout > 0) @@ -96,7 +96,7 @@ setMethod( } # then refit with missing data control$data <- INLA::inla.stack.data(stack_total, spde = spde) - control$control.predictor = list( + control$control.predictor <- list( A = INLA::inla.stack.A(stack_total), link = 1 ) control$lincomb <- lc diff --git a/R/make_a.R b/R/make_a.R index b5a9af86..b0216e1c 100644 --- a/R/make_a.R +++ b/R/make_a.R @@ -1,5 +1,7 @@ #' Convert an `Spde` object to a A object #' @param object The `Spde` object +#' @param data The `data.frame` to combine with the `Spde` object to create the +#' `A` matrix. #' @name make_a #' @rdname make_a #' @exportMethod make_a diff --git a/R/n2k_spde.R b/R/n2k_spde.R index 71843903..17099ca8 100644 --- a/R/n2k_spde.R +++ b/R/n2k_spde.R @@ -17,6 +17,7 @@ setGeneric( ) #' @description A new `n2kSpde` model is created when `data` is a `data.frame`. +#' @param spde the `Spde` object. See [spde]. #' @rdname n2k_spde #' @aliases n2k_spde,n2kSpde-methods #' @importFrom methods setMethod new diff --git a/R/spde.R b/R/spde.R index ba14d864..05fc3dd5 100644 --- a/R/spde.R +++ b/R/spde.R @@ -1,9 +1,11 @@ #' Create an `Spde` object #' @param coordinates a `data.frame` of coordinates use to define the mesh. #' @param range a numeric vector of length 2. -#' Will be used as the `prior.range` argument of [INLA::inla.spde2.pcmatern]. -#' @param Sigma a numeric vector of length 2. -#' Will be used as the `prior.sigma` argument of [INLA::inla.spde2.pcmatern]. +#' Will be used as the `prior.range` argument of +#' [INLA::inla.spde2.pcmatern]. +#' @param sigma a numeric vector of length 2. +#' Will be used as the `prior.sigma` argument of +#' [INLA::inla.spde2.pcmatern]. #' @export #' @importFrom assertthat assert_that noNA #' @importFrom methods new diff --git a/R/spde2matern.R b/R/spde2matern.R index af8b3fc2..181e15ce 100644 --- a/R/spde2matern.R +++ b/R/spde2matern.R @@ -1,4 +1,4 @@ -#' Convert an `Spde` object to a matern object +#' Convert an `Spde` object to a Matern object #' @param object The `Spde` object #' @name spde2matern #' @rdname spde2matern diff --git a/R/spde_class.R b/R/spde_class.R index 3e2c1067..47fc2a1d 100644 --- a/R/spde_class.R +++ b/R/spde_class.R @@ -3,9 +3,11 @@ #' It holds the coordinates, range and sigma parameters for the SPDE model. #' @slot Coordinates a `data.frame` with the coordinates used for the mesh. #' @slot Range a numeric vector of length 2. -#' Will be used as the `prior.range` argument of [INLA::inla.spde2.pcmatern]. +#' Will be used as the `prior.range` argument of +#' [INLA::inla.spde2.pcmatern]. #' @slot Sigma a numeric vector of length 2. -#' Will be used as the `prior.sigma` argument of [INLA::inla.spde2.pcmatern]. +#' Will be used as the `prior.sigma` argument of +#' [INLA::inla.spde2.pcmatern]. #' @name Spde-class #' @rdname Spde-class #' @exportClass Spde diff --git a/man/Spde-class.Rd b/man/Spde-class.Rd index 88cafaa7..a8b314e3 100644 --- a/man/Spde-class.Rd +++ b/man/Spde-class.Rd @@ -13,9 +13,11 @@ It holds the coordinates, range and sigma parameters for the SPDE model. \item{\code{Coordinates}}{a \code{data.frame} with the coordinates used for the mesh.} \item{\code{Range}}{a numeric vector of length 2. -Will be used as the \code{prior.range} argument of \link[INLA:inla.spde2.pcmatern]{INLA::inla.spde2.pcmatern}.} +Will be used as the \code{prior.range} argument of +\link[INLA:inla.spde2.pcmatern]{INLA::inla.spde2.pcmatern}. \if{html}{\out{}}} \item{\code{Sigma}}{a numeric vector of length 2. -Will be used as the \code{prior.sigma} argument of \link[INLA:inla.spde2.pcmatern]{INLA::inla.spde2.pcmatern}.} +Will be used as the \code{prior.sigma} argument of +\link[INLA:inla.spde2.pcmatern]{INLA::inla.spde2.pcmatern}. \if{html}{\out{}}} }} diff --git a/man/make_a.Rd b/man/make_a.Rd index ab64631c..f761343b 100644 --- a/man/make_a.Rd +++ b/man/make_a.Rd @@ -12,6 +12,9 @@ make_a(object, data) } \arguments{ \item{object}{The \code{Spde} object} + +\item{data}{The \code{data.frame} to combine with the \code{Spde} object to create the +\code{A} matrix.} } \description{ Convert an \code{Spde} object to a A object diff --git a/man/n2k_spde.Rd b/man/n2k_spde.Rd index d177394a..abe9a5fe 100644 --- a/man/n2k_spde.Rd +++ b/man/n2k_spde.Rd @@ -67,6 +67,8 @@ Defaults to \code{"waiting"}.} \item{model_type}{The type of the models. Must start with \code{"inla comparison:"}.} +\item{spde}{the \code{Spde} object. See \link{spde}.} + \item{first_imported_year}{Oldest year considered in the data.} \item{last_imported_year}{Most recent year considered in the data.} diff --git a/man/spde.Rd b/man/spde.Rd index 04f525dd..ae19ff48 100644 --- a/man/spde.Rd +++ b/man/spde.Rd @@ -10,10 +10,12 @@ spde(coordinates, range, sigma) \item{coordinates}{a \code{data.frame} of coordinates use to define the mesh.} \item{range}{a numeric vector of length 2. -Will be used as the \code{prior.range} argument of \link[INLA:inla.spde2.pcmatern]{INLA::inla.spde2.pcmatern}.} +Will be used as the \code{prior.range} argument of +\link[INLA:inla.spde2.pcmatern]{INLA::inla.spde2.pcmatern}. \if{html}{\out{}}} -\item{Sigma}{a numeric vector of length 2. -Will be used as the \code{prior.sigma} argument of \link[INLA:inla.spde2.pcmatern]{INLA::inla.spde2.pcmatern}.} +\item{sigma}{a numeric vector of length 2. +Will be used as the \code{prior.sigma} argument of +\link[INLA:inla.spde2.pcmatern]{INLA::inla.spde2.pcmatern}. \if{html}{\out{}}} } \description{ Create an \code{Spde} object diff --git a/man/spde2matern.Rd b/man/spde2matern.Rd index c5abe3d3..9c1e588d 100644 --- a/man/spde2matern.Rd +++ b/man/spde2matern.Rd @@ -3,7 +3,7 @@ \docType{methods} \name{spde2matern} \alias{spde2matern} -\title{Convert an \code{Spde} object to a matern object} +\title{Convert an \code{Spde} object to a Matern object} \usage{ spde2matern(object) } @@ -11,5 +11,5 @@ spde2matern(object) \item{object}{The \code{Spde} object} } \description{ -Convert an \code{Spde} object to a matern object +Convert an \code{Spde} object to a Matern object } From 0c5e4557254522f9bef54be01212615830f3531c Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 11 Sep 2024 17:39:12 +0200 Subject: [PATCH 09/50] update checklist infrastructure --- .Rbuildignore | 1 + .github/workflows/check_on_branch.yml | 6 +-- .github/workflows/check_on_different_r_os.yml | 1 + .github/workflows/check_on_main.yml | 6 +-- .github/workflows/release.yml | 27 ++++-------- .gitignore | 2 +- _pkgdown.yml | 44 ++++++++++--------- pkgdown/extra.css | 17 ++++--- 8 files changed, 49 insertions(+), 55 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 78ea75d2..38dc18f6 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -14,5 +14,6 @@ ^data-raw$ ^docs$ ^man-roxygen$ +^organisation.yml$ ^pkgdown$ ^wercker\.yml$ diff --git a/.github/workflows/check_on_branch.yml b/.github/workflows/check_on_branch.yml index 80bf4d8f..a013b023 100644 --- a/.github/workflows/check_on_branch.yml +++ b/.github/workflows/check_on_branch.yml @@ -14,11 +14,7 @@ jobs: env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - N2KBUCKET: ${{ secrets.N2KBUCKET }} permissions: contents: read steps: - - uses: inbo/actions/check_pkg@checklist-0.3.6 + - uses: inbo/actions/check_pkg@checklist-0.4.1 diff --git a/.github/workflows/check_on_different_r_os.yml b/.github/workflows/check_on_different_r_os.yml index 2ac75562..76f95eb2 100644 --- a/.github/workflows/check_on_different_r_os.yml +++ b/.github/workflows/check_on_different_r_os.yml @@ -21,6 +21,7 @@ jobs: matrix: config: - {os: macOS-latest, r: 'release'} + - {os: windows-latest, r: 'release'} - {os: ubuntu-22.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"} - {os: ubuntu-22.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"} diff --git a/.github/workflows/check_on_main.yml b/.github/workflows/check_on_main.yml index 57e2a642..65ca9607 100644 --- a/.github/workflows/check_on_main.yml +++ b/.github/workflows/check_on_main.yml @@ -15,9 +15,5 @@ jobs: env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - N2KBUCKET: ${{ secrets.N2KBUCKET }} steps: - - uses: inbo/actions/check_pkg@checklist-0.3.6 + - uses: inbo/actions/check_pkg@checklist-0.4.1 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8146b6a0..9b10b9f6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -10,32 +10,21 @@ on: - completed jobs: - prepare: + publish: runs-on: ubuntu-latest - outputs: - tag: ${{ steps.gettag.outputs.tag }} - body: ${{ steps.gettag.outputs.body }} + permissions: + contents: write steps: - uses: actions/checkout@v3 - name: Get tag run: | git fetch --tags --force TAG=$(git tag --contains $(git rev-parse HEAD)) - echo "tag=$TAG" >> "$GITHUB_OUTPUT" TAG_BODY=$(git tag --contains $(git rev-parse HEAD) --format='%(contents)') - TAG_BODY="${TAG_BODY//'%'/'%25'}" - TAG_BODY="${TAG_BODY//$'\n'/'%0A'}" - TAG_BODY="${TAG_BODY//$'\r'/'%0D'}" - echo "body=$TAG_BODY" >> "$GITHUB_OUTPUT" - id: gettag - publish: - runs-on: ubuntu-latest - permissions: - contents: write - needs: prepare - steps: + echo "tag=$TAG" >> $GITHUB_ENV + echo "$TAG_BODY" > body.md - uses: ncipollo/release-action@v1 with: - name: Release ${{needs.prepare.outputs.tag}} - tag: ${{needs.prepare.outputs.tag}} - body: ${{needs.prepare.outputs.body}} + name: Release ${{ env.tag }} + tag: ${{ env.tag }} + bodyFile: body.md diff --git a/.gitignore b/.gitignore index 84dba30a..8de6414e 100644 --- a/.gitignore +++ b/.gitignore @@ -17,7 +17,7 @@ .Ruserdata .httr-oauth docs +inst/doc libs output renv/library -inst/doc diff --git a/_pkgdown.yml b/_pkgdown.yml index b98f97fe..e7005f15 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,27 +1,31 @@ +template: + bootstrap: 5 + light-switch: false navbar: title: ~ type: default - left: - - text: NEWS - href: news/index.html - - text: Tutorials - href: articles/index.html - menu: - - text: Using n2kanalysis to analyse monitoring data - href: articles/workflow.html - - text: Functions - href: reference/index.html - - text: Contributing - href: CONTRIBUTING.html - right: - - icon: "fa fa-github" - href: https://github.com/inbo/n2kanalysis - - icon: "fa fa-twitter" - href: https://twitter.com/INBOVlaanderen - - icon: "fa fa-facebook" - href: https://www.facebook.com/pg/INBOVlaanderen + structure: + left: [intro, reference, news, tutorials, contributing] + right: [search, github] + components: + news: + text: Changelog + href: news/index.html + contributing: + text: Contributing + menu: + - text: Contributing + href: CONTRIBUTING.html + - text: Code of conduct + href: CODE_OF_CONDUCT.html + tutorials: + text: Tutorials + href: articles/index.html + menu: + - text: Using n2kanalysis to analyse monitoring data + href: articles/workflow.html authors: Research Institute for Nature and Forest (INBO): href: "https://www.vlaanderen.be/inbo/en-gb" - html: "" + html: "logo of the Research Institute for Nature and Forest (INBO)" diff --git a/pkgdown/extra.css b/pkgdown/extra.css index 00938dd1..dfdfc0b2 100644 --- a/pkgdown/extra.css +++ b/pkgdown/extra.css @@ -18,13 +18,20 @@ a:hover { .navbar, .label-default, -.navbar-default .navbar-nav>.active>a, .navbar-default .navbar-nav>.active>a:hover, .navbar-default .navbar-nav>.active>a:focus { - background-color: #356196; +.navbar-default .navbar-nav>.active>a, .navbar-default .navbar-nav>.active>a:hover, .navbar-default .navbar-nav>.active>a:focus, +#toc>.nav a.nav-link.active { + background-color: #356196 !important; } -.navbar-default .navbar-link, -.navbar-default .navbar-nav>li>a { - color: #ffffff; +#toc>.nav a.nav-link { + background-color: #729BB7 !important; +} + +.navbar, +.navbar-brand, +.nav-link, +.nav-text.text-muted { + color: #ffffff !important; } .nav-pills li.active>a, .nav-pills li>a:hover { From 163023605383b10a464656dd7d8ab4a3143b3e93 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 11 Sep 2024 18:00:56 +0200 Subject: [PATCH 10/50] define missing suggested package --- .github/workflows/check_on_different_r_os.yml | 2 +- DESCRIPTION | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/check_on_different_r_os.yml b/.github/workflows/check_on_different_r_os.yml index 76f95eb2..bb243621 100644 --- a/.github/workflows/check_on_different_r_os.yml +++ b/.github/workflows/check_on_different_r_os.yml @@ -41,7 +41,7 @@ jobs: - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} - extra-repositories: https://inla.r-inla-download.org/R/testing + extra-repositories: https://inla.r-inla-download.org/R/stable - uses: r-lib/actions/setup-pandoc@v2 diff --git a/DESCRIPTION b/DESCRIPTION index 8b18f2d5..05c81d39 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,11 +37,13 @@ Imports: tidyr (>= 0.4.0), yaml Suggests: + fmesher, INLA (>= 23.04.24), knitr, Matrix, parallel, rmarkdown, + sf, sn, testthat (>= 2.0.1) VignetteBuilder: From 85b8a5875f370985e2f57c2208c78c4f36b6b50b Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Thu, 12 Sep 2024 10:50:25 +0200 Subject: [PATCH 11/50] require latest version of multimput --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 05c81d39..fe85bd40 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,7 +28,7 @@ Imports: fs, MASS, methods, - multimput (>= 0.2.13), + multimput (>= 0.2.14), n2khelper (>= 0.5.0), purrr, rlang, From 365dce981a4e3b32e537496d9e86758d0e65de29 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Thu, 12 Sep 2024 11:20:41 +0200 Subject: [PATCH 12/50] fix documentation issues --- R/n2k_spde.R | 2 +- R/spde.R | 6 ++---- R/spde2matern.R | 2 +- R/spde_class.R | 6 ++---- man/Spde-class.Rd | 6 ++---- man/n2k_spde.Rd | 2 +- man/spde.Rd | 6 ++---- man/spde2matern.Rd | 4 ++-- 8 files changed, 13 insertions(+), 21 deletions(-) diff --git a/R/n2k_spde.R b/R/n2k_spde.R index 17099ca8..cc7b9609 100644 --- a/R/n2k_spde.R +++ b/R/n2k_spde.R @@ -17,7 +17,7 @@ setGeneric( ) #' @description A new `n2kSpde` model is created when `data` is a `data.frame`. -#' @param spde the `Spde` object. See [spde]. +#' @param spde the `Spde` object. See `[spde]`. #' @rdname n2k_spde #' @aliases n2k_spde,n2kSpde-methods #' @importFrom methods setMethod new diff --git a/R/spde.R b/R/spde.R index 05fc3dd5..ce8738b6 100644 --- a/R/spde.R +++ b/R/spde.R @@ -1,11 +1,9 @@ #' Create an `Spde` object #' @param coordinates a `data.frame` of coordinates use to define the mesh. #' @param range a numeric vector of length 2. -#' Will be used as the `prior.range` argument of -#' [INLA::inla.spde2.pcmatern]. +#' Will be used as the `prior.range` argument of `[INLA::inla.spde2.pcmatern]`. #' @param sigma a numeric vector of length 2. -#' Will be used as the `prior.sigma` argument of -#' [INLA::inla.spde2.pcmatern]. +#' Will be used as the `prior.sigma` argument of `[INLA::inla.spde2.pcmatern]`. #' @export #' @importFrom assertthat assert_that noNA #' @importFrom methods new diff --git a/R/spde2matern.R b/R/spde2matern.R index 181e15ce..7946eb58 100644 --- a/R/spde2matern.R +++ b/R/spde2matern.R @@ -1,4 +1,4 @@ -#' Convert an `Spde` object to a Matern object +#' Convert an `Spde` object to a `matern` object #' @param object The `Spde` object #' @name spde2matern #' @rdname spde2matern diff --git a/R/spde_class.R b/R/spde_class.R index 47fc2a1d..68fe52a7 100644 --- a/R/spde_class.R +++ b/R/spde_class.R @@ -3,11 +3,9 @@ #' It holds the coordinates, range and sigma parameters for the SPDE model. #' @slot Coordinates a `data.frame` with the coordinates used for the mesh. #' @slot Range a numeric vector of length 2. -#' Will be used as the `prior.range` argument of -#' [INLA::inla.spde2.pcmatern]. +#' Will be used as the `prior.range` argument of `[INLA::inla.spde2.pcmatern]`. #' @slot Sigma a numeric vector of length 2. -#' Will be used as the `prior.sigma` argument of -#' [INLA::inla.spde2.pcmatern]. +#' Will be used as the `prior.sigma` argument of `[INLA::inla.spde2.pcmatern]`. #' @name Spde-class #' @rdname Spde-class #' @exportClass Spde diff --git a/man/Spde-class.Rd b/man/Spde-class.Rd index a8b314e3..3a34d1df 100644 --- a/man/Spde-class.Rd +++ b/man/Spde-class.Rd @@ -13,11 +13,9 @@ It holds the coordinates, range and sigma parameters for the SPDE model. \item{\code{Coordinates}}{a \code{data.frame} with the coordinates used for the mesh.} \item{\code{Range}}{a numeric vector of length 2. -Will be used as the \code{prior.range} argument of -\link[INLA:inla.spde2.pcmatern]{INLA::inla.spde2.pcmatern}. \if{html}{\out{}}} +Will be used as the \code{prior.range} argument of \verb{[INLA::inla.spde2.pcmatern]}.} \item{\code{Sigma}}{a numeric vector of length 2. -Will be used as the \code{prior.sigma} argument of -\link[INLA:inla.spde2.pcmatern]{INLA::inla.spde2.pcmatern}. \if{html}{\out{}}} +Will be used as the \code{prior.sigma} argument of \verb{[INLA::inla.spde2.pcmatern]}.} }} diff --git a/man/n2k_spde.Rd b/man/n2k_spde.Rd index abe9a5fe..d9b7352b 100644 --- a/man/n2k_spde.Rd +++ b/man/n2k_spde.Rd @@ -67,7 +67,7 @@ Defaults to \code{"waiting"}.} \item{model_type}{The type of the models. Must start with \code{"inla comparison:"}.} -\item{spde}{the \code{Spde} object. See \link{spde}.} +\item{spde}{the \code{Spde} object. See \verb{[spde]}.} \item{first_imported_year}{Oldest year considered in the data.} diff --git a/man/spde.Rd b/man/spde.Rd index ae19ff48..07aeecf7 100644 --- a/man/spde.Rd +++ b/man/spde.Rd @@ -10,12 +10,10 @@ spde(coordinates, range, sigma) \item{coordinates}{a \code{data.frame} of coordinates use to define the mesh.} \item{range}{a numeric vector of length 2. -Will be used as the \code{prior.range} argument of -\link[INLA:inla.spde2.pcmatern]{INLA::inla.spde2.pcmatern}. \if{html}{\out{}}} +Will be used as the \code{prior.range} argument of \verb{[INLA::inla.spde2.pcmatern]}.} \item{sigma}{a numeric vector of length 2. -Will be used as the \code{prior.sigma} argument of -\link[INLA:inla.spde2.pcmatern]{INLA::inla.spde2.pcmatern}. \if{html}{\out{}}} +Will be used as the \code{prior.sigma} argument of \verb{[INLA::inla.spde2.pcmatern]}.} } \description{ Create an \code{Spde} object diff --git a/man/spde2matern.Rd b/man/spde2matern.Rd index 9c1e588d..75450131 100644 --- a/man/spde2matern.Rd +++ b/man/spde2matern.Rd @@ -3,7 +3,7 @@ \docType{methods} \name{spde2matern} \alias{spde2matern} -\title{Convert an \code{Spde} object to a Matern object} +\title{Convert an \code{Spde} object to a \code{matern} object} \usage{ spde2matern(object) } @@ -11,5 +11,5 @@ spde2matern(object) \item{object}{The \code{Spde} object} } \description{ -Convert an \code{Spde} object to a Matern object +Convert an \code{Spde} object to a \code{matern} object } From 9d52dcfc339929cc7a237e9adde68c56d0513ff7 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Thu, 12 Sep 2024 11:39:47 +0200 Subject: [PATCH 13/50] add url to pkgdown --- _pkgdown.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index e7005f15..844160cf 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,3 +1,4 @@ +url: https://inbo.github.io/n2kanalysis/ template: bootstrap: 5 light-switch: false From 93af224231ad97ce6d7b48f2931f2f74c24f8925 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Thu, 19 Sep 2024 15:38:03 +0200 Subject: [PATCH 14/50] bugfix in fit_model() on n2kSpde --- R/fit_model_n2k_spde.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/R/fit_model_n2k_spde.R b/R/fit_model_n2k_spde.R index 3ab101dd..92aa7477 100644 --- a/R/fit_model_n2k_spde.R +++ b/R/fit_model_n2k_spde.R @@ -47,15 +47,14 @@ setMethod( ) ) INLA::inla.stack( - data = data[is.na(data[[response]]), response, drop = FALSE], - A = list(1, make_a(object = x@Spde, data[is.na(data[[response]]), ])), + data = data[, response, drop = FALSE], + A = list(1, make_a(object = x@Spde, data)), tag = "observed", effects = list( - data[is.na(data[[response]]), colnames(data) != response, drop = FALSE], + data[, colnames(data) != response, drop = FALSE], index ) - ) |> - INLA::inla.stack(stack_observed) -> stack_total + ) -> stack_total # prepare linear combinations lc <- model2lincomb(x@LinearCombination) # prepare inla() arguments From d24c21cc4b97a84f12a65416b3148356a23425dc Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Thu, 19 Sep 2024 15:38:16 +0200 Subject: [PATCH 15/50] add moving_trend() --- DESCRIPTION | 1 + NAMESPACE | 1 + R/moving_trend.R | 38 ++++++++++++++++++++++++++++++++++++++ man/moving_trend.Rd | 31 +++++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+) create mode 100644 R/moving_trend.R create mode 100644 man/moving_trend.Rd diff --git a/DESCRIPTION b/DESCRIPTION index fe85bd40..c9be53d3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -128,6 +128,7 @@ Collate: 'make_a.R' 'manifest_yaml_to_bash.R' 'mark_obsolete_dataset.R' + 'moving_trend.R' 'n2k_aggregated.R' 'n2k_composite.R' 'n2k_hurdle_imputed.R' diff --git a/NAMESPACE b/NAMESPACE index 3340f8bd..29df6593 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ export(fit_every_model) export(get_parents) export(inla_inverse) export(mark_obsolete_dataset) +export(moving_trend) export(select_factor_count_strictly_positive) export(select_factor_threshold) export(select_observed_range) diff --git a/R/moving_trend.R b/R/moving_trend.R new file mode 100644 index 00000000..2afee4cc --- /dev/null +++ b/R/moving_trend.R @@ -0,0 +1,38 @@ +#' Calculate moving trend coefficients +#' @param n_year Number of available years in the data. +#' @param duration Number of years in the moving window. +#' If the number of available years is less than this value, the trend will be +#' calculated for the available years. +#' @param first_year First year of the data. +#' Only used to name the rows. +#' @return A matrix with the moving trend coefficients. +#' One row for each window and and one column for each year in the data. +#' The format of the row names is `trend_{window mid point}_{window length}`. +#' `trend_2001.5_4` is the trend for the years 2000 to 2003. +#' @export +#' @examples +#' moving_trend(5, 3) +#' moving_trend(5, 4, 2000) +#' @export +#' @importFrom assertthat assert_that is.count is.number +moving_trend <- function(n_year, duration, first_year = 0) { + assert_that(is.count(n_year), is.count(duration), is.number(first_year)) + duration <- min(n_year, duration) + trend_coef <- seq_len(duration) - (duration + 1) / 2 + vapply( + seq_len(n_year - duration + 1), + function(i, trend_coef) { + c(rep(0, i - 1), trend_coef, rep(0, n_year - duration - i + 1)) + }, + numeric(n_year), trend_coef = trend_coef / sum(trend_coef ^ 2) + ) |> + `colnames<-`( + sprintf( + "trend_%.1f_%i", + seq_len(n_year - duration + 1) + median(seq_len(duration)) - 2 + + first_year, + duration + ) + ) |> + t() +} diff --git a/man/moving_trend.Rd b/man/moving_trend.Rd new file mode 100644 index 00000000..e3d555c6 --- /dev/null +++ b/man/moving_trend.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/moving_trend.R +\name{moving_trend} +\alias{moving_trend} +\title{Calculate moving trend coefficients} +\usage{ +moving_trend(n_year, duration, first_year = 0) +} +\arguments{ +\item{n_year}{Number of available years in the data.} + +\item{duration}{Number of years in the moving window. +If the number of available years is less than this value, the trend will be +calculated for the available years.} + +\item{first_year}{First year of the data. +Only used to name the rows.} +} +\value{ +A matrix with the moving trend coefficients. +One row for each window and and one column for each year in the data. +The format of the row names is \verb{trend_\{window mid point\}_\{window length\}}. +\code{trend_2001.5_4} is the trend for the years 2000 to 2003. +} +\description{ +Calculate moving trend coefficients +} +\examples{ +moving_trend(5, 3) +moving_trend(5, 4, 2000) +} From b48582b125c4696463f22690516ceb0f6f40a58b Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Thu, 19 Sep 2024 17:31:33 +0200 Subject: [PATCH 16/50] add moving_trend() --- DESCRIPTION | 1 + NAMESPACE | 1 + R/moving_average.R | 31 +++++++++++++++++++++++++++++++ man/moving_average.Rd | 31 +++++++++++++++++++++++++++++++ 4 files changed, 64 insertions(+) create mode 100644 R/moving_average.R create mode 100644 man/moving_average.Rd diff --git a/DESCRIPTION b/DESCRIPTION index c9be53d3..e2fd04df 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -128,6 +128,7 @@ Collate: 'make_a.R' 'manifest_yaml_to_bash.R' 'mark_obsolete_dataset.R' + 'moving_average.R' 'moving_trend.R' 'n2k_aggregated.R' 'n2k_composite.R' diff --git a/NAMESPACE b/NAMESPACE index 29df6593..9974a6ec 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ export(fit_every_model) export(get_parents) export(inla_inverse) export(mark_obsolete_dataset) +export(moving_average) export(moving_trend) export(select_factor_count_strictly_positive) export(select_factor_threshold) diff --git a/R/moving_average.R b/R/moving_average.R new file mode 100644 index 00000000..90f2de70 --- /dev/null +++ b/R/moving_average.R @@ -0,0 +1,31 @@ +#' Calculate moving trend coefficients +#' @inheritParams moving_trend +#' @return A matrix with moving average coefficients +#' One row for each window and and one column for each year in the data. +#' The format of the row names is `average_{window mid point}_{window length}`. +#' `average_2001.5_4` is the average for the years 2000 to 2003. +#' @export +#' @examples +#' moving_average(5, 3) +#' moving_average(5, 3, 2000) +#' @importFrom assertthat assert_that is.count is.number +moving_average <- function(n_year, duration, first_year = 0) { + assert_that(is.count(n_year), is.count(duration), is.number(first_year)) + duration <- min(n_year, duration) + vapply( + seq_len(n_year - duration + 1) - 1, + FUN.VALUE = vector(mode = "numeric", length = n_year), + FUN = function(i, trend_coef, n_year) { + c(rep(0, i), trend_coef, rep(0, n_year - length(trend_coef) - i)) + }, trend_coef = rep(1 / duration, duration), n_year = n_year + ) |> + `colnames<-`( + sprintf( + "average_%.1f_%i", + seq_len(n_year - duration + 1) + median(seq_len(duration)) - 2 + + first_year, + duration + ) + ) |> + t() +} diff --git a/man/moving_average.Rd b/man/moving_average.Rd new file mode 100644 index 00000000..520bfa32 --- /dev/null +++ b/man/moving_average.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/moving_average.R +\name{moving_average} +\alias{moving_average} +\title{Calculate moving trend coefficients} +\usage{ +moving_average(n_year, duration, first_year = 0) +} +\arguments{ +\item{n_year}{Number of available years in the data.} + +\item{duration}{Number of years in the moving window. +If the number of available years is less than this value, the trend will be +calculated for the available years.} + +\item{first_year}{First year of the data. +Only used to name the rows.} +} +\value{ +A matrix with moving average coefficients +One row for each window and and one column for each year in the data. +The format of the row names is \verb{average_\{window mid point\}_\{window length\}}. +\code{average_2001.5_4} is the average for the years 2000 to 2003. +} +\description{ +Calculate moving trend coefficients +} +\examples{ +moving_average(5, 3) +moving_average(5, 3, 2000) +} From 79854e9ef02898e6dbafdb9582d1b40c17573bcd Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Thu, 19 Sep 2024 17:31:54 +0200 Subject: [PATCH 17/50] add moving_difference() --- DESCRIPTION | 1 + NAMESPACE | 1 + R/moving_difference.R | 44 ++++++++++++++++++++++++++++++++++++++++ man/moving_difference.Rd | 33 ++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+) create mode 100644 R/moving_difference.R create mode 100644 man/moving_difference.Rd diff --git a/DESCRIPTION b/DESCRIPTION index e2fd04df..5c5909c0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -129,6 +129,7 @@ Collate: 'manifest_yaml_to_bash.R' 'mark_obsolete_dataset.R' 'moving_average.R' + 'moving_difference.R' 'moving_trend.R' 'n2k_aggregated.R' 'n2k_composite.R' diff --git a/NAMESPACE b/NAMESPACE index 9974a6ec..e79f09f4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ export(get_parents) export(inla_inverse) export(mark_obsolete_dataset) export(moving_average) +export(moving_difference) export(moving_trend) export(select_factor_count_strictly_positive) export(select_factor_threshold) diff --git a/R/moving_difference.R b/R/moving_difference.R new file mode 100644 index 00000000..adebb8ad --- /dev/null +++ b/R/moving_difference.R @@ -0,0 +1,44 @@ +#' Calculate coefficients for a moving difference +#' @inheritParams moving_trend +#' @return A matrix with moving average coefficients +#' One row for each window and and one column for each year in the data. +#' The format of the row names is +#' `difference_{window mid point start}_{window mid point end}_{window length}`. +#' `difference_2001.5_2010.5_4` is the difference of the average for period for +#' the years 2009 to 2012 compared to the period from 2000 to 2003. +#' @export +#' @examples +#' moving_difference(6, 2) +#' moving_difference(6, 2, 2000) +#' @importFrom assertthat assert_that is.count is.number +moving_difference <- function(n_year, duration, first_year) { + assert_that(is.count(n_year), is.count(duration), is.number(first_year)) + duration <- min(floor(n_year / 2), duration) + list(seq_len(n_year - 2 * duration + 1) - 1) |> + rep(2) |> + expand.grid() -> extra_zero + extra_zero <- extra_zero[ + rowSums(extra_zero) <= n_year - 2 * duration, + ] + vapply( + seq_len(nrow(extra_zero)), + FUN.VALUE = vector(mode = "numeric", length = n_year), + FUN = function(i, trend_coef, n_year, extra_zero) { + c( + rep(0, extra_zero[i, 1]), -trend_coef, + rep(0, n_year - 2 * length(trend_coef) - sum(extra_zero[i, ])), + trend_coef, rep(0, extra_zero[i, 2]) + ) + }, trend_coef = rep(1 / duration, duration), n_year = n_year, + extra_zero = extra_zero + ) |> + `colnames<-`( + sprintf( + "difference_%.1f_%.1f_%i", + extra_zero[, 1] + median(seq_len(duration)) - 1 + first_year, + n_year - extra_zero[, 2] - median(seq_len(duration)) + first_year, + duration + ) + ) |> + t() +} diff --git a/man/moving_difference.Rd b/man/moving_difference.Rd new file mode 100644 index 00000000..0afe862b --- /dev/null +++ b/man/moving_difference.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/moving_difference.R +\name{moving_difference} +\alias{moving_difference} +\title{Calculate coefficients for a moving difference} +\usage{ +moving_difference(n_year, duration, first_year) +} +\arguments{ +\item{n_year}{Number of available years in the data.} + +\item{duration}{Number of years in the moving window. +If the number of available years is less than this value, the trend will be +calculated for the available years.} + +\item{first_year}{First year of the data. +Only used to name the rows.} +} +\value{ +A matrix with moving average coefficients +One row for each window and and one column for each year in the data. +The format of the row names is +\verb{difference_\{window mid point start\}_\{window mid point end\}_\{window length\}}. +\code{difference_2001.5_2010.5_4} is the difference of the average for period for +the years 2009 to 2012 compared to the period from 2000 to 2003. +} +\description{ +Calculate coefficients for a moving difference +} +\examples{ +moving_difference(6, 2) +moving_difference(6, 2, 2000) +} From c8de1e6003ac03e021b37a8ea5483cb150462f30 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Sun, 22 Sep 2024 14:24:02 +0200 Subject: [PATCH 18/50] fit fit_model() on n2kManifest --- R/fit_model_n2k_manifest.R | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/R/fit_model_n2k_manifest.R b/R/fit_model_n2k_manifest.R index 3b044477..2b16892b 100644 --- a/R/fit_model_n2k_manifest.R +++ b/R/fit_model_n2k_manifest.R @@ -25,25 +25,17 @@ setMethod( length(status) >= 1 ) to_do <- order_manifest(x) - remaining <- length(to_do) - while (length(to_do) > 1 && first) { - head(to_do, 1) |> - hash_status(base = base, project = project) -> stat - if (stat %in% status) { - to_do <- head(to_do, 1) - } else { - to_do <- tail(to_do, -1) - } - } + stat <- map_chr(to_do, ~hash_status(base = base, project = project, .x)) + to_do <- to_do[stat %in% status] if (length(to_do) == 0) { - return(invisible(0)) + return(invisible(NULL)) } if (inherits(base, "character")) { walk( to_do, fit_model, base = base, project = project, status = status, verbose = verbose, ... ) - return(invisible(remaining)) + return(invisible(NULL)) } display(verbose, "Downloading objects") @@ -70,13 +62,14 @@ setMethod( to_do, download_model, base = local, project = project, local = base, verbose = verbose ) - return(invisible(remaining)) + return(invisible(NULL)) } ) #' @importFrom aws.s3 get_bucket #' @importFrom purrr map_chr hash_status <- function(hash, base, project) { + assert_that(is.string(hash), is.string(project)) if (inherits(base, "s3_bucket")) { substr(hash, 1, 4) |> sprintf(fmt = "%2$s/%1$s/", project) |> @@ -87,7 +80,11 @@ hash_status <- function(hash, base, project) { unname() -> output return(output) } - stop("hash status for ", class(base), " still do to") + stopifnot(inherits(base, "character")) + assert_that(is.string(base)) + file.path(base, project) |> + list.files(recursive = TRUE, pattern = hash) |> + gsub(x = _, pattern = ".*/(.*)/.*\\.rds", replacement = "\\1") } download_model <- function(hash, base, local, project, verbose = FALSE) { From 1f27287338fc4fd8401f0f8e5c396f3e64b8cdf1 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Thu, 3 Oct 2024 15:14:47 +0200 Subject: [PATCH 19/50] add pkgdown url to DESCRIPTION --- CITATION.cff | 2 ++ DESCRIPTION | 3 ++- inst/CITATION | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 175d3594..6f445561 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -27,4 +27,6 @@ abstract: "All generic functions and classes for the analysis for the 'Natura 20 identifiers: - type: doi value: 10.5281/zenodo.3576047 +- type: url + value: https://inbo.github.io/n2kanalysis/ version: 0.4.0 diff --git a/DESCRIPTION b/DESCRIPTION index 5c5909c0..bca0b2f4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,8 @@ Description: All generic functions and classes for the analysis for the every monitoring scheme. For example 'abvanalysis' and 'watervogelanalysis'. License: GPL-3 -URL: https://doi.org/10.5281/zenodo.3576047 +URL: https://doi.org/10.5281/zenodo.3576047, + https://inbo.github.io/n2kanalysis/ BugReports: https://github.com/inbo/n2kanalysis/issues Depends: R (>= 4.2.0) diff --git a/inst/CITATION b/inst/CITATION index a165738f..535e547b 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -5,9 +5,9 @@ bibentry( title = "n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring. Version 0.4.0", author = c( author = c(person(given = "Thierry", family = "Onkelinx"))), year = 2024, - url = "https://github.com/inbo/n2kanalysis/", + url = "https://inbo.github.io/n2kanalysis/", abstract = "All generic functions and classes for the analysis for the 'Natura 2000' monitoring. The classes contain all required data and definitions to fit the model without the need to access other sources. Potentially they might need access to one or more parent objects. An aggregation object might for example need the result of an imputation object. The actual definition of the analysis, using these generic function and classes, is defined in dedictated analysis R packages for every monitoring scheme. For example 'abvanalysis' and 'watervogelanalysis'.", - textVersion = "Onkelinx, Thierry (2024) n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring. Version 0.4.0. https://github.com/inbo/n2kanalysis/", + textVersion = "Onkelinx, Thierry (2024) n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring. Version 0.4.0. https://inbo.github.io/n2kanalysis/", keywords = "analysis, reproducible research, natura 2000, monitoring", doi = "10.5281/zenodo.3576047", ) From cf100192ae71e3dc0ff2003421a16b523c600be3 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Mon, 7 Oct 2024 16:49:59 +0200 Subject: [PATCH 20/50] fit_model() on hurdleImput stores intermediate models --- R/fit_model_n2k_hurdle_imputed.R | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/R/fit_model_n2k_hurdle_imputed.R b/R/fit_model_n2k_hurdle_imputed.R index 8635c822..272040b4 100644 --- a/R/fit_model_n2k_hurdle_imputed.R +++ b/R/fit_model_n2k_hurdle_imputed.R @@ -18,7 +18,7 @@ setMethod( return(x) } - if (status(x) != "new") { + if (is.null(x@Presence) || status(x) %in% c("converged", "error")) { presence <- read_model( x@AnalysisRelation$parent_analysis[1], base = base, project = project ) @@ -29,7 +29,26 @@ setMethod( x@AnalysisRelation$parentstatus[1] <- status(presence) rm(presence) gc() - + x@AnalysisMetadata$status <- ifelse( + all(x@AnalysisRelation$parentstatus == "converged"), "new", + ifelse( + any(!x@AnalysisRelation$parentstatus %in% + c("new", "waiting", "converged")), + "error", "waiting" + ) + ) + x@AnalysisMetadata$status_fingerprint <- sha1( + list( + get_file_fingerprint(x), x@AnalysisMetadata$status, + x@AnalysisVersion$fingerprint, x@AnalysisVersion, x@RPackage, + x@AnalysisVersionRPackage, x@AnalysisRelation, x@Presence, x@Count, + x@Hurdle + ), + digits = 6L + ) + store_model(x, base = base, project = project) + } + if (is.null(x@Count) || status(x) %in% c("converged", "error")) { count <- read_model( x@AnalysisRelation$parent_analysis[2], base = base, project = project ) @@ -58,10 +77,7 @@ setMethod( ), digits = 6L ) - } - - if (status(x) != "new") { - return(x) + store_model(x, base = base, project = project) } result <- try(hurdle_impute(x@Presence, x@Count)) From 20afc6705b05dd18b068f0b9a46367499e5e5054 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 8 Oct 2024 09:43:05 +0200 Subject: [PATCH 21/50] manifest_yaml_to_bash() creates the project and yaml subfolders when missing --- R/manifest_yaml_to_bash.R | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/R/manifest_yaml_to_bash.R b/R/manifest_yaml_to_bash.R index c08298d1..c7ed3bcf 100644 --- a/R/manifest_yaml_to_bash.R +++ b/R/manifest_yaml_to_bash.R @@ -143,13 +143,11 @@ setMethod( ) assert_that(split == 1, msg = "`split > 1` to do on local file systems.") assert_that( - file_test("-d", path(base, project)), - msg = sprintf("`%s` is not a subdirectory of `%s`", project, base) - ) - assert_that( - file_test("-d", path(base, project, "yaml")), - msg = sprintf("`yaml` is not a subdirectory of `%s/%s`", base, project) + file_test("-d", base), + msg = sprintf("`%s` is not an existing folder", base) ) + path(base, project, "yaml") |> + dir.create(showWarnings = FALSE) if (missing(hash)) { path(base, project, "yaml") |> dir_info(type = "file", regexp = "\\.yaml$") |> From a48dd4ae7f39b1707ded5799e36fe01e3be5b167 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 8 Oct 2024 09:51:50 +0200 Subject: [PATCH 22/50] fit_model() on n2kSpde with imputations returns an n2kSpde model --- R/fit_model_n2k_spde.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/fit_model_n2k_spde.R b/R/fit_model_n2k_spde.R index 92aa7477..2c0816aa 100644 --- a/R/fit_model_n2k_spde.R +++ b/R/fit_model_n2k_spde.R @@ -130,7 +130,7 @@ setMethod( return(n2k_spde(data = x, model_fit = model, status = "error")) } # return fitted model with imputations - return(n2k_inla( + return(n2k_spde( data = x, model_fit = model, status = "converged", raw_imputed = imputed )) } From 124f8cd706ac926f09c31fb4b5cee71040384960 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 9 Oct 2024 11:20:25 +0200 Subject: [PATCH 23/50] export order_manifest() --- NAMESPACE | 1 + R/manifest_yaml_to_bash.R | 8 ++++++++ man/order_manifest.Rd | 14 ++++++++++++++ 3 files changed, 23 insertions(+) create mode 100644 man/order_manifest.Rd diff --git a/NAMESPACE b/NAMESPACE index e79f09f4..be2030d0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,7 @@ export(mark_obsolete_dataset) export(moving_average) export(moving_difference) export(moving_trend) +export(order_manifest) export(select_factor_count_strictly_positive) export(select_factor_threshold) export(select_observed_range) diff --git a/R/manifest_yaml_to_bash.R b/R/manifest_yaml_to_bash.R index c7ed3bcf..c1253458 100644 --- a/R/manifest_yaml_to_bash.R +++ b/R/manifest_yaml_to_bash.R @@ -204,7 +204,15 @@ date", } ) +#' Convert a manifest in vector of analysis fingerprints +#' +#' The order of the analysis is determined by the parent-child relationship. +#' It starts with the analyses without parents. +#' Then it adds the analyses with parents that have already been added. +#' This process is repeated until all analyses have been added. +#' @inheritParams store_manifest #' @importFrom assertthat assert_that +#' @export order_manifest <- function(manifest) { assert_that(inherits(manifest, "n2kManifest")) full <- slot(manifest, "Manifest") diff --git a/man/order_manifest.Rd b/man/order_manifest.Rd new file mode 100644 index 00000000..b5d5ac78 --- /dev/null +++ b/man/order_manifest.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/manifest_yaml_to_bash.R +\name{order_manifest} +\alias{order_manifest} +\title{Convert a manifest in vector of analysis fingerprints} +\usage{ +order_manifest(manifest) +} +\description{ +The order of the analysis is determined by the parent-child relationship. +It starts with the analyses without parents. +Then it adds the analyses with parents that have already been added. +This process is repeated until all analyses have been added. +} From e65e5594e4445510e7c49663bab76b794d96b2fc Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Sun, 27 Oct 2024 11:03:11 +0100 Subject: [PATCH 24/50] add get_datafield_id() --- DESCRIPTION | 1 + NAMESPACE | 5 +++++ R/get_datafield_id.R | 49 +++++++++++++++++++++++++++++++++++++++++ man/get_datafield_id.Rd | 24 ++++++++++++++++++++ 4 files changed, 79 insertions(+) create mode 100644 R/get_datafield_id.R create mode 100644 man/get_datafield_id.Rd diff --git a/DESCRIPTION b/DESCRIPTION index bca0b2f4..8d701d4f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -102,6 +102,7 @@ Collate: 'get_anomaly_n2k_inla.R' 'get_anomaly_n2k_model.R' 'get_data.R' + 'get_datafield_id.R' 'get_file_fingerprint.R' 'get_formula.R' 'get_location_group_id.R' diff --git a/NAMESPACE b/NAMESPACE index be2030d0..809a69a8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ S3method(sha1,inla) S3method(sha1,rawImputed) export(display) export(fit_every_model) +export(get_datafield_id) export(get_parents) export(inla_inverse) export(mark_obsolete_dataset) @@ -142,6 +143,10 @@ importFrom(fs,file_delete) importFrom(fs,file_exists) importFrom(fs,path) importFrom(fs,path_abs) +importFrom(git2rdata,is_git2rmeta) +importFrom(git2rdata,update_metadata) +importFrom(git2rdata,verify_vc) +importFrom(git2rdata,write_vc) importFrom(methods,new) importFrom(methods,setClass) importFrom(methods,setClassUnion) diff --git a/R/get_datafield_id.R b/R/get_datafield_id.R new file mode 100644 index 00000000..d6092456 --- /dev/null +++ b/R/get_datafield_id.R @@ -0,0 +1,49 @@ +#' Get the datafield id +#' @param table The table name +#' @param field The field name +#' @param datasource The datasource name +#' @inheritParams git2rdata::write_vc +#' @export +#' @importFrom assertthat assert_that is.string noNA +#' @importFrom git2rdata is_git2rmeta update_metadata verify_vc write_vc +get_datafield_id <- function(table, field, datasource, root, stage = FALSE) { + assert_that( + is.string(table), is.string(field), is.string(datasource), noNA(table), + noNA(field), noNA(datasource) + ) + if (!is_git2rmeta(file = "datafield", root = root)) { + data.frame(id = 1L, table = table, field = field, source = datasource) |> + write_vc(file = "datafield", root = root, sorting = "id", stage = stage) + update_metadata( + file = "datafield", root = root, stage = stage, name = "datafield", + title = "Pointer to external code identifiers", + description = + "This dataset describes the external code identifiers used in the data. +It points to the original source of the external code: which datasource, which +table in that datasource and which field in that table.", + field_description = c( + id = "unique identifier of the datafield", + table = "table name of the identifier", + field = "field name of the identifier", + source = "data source which stores the data" + ) + ) + return(1) + } + datafield <- verify_vc( + file = "datafield", root = root, + variables = c("id", "table", "field", "source") + ) + which( + datafield$table == table & datafield$field == field & + datafield$source == datasource + ) -> relevant + stopifnot("multiple matching datafield id found" = length(relevant) <= 1) + if (length(relevant) == 1) { + return(datafield$id[relevant]) + } + new_id <- max(datafield$id) + 1L + data.frame(id = new_id, table = table, field = field, source = datasource) |> + write_vc(file = "datafield", root = root, stage = stage, append = TRUE) + return(new_id) +} diff --git a/man/get_datafield_id.Rd b/man/get_datafield_id.Rd new file mode 100644 index 00000000..820fdb83 --- /dev/null +++ b/man/get_datafield_id.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_datafield_id.R +\name{get_datafield_id} +\alias{get_datafield_id} +\title{Get the datafield id} +\usage{ +get_datafield_id(table, field, datasource, root, stage = FALSE) +} +\arguments{ +\item{table}{The table name} + +\item{field}{The field name} + +\item{datasource}{The datasource name} + +\item{root}{The root of a project. Can be a file path or a \code{git-repository}. +Defaults to the current working directory (\code{"."}).} + +\item{stage}{Logical value indicating whether to stage the changes after +writing the data. Defaults to \code{FALSE}.} +} +\description{ +Get the datafield id +} From 953668b7ba79ae77ff9595b43a1b8d26b0919b4b Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Sun, 27 Oct 2024 11:03:46 +0100 Subject: [PATCH 25/50] use base pipe in fit_model() on n2kComposite --- R/fit_model_n2k_composite.R | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/R/fit_model_n2k_composite.R b/R/fit_model_n2k_composite.R index cd03f46c..1170cd11 100644 --- a/R/fit_model_n2k_composite.R +++ b/R/fit_model_n2k_composite.R @@ -1,6 +1,6 @@ #' @rdname fit_model #' @importFrom methods setMethod new -#' @importFrom dplyr %>% filter group_by n summarise transmute mutate arrange +#' @importFrom dplyr arrange filter group_by mutate n summarise transmute #' @importFrom rlang .data #' @importFrom utils file_test #' @importFrom stats qnorm @@ -21,21 +21,19 @@ setMethod( status(x) <- "error" return(x) } - x@Parameter %>% - filter(!is.na(.data$estimate), !is.na(.data$variance)) %>% - group_by(.data$value) %>% + x@Parameter |> + filter(!is.na(.data$estimate), !is.na(.data$variance)) |> + group_by(.data$value) |> summarise( - estimate = mean(.data$estimate), - se = sqrt(sum(.data$variance)) / n() - ) %>% + estimate = mean(.data$estimate), se = sqrt(sum(.data$variance)) / n() + ) |> transmute( - .data$value, - .data$estimate, + .data$value, .data$estimate, lower_confidence_limit = qnorm(0.025, mean = .data$estimate, sd = .data$se), upper_confidence_limit = qnorm(0.975, mean = .data$estimate, sd = .data$se) - ) %>% + ) |> as.data.frame() -> x@Index status(x) <- "converged" return(x) @@ -43,8 +41,8 @@ setMethod( status(x) <- "waiting" parent_status <- parent_status(x) - parent_status %>% - filter(.data$parent_status %in% c("new", "waiting", status)) %>% + parent_status |> + filter(.data$parent_status %in% c("new", "waiting", status)) |> pull("parent_analysis") -> todo for (this_parent in todo) { @@ -67,12 +65,12 @@ setMethod( ] <- get_status_fingerprint(model) x@AnalysisRelation <- parent_status if (status(model) == "converged") { - extract(extractor = x@Extractor, object = model) %>% - mutate(parent = this_parent) %>% + extract(extractor = x@Extractor, object = model) |> + mutate(parent = this_parent) |> bind_rows( - x@Parameter %>% + x@Parameter |> filter(.data$parent != this_parent) - ) %>% + ) |> arrange(.data$parent, .data$value) -> x@Parameter } } From 659a7bbc844142d1f988ad26f833b08e24f703c1 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Thu, 31 Oct 2024 10:24:25 +0100 Subject: [PATCH 26/50] use pak::pkg_install() instead of remotes::install_github() in Docker image --- R/manifest_yaml_to_bash.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/manifest_yaml_to_bash.R b/R/manifest_yaml_to_bash.R index c1253458..a88314fa 100644 --- a/R/manifest_yaml_to_bash.R +++ b/R/manifest_yaml_to_bash.R @@ -68,8 +68,8 @@ setMethod( read_manifest(base = base, project = project) -> manifest docker_hash <- get_file_fingerprint(manifest) sprintf( - "RUN Rscript -e 'remotes::install_github(\\\"%s\\\"%s)'", yaml$github, - ", dependencies = FALSE, upgrade = \\\"never\\\"" + "RUN Rscript -e 'pak::pkg_install(\\\"%s\\\"%s)'", yaml$github, + ", dependencies = FALSE, upgrade = FALSE, ask = FALSE" ) -> deps sprintf( "#!/bin/bash From 0b08e5665fe5eb20981823bf502f8f0bac99eb4b Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Fri, 1 Nov 2024 10:20:29 +0100 Subject: [PATCH 27/50] fit_model() on n2kManifest stores the status of processed analyses. Speeding up the analysis on aws S3 --- DESCRIPTION | 1 + NAMESPACE | 1 + R/fit_model_character.R | 15 +++++-- R/fit_model_n2k_manifest.R | 89 +++++++++++--------------------------- man/fit_model.Rd | 10 +---- 5 files changed, 41 insertions(+), 75 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8d701d4f..0a9642c7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,6 +36,7 @@ Imports: RODBC, tibble, tidyr (>= 0.4.0), + tools, yaml Suggests: fmesher, diff --git a/NAMESPACE b/NAMESPACE index 809a69a8..46bd2e21 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -185,6 +185,7 @@ importFrom(stats,terms) importFrom(tibble,rownames_to_column) importFrom(tibble,tibble) importFrom(tidyr,pivot_longer) +importFrom(tools,R_user_dir) importFrom(utils,file_test) importFrom(utils,flush.console) importFrom(utils,head) diff --git a/R/fit_model_character.R b/R/fit_model_character.R index 9decb4a3..8411c3ea 100644 --- a/R/fit_model_character.R +++ b/R/fit_model_character.R @@ -56,9 +56,12 @@ setMethod( ) display(verbose, status(analysis)) store_model(analysis, base = base, project = project) + result <- data.frame( + fingerprint = get_file_fingerprint(analysis), status = status(analysis) + ) rm(analysis) gc(verbose = FALSE) - return(invisible(NULL)) + return(invisible(result)) } dots <- list(...) to_do <- object_status( @@ -67,7 +70,10 @@ setMethod( if (length(to_do) == 0) { display(verbose, "skipping") gc(verbose = FALSE) - return(invisible(NULL)) + result <- data.frame( + fingerprint = hash, status = "converged" + ) + return(invisible(result)) } download_model( hash = hash, base = base, local = dots$local, project = project, @@ -91,8 +97,11 @@ setMethod( hash = hash, local = base, base = dots$local, project = project, verbose = verbose ) + result <- data.frame( + fingerprint = get_file_fingerprint(analysis), status = status(analysis) + ) rm(analysis) gc(verbose = FALSE) - return(invisible(NULL)) + return(invisible(result)) } ) diff --git a/R/fit_model_n2k_manifest.R b/R/fit_model_n2k_manifest.R index 2b16892b..e97a0974 100644 --- a/R/fit_model_n2k_manifest.R +++ b/R/fit_model_n2k_manifest.R @@ -4,89 +4,52 @@ #' @importFrom methods setMethod new #' @importFrom purrr walk #' @importFrom stats na.omit +#' @importFrom tools R_user_dir #' @include n2k_manifest_class.R #' @param local A local folder into which objects from an AWS S3 bucket are #' downloaded. -#' @param first A logical. -#' `first = TRUE` implies to fit only the first object in the manifest with -#' matching status. -#' `first = FALSE` implies to fit all objects in the manifest with matching -#' status. -#' Defaults to `FALSE`. setMethod( f = "fit_model", signature = signature(x = "n2kManifest"), definition = function( x, base, project, status = c("new", "waiting"), verbose = TRUE, ..., - local = tempfile("fit_model"), first = FALSE + local = tempfile("fit_model") ) { assert_that( is.string(project), noNA(project), is.character(status), noNA(status), length(status) >= 1 ) to_do <- order_manifest(x) - stat <- map_chr(to_do, ~hash_status(base = base, project = project, .x)) - to_do <- to_do[stat %in% status] - if (length(to_do) == 0) { - return(invisible(NULL)) + R_user_dir("n2kanalysis", which = "cache") |> + file.path(x@Fingerprint) -> cache_file + dirname(cache_file) |> + dir.create(recursive = TRUE, showWarnings = FALSE) + if (file_test("-f", cache_file)) { + processed <- read.table(cache_file, header = TRUE, sep = "\t") + done <- processed$status %in% status + to_do <- to_do[!to_do %in% processed$fingerprint[done]] + } else { + data.frame(fingerprint = character(0), status = character(0)) |> + write.table( + file = cache_file, sep = "\t", row.names = FALSE, quote = FALSE + ) } - if (inherits(base, "character")) { - walk( - to_do, fit_model, base = base, project = project, - status = status, verbose = verbose, ... - ) - return(invisible(NULL)) + for (i in to_do) { + result <- try(fit_model( + x = i, base = base, project = project, status = status, + verbose = verbose, ..., local = local + )) + if (!inherits(result, "try-error")) { + write.table( + result, file = cache_file, append = TRUE, sep = "\t", + row.names = FALSE, quote = FALSE, col.names = FALSE + ) + } } - - display(verbose, "Downloading objects") - x@Manifest$parent[x@Manifest$fingerprint %in% to_do] |> - c(to_do) |> - unique() |> - na.omit() -> to_download - path(local, project) |> - dir_create() - path(local, project) |> - dir_ls(recurse = TRUE, type = "file") |> - basename() -> local_files - to_download[!paste0(to_download, ".rds") %in% local_files] |> - walk( - download_model, base = base, project = project, local = local, - verbose = verbose - ) - walk( - to_do, fit_model, base = local, project = project, - status = status, verbose = verbose, ... - ) - display(verbose, "Uploading objects") - walk( - to_do, download_model, base = local, project = project, local = base, - verbose = verbose - ) return(invisible(NULL)) } ) -#' @importFrom aws.s3 get_bucket -#' @importFrom purrr map_chr -hash_status <- function(hash, base, project) { - assert_that(is.string(hash), is.string(project)) - if (inherits(base, "s3_bucket")) { - substr(hash, 1, 4) |> - sprintf(fmt = "%2$s/%1$s/", project) |> - get_bucket(bucket = base, max = Inf) |> - map_chr("Key") -> keys - keys[grepl(hash, keys)] |> - gsub(pattern = sprintf(".*/(.*)/%s\\.rds", hash), replacement = "\\1") |> - unname() -> output - return(output) - } - stopifnot(inherits(base, "character")) - assert_that(is.string(base)) - file.path(base, project) |> - list.files(recursive = TRUE, pattern = hash) |> - gsub(x = _, pattern = ".*/(.*)/.*\\.rds", replacement = "\\1") -} - download_model <- function(hash, base, local, project, verbose = FALSE) { display(verbose, paste("Moving", hash)) read_model(x = hash, base = base, project = project) |> diff --git a/man/fit_model.Rd b/man/fit_model.Rd index 31782e39..7e8c372e 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -57,8 +57,7 @@ fit_model(x, ...) status = c("new", "waiting"), verbose = TRUE, ..., - local = tempfile("fit_model"), - first = FALSE + local = tempfile("fit_model") ) \S4method{fit_model}{n2kModelImputed}(x, ...) @@ -123,13 +122,6 @@ using \code{B:0} threads.} \item{local}{A local folder into which objects from an AWS S3 bucket are downloaded.} - -\item{first}{A logical. -\code{first = TRUE} implies to fit only the first object in the manifest with -matching status. -\code{first = FALSE} implies to fit all objects in the manifest with matching -status. -Defaults to \code{FALSE}.} } \description{ Fit an \code{n2kModel} object From 40177fe7f0d4c33eab324dd481397734d6afa0b6 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 6 Nov 2024 15:08:43 +0100 Subject: [PATCH 28/50] fit_model() on n2kManifest displays the progress --- R/fit_model_n2k_manifest.R | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/R/fit_model_n2k_manifest.R b/R/fit_model_n2k_manifest.R index e97a0974..c09c1b44 100644 --- a/R/fit_model_n2k_manifest.R +++ b/R/fit_model_n2k_manifest.R @@ -34,9 +34,16 @@ setMethod( file = cache_file, sep = "\t", row.names = FALSE, quote = FALSE ) } - for (i in to_do) { + for (i in seq_along(to_do)) { + display( + verbose = verbose, + message = sprintf( + "Processing %i from %i (%.2f%%)", i, length(to_do), + 100 * (i - 1) / length(to_do) + ) + ) result <- try(fit_model( - x = i, base = base, project = project, status = status, + x = to_do[i], base = base, project = project, status = status, verbose = verbose, ..., local = local )) if (!inherits(result, "try-error")) { From 10f0e7d0b7c02d4a7b888b1e4103a885653d7efd Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Fri, 8 Nov 2024 14:00:20 +0100 Subject: [PATCH 29/50] try to fit INLA model without control.family settings --- R/fit_model_n2k_inla.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/R/fit_model_n2k_inla.R b/R/fit_model_n2k_inla.R index 1e34d955..d449fb6e 100644 --- a/R/fit_model_n2k_inla.R +++ b/R/fit_model_n2k_inla.R @@ -60,6 +60,17 @@ setMethod( } do.call(INLA::inla, control) }, silent = TRUE) + if (inherits(m0, "try-error") && "control.family" %in% names(control)) { + control$control.family <- NULL + m0 <- try({ + if (!is.null(timeout)) { + assert_that(is.number(timeout), timeout > 0) + setTimeLimit(cpu = timeout, elapsed = timeout) + } + do.call(INLA::inla, control) + }, silent = TRUE) + control$control.family <- x@Control$control.family + } if (inherits(m0, "try-error")) { status(x) <- ifelse( grepl("time limit", m0), "time-out", "error" From 143b2c9565779765cf623d9405f47207f752905d Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 13 Nov 2024 11:24:00 +0100 Subject: [PATCH 30/50] add extract() method for n2kModelImputed --- R/extract.R | 14 ++++++++++++++ man/extract.Rd | 3 +++ 2 files changed, 17 insertions(+) diff --git a/R/extract.R b/R/extract.R index f791a819..b9b70d5d 100644 --- a/R/extract.R +++ b/R/extract.R @@ -63,3 +63,17 @@ setMethod( extractor(object@Model) } ) + + +#' @rdname extract +#' @aliases extract,n2kInla-methods +#' @importFrom methods setMethod new +#' @include n2k_inla_class.R +setMethod( + f = "extract", + signature = signature(object = "n2kModelImputed"), + definition = function(extractor, object, base = NULL, project = NULL) { + assert_that(inherits(extractor, "function")) + extractor(object) + } +) diff --git a/man/extract.Rd b/man/extract.Rd index 9fdce4d0..5061d4ad 100644 --- a/man/extract.Rd +++ b/man/extract.Rd @@ -7,6 +7,7 @@ \alias{extract,character-methods} \alias{extract,ANY,n2kInla-method} \alias{extract,n2kInla-methods} +\alias{extract,ANY,n2kModelImputed-method} \title{Extract the relevant coefficients} \usage{ extract(extractor, object, base, project) @@ -14,6 +15,8 @@ extract(extractor, object, base, project) \S4method{extract}{ANY,character}(extractor, object, base, project) \S4method{extract}{ANY,n2kInla}(extractor, object, base = NULL, project = NULL) + +\S4method{extract}{ANY,n2kModelImputed}(extractor, object, base = NULL, project = NULL) } \arguments{ \item{extractor}{the extractor function} From e25584038ea577db54369f9433d5b791c05b3025 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 13 Nov 2024 11:24:52 +0100 Subject: [PATCH 31/50] fit_model() on n2kHurdle handles the case when not all parent models are converged --- R/fit_model_n2k_hurdle_imputed.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/fit_model_n2k_hurdle_imputed.R b/R/fit_model_n2k_hurdle_imputed.R index 272040b4..edaf2571 100644 --- a/R/fit_model_n2k_hurdle_imputed.R +++ b/R/fit_model_n2k_hurdle_imputed.R @@ -80,6 +80,10 @@ setMethod( store_model(x, base = base, project = project) } + if (!all(x@AnalysisRelation$parentstatus == "converged")) { + return(x) + } + result <- try(hurdle_impute(x@Presence, x@Count)) if (inherits(result, "try-error")) { x@AnalysisMetadata$status_fingerprint <- sha1( From 7180762e97474df493e82a31ea8867b7cbd51eee Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 13 Nov 2024 11:26:15 +0100 Subject: [PATCH 32/50] fit_model() on n2kManifest() does not use a default for the local argument --- R/fit_model_character.R | 6 +++++- R/fit_model_n2k_manifest.R | 4 ++-- man/fit_model.Rd | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/fit_model_character.R b/R/fit_model_character.R index 8411c3ea..578de151 100644 --- a/R/fit_model_character.R +++ b/R/fit_model_character.R @@ -48,7 +48,11 @@ setMethod( fit_model(base = base, project = project, verbose = verbose, ...) return(invisible(NULL)) } - if (!has_name(list(...), "local") || !inherits(base, "s3_bucket")) { + dots <- list(...) + if ( + !has_name(dots, "local") || is.null(dots$local) || + !inherits(base, "s3_bucket") + ) { analysis <- read_model(hash, base = base, project = project) display(verbose, paste(status(analysis), "-> "), FALSE) analysis <- fit_model( diff --git a/R/fit_model_n2k_manifest.R b/R/fit_model_n2k_manifest.R index c09c1b44..07e709a9 100644 --- a/R/fit_model_n2k_manifest.R +++ b/R/fit_model_n2k_manifest.R @@ -13,7 +13,7 @@ setMethod( signature = signature(x = "n2kManifest"), definition = function( x, base, project, status = c("new", "waiting"), verbose = TRUE, ..., - local = tempfile("fit_model") + local = NULL ) { assert_that( is.string(project), noNA(project), is.character(status), noNA(status), @@ -27,7 +27,7 @@ setMethod( if (file_test("-f", cache_file)) { processed <- read.table(cache_file, header = TRUE, sep = "\t") done <- processed$status %in% status - to_do <- to_do[!to_do %in% processed$fingerprint[done]] + to_do <- to_do[!to_do %in% processed$fingerprint[!done]] } else { data.frame(fingerprint = character(0), status = character(0)) |> write.table( diff --git a/man/fit_model.Rd b/man/fit_model.Rd index 7e8c372e..a89abd58 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -57,7 +57,7 @@ fit_model(x, ...) status = c("new", "waiting"), verbose = TRUE, ..., - local = tempfile("fit_model") + local = NULL ) \S4method{fit_model}{n2kModelImputed}(x, ...) From dc16f944f45f4401ddc6d899450c1b15ed74966b Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Thu, 5 Dec 2024 12:34:14 +0100 Subject: [PATCH 33/50] fit_model() on n2kManifest displays the system time --- R/fit_model_n2k_manifest.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/fit_model_n2k_manifest.R b/R/fit_model_n2k_manifest.R index 07e709a9..1323dde1 100644 --- a/R/fit_model_n2k_manifest.R +++ b/R/fit_model_n2k_manifest.R @@ -38,8 +38,8 @@ setMethod( display( verbose = verbose, message = sprintf( - "Processing %i from %i (%.2f%%)", i, length(to_do), - 100 * (i - 1) / length(to_do) + "Processing %i from %i (%.2f%%) %s", i, length(to_do), + 100 * (i - 1) / length(to_do), Sys.time() ) ) result <- try(fit_model( From 628149fa7ec2a39eef86eade023c4c2ae66cdd83 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Sat, 7 Dec 2024 13:44:28 +0100 Subject: [PATCH 34/50] fit_model) on n2kManifest displays the ETA --- R/fit_model_n2k_manifest.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/R/fit_model_n2k_manifest.R b/R/fit_model_n2k_manifest.R index 1323dde1..c291f3e9 100644 --- a/R/fit_model_n2k_manifest.R +++ b/R/fit_model_n2k_manifest.R @@ -34,12 +34,18 @@ setMethod( file = cache_file, sep = "\t", row.names = FALSE, quote = FALSE ) } + start_time <- Sys.time() for (i in seq_along(to_do)) { display( verbose = verbose, message = sprintf( - "Processing %i from %i (%.2f%%) %s", i, length(to_do), - 100 * (i - 1) / length(to_do), Sys.time() + "Processing %i from %i (%.2f%%) %s ETA %s", i, length(to_do), + 100 * (i - 1) / length(to_do), + format(Sys.time(), "%Y-%m-%d %H:%M:%S"), + format( + start_time + (Sys.time() - start_time) * length(to_do) / (i - 1), + "%d %H:%M" + ) ) ) result <- try(fit_model( From 237bf7acee4cffdea6e0fce42c44114f43cc1360 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 21 Jan 2025 13:36:26 +0100 Subject: [PATCH 35/50] =?UTF-8?q?=F0=9F=93=9D=20Fix=20spelling=20errors?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/get_datafield_id.R | 4 ++-- man/get_datafield_id.Rd | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/get_datafield_id.R b/R/get_datafield_id.R index d6092456..c2917f80 100644 --- a/R/get_datafield_id.R +++ b/R/get_datafield_id.R @@ -1,7 +1,7 @@ -#' Get the datafield id +#' Get the data field id #' @param table The table name #' @param field The field name -#' @param datasource The datasource name +#' @param datasource The data source name #' @inheritParams git2rdata::write_vc #' @export #' @importFrom assertthat assert_that is.string noNA diff --git a/man/get_datafield_id.Rd b/man/get_datafield_id.Rd index 820fdb83..1bb2ad16 100644 --- a/man/get_datafield_id.Rd +++ b/man/get_datafield_id.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/get_datafield_id.R \name{get_datafield_id} \alias{get_datafield_id} -\title{Get the datafield id} +\title{Get the data field id} \usage{ get_datafield_id(table, field, datasource, root, stage = FALSE) } @@ -11,7 +11,7 @@ get_datafield_id(table, field, datasource, root, stage = FALSE) \item{field}{The field name} -\item{datasource}{The datasource name} +\item{datasource}{The data source name} \item{root}{The root of a project. Can be a file path or a \code{git-repository}. Defaults to the current working directory (\code{"."}).} @@ -20,5 +20,5 @@ Defaults to the current working directory (\code{"."}).} writing the data. Defaults to \code{FALSE}.} } \description{ -Get the datafield id +Get the data field id } From ae1c454b21c3aed92c6a8d169818d8c50862a2cc Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 21 Jan 2025 13:38:45 +0100 Subject: [PATCH 36/50] =?UTF-8?q?=E2=9E=95=20Add=20missing=20dependency?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Depend on git2rdata --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 0a9642c7..88209361 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,6 +27,7 @@ Imports: digest (>= 0.6.23.2), dplyr, fs, + git2rdata, MASS, methods, multimput (>= 0.2.14), From fffe9e5f539e6697867c2eaa5a1c0a2e8410c103 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 21 Jan 2025 13:39:27 +0100 Subject: [PATCH 37/50] =?UTF-8?q?=F0=9F=93=9D=20Update=20citation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- inst/CITATION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/CITATION b/inst/CITATION index 535e547b..8146bc88 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -4,10 +4,10 @@ bibentry( bibtype = "Manual", title = "n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring. Version 0.4.0", author = c( author = c(person(given = "Thierry", family = "Onkelinx"))), - year = 2024, + year = 2025, url = "https://inbo.github.io/n2kanalysis/", abstract = "All generic functions and classes for the analysis for the 'Natura 2000' monitoring. The classes contain all required data and definitions to fit the model without the need to access other sources. Potentially they might need access to one or more parent objects. An aggregation object might for example need the result of an imputation object. The actual definition of the analysis, using these generic function and classes, is defined in dedictated analysis R packages for every monitoring scheme. For example 'abvanalysis' and 'watervogelanalysis'.", - textVersion = "Onkelinx, Thierry (2024) n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring. Version 0.4.0. https://inbo.github.io/n2kanalysis/", + textVersion = "Onkelinx, Thierry (2025) n2kanalysis: Generic Functions to Analyse Data from the 'Natura 2000' Monitoring. Version 0.4.0. https://inbo.github.io/n2kanalysis/", keywords = "analysis, reproducible research, natura 2000, monitoring", doi = "10.5281/zenodo.3576047", ) From 44ffa65190bffd0b70ab84814800fc7f0439f7aa Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 21 Jan 2025 14:34:09 +0100 Subject: [PATCH 38/50] =?UTF-8?q?=F0=9F=93=9D=20Moving=5Fdifference()=20ga?= =?UTF-8?q?ins=20a=20default=20value=20for=20first=5Fyear?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/moving_difference.R | 2 +- man/moving_difference.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/moving_difference.R b/R/moving_difference.R index adebb8ad..d3a7c1ac 100644 --- a/R/moving_difference.R +++ b/R/moving_difference.R @@ -11,7 +11,7 @@ #' moving_difference(6, 2) #' moving_difference(6, 2, 2000) #' @importFrom assertthat assert_that is.count is.number -moving_difference <- function(n_year, duration, first_year) { +moving_difference <- function(n_year, duration, first_year = 1) { assert_that(is.count(n_year), is.count(duration), is.number(first_year)) duration <- min(floor(n_year / 2), duration) list(seq_len(n_year - 2 * duration + 1) - 1) |> diff --git a/man/moving_difference.Rd b/man/moving_difference.Rd index 0afe862b..b9fa099f 100644 --- a/man/moving_difference.Rd +++ b/man/moving_difference.Rd @@ -4,7 +4,7 @@ \alias{moving_difference} \title{Calculate coefficients for a moving difference} \usage{ -moving_difference(n_year, duration, first_year) +moving_difference(n_year, duration, first_year = 1) } \arguments{ \item{n_year}{Number of available years in the data.} From c9e77ccfb6a5ead2fc7e6bf7d006e381d8c60605 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 21 Jan 2025 14:36:55 +0100 Subject: [PATCH 39/50] =?UTF-8?q?=F0=9F=9A=A8=20Fix=20alias=20for=20extrac?= =?UTF-8?q?t()=20on=20n2kModelImputed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DESCRIPTION | 6 +++--- R/extract.R | 4 ++-- R/n2k_hurdle_imputed_class.R | 1 + R/n2k_model_imputed_class.R | 2 +- man/extract.Rd | 1 + 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 88209361..45823136 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -76,23 +76,23 @@ Collate: 'delete_model.R' 'display.R' 'n2k_model_class.R' + 'n2k_inla_comparison_class.R' 'n2k_inla_class.R' + 'n2k_aggregate_class.R' + 'n2k_model_imputed_class.R' 'extract.R' 'fit_every_model.R' 'fit_model.R' 'fit_model_character.R' - 'n2k_aggregate_class.R' 'fit_model_n2k_aggregate.R' 'n2k_composite_class.R' 'fit_model_n2k_composite.R' 'n2k_hurdle_imputed_class.R' 'fit_model_n2k_hurdle_imputed.R' 'fit_model_n2k_inla.R' - 'n2k_inla_comparison_class.R' 'fit_model_n2k_inla_comparison.R' 'n2k_manifest_class.R' 'fit_model_n2k_manifest.R' - 'n2k_model_imputed_class.R' 'fit_model_n2k_model_imputed.R' 'spde_class.R' 'n2k_spde_class.R' diff --git a/R/extract.R b/R/extract.R index b9b70d5d..6b8fe4b2 100644 --- a/R/extract.R +++ b/R/extract.R @@ -66,9 +66,9 @@ setMethod( #' @rdname extract -#' @aliases extract,n2kInla-methods +#' @aliases extract,n2kModelImputed-methods #' @importFrom methods setMethod new -#' @include n2k_inla_class.R +#' @include n2k_model_imputed_class.R setMethod( f = "extract", signature = signature(object = "n2kModelImputed"), diff --git a/R/n2k_hurdle_imputed_class.R b/R/n2k_hurdle_imputed_class.R index 56a85b1f..5ff80a00 100644 --- a/R/n2k_hurdle_imputed_class.R +++ b/R/n2k_hurdle_imputed_class.R @@ -10,6 +10,7 @@ #' @aliases n2kHurdleImputed-class #' @importFrom methods setClass #' @docType class +#' @include n2k_aggregate_class.R setClass( "n2kHurdleImputed", representation = representation( diff --git a/R/n2k_model_imputed_class.R b/R/n2k_model_imputed_class.R index 25973ca1..c5c5d905 100644 --- a/R/n2k_model_imputed_class.R +++ b/R/n2k_model_imputed_class.R @@ -31,7 +31,7 @@ setClassUnion("maybeFunction", c("function", "character")) #' @aliases n2kModelImputed-class #' @importFrom methods setClass #' @docType class -#' @include n2k_model_imputed_class.R +#' @include n2k_aggregate_class.R #' @include n2k_inla_comparison_class.R setClass( "n2kModelImputed", diff --git a/man/extract.Rd b/man/extract.Rd index 5061d4ad..4bd2aadf 100644 --- a/man/extract.Rd +++ b/man/extract.Rd @@ -8,6 +8,7 @@ \alias{extract,ANY,n2kInla-method} \alias{extract,n2kInla-methods} \alias{extract,ANY,n2kModelImputed-method} +\alias{extract,n2kModelImputed-methods} \title{Extract the relevant coefficients} \usage{ extract(extractor, object, base, project) From 8c704eed3e0bbd0d3f2106b0980441504ba3b1b7 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 21 Jan 2025 14:38:12 +0100 Subject: [PATCH 40/50] =?UTF-8?q?=F0=9F=9A=A8=20Reduce=20cyclomatic=20comp?= =?UTF-8?q?lexity=20of=20fit=5Fmodel()=20on=20n2kInla()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/fit_model_n2k_inla.R | 109 +++++++++++++++++++++++------------------ 1 file changed, 61 insertions(+), 48 deletions(-) diff --git a/R/fit_model_n2k_inla.R b/R/fit_model_n2k_inla.R index d449fb6e..2fe03d1b 100644 --- a/R/fit_model_n2k_inla.R +++ b/R/fit_model_n2k_inla.R @@ -1,6 +1,6 @@ #' @rdname fit_model #' @importFrom methods setMethod new -#' @importFrom assertthat assert_that is.number +#' @importFrom assertthat assert_that #' @importMethodsFrom multimput impute #' @include n2k_inla_class.R #' @param timeout the optional number of second until the model will time out @@ -40,54 +40,14 @@ setMethod( fm <- terms(x@AnalysisFormula[[1]]) response <- all.vars(fm)[attr(fm, "response")] if (mean(is.na(data[[response]])) < 0.10) { - # directly fit model when less than 10% missing data - control$data <- data - control$lincomb <- lc - model <- try({ - if (!is.null(timeout)) { - assert_that(is.number(timeout), timeout > 0) - setTimeLimit(cpu = timeout, elapsed = timeout) - } - do.call(INLA::inla, control) - }, silent = TRUE) + model <- direct_fit( + control = control, data = data, lc = lc, timeout = timeout + ) } else { - # first fit model without missing data - control$data <- data[!is.na(data[[response]]), ] - m0 <- try({ - if (!is.null(timeout)) { - assert_that(is.number(timeout), timeout > 0) - setTimeLimit(cpu = timeout, elapsed = timeout) - } - do.call(INLA::inla, control) - }, silent = TRUE) - if (inherits(m0, "try-error") && "control.family" %in% names(control)) { - control$control.family <- NULL - m0 <- try({ - if (!is.null(timeout)) { - assert_that(is.number(timeout), timeout > 0) - setTimeLimit(cpu = timeout, elapsed = timeout) - } - do.call(INLA::inla, control) - }, silent = TRUE) - control$control.family <- x@Control$control.family - } - if (inherits(m0, "try-error")) { - status(x) <- ifelse( - grepl("time limit", m0), "time-out", "error" - ) - return(x) - } - # then refit with missing data - control$data <- data - control$lincomb <- lc - control$control.update <- list(result = m0) - model <- try({ - if (!is.null(timeout)) { - assert_that(is.number(timeout), timeout > 0) - setTimeLimit(cpu = timeout, elapsed = timeout) - } - do.call(INLA::inla, control) - }, silent = TRUE) + model <- indirect_fit( + response = response, control = control, data = data, lc = lc, + timeout = timeout + ) } # handle error in model fit @@ -137,3 +97,56 @@ model2lincomb <- function(lincomb) { } return(lc) } + +# directly fit model when less than 10% missing data +direct_fit <- function(control, data, lc, timeout = NULL) { + control$data <- data + control$lincomb <- lc + try({ + if (!is.null(timeout)) { + assert_that(is.number(timeout), timeout > 0) + setTimeLimit(cpu = timeout, elapsed = timeout) + } + do.call(INLA::inla, control) + }, silent = TRUE) +} + +#' @importFrom assertthat assert_that is.number +indirect_fit <- function(control, data, lc, response, timeout = NULL) { + # first fit model without missing data + control$data <- data[!is.na(data[[response]]), ] + m0 <- try({ + if (!is.null(timeout)) { + assert_that(is.number(timeout), timeout > 0) + setTimeLimit(cpu = timeout, elapsed = timeout) + } + do.call(INLA::inla, control) + }, silent = TRUE) + if (inherits(m0, "try-error") && "control.family" %in% names(control)) { + # when model failed to fit, try again without family + old_control_family <- control$control.family + control$control.family <- NULL + m0 <- try({ + if (!is.null(timeout)) { + assert_that(is.number(timeout), timeout > 0) + setTimeLimit(cpu = timeout, elapsed = timeout) + } + do.call(INLA::inla, control) + }, silent = TRUE) + control$control.family <- old_control_family + } + if (inherits(m0, "try-error")) { + return(m0) + } + # then refit with missing data + control$data <- data + control$lincomb <- lc + control$control.update <- list(result = m0) + try({ + if (!is.null(timeout)) { + assert_that(is.number(timeout), timeout > 0) + setTimeLimit(cpu = timeout, elapsed = timeout) + } + do.call(INLA::inla, control) + }, silent = TRUE) +} From ec8eb7dc45666f6c33e8589049242f1635943508 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 21 Jan 2025 14:39:06 +0100 Subject: [PATCH 41/50] =?UTF-8?q?=F0=9F=A7=B1=20Add=20Rstudio=20project=20?= =?UTF-8?q?id?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- n2kanalysis.Rproj | 1 + 1 file changed, 1 insertion(+) diff --git a/n2kanalysis.Rproj b/n2kanalysis.Rproj index b067fd56..24461809 100644 --- a/n2kanalysis.Rproj +++ b/n2kanalysis.Rproj @@ -1,4 +1,5 @@ Version: 1.0 +ProjectId: 5ea8d182-fbfb-485c-8a11-cd3348bf81a0 RestoreWorkspace: No SaveWorkspace: No From c99b2c6c6daa56714fdcb46cc5517dd78a319703 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 21 Jan 2025 15:37:38 +0100 Subject: [PATCH 42/50] =?UTF-8?q?=E2=9C=85=20Fix=20unit=20tests=20on=20fit?= =?UTF-8?q?=5Fmodel()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/testthat/test_caa_fit_model.R | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test_caa_fit_model.R b/tests/testthat/test_caa_fit_model.R index 08a42518..a5839e1d 100644 --- a/tests/testthat/test_caa_fit_model.R +++ b/tests/testthat/test_caa_fit_model.R @@ -218,7 +218,8 @@ test_that("fit_model() works on n2kInlaComparison", { fit_model(filename2, verbose = FALSE) fit_model(filename3, verbose = FALSE) filename3 <- gsub("waiting", "converged", filename3) - expect_null(fit_model(filename3, verbose = FALSE)) + expect_invisible(output <- fit_model(filename3, verbose = FALSE)) + expect_is(output, "data.frame") }) test_that("fit_model() works on n2kInlaComposite", { @@ -294,7 +295,8 @@ test_that("fit_model() works on n2kInlaComposite", { fit_model(filename2, verbose = FALSE) fit_model(filename3, verbose = FALSE) filename3 <- gsub("waiting", "converged", filename3) - expect_null(fit_model(filename3, verbose = FALSE)) + expect_invisible(output <- fit_model(filename3, verbose = FALSE)) + expect_is(output, "data.frame") }) test_that("fit_model() works on n2kHurdleImputed", { @@ -337,18 +339,30 @@ test_that("fit_model() works on n2kHurdleImputed", { sha_count <- store_model(count, base = base, project = project) sha_presence <- store_model(presence, base = base, project = project) sha_hurdle <- store_model(hurdle, base = base, project = project) - expect_null(fit_model(basename(sha_hurdle), base = base, project = project)) + expect_invisible( + output <- fit_model(basename(sha_hurdle), base = base, project = project) + ) + expect_is(output, "data.frame") suppressWarnings( - expect_null(fit_model(basename(sha_count), base = base, project = project)) + expect_invisible( + output <- fit_model(basename(sha_count), base = base, project = project) + ) ) + expect_is(output, "data.frame") suppressWarnings( - expect_null( - fit_model(basename(sha_presence), base = base, project = project) + expect_invisible( + output <- fit_model( + basename(sha_presence), base = base, project = project + ) ) ) + expect_is(output, "data.frame") suppressWarnings( - expect_null(fit_model(basename(sha_hurdle), base = base, project = project)) + expect_invisible( + output <- fit_model(basename(sha_hurdle), base = base, project = project) + ) ) + expect_is(output, "data.frame") expect_s4_class( basename(sha_hurdle) |> read_model(base = base, project = project) |> From 1681c67ca650ef419af31a2503caf7f2d4e89abd Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 21 Jan 2025 15:52:19 +0100 Subject: [PATCH 43/50] =?UTF-8?q?=E2=9E=95=20Import=20median()=20from=20st?= =?UTF-8?q?ats?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- NAMESPACE | 1 + R/moving_average.R | 1 + R/moving_difference.R | 1 + R/moving_trend.R | 1 + 4 files changed, 4 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 46bd2e21..40a0f55b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -176,6 +176,7 @@ importFrom(rlang,"!!") importFrom(rlang,.data) importFrom(stats,as.formula) importFrom(stats,coef) +importFrom(stats,median) importFrom(stats,na.fail) importFrom(stats,na.omit) importFrom(stats,qnorm) diff --git a/R/moving_average.R b/R/moving_average.R index 90f2de70..57f3d843 100644 --- a/R/moving_average.R +++ b/R/moving_average.R @@ -9,6 +9,7 @@ #' moving_average(5, 3) #' moving_average(5, 3, 2000) #' @importFrom assertthat assert_that is.count is.number +#' @importFrom stats median moving_average <- function(n_year, duration, first_year = 0) { assert_that(is.count(n_year), is.count(duration), is.number(first_year)) duration <- min(n_year, duration) diff --git a/R/moving_difference.R b/R/moving_difference.R index d3a7c1ac..33b893d4 100644 --- a/R/moving_difference.R +++ b/R/moving_difference.R @@ -11,6 +11,7 @@ #' moving_difference(6, 2) #' moving_difference(6, 2, 2000) #' @importFrom assertthat assert_that is.count is.number +#' @importFrom stats median moving_difference <- function(n_year, duration, first_year = 1) { assert_that(is.count(n_year), is.count(duration), is.number(first_year)) duration <- min(floor(n_year / 2), duration) diff --git a/R/moving_trend.R b/R/moving_trend.R index 2afee4cc..8cd35779 100644 --- a/R/moving_trend.R +++ b/R/moving_trend.R @@ -15,6 +15,7 @@ #' moving_trend(5, 4, 2000) #' @export #' @importFrom assertthat assert_that is.count is.number +#' @importFrom stats median moving_trend <- function(n_year, duration, first_year = 0) { assert_that(is.count(n_year), is.count(duration), is.number(first_year)) duration <- min(n_year, duration) From a72993a8669667d2827576a92d34de4ed4ede827 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Tue, 21 Jan 2025 15:52:58 +0100 Subject: [PATCH 44/50] =?UTF-8?q?=F0=9F=93=9D=20Document=20arguments=20of?= =?UTF-8?q?=20order=5Fmanifest()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/manifest_yaml_to_bash.R | 2 +- man/order_manifest.Rd | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/R/manifest_yaml_to_bash.R b/R/manifest_yaml_to_bash.R index a88314fa..213dfbdc 100644 --- a/R/manifest_yaml_to_bash.R +++ b/R/manifest_yaml_to_bash.R @@ -210,7 +210,7 @@ date", #' It starts with the analyses without parents. #' Then it adds the analyses with parents that have already been added. #' This process is repeated until all analyses have been added. -#' @inheritParams store_manifest +#' @param manifest the `n2kManifest` #' @importFrom assertthat assert_that #' @export order_manifest <- function(manifest) { diff --git a/man/order_manifest.Rd b/man/order_manifest.Rd index b5d5ac78..d9178649 100644 --- a/man/order_manifest.Rd +++ b/man/order_manifest.Rd @@ -6,6 +6,9 @@ \usage{ order_manifest(manifest) } +\arguments{ +\item{manifest}{the \code{n2kManifest}} +} \description{ The order of the analysis is determined by the parent-child relationship. It starts with the analyses without parents. From 00080d11c3e29aca851a3de253861fcdbbea48c9 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Fri, 24 Jan 2025 10:14:36 +0100 Subject: [PATCH 45/50] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Require=20git2rdata?= =?UTF-8?q?=200.5.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 45823136..f5d110de 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,7 @@ Imports: digest (>= 0.6.23.2), dplyr, fs, - git2rdata, + git2rdata (>= 0.5.0), MASS, methods, multimput (>= 0.2.14), From efaa0366975a6c528ea8a5eb27e1ab63dc9e582a Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Sun, 26 Jan 2025 18:33:09 +0100 Subject: [PATCH 46/50] =?UTF-8?q?=E2=9C=85=20Add=20unit=20test=20for=20n2k?= =?UTF-8?q?=5Fspde()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/testthat/test_abb_n2k_spde.R | 40 ++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 tests/testthat/test_abb_n2k_spde.R diff --git a/tests/testthat/test_abb_n2k_spde.R b/tests/testthat/test_abb_n2k_spde.R new file mode 100644 index 00000000..a92af460 --- /dev/null +++ b/tests/testthat/test_abb_n2k_spde.R @@ -0,0 +1,40 @@ +this_result_datasource_id <- sha1(sample(letters)) +this_scheme_id <- sha1(sample(letters)) +this_species_group_id <- sha1(sample(letters)) +this_location_group_id <- sha1(sample(letters)) +this_analysis_date <- Sys.time() +this_model_type <- "inla poisson: A * (B + C) + C:D" +this_formula <- + "Count ~ A * (B + C) + C:D + + f(E, model = \"rw1\", replicate = as.integer(A)) + + f(G, model = \"iid\")" +this_first_imported_year <- 1990L +this_last_imported_year <- 2015L +this_last_analysed_year <- 2014L +this_duration <- 1L +dataset <- test_data() +this_lc <- dataset %>% + select("A", "B", "C", "D") %>% + filter(.data$C == max(.data$C), .data$D == max(.data$D)) %>% + distinct() %>% + model.matrix(object = ~A * (B + C) + C:D) +test_that("n2k_spde() creates the object", { + expect_s4_class( + spde_model <- spde( + dataset[, c("C", "D")], range = c(0.5, 0.05), sigma = c(0.5, 0.05) + ), + "Spde" + ) + expect_s4_class( + object <- n2k_spde( + result_datasource_id = this_result_datasource_id, + scheme_id = this_scheme_id, species_group_id = this_species_group_id, + location_group_id = this_location_group_id, model_type = this_model_type, + formula = this_formula, spde = spde_model, data = dataset, + first_imported_year = this_first_imported_year, + last_imported_year = this_last_imported_year, + analysis_date = this_analysis_date + ), + "n2kSpde" + ) +}) From 9ee63d5ddb6d81a732bd51188466ed164d2ad476 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Mon, 27 Jan 2025 10:18:01 +0100 Subject: [PATCH 47/50] =?UTF-8?q?=E2=9C=85=20Add=20unit=20test=20for=20get?= =?UTF-8?q?=5Fdatafield=5Fid()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/get_datafield_id.R | 2 +- tests/testthat/test_aaa_get_datafield_id.R | 24 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 tests/testthat/test_aaa_get_datafield_id.R diff --git a/R/get_datafield_id.R b/R/get_datafield_id.R index c2917f80..6836271d 100644 --- a/R/get_datafield_id.R +++ b/R/get_datafield_id.R @@ -28,7 +28,7 @@ table in that datasource and which field in that table.", source = "data source which stores the data" ) ) - return(1) + return(1L) } datafield <- verify_vc( file = "datafield", root = root, diff --git a/tests/testthat/test_aaa_get_datafield_id.R b/tests/testthat/test_aaa_get_datafield_id.R new file mode 100644 index 00000000..d6870d10 --- /dev/null +++ b/tests/testthat/test_aaa_get_datafield_id.R @@ -0,0 +1,24 @@ +test_that("get_datafield_id", { + root <- tempfile("get_datafield_id") + dir.create(root) + on.exit(unlink(root, recursive = TRUE)) + + expect_is( + get_datafield_id( + table = "test", field = "id", datasource = "database", root = root + ), + "integer" + ) + expect_is( + get_datafield_id( + table = "test", field = "id", datasource = "database", root = root + ), + "integer" + ) + expect_is( + get_datafield_id( + table = "test2", field = "id", datasource = "database", root = root + ), + "integer" + ) +}) From 7e9a159255028806bfad67e91a0674e06915a89e Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Wed, 29 Jan 2025 19:38:30 +0100 Subject: [PATCH 48/50] =?UTF-8?q?=F0=9F=92=9A=20Pass=20missing=20environme?= =?UTF-8?q?nt=20variables=20to=20GHA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/check_on_branch.yml | 5 ++++- .github/workflows/check_on_main.yml | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/check_on_branch.yml b/.github/workflows/check_on_branch.yml index a013b023..182dd948 100644 --- a/.github/workflows/check_on_branch.yml +++ b/.github/workflows/check_on_branch.yml @@ -2,7 +2,6 @@ on: push: branches-ignore: - main - - master - ghpages name: "check package with checklist" @@ -14,6 +13,10 @@ jobs: env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + N2KBUCKET: ${{ secrets.N2KBUCKET }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} permissions: contents: read steps: diff --git a/.github/workflows/check_on_main.yml b/.github/workflows/check_on_main.yml index 65ca9607..3ffc91a0 100644 --- a/.github/workflows/check_on_main.yml +++ b/.github/workflows/check_on_main.yml @@ -15,5 +15,9 @@ jobs: env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + N2KBUCKET: ${{ secrets.N2KBUCKET }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} steps: - uses: inbo/actions/check_pkg@checklist-0.4.1 From 7bbf7ef5615a9025496b790832a0234ec6264364 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Thu, 30 Jan 2025 12:30:06 +0100 Subject: [PATCH 49/50] =?UTF-8?q?=E2=9C=85=20Add=20unit=20tests=20for=20ge?= =?UTF-8?q?t=5Fresult()=20on=20n2kManifest?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/testthat/test_cba_fit_model_manifest.R | 36 +++++++++++++++----- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test_cba_fit_model_manifest.R b/tests/testthat/test_cba_fit_model_manifest.R index 526a8321..a694edc6 100644 --- a/tests/testthat/test_cba_fit_model_manifest.R +++ b/tests/testthat/test_cba_fit_model_manifest.R @@ -41,17 +41,22 @@ test_that("it handles a manifest", { ) %>% n2k_manifest() expect_invisible(fit_model(x, base = base, project = project)) - x <- store_manifest(x, base, project) - expect_null(fit_model(x, base = base, project = project)) - expect_null(fit_model(x)) + y <- store_manifest(x, base, project) + expect_null(fit_model(y, base = base, project = project)) + expect_null(fit_model(y)) - file.path(base, project) %>% - list.files(recursive = TRUE, full.names = TRUE) %>% + file.path(base, project) |> + list.files(recursive = TRUE, full.names = TRUE) |> + c( + R_user_dir("n2kanalysis", which = "cache") |> + file.path(x@Fingerprint) + ) |> file.remove() + # works with an S3 bucket skip_if(Sys.getenv("AWS_SECRET_ACCESS_KEY") == "", message = "No AWS access") - aws_base <- get_bucket(Sys.getenv("N2KBUCKET")) + aws_base <- get_bucket(Sys.getenv("N2KBUCKET"), max = 1) store_model(object, base = aws_base, project = project) store_model(object2, base = aws_base, project = project) store_model(object3, base = aws_base, project = project) @@ -66,16 +71,29 @@ test_that("it handles a manifest", { stringsAsFactors = FALSE ) %>% n2k_manifest() + expect_s3_class( + results <- get_result(x, base = aws_base, project = project), + "data.frame" + ) + expect_true(all(results$status == "new")) expect_invisible( fit_model(x, base = aws_base, project = project, verbose = TRUE) ) + expect_s3_class( + results <- get_result(x, base = aws_base, project = project), + "data.frame" + ) + expect_true(all(results$status == "converged")) - x <- store_manifest(x, base = aws_base, project = project) - expect_invisible(fit_model(x$Contents)) + y <- store_manifest(x, base = aws_base, project = project) + expect_invisible(fit_model(y$Contents)) - expect_null(fit_model(x$Contents$Key, base = aws_base, project = project)) + expect_null(fit_model(y$Contents$Key, base = aws_base, project = project)) available <- get_bucket(aws_base, prefix = project) %>% sapply("[[", "Key") expect_true(all(sapply(available, delete_object, bucket = aws_base))) + R_user_dir("n2kanalysis", which = "cache") |> + file.path(x@Fingerprint) |> + file.remove() }) From f301638abd9a6fbc697676fd6c09babde004bb40 Mon Sep 17 00:00:00 2001 From: Thierry Onkelinx Date: Thu, 30 Jan 2025 16:53:18 +0100 Subject: [PATCH 50/50] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Improve=20manifest?= =?UTF-8?q?=5Fyaml=5Fto=5Fbash()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move similar code for different types of base to separate functions --- R/manifest_yaml_to_bash.R | 113 ++++++++++--------- man/manifest_yaml_to_bash.Rd | 3 +- tests/testthat/test_cba_fit_model_manifest.R | 18 ++- 3 files changed, 76 insertions(+), 58 deletions(-) diff --git a/R/manifest_yaml_to_bash.R b/R/manifest_yaml_to_bash.R index 213dfbdc..fb40a147 100644 --- a/R/manifest_yaml_to_bash.R +++ b/R/manifest_yaml_to_bash.R @@ -67,42 +67,16 @@ setMethod( gsub("\\.manifest$", "", yaml$hash) |> read_manifest(base = base, project = project) -> manifest docker_hash <- get_file_fingerprint(manifest) - sprintf( - "RUN Rscript -e 'pak::pkg_install(\\\"%s\\\"%s)'", yaml$github, - ", dependencies = FALSE, upgrade = FALSE, ask = FALSE" - ) -> deps - sprintf( - "#!/bin/bash -export $(cat .env | xargs) -echo \"FROM %s -%s\" > Dockerfile -docker build --pull --tag rn2k:%s . -rm Dockerfile", - yaml$docker, paste(deps, collapse = "\n"), docker_hash - ) -> init + init <- create_docker_init(yaml = yaml, docker_hash = docker_hash) volume <- "/n2kanalysis:/n2kanalysis:rw" models <- order_manifest(manifest = manifest) to_do <- object_status(base = base, project = project, status = status) models <- models[models %in% to_do] - c( - "echo \"\n\nmodel %i of %i\n\n\"\ndate\n", - "timeout --kill-after=2m %ih docker run %s --name=%s -v %s rn2k:%s", - "./fit_model_aws.sh -b %s -p %s -m %s%s" - ) |> - paste(collapse = " ") |> - sprintf( - seq_along(models), length(models), timeout, - paste( - c( - "--rm", "--env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID", - "--env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY", - "--env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION", - "--cap-add NET_ADMIN"[limit], "--cpu-shares=512"[limit] - ), - collapse = " " - ), models, volume, docker_hash, get_bucketname(base), project, models, - ifelse(limit, " -s 1", "") - ) -> model_scripts + model_scripts <- create_docker_model_scripts( + models = models, base = base, timeout = timeout, + limit = limit, volume = volume, docker_hash = docker_hash, + project = project + ) vapply( seq_len(split), FUN.VALUE = character(1), project = project, init = init, split = split, shutdown = shutdown, base = base, @@ -121,6 +95,52 @@ rm Dockerfile", } ) +create_docker_init <- function(yaml, docker_hash) { + sprintf( + "RUN Rscript -e 'pak::pkg_install(\\\"%s\\\"%s)'", yaml$github, + ", dependencies = FALSE, upgrade = FALSE, ask = FALSE" + ) -> deps + sprintf( + "#!/bin/bash +export $(cat .env | xargs) +echo \"FROM %s +%s\" > Dockerfile +docker build --pull --tag rn2k:%s . +rm Dockerfile", + yaml$docker, paste(deps, collapse = "\n"), docker_hash + ) +} + +create_docker_model_scripts <- function( + models, base, timeout = 4, limit = FALSE, volume, docker_hash, project +) { + if (inherits(base, "character")) { + script <- "./fit_model_file.sh" + } else { + script <- "./fit_model_aws.sh" + base <- get_bucketname(base) + } + c( + "echo \"\n\nmodel %i of %i\n\n\"\ndate\n", + "timeout --kill-after=2m %ih docker run %s --name=%s -v %s rn2k:%s", + script, " -b %s -p %s -m %s%s" + ) |> + paste(collapse = " ") |> + sprintf( + seq_along(models), length(models), timeout, + paste( + c( + "--rm", "--env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID", + "--env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY", + "--env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION", + "--cap-add NET_ADMIN"[limit], "--cpu-shares=512"[limit] + ), + collapse = " " + ), models, volume, docker_hash, base, project, models, + ifelse(limit, " -s 1", "") + ) +} + #' @export #' @rdname manifest_yaml_to_bash #' @importFrom assertthat assert_that is.count is.flag is.string noNA @@ -134,7 +154,7 @@ setMethod( signature = signature(base = "character"), definition = function( base, project, hash, shutdown = FALSE, split = 1, - status = c("new", "waiting"), limit = FALSE + status = c("new", "waiting"), limit = FALSE, timeout = 4 ) { assert_that( is.string(base), noNA(base), file_test("-d", base), is.string(project), @@ -169,31 +189,14 @@ setMethod( gsub("\\.manifest$", "", yaml$hash) |> read_manifest(base = base, project = project) -> manifest docker_hash <- get_file_fingerprint(manifest) - sprintf( - "Rscript -e 'remotes::install_github(\\\"%s\\\"%s)'", yaml$github, - ", dependencies = TRUE, upgrade = \\\"never\\\", keep_source = FALSE" - ) -> deps - sprintf( - "#!/bin/bash -echo \"FROM %s -RUN %s\" > Dockerfile -docker build --pull --tag rn2k:%s . -rm Dockerfile", - yaml$docker, paste(deps, collapse = " \\\n&& "), docker_hash - ) -> init + init <- create_docker_init(yaml = yaml, docker_hash = docker_hash) base <- normalizePath(base, winslash = "/") volume <- paste(base, base, "rw", sep = ":") models <- order_manifest(manifest = manifest) - sprintf( - "echo \"model %i of %i\" -docker run %s%s --name=%s -v %s rn2k:%s ./fit_model_file.sh -b %s -p %s -m %s -date -docker stop --time 14400 %s -date", - seq_along(models), length(models), "--rm -d", - ifelse(limit, "--cpu-shares=512", ""), models, volume, docker_hash, - base, project, models, models - ) -> model_scripts + model_scripts <- create_docker_model_scripts( + models = models, base = base, timeout = timeout, limit = limit, + volume = volume, docker_hash = docker_hash, project = project + ) path(base, project, "bash") |> dir_create() script <- path(base, project, sprintf("bash/%s.sh", docker_hash)) diff --git a/man/manifest_yaml_to_bash.Rd b/man/manifest_yaml_to_bash.Rd index 6ef9fb59..c7096eb7 100644 --- a/man/manifest_yaml_to_bash.Rd +++ b/man/manifest_yaml_to_bash.Rd @@ -36,7 +36,8 @@ manifest_yaml_to_bash( shutdown = FALSE, split = 1, status = c("new", "waiting"), - limit = FALSE + limit = FALSE, + timeout = 4 ) } \arguments{ diff --git a/tests/testthat/test_cba_fit_model_manifest.R b/tests/testthat/test_cba_fit_model_manifest.R index a694edc6..2f0f0ec8 100644 --- a/tests/testthat/test_cba_fit_model_manifest.R +++ b/tests/testthat/test_cba_fit_model_manifest.R @@ -38,8 +38,15 @@ test_that("it handles a manifest", { NA, get_file_fingerprint(object), get_file_fingerprint(object2) ), stringsAsFactors = FALSE - ) %>% + ) |> n2k_manifest() + hash <- store_manifest_yaml( + x = x, base = base, project = project, docker = "inbobmk/rn2k:dev-0.10", + dependencies = c("inbo/n2khelper@v0.5.0", "inbo/n2kanalysis@0.4.0") + ) + script <- manifest_yaml_to_bash( + base = base, project = project, hash = basename(hash) + ) expect_invisible(fit_model(x, base = base, project = project)) y <- store_manifest(x, base, project) expect_null(fit_model(y, base = base, project = project)) @@ -69,8 +76,15 @@ test_that("it handles a manifest", { NA, get_file_fingerprint(object), get_file_fingerprint(object2) ), stringsAsFactors = FALSE - ) %>% + ) |> n2k_manifest() + hash <- store_manifest_yaml( + x = x, base = aws_base, project = project, docker = "inbobmk/rn2k:dev-0.10", + dependencies = c("inbo/n2khelper@v0.5.0", "inbo/n2kanalysis@0.4.0") + ) + script <- manifest_yaml_to_bash( + base = aws_base, project = project, hash = basename(hash) + ) expect_s3_class( results <- get_result(x, base = aws_base, project = project), "data.frame"