diff --git a/DESCRIPTION b/DESCRIPTION
index cb31905..7b3e541 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -28,8 +28,35 @@ Imports:
     terra
 Suggests:
     tinytest,
-    quarto
+    quarto,
+    ranger
 VignetteBuilder: quarto
 Config/testthat/edition: 3
-LinkingTo: 
+LinkingTo:
     Rcpp
+Collate:
+    'RcppExports.R'
+    'alloc_params_t.R'
+    'coords_t.R'
+    'covariance_filter.R'
+    'parquet_duckdb.R'
+    'evoland_db.R'
+    'evoland_db_neighbors.R'
+    'evoland_db_tables.R'
+    'evoland_db_views.R'
+    'grrf_filter.r'
+    'init.R'
+    'intrv_masks_t.R'
+    'intrv_meta_t.R'
+    'lulc_data_t.R'
+    'lulc_meta_t.R'
+    'neighbors_t.R'
+    'periods_t.R'
+    'pred_data_t.R'
+    'pred_meta_t.R'
+    'trans_meta_t.R'
+    'trans_models_t.R'
+    'trans_preds_t.R'
+    'util.R'
+    'util_download.R'
+    'util_terra.R'
diff --git a/NAMESPACE b/NAMESPACE
index 77baa3d..ee973a4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -8,6 +8,7 @@ S3method(print,intrv_masks_t)
 S3method(print,intrv_meta_t)
 S3method(print,lulc_data_t)
 S3method(print,lulc_meta_t)
+S3method(print,neighbors_t)
 S3method(print,periods_t)
 S3method(print,pred_data_t)
 S3method(print,pred_meta_t)
@@ -22,6 +23,7 @@ S3method(validate,intrv_masks_t)
 S3method(validate,intrv_meta_t)
 S3method(validate,lulc_data_t)
 S3method(validate,lulc_meta_t)
+S3method(validate,neighbors_t)
 S3method(validate,periods_t)
 S3method(validate,pred_data_t)
 S3method(validate,pred_data_t_bool)
@@ -37,24 +39,27 @@ export(as_intrv_masks_t)
 export(as_intrv_meta_t)
 export(as_lulc_data_t)
 export(as_lulc_meta_t)
+export(as_neighbors_t)
 export(as_periods_t)
 export(as_pred_data_t)
 export(as_pred_meta_t)
 export(as_trans_meta_t)
 export(as_trans_models_t)
 export(as_trans_preds_t)
-export(compute_neighbors)
+export(covariance_filter)
 export(create_coords_t_square)
 export(create_intrv_meta_t)
 export(create_intrv_meta_t_row)
 export(create_lulc_meta_t)
+export(create_neighbors_t)
 export(create_periods_t)
 export(create_pred_meta_t)
 export(create_trans_meta_t)
-export(create_trans_preds_t)
 export(download_and_verify)
 export(evoland_db)
 export(extract_using_coords_t)
+export(grrf_filter)
+export(parquet_duckdb)
 export(print_rowwise_yaml)
 export(validate)
 importFrom(Rcpp,sourceCpp)
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 3a54496..395824b 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -1,7 +1,7 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
-distance_neighbors_cpp <- function(coords_t, max_distance, resolution = 100.0) {
-    .Call(`_evoland_distance_neighbors_cpp`, coords_t, max_distance, resolution)
+distance_neighbors_cpp <- function(coords_t, max_distance, resolution = 100.0, quiet = FALSE) {
+    .Call(`_evoland_distance_neighbors_cpp`, coords_t, max_distance, resolution, quiet)
 }
 
diff --git a/R/coords_t.R b/R/coords_t.R
index e4c77f2..e99609a 100644
--- a/R/coords_t.R
+++ b/R/coords_t.R
@@ -34,9 +34,9 @@ as_coords_t <- function(x) {
       geom_polygon = list()
     )
   }
-  cast_dt_col(x, "id_coord", as.integer)
+  cast_dt_col(x, "id_coord", "int")
   if (!is.null(x[["region"]])) {
-    cast_dt_col(x, "region", as.factor)
+    cast_dt_col(x, "region", "factor")
   }
   new_evoland_table(
     x,
diff --git a/R/covariance_filter.R b/R/covariance_filter.R
new file mode 100644
index 0000000..bb9fc0c
--- /dev/null
+++ b/R/covariance_filter.R
@@ -0,0 +1,168 @@
+#' Two stage covariate filtering
+#'
+#' The `covariance_filter` returns a set of covariates for land use land cover change
+#' (LULCC) models based on a two-stage variable selection: a first statistical fit
+#' estimates a covariate's quality for a given prediction task. A second step selects
+#' all variables below a given correlation threshold: We iterate over a correlation
+#' matrix ordered in the first step. Starting within the leftmost column, all rows (i.e.
+#' candidates) greater than the given threshold are dropped from the full set of
+#' candidates. This candidate selection is retained and used to select the next column,
+#' until no further columns are left to investigate. The columns that were iterated over
+#' are those returned as a character vector of selected variable names.
+#'
+#' @param data A data.table of target variable and candidate covariates to be filtered;
+#'        wide format with one predictor per column.
+#' @param result_col Name of the column representing the transition results (0: no
+#'        trans, 1: trans)
+#' @param rank_fun Optional function to compute ranking scores for each covariate.
+#'        Should take arguments (x, y, weights, ...) and return a single numeric value
+#'        (lower = better). Defaults to polynomial GLM p-value ranking.
+#' @param weights Optional vector of weights to be used in the ranking function. Defaults to
+#'        class-balanced weights
+#' @param corcut Numeric threshold (0-1) for correlation filtering. Covariates with correlation
+#'        coefficients above this threshold will be filtered out. Default is 0 (no filtering).
+#' @param ... Additional arguments passed to rank_fun.
+#'
+#' @return A set of column names (covariates) to retain
+#'
+#' @details
+#' The function first ranks covariates using the provided ranking function (default:
+#' quasibinomial polynomial GLM). Then, it iteratively removes highly (Pearson)
+#' correlated variables based on the correlation cutoff threshold, preserving variables
+#' in order of their ranking. See
+#' <https://github.com/ethzplus/evoland-plus-legacy/blob/main/R/lulcc.covfilter.r> for
+#' where the concept came from. The original author was Antoine Adde, with edits by
+#' Benjamin Black. A similar mechanism is found in <https://github.com/antadde/covsel/>.
+#'
+#' @name covariance_filter
+#'
+#' @export
+
+covariance_filter <- function(
+  data,
+  result_col = "result",
+  rank_fun = rank_poly_glm,
+  weights = compute_balanced_weights(data[[result_col]]),
+  corcut = 0.7,
+  ...
+) {
+  # Early return for single covariate
+  if (ncol(data) == 1) {
+    return(data)
+  }
+
+  data.table::setDT(data)
+
+  # Validate binary outcome
+  stopifnot(
+    "corcut must be between 0 and 1" = corcut >= 0 && corcut <= 1
+  )
+
+  # Compute ranking scores for all covariates (vectorized where possible)
+  scores <- vapply(
+    data[, -..result_col],
+    rank_fun,
+    FUN.VALUE = numeric(1),
+    y = data[[result_col]],
+    weights = weights,
+    ...
+  )
+
+  # Sort by scores (lower = better/more significant)
+  ranked_order <- names(sort(scores))
+
+  # If no correlation filtering needed, return ranked predictors
+  if (corcut == 1) {
+    return(ranked_order)
+  }
+
+  # Compute correlation matrix once
+  cor_mat <- abs(cor(data[, ..ranked_order], use = "pairwise.complete.obs"))
+
+  # Iteratively select covariates based on correlation threshold
+  select_by_correlation(cor_mat, corcut)
+}
+
+
+#' @describeIn covariance_filter Default ranking function using polynomial GLM. Returns
+#' the lower p value for each of the polynomial terms
+#' @param x A numeric vector representing a single covariate
+#' @param y A binary outcome vector (0/1)
+#' @param weights Optional weights vector
+#' @keywords internal
+rank_poly_glm <- function(x, y, weights = NULL, ...) {
+  fit <- glm.fit(
+    x = cbind(1, poly(x, degree = 2, simple = TRUE)),
+    y = y,
+    family = quasibinomial(),
+    weights = weights
+  )
+
+  # Get p-values for linear and quadratic terms
+  coef_summary <- summary.glm(fit)$coefficients
+
+  # Return minimum p-value (most significant term)
+  min(coef_summary[2:3, 4], na.rm = TRUE)
+}
+
+
+#' @describeIn covariance_filter Compute class-balanced weights for imbalanced binary
+#' outcomes; returns a numeric vector
+#' @param trans_result Binary outcome vector (0/1)
+#' @param legacy Bool, use legacy weighting?
+#' @keywords internal
+compute_balanced_weights <- function(trans_result, legacy = FALSE) {
+  n_total <- length(trans_result)
+  n_trans <- sum(trans_result)
+  n_non_trans <- sum(!trans_result)
+
+  # Compute inverse frequency weights
+  weights <- numeric(n_total)
+
+  if (legacy) {
+    # I found this weighting in evoland-plus-legacy, but the models wouldn't converge
+    # https://github.com/ethzplus/evoland-plus-legacy/blob/main/R/lulcc.splitforcovselection.r
+    # This is actually just setting the underrepresented class to the rounded imbalance ratio
+    weights[!trans_result] <- 1
+    weights[trans_result] <- round(n_non_trans / n_trans)
+    return(weights)
+  }
+
+  # This is the heuristic in scikit-learn, n_samples / (n_classes * np.bincount(y))
+  # https://scikit-learn.org/stable/modules/generated/sklearn.utils.class_weight.compute_class_weight.html #nolint
+  # This weighting maintains the exact imbalance ratio
+  weights[trans_result] <- n_total / (2 * n_trans)
+  weights[!trans_result] <- n_total / (2 * n_non_trans)
+
+  weights
+}
+
+
+#' @describeIn covariance_filter Implements the iterative selection procedure.
+#' @param cor_mat Absolute correlation matrix
+#' @param corcut Correlation cutoff threshold
+#' @keywords internal
+select_by_correlation <- function(cor_mat, corcut) {
+  var_names <- colnames(cor_mat)
+
+  # Early return if all correlations are below threshold
+  if (all(cor_mat[lower.tri(cor_mat)] < corcut)) {
+    return(var_names)
+  }
+
+  selected <- character(0)
+  remaining_idx <- seq_along(var_names)
+
+  while (length(remaining_idx) > 0) {
+    # Select the first remaining variable (highest ranked)
+    current_var <- remaining_idx[1]
+    selected <- c(selected, var_names[current_var])
+
+    # Find variables with correlation <= corcut with current variable
+    # (excluding the variable itself)
+    keep_idx <- which(cor_mat[remaining_idx, current_var] <= corcut)
+    remaining_idx <- remaining_idx[keep_idx]
+  }
+
+  selected
+}
diff --git a/R/evoland_db.R b/R/evoland_db.R
index 5179ffd..9aebd16 100644
--- a/R/evoland_db.R
+++ b/R/evoland_db.R
@@ -6,25 +6,24 @@
 #' This class uses DuckDB for in-memory SQL operations while persisting data
 #' to disk in parquet format for better compression.
 #'
+#' Inherits from [parquet_duckdb] for generic database operations.
+#'
+#' @seealso
+#' Additional methods and active bindings are added to this class in separate files:
+#'
+#' - [evoland_db_tables] - Table active bindings (coords_t, lulc_data_t, etc.)
+#' - [evoland_db_views] - View active bindings (lulc_meta_long_v, etc.) and methods
+#' - [evoland_db_neighbors] - Neighbor analysis methods
+#'
+#' @include parquet_duckdb.R
 #' @export
 
 evoland_db <- R6::R6Class(
   classname = "evoland_db",
+  inherit = parquet_duckdb,
 
   ## Public Methods ----
   public = list(
-    #' @field connection DBI connection object to an in-memory DuckDB database
-    connection = NULL,
-
-    #' @field path Character string path to the data folder
-    path = NULL,
-
-    #' @field default_format Default file format for new tables
-    default_format = NULL,
-
-    #' @field writeopts Default write options for DuckDB, see
-    writeopts = NULL,
-
     #' @description
     #' Initialize a new evoland_db object
     #' @param path Character string. Path to the data folder.
@@ -38,363 +37,44 @@ evoland_db <- R6::R6Class(
       default_format = c("parquet", "csv"),
       ...
     ) {
-      # Create folder if it doesn't exist
-      self$path <- ensure_dir(path)
-
-      # set format / writeopts
-      self$default_format <- match.arg(default_format)
-      self$writeopts <- switch(
-        self$default_format,
-        parquet = "FORMAT parquet, COMPRESSION zstd",
-        csv = "FORMAT csv",
-        stop(glue::glue("Unsupported format: {format}"))
+      # Initialize parent class with spatial extension
+      super$initialize(
+        path = path,
+        default_format = default_format,
+        extensions = "spatial"
       )
 
-      # Create in-memory connection for SQL operations
-      self$connection <- DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:")
-      self$execute("INSTALL spatial; LOAD spatial;")
-
+      # Set evoland-specific reporting metadata
       self$set_report(...)
 
       invisible(self)
     },
 
-    ### DB methods ----
-    # TODO the database methods should be spun out into a parent class "parquet_db" that is
-    # independent of domain specific logic. the domain logic would then live in evoland_db.
-    #' @description
-    #' Commit data in overwrite mode
-    #' @param x Data frame to commit
-    #' @param table_name Character string table name
-    #' @param autoincrement_cols Character vector of column names to auto-increment
-    #' @param map_cols Character vector of columns to convert to MAP format
-    commit_overwrite = function(
-      x,
-      table_name,
-      autoincrement_cols = character(0),
-      map_cols = character(0)
-    ) {
-      file_path <- file.path(self$path, paste0(table_name, ".", self$default_format))
-
-      private$register_new_data_v(x, map_cols)
-      on.exit(private$cleanup_new_data_v(map_cols), add = TRUE)
-
-      # if there are any of these, first get existing max values
-      if (length(intersect(autoincrement_cols, names(x))) > 0) {
-        warning(glue::glue(
-          "Overriding existing IDs ({toString(autoincrement_cols)}) with row numbers;\n",
-          "Assign these IDs manually and do not pass any autoincrement_cols to avoid this warning"
-        ))
-      }
-
-      # autoincrement = row number; all other cols treated as ordinary
-      ordinary_cols <- setdiff(names(x), autoincrement_cols)
-      select_expr <- glue::glue_collapse(
-        c(glue::glue("row_number() over () as {autoincrement_cols}"), ordinary_cols),
-        sep = ",\n "
-      )
-
-      self$execute(glue::glue(
-        r"{
-        copy (
-          select {select_expr}
-          from new_data_v
-        ) to '{file_path}' ({self$writeopts})
-        }"
-      ))
-    },
-
-    #' @description
-    #' Commit data in append mode
-    #' @param x Data frame to commit
-    #' @param table_name Character string table name
-    #' @param autoincrement_cols Character vector of column names to auto-increment
-    #' @param map_cols Character vector of columns to convert to MAP format
-    commit_append = function(
-      x,
-      table_name,
-      autoincrement_cols = character(0),
-      map_cols = character(0)
-    ) {
-      file_info <- private$get_file_path(table_name)
-
-      if (!file_info$exists) {
-        self$commit_overwrite(x, table_name, autoincrement_cols, map_cols)
-      }
-
-      self$attach_table(table_name)
-      on.exit(self$detach_table(table_name))
-      private$set_autoincrement_vars(table_name, autoincrement_cols)
-
-      private$register_new_data_v(x, map_cols)
-      on.exit(private$cleanup_new_data_v(map_cols), add = TRUE)
-
-      ordinary_cols <- setdiff(names(x), autoincrement_cols)
-      select_new <- glue::glue_collapse(
-        c(
-          glue::glue(
-            "row_number() over () + getvariable('max_{autoincrement_cols}') as {autoincrement_cols}"
-          ),
-          ordinary_cols
-        ),
-        sep = ",\n "
-      )
-
-      # concatenation using union all; the "by name" option inserts NULLs if a column is
-      # missing in one of the queries. also makes it robust against differing col orders.
-      self$execute(glue::glue(
-        r"{
-          copy (
-            select
-              *
-            from
-              {table_name}
-            union all by name
-            select
-              {select_new}
-            from
-              new_data_v
-          )
-          to '{file_info$path}' ({self$writeopts})
-          }"
-      ))
-    },
-
     #' @description
-    #' Commit data in upsert mode
-    #' @param x Data frame to commit
-    #' @param table_name Character string table name
-    #' @param key_cols Identify unique columns - heuristic: if prefixed with
-    #' id_, the set of all columns designates a uniqueness condition
-    #' @param autoincrement_cols Character vector of column names to auto-increment
-    #' @param map_cols Character vector of columns to convert to MAP format
-    commit_upsert = function(
-      x,
-      table_name,
-      key_cols = grep("^id_", names(x), value = TRUE),
-      autoincrement_cols = character(0),
-      map_cols = character(0)
-    ) {
-      file_info <- private$get_file_path(table_name)
-
-      if (!file_info$exists) {
-        return(self$commit_overwrite(x, table_name, autoincrement_cols, map_cols))
-      } else if (length(key_cols) == 0) {
-        return(self$commit_append(x, table_name, autoincrement_cols, map_cols))
-      }
-
-      self$attach_table(table_name)
-      on.exit(self$detach_table(table_name))
-
-      private$set_autoincrement_vars(table_name, autoincrement_cols)
-
-      private$register_new_data_v(x, map_cols)
-      on.exit(private$cleanup_new_data_v(map_cols), add = TRUE)
-
-      # Update existing data without touching key_cols or autoincrement_cols
-      ordinary_cols <- setdiff(names(x), union(key_cols, autoincrement_cols))
-      update_select_expr <- glue::glue_collapse(
-        glue::glue("{ordinary_cols} = new_data_v.{ordinary_cols}"),
-        sep = ",\n "
-      )
-      update_join_condition <- glue::glue_collapse(
-        glue::glue("{table_name}.{key_cols} = new_data_v.{key_cols}"),
-        sep = "\nand "
-      )
-
-      self$execute(glue::glue(
-        r"{
-        update {table_name} set
-          {update_select_expr}
-        from new_data_v
-        where
-          {update_join_condition};
-        }"
-      ))
-
-      # Insert new data with incrementing autoincrement_cols
-      insert_select_expr <- glue::glue_collapse(
-        c(
-          glue::glue(
-            "row_number() over () + getvariable('max_{autoincrement_cols}') as {autoincrement_cols}"
-          ),
-          glue::glue("new_data_v.{setdiff(names(x), autoincrement_cols)}")
-        ),
-        sep = ",\n "
-      )
-      null_condition <- glue::glue_collapse(
-        glue::glue("{table_name}.{key_cols} is null"),
-        sep = "\nand "
-      )
-
-      self$execute(glue::glue(
-        r"{
-        insert into {table_name}
-        select
-          {insert_select_expr}
-        from
-          new_data_v
-        left join
-          {table_name}
-        on
-          {update_join_condition}
-        where
-          {null_condition}
-        ;
-        }"
-      ))
-
-      self$execute(glue::glue("copy {table_name} to '{file_info$path}' ({self$writeopts})"))
-    },
-
-    #' @description
-    #' Fetch data from storage
+    #' Fetch data from storage with evoland-specific view support
     #' @param table_name Character string. Name of the table to query.
     #' @param where Character string. Optional WHERE clause for the SQL query.
     #' @param limit integerish, limit the amount of rows to return
     #'
     #' @return A data.table
     fetch = function(table_name, where = NULL, limit = NULL) {
-      # Check if this is a view that needs special handling
-      view_result <- switch(
-        table_name,
-        lulc_meta_long_v = make_lulc_meta_long_v(self, private, where),
-        pred_sources_v = make_pred_sources_v(self, private, where),
-        transitions_v = make_transitions_v(self, private, where),
-        NULL
-      )
-
-      if (!is.null(view_result)) {
-        return(view_result)
+      # Check if this is a view (active binding)
+      if (
+        # TODO these should probably not be active bindings, but instead methods with
+        # predefined query parameters
+        table_name %in%
+          c("lulc_meta_long_v", "pred_sources_v", "transitions_v", "extent", "coords_minimal")
+      ) {
+        return(self[[table_name]])
       }
 
       file_info <- private$get_file_path(table_name)
 
       if (!file_info$exists) {
-        return(private$get_empty_table(table_name))
+        stop("Table `", table_name, "` does not exist")
       }
 
-      # Build SQL query
-      sql <- glue::glue("SELECT * FROM read_{file_info$format}('{file_info$path}')")
-
-      if (!is.null(where)) {
-        sql <- glue::glue("{sql} WHERE {where}")
-      }
-      if (!is.null(limit)) {
-        sql <- glue::glue("{sql} LIMIT {limit}")
-      }
-
-      self$get_query(sql)
-    },
-
-    #' @description
-    #' List all tables (files) in storage
-    #' @return Character vector of table names
-    list_tables = function() {
-      list.files(self$path, pattern = "\\.(parquet|csv)$", full.names = FALSE) |>
-        tools::file_path_sans_ext() |>
-        unique() |>
-        sort()
-    },
-
-    #' @description
-    #' Execute statement
-    #' @param statement A SQL statement
-    #' @return No. of rows affected by statement
-    execute = function(statement) {
-      DBI::dbExecute(self$connection, statement)
-    },
-
-    #' @description
-    #' Get Query
-    #' @param statement A SQL statement
-    #' @return No. of rows affected by statement
-    get_query = function(statement) {
-      DBI::dbGetQuery(self$connection, statement) |>
-        data.table::as.data.table()
-    },
-
-    #' @description
-    #' Attach a single table as a temporary table in DuckDB memory. This is
-    #' useful for working with multiple tables in SQL queries instead of loading
-    #' them into R objects.
-    #' @param table_name Character vector. Names of table to attach.
-    #' @param columns Character vector. Optional sql column selection, defaults to "*"
-    attach_table = function(table_name, columns = "*", where = NULL) {
-      # TODO the attach/detach combo should be available in a DB method "with
-      # table"; it should do nothing if the table is already attached
-      file_info <- private$get_file_path(table_name)
-
-      # Build SQL query
-      sql <- glue::glue(
-        "CREATE TEMP TABLE {table_name} AS ",
-        "SELECT {paste(columns, collapse = ', ')} ",
-        "FROM read_{file_info$format}('{file_info$path}')"
-      )
-
-      if (!is.null(where)) {
-        sql <- glue::glue("{sql} WHERE {where}")
-      }
-
-      # Execute SQL
-      self$execute(sql)
-    },
-
-    #' @description
-    #' Detach one or more tables from the database.
-    #' @param table_name Character. Name of table to drop.
-    detach_table = function(table_name) {
-      self$execute(paste0("drop table ", table_name, ";"))
-    },
-
-    #' @description
-    #' Get table row count
-    #' @param table_name Character string. Name of the table to query.
-    #' @return No. of rows
-    row_count = function(table_name) {
-      file_info <- private$get_file_path(table_name)
-
-      if (!file_info$exists) {
-        return(0L)
-      }
-
-      self$get_query(
-        glue::glue("SELECT COUNT(*) as n FROM read_{file_info$format}('{file_info$path}')")
-      )[[1]]
-    },
-
-    #' @description
-    #' Delete rows from a table
-    #' @param table_name Character string. Name of the table to delete from.
-    #' @param where Character string, defaults to NULL: delete everything in table.
-    #' @return No. of rows affected
-    delete_from = function(table_name, where = NULL) {
-      file_info <- private$get_file_path(table_name)
-
-      if (!file_info$exists) {
-        return(0L)
-      }
-
-      count_before <- self$row_count(table_name)
-
-      if (is.null(where)) {
-        file.remove(file_info$path)
-        return(count_before)
-      }
-
-      self$execute(glue::glue(
-        r"{
-        COPY (
-          SELECT * FROM read_{file_info$format}('{file_info$path}')
-          WHERE NOT ({where})
-        )
-        TO '{file_info$path}' ({self$writeopts})
-        }"
-      ))
-
-      count_after <- self$row_count(table_name)
-
-      return(count_before - count_after)
+      super$fetch(table_name, where, limit)
     },
 
     ### Setter methods ----
@@ -405,21 +85,27 @@ evoland_db <- R6::R6Class(
     set_report = function(...) {
       params <- list(...)
       if (self$row_count("reporting_t") == 0L) {
-        params[["report_name"]] <- "evoland_scenario"
-        params[["report_name_pretty"]] <- "Default Evoland Scenario"
-        params[["report_include_date"]] <- "TRUE"
-        params[["creator_username"]] <- Sys.getenv("USER", unset = "unknown")
+        # only upsert if these values are missing upon DB init
+        params[["report_name"]] <-
+          params[["report_name"]] %||% "evoland_scenario"
+        params[["report_name_pretty"]] <-
+          params[["report_name_pretty"]] %||% "Default Evoland Scenario"
+        params[["report_include_date"]] <-
+          params[["report_include_date"]] %||% "TRUE"
+        params[["creator_username"]] <-
+          params[["creator_username"]] %||% Sys.getenv("USER", unset = "unknown")
       }
       params[["last_opened"]] <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ", tz = "UTC")
       params[["last_opened_username"]] <- Sys.getenv("USER", unset = "unknown")
 
-      self$commit_upsert(
+      self$commit(
         data.table::as.data.table(list(
           key = names(params), # cannot name a column "key" in data.table()
           value = unlist(params)
         )),
         table_name = "reporting_t",
-        key_cols = "key"
+        key_cols = "key",
+        method = "upsert"
       )
     },
 
@@ -438,7 +124,7 @@ evoland_db <- R6::R6Class(
         function(x) stop("Unsupported coordinate type specified.")
       )
 
-      self$commit_overwrite(as_coords_t(create_fun(...)), "coords_t")
+      self$commit(as_coords_t(create_fun(...)), "coords_t", method = "overwrite")
     },
 
     #' @description
@@ -459,7 +145,7 @@ evoland_db <- R6::R6Class(
         return(invisible(NULL))
       }
 
-      self$commit_append(
+      self$commit(
         do.call(create_periods_t, as.list(environment())),
         "periods_t"
       )
@@ -482,485 +168,14 @@ evoland_db <- R6::R6Class(
         where = glue::glue("name = '{names(pred_spec)}'")
       )
 
-      data.table::set(pred_data, j = "id_pred", value = existing_meta[["id_pred"]])
+      data.table::set(pred_data, j = "id_pred", value = as.integer(existing_meta[["id_pred"]]))
       data.table::setcolorder(pred_data, c("id_pred", "id_coord", "id_period", "value"))
 
-      self$commit_upsert(
+      self$commit(
         as_pred_data_t(pred_data, pred_type),
-        paste0("pred_data_t_", pred_type)
-      )
-    },
-
-    #' @description
-    #' Print method for evoland_db
-    #' @param ... Not used
-    #' @return self (invisibly)
-    print = function(...) {
-      cat("<evoland_db>\n")
-      cat(sprintf("Database path: %s\n\n", self$path))
-
-      cat("Database methods:\n")
-      cat("  Commit: commit_overwrite(x, table_name, autoincrement_cols, map_cols),\n")
-      cat("          commit_append(x, table_name, autoincrement_cols, map_cols),\n")
-      cat("          commit_upsert(x, table_name, key_cols, autoincrement_cols, map_cols)\n")
-      cat("  Fetch:  fetch(table_name, where, limit)\n")
-      cat("  Delete: delete_from(table_name, where)\n")
-      cat("  Other:  list_tables(), execute(statement), get_query(statement),\n")
-      cat("          attach_table(table_name, columns), detach_table(table_name),\n")
-      cat("          row_count(table_name)\n\n")
-
-      cat("Set / Add methods:\n")
-      cat("  set_report(...), set_coords(type, ...),\n")
-      cat("  set_periods(period_length_str, start_observed, end_observed, end_extrapolated),\n")
-      cat("  add_predictor(pred_spec, pred_data, pred_type)\n\n")
-
-      cat("Active bindings (read-write):\n")
-      cat("  coords_t, periods_t, lulc_meta_t, lulc_data_t, pred_data_t_float,\n")
-      cat("  pred_data_t_int, pred_data_t_bool, pred_meta_t, trans_meta_t, trans_preds_t,\n")
-      cat("  intrv_meta_t, intrv_masks_t, trans_models_t, alloc_params_t\n\n")
-
-      cat("Active bindings (read-only):\n")
-      cat("  extent, coords_minimal, lulc_meta_long_v, pred_sources_v\n")
-
-      invisible(self)
-    }
-  ),
-
-  ## Active Bindings ----
-  active = list(
-    ### Bindings for tables ----
-    #' @field coords_t A `coords_t` instance; see [create_coords_t()] for the type of
-    #' object to assign. Assigning is an upsert operation.
-    coords_t = function(x) {
-      create_active_binding(self, "coords_t", as_coords_t)(x)
-    },
-
-    #' @field periods_t A `periods_t` instance; see [create_periods_t()] for the type of
-    #' object to assign. Assigning is an upsert operation.
-    periods_t = function(x) {
-      create_active_binding(self, "periods_t", as_periods_t)(x)
-    },
-
-    #' @field lulc_meta_t A `lulc_meta_t` instance; see [create_lulc_meta_t()] for the type of
-    #' object to assign. Assigning is an upsert operation.
-    lulc_meta_t = function(x) {
-      create_active_binding(self, "lulc_meta_t", as_lulc_meta_t)(x)
-    },
-
-    #' @field lulc_meta_long_v Return a `lulc_meta_long_v` instance, i.e. unrolled `lulc_meta_t`.
-    lulc_meta_long_v = function() {
-      make_lulc_meta_long_v(self, private)
-    },
-
-    #' @field lulc_data_t A `lulc_data_t` instance; see [as_lulc_data_t()] for the type of
-    #' object to assign. Assigning is an upsert operation.
-    lulc_data_t = function(x) {
-      create_active_binding(self, "lulc_data_t", as_lulc_data_t)(x)
-    },
-
-    #' @field transitions_v Get the transitions from `lulc_data_t`.
-    transitions_v = function() {
-      make_transitions_v(self, private)
-    },
-
-    #' @field pred_data_t_float A `pred_data_t_float` instance; see
-    #' [create_pred_data_t()] for the type of object to assign. Assigning is an
-    #' upsert operation.
-    pred_data_t_float = function(x) {
-      create_active_binding(
-        self,
-        "pred_data_t_float",
-        as_pred_data_t,
-        type = "float"
-      )(x)
-    },
-
-    #' @field pred_data_t_int A `pred_data_t_int` instance; see
-    #' [create_pred_data_t()] for the type of object to assign. Assigning is an
-    #' upsert operation.
-    pred_data_t_int = function(x) {
-      create_active_binding(
-        self,
-        "pred_data_t_int",
-        as_pred_data_t,
-        type = "int"
-      )(x)
-    },
-
-    #' @field pred_data_t_bool A `pred_data_t_bool` instance; see
-    #' [create_pred_data_t()] for the type of object to assign. Assigning is an
-    #' upsert operation.
-    pred_data_t_bool = function(x) {
-      create_active_binding(
-        self,
-        "pred_data_t_bool",
-        as_pred_data_t,
-        type = "bool"
-      )(x)
-    },
-
-    ### Bindings for descriptions, views, etc. ----
-    #' @field extent Return a terra SpatExtent based on coords_t
-    extent = function() {
-      make_extent_db(self, private)
-    },
-
-    #' @field coords_minimal data.table with only (id_coord, lon, lat)
-    coords_minimal = function() {
-      make_coords_minimal(self, private)
-    },
-
-    #' @field pred_meta_t A `pred_meta_t` instance; see [create_pred_meta_t()] for the type of
-    #' object to assign. Assigning is an upsert operation.
-    pred_meta_t = function(x) {
-      if (missing(x)) {
-        x <- self$fetch("pred_meta_t")
-        return(as_pred_meta_t(x))
-      }
-      stopifnot(inherits(x, "pred_meta_t"))
-      self$commit_upsert(
-        x,
-        table_name = "pred_meta_t",
-        key_cols = "name",
-        autoincrement_cols = "id_pred"
-      )
-    },
-
-    #' @field pred_sources_v Retrieve a table of distinct predictor urls and their
-    #' md5sum
-    pred_sources_v = function() {
-      make_pred_sources_v(self, private)
-    },
-
-    #' @field trans_meta_t A `trans_meta_t` instance; see [create_trans_meta_t()] for the type of
-    #' object to assign. Assigning is an upsert operation.
-    trans_meta_t = function(x) {
-      if (missing(x)) {
-        x <- self$fetch("trans_meta_t")
-        return(as_trans_meta_t(x))
-      }
-      stopifnot(inherits(x, "trans_meta_t"))
-      self$commit_upsert(
-        x,
-        table_name = "trans_meta_t",
-        key_cols = c("id_lulc_anterior", "id_lulc_posterior"),
-        autoincrement_cols = "id_trans"
-      )
-    },
-
-    #' @field trans_preds_t A `trans_preds_t` instance; see [create_trans_preds_t()] for the type of
-    #' object to assign. Assigning is an upsert operation.
-    trans_preds_t = function(x) {
-      create_active_binding(self, "trans_preds_t", as_trans_preds_t)(x)
-    },
-
-    #' @field intrv_meta_t A `intrv_meta_t` instance; see [create_intrv_meta_t()] for the type of
-    #' object to assign. Assigning is an upsert operation.
-    intrv_meta_t = function(x) {
-      if (missing(x)) {
-        return(as_intrv_meta_t(
-          convert_list_cols(self$fetch("intrv_meta_t"), "params", kv_df_to_list)
-        ))
-      }
-      stopifnot(inherits(x, "intrv_meta_t"))
-
-      self$commit_upsert(x, "intrv_meta_t", key_cols = "id_intrv", map_cols = "params")
-    },
-
-    #' @field intrv_masks_t A `intrv_masks_t` instance; see [as_intrv_masks_t()] for the type of
-    #' object to assign. Assigning is an upsert operation.
-    intrv_masks_t = function(x) {
-      create_active_binding(self, "intrv_masks_t", as_intrv_masks_t)(x)
-    },
-
-    #' @field trans_models_t A `trans_models_t` instance; see [create_trans_models_t()] for the type
-    #' of object to assign. Assigning is an upsert operation.
-    trans_models_t = function(x) {
-      if (missing(x)) {
-        return(as_trans_models_t(
-          convert_list_cols(
-            self$fetch("trans_models_t"),
-            c("model_params", "goodness_of_fit"),
-            kv_df_to_list
-          )
-        ))
-      }
-      stopifnot(inherits(x, "trans_models_t"))
-
-      self$commit_upsert(
-        x,
-        "trans_models_t",
-        key_cols = c("id_trans", "id_period"),
-        map_cols = c("model_params", "goodness_of_fit")
-      )
-    },
-
-    #' @field alloc_params_t A `alloc_params_t` instance; see [as_alloc_params_t()] for the type
-    #' of object to assign. Assigning is an upsert operation.
-    alloc_params_t = function(x) {
-      if (missing(x)) {
-        return(as_alloc_params_t(
-          convert_list_cols(
-            self$fetch("alloc_params_t"),
-            c("alloc_params", "goodness_of_fit"),
-            kv_df_to_list
-          )
-        ))
-      }
-      stopifnot(inherits(x, "alloc_params_t"))
-
-      self$commit_upsert(
-        x,
-        "alloc_params_t",
-        key_cols = c("id_trans", "id_period"),
-        map_cols = c("alloc_params", "goodness_of_fit")
+        paste0("pred_data_t_", pred_type),
+        method = "upsert"
       )
     }
-  ),
-
-  ## Private Methods ----
-  private = list(
-    # r6 hook called on gc(); Close the database connection
-    #
-    # return NULL (called for side effects)
-    finalize = function() {
-      if (!is.null(self$connection)) {
-        DBI::dbDisconnect(self$connection)
-        self$connection <- NULL
-      }
-    },
-
-    # Get file path and format for a table
-    #
-    # param table_name Character string table name
-    # return List with path, format, and exists flag
-    get_file_path = function(table_name) {
-      # Check for parquet first, then csv
-      parquet_path <- file.path(self$path, paste0(table_name, ".parquet"))
-      csv_path <- file.path(self$path, paste0(table_name, ".csv"))
-
-      if (file.exists(parquet_path)) {
-        return(list(path = parquet_path, format = "parquet", exists = TRUE))
-      } else if (file.exists(csv_path)) {
-        return(list(path = csv_path, format = "csv", exists = TRUE))
-      } else {
-        # Return default format for new file
-        default_path <- file.path(
-          self$path,
-          paste0(table_name, ".", self$default_format)
-        )
-        return(list(
-          path = default_path,
-          format = self$default_format,
-          exists = FALSE
-        ))
-      }
-    },
-
-    # Register new_data_v table, optionally converting MAP columns
-    #
-    # param x Data to register or character name of existing table/view
-    # param map_cols Character vector of columns to convert to MAP format
-    # return NULL (called for side effects)
-    register_new_data_v = function(x, map_cols = character(0)) {
-      # If x is a table name, create alias to new_data_v
-      if (is.character(x) && length(x) == 1) {
-        self$execute(glue::glue("create view new_data_v as select * from {x}"))
-        return(invisible(NULL))
-      }
-
-      if (length(map_cols) == 0) {
-        # No MAP conversion needed - register directly
-        duckdb::duckdb_register(self$connection, "new_data_v", x)
-      } else {
-        # Convert list columns to key-value dataframes
-        x <-
-          data.table::copy(x) |>
-          convert_list_cols(map_cols, list_to_kv_df)
-
-        # Register as intermediate table
-        duckdb::duckdb_register(self$connection, "new_data_raw", x)
-
-        # Build SELECT expression with map_from_entries for MAP columns
-        map_exprs <- glue::glue("map_from_entries({map_cols}) as {map_cols}")
-        other_cols <- setdiff(names(x), map_cols)
-        all_exprs <- c(other_cols, map_exprs)
-        select_expr <- glue::glue_collapse(all_exprs, sep = ", ")
-
-        # Create new_data_v from new_data_raw
-        self$execute(glue::glue(
-          "create temp table new_data_v as select {select_expr} from new_data_raw"
-        ))
-      }
-
-      invisible(NULL)
-    },
-
-    # Cleanup new_data_v and related tables
-    #
-    # param map_cols Character vector indicating if MAP conversion was used
-    # return NULL (called for side effects)
-    cleanup_new_data_v = function(map_cols = character(0)) {
-      if (length(map_cols) == 0) {
-        duckdb::duckdb_unregister(self$connection, "new_data_v")
-      } else {
-        self$execute(
-          "drop table if exists new_data_v;
-          drop view if exists new_data_v"
-        )
-        duckdb::duckdb_unregister(self$connection, "new_data_raw")
-      }
-
-      invisible(NULL)
-    },
-
-    # Write data to file
-    #
-    # param x Data frame to write
-    # param table_name Character string table name
-    write_file = function(x, table_name) {
-      file_path <- file.path(self$path, paste0(table_name, ".", self$default_format))
-
-      duckdb::duckdb_register(self$connection, "temp_write_table", x)
-      on.exit(duckdb::duckdb_unregister(self$connection, "temp_write_table"))
-
-      sql <- glue::glue("COPY temp_write_table TO '{file_path}' ({self$writeopts})")
-      DBI::dbExecute(self$connection, sql)
-    },
-
-    # Set one duckdb variable name max_{colname} to the maximum found for each
-    # autoincrement_col in table_name. if column is missing from table_name, set to 0.
-    set_autoincrement_vars = function(table_name, autoincrement_cols) {
-      if (length(autoincrement_cols) == 0L) {
-        return(NULL)
-      }
-      if (!DBI::dbExistsTable(self$connection, table_name)) {
-        # attach / detach unless it was already there
-        self$attach_table(table_name)
-        on.exit(self$detach_table(table_name))
-      }
-
-      existing_cols <-
-        glue::glue("select column_name from (describe {table_name})") |>
-        self$get_query() |>
-        (\(x) x[[1]])() |>
-        intersect(autoincrement_cols)
-
-      missing_cols <- setdiff(autoincrement_cols, existing_cols)
-
-      set_exprs <- glue::glue_collapse(
-        c(
-          glue::glue(
-            "set variable max_{existing_cols} = (select coalesce(max({existing_cols}), 0) from {table_name});"
-          ),
-          glue::glue(
-            "set variable max_{missing_cols} = 0;"
-          )
-        ),
-        sep = "\n"
-      )
-
-      self$execute(set_exprs)
-    },
-
-    # Get empty table with proper structure
-    #
-    # param table_name Character string table name
-    # return Empty data.table with correct columns
-    get_empty_table = function(table_name) {
-      # Define empty table structures
-      empty_tables <- list(
-        reporting_t = suppressWarnings(data.table::data.table(
-          key = character(0),
-          value = character(0)
-        )),
-        coords_t = as_coords_t(),
-        periods_t = as_periods_t(),
-        lulc_meta_t = as_lulc_meta_t(),
-        lulc_data_t = as_lulc_data_t(),
-        pred_meta_t = as_pred_meta_t(),
-        pred_data_t_float = as_pred_data_t(type = "float"),
-        pred_data_t_int = as_pred_data_t(type = "int"),
-        pred_data_t_bool = as_pred_data_t(type = "bool"),
-        trans_meta_t = as_trans_meta_t(),
-        trans_preds_t = as_trans_preds_t(),
-        intrv_meta_t = as_intrv_meta_t(),
-        intrv_masks_t = as_intrv_masks_t(),
-        trans_models_t = as_trans_models_t(),
-        alloc_params_t = as_alloc_params_t()
-      )
-
-      if (table_name %in% names(empty_tables)) {
-        return(empty_tables[[table_name]])
-      }
-
-      # Default: return empty data.table
-      data.table::data.table()
-    }
   )
 )
-
-# Helper function to create simple Bindings with standard fetch/commit pattern
-# param self The R6 instance (self).
-# param table_name Character string. Name of the table.
-# param as_fn Function to convert fetched data to the appropriate type.
-# param ... Additional arguments passed to as_fn.
-# return A function suitable for use as an active binding.
-create_active_binding <- function(self, table_name, as_fn, ...) {
-  extra_args <- list(...)
-  function(x) {
-    if (missing(x)) {
-      x <- self$fetch(table_name)
-      return(do.call(as_fn, c(list(x), extra_args)))
-    }
-    stopifnot(inherits(x, table_name))
-    self$commit_upsert(x, table_name)
-  }
-}
-
-
-# Helper functions for converting between list and data.frame formats for DuckDB MAPs
-convert_list_cols <- function(x, cols, fn) {
-  for (col in cols) {
-    x[[col]] <- lapply(x[[col]], fn)
-  }
-  x
-}
-
-list_to_kv_df <- function(x) {
-  if (is.null(x) || length(x) == 0) {
-    return(data.frame(
-      key = character(0),
-      value = character(0),
-      stringsAsFactors = FALSE
-    ))
-  }
-  data.frame(
-    key = names(x),
-    value = as.character(unlist(x)),
-    stringsAsFactors = FALSE
-  )
-}
-
-kv_df_to_list <- function(x) {
-  if (is.null(x) || nrow(x) == 0) {
-    return(NULL)
-  }
-
-  out <- list()
-
-  for (row in seq_len(nrow(x))) {
-    key <- x$key[row]
-    val <- x$value[row]
-
-    # Try numeric conversion
-    num_val <- suppressWarnings(as.numeric(val))
-    if (!is.na(num_val)) {
-      out[[key]] <- num_val
-    } else {
-      out[[key]] <- val
-    }
-  }
-
-  out
-}
diff --git a/R/evoland_db_neighbors.R b/R/evoland_db_neighbors.R
new file mode 100644
index 0000000..f8f6d20
--- /dev/null
+++ b/R/evoland_db_neighbors.R
@@ -0,0 +1,162 @@
+#' Neighbor analysis methods for evoland_db
+#'
+#' @description
+#' This file adds neighbor analysis methods to the `evoland_db` class using R6's `$set()` method.
+#' These methods compute neighbor relationships and generate neighbor-based predictors.
+#'
+#' @section Methods Added:
+#'
+#' - `create_neighbors_t(max_distance, distance_breaks, resolution, overwrite)` -
+#'   Computes neighbor relationships between coordinates.
+#'   - `max_distance`: Maximum distance for neighbors (default: 1000)
+#'   - `distance_breaks`: Vector of breaks for distance classes (default: c(0, 100, 500, 1000))
+#'   - `resolution`: Grid resolution for distance calculations (default: 100)
+#'   - `overwrite`: Whether to overwrite existing neighbors_t (default: FALSE)
+#' - `generate_neighbor_predictors()` - Generates predictor variables based on neighbor
+#'   land use counts by distance class. Requires neighbors_t with distance_class column
+#'   (create_neighbors_t with distance_breaks).
+#'
+#' @name evoland_db_neighbors
+#' @include evoland_db.R
+NULL
+
+evoland_db$set(
+  "public",
+  "set_neighbors",
+  function(
+    max_distance = 1000,
+    distance_breaks = c(0, 100, 500, 1000),
+    resolution = 100,
+    overwrite = FALSE
+  ) {
+    if (!overwrite && self$row_count("neighbors_t") > 0) {
+      message("neighbors_t already exists. Use overwrite = TRUE to recompute.")
+      return(invisible(self))
+    }
+
+    coords <- self$coords_t
+
+    neighbors <- create_neighbors_t(
+      coords,
+      max_distance = max_distance,
+      distance_breaks = distance_breaks,
+      resolution = resolution
+    )
+
+    self$commit(
+      as_neighbors_t(neighbors),
+      table_name = "neighbors_t",
+      method = "overwrite"
+    )
+
+    message(glue::glue("Computed {nrow(neighbors)} neighbor relationships"))
+    invisible(self)
+  }
+)
+
+evoland_db$set("public", "generate_neighbor_predictors", function() {
+  if (self$row_count("neighbors_t") == 0) {
+    stop("No neighbor data found. Run $set_neighbors() first.")
+  }
+
+  if (self$row_count("lulc_meta_t") == 0) {
+    stop("No LULC metadata found. Add lulc_meta_t before generating neighbor predictors.")
+  }
+
+  if (self$row_count("lulc_data_t") == 0) {
+    stop("No LULC data found. Add lulc_data_t before generating neighbor predictors.")
+  }
+
+  neighbors_sample <- self$fetch("neighbors_t", limit = 0L)
+  if (!"distance_class" %in% names(neighbors_sample)) {
+    stop(
+      "neighbors_t does not have distance_class column. Run $create_neighbors_t() with distance_breaks parameter."
+    )
+  }
+
+  self$attach_table(
+    "neighbors_t",
+    columns = c("id_coord_origin", "id_coord_neighbor", "distance_class")
+  )
+  self$attach_table("lulc_data_t")
+  self$attach_table("lulc_meta_t")
+
+  on.exit({
+    self$detach_table("neighbors_t")
+    self$detach_table("lulc_data_t")
+    self$detach_table("lulc_meta_t")
+  })
+
+  n_predictors <- self$execute(
+    r"{
+    create or replace temp table pred_meta_neighbors_t as
+    with
+      all_distance_classes as (select distinct distance_class from neighbors_t)
+    select
+      NULL as id_pred,
+      concat('id_lulc_', l.id_lulc, '_dist_', c.distance_class) as name,
+      concat('Count of ', l.pretty_name, ' within distance class ', c.distance_class) as pretty_name,
+      'Number of neighbors by land use class and distance interval' as description,
+      'land use coordinate data' as orig_format,
+      NULL as sources,
+      'number of neighbors' as unit,
+      NULL as factor_levels,
+      c.distance_class,
+      l.id_lulc
+    from
+      lulc_meta_t l
+    cross join
+      all_distance_classes c
+    }"
+  )
+  self$execute(
+    "create or replace view pred_meta_upsert_v as 
+     select name, pretty_name, description, orig_format, sources, unit, factor_levels
+     from pred_meta_neighbors_t"
+  )
+
+  self$attach_table("pred_meta_t")
+  on.exit(self$detach_table("pred_meta_t"), add = TRUE)
+  self$execute(
+    r"{
+    update pred_meta_neighbors_t
+    set id_pred = pred_meta_t.id_pred
+    from pred_meta_t
+    where pred_meta_neighbors_t.name = pred_meta_t.name
+    }"
+  )
+
+  n_data_points <- self$execute(
+    r"{
+    create temp table pred_neighbors_t as
+    select
+      p.id_pred,
+      n.id_coord_origin as id_coord,
+      t.id_period,
+      count(n.id_coord_neighbor) as value
+    from
+     neighbors_t n,
+     lulc_data_t t,
+     pred_meta_neighbors_t p
+    where
+      n.id_coord_neighbor = t.id_coord
+      and p.id_lulc = t.id_lulc
+      and p.distance_class = n.distance_class
+    group by
+      n.id_coord_origin,
+      n.distance_class,
+      t.id_period,
+      t.id_lulc,
+      p.id_pred
+    }"
+  )
+
+  self$commit("pred_neighbors_t", "pred_neighbors_t_int", method = "upsert")
+
+  message(glue::glue(
+    "Generated {n_predictors} neighbor predictor variables with ",
+    "{n_data_points} data points"
+  ))
+
+  invisible(self)
+})
diff --git a/R/evoland_db_tables.R b/R/evoland_db_tables.R
new file mode 100644
index 0000000..1ff2043
--- /dev/null
+++ b/R/evoland_db_tables.R
@@ -0,0 +1,207 @@
+#' Table active bindings for evoland_db
+#'
+#' @description
+#' This file adds table active bindings to the `evoland_db` class using R6's `$set()` method.
+#' These bindings provide read/write access to database tables with automatic validation.
+#'
+#' @section Active Bindings Added:
+#'
+#' - `coords_t` - Coordinates table. See [as_coords_t()]
+#' - `periods_t` - Time periods table. See [as_periods_t()]
+#' - `lulc_meta_t` - Land use/land cover metadata. See [as_lulc_meta_t()]
+#' - `lulc_data_t` - Land use/land cover data. See [as_lulc_data_t()]
+#' - `pred_data_t_float` - Float predictor data. See [as_pred_data_t()]
+#' - `pred_data_t_int` - Integer predictor data. See [as_pred_data_t()]
+#' - `pred_data_t_bool` - Boolean predictor data. See [as_pred_data_t()]
+#' - `pred_meta_t` - Predictor metadata. See [as_pred_meta_t()]
+#' - `trans_meta_t` - Transition metadata. See [as_trans_meta_t()]
+#' - `trans_preds_t` - Transition-predictor relationships. See [as_trans_preds_t()]
+#' - `intrv_meta_t` - Intervention metadata. See [as_intrv_meta_t()]
+#' - `intrv_masks_t` - Intervention masks. See [as_intrv_masks_t()]
+#' - `trans_models_t` - Transition models. See [as_trans_models_t()]
+#' - `alloc_params_t` - Allocation parameters. See [as_alloc_params_t()]
+#' - `neighbors_t` - Neighbor relationships. See [as_neighbors_t()]
+#'
+#' @name evoland_db_tables
+#' @include evoland_db.R
+NULL
+
+create_table_binding <- function(
+  self,
+  table_name,
+  as_fn,
+  key_cols = NULL,
+  autoincrement_cols = NULL,
+  map_cols = NULL,
+  ...
+) {
+  extra_args <- list(...)
+
+  function(x) {
+    if (missing(x)) {
+      fetched <- self$fetch(table_name)
+
+      if (!is.null(map_cols) && nrow(fetched) > 0) {
+        fetched <- convert_list_cols(fetched, map_cols, kv_df_to_list)
+      }
+
+      return(do.call(as_fn, c(list(fetched), extra_args)))
+    }
+
+    stopifnot(inherits(x, table_name))
+
+    self$commit(
+      x,
+      table_name = table_name,
+      key_cols = key_cols,
+      autoincrement_cols = autoincrement_cols,
+      map_cols = map_cols,
+      method = "upsert"
+    )
+  }
+}
+
+evoland_db$set("active", "coords_t", function(x) {
+  create_table_binding(
+    self,
+    "coords_t",
+    as_coords_t,
+    key_cols = "id_coord"
+  )(x)
+})
+
+evoland_db$set("active", "periods_t", function(x) {
+  create_table_binding(
+    self,
+    "periods_t",
+    as_periods_t,
+    key_cols = "id_period"
+  )(x)
+})
+
+evoland_db$set("active", "lulc_meta_t", function(x) {
+  create_table_binding(
+    self,
+    "lulc_meta_t",
+    as_lulc_meta_t,
+    key_cols = "id_lulc"
+  )(x)
+})
+
+evoland_db$set("active", "lulc_data_t", function(x) {
+  create_table_binding(
+    self,
+    "lulc_data_t",
+    as_lulc_data_t,
+    key_cols = c("id_coord", "id_period")
+  )(
+    x
+  )
+})
+
+evoland_db$set("active", "pred_data_t_float", function(x) {
+  create_table_binding(
+    self,
+    "pred_data_t_float",
+    as_pred_data_t,
+    key_cols = c("id_pred", "id_coord", "id_period"),
+    type = "float"
+  )(x)
+})
+
+evoland_db$set("active", "pred_data_t_int", function(x) {
+  create_table_binding(
+    self,
+    "pred_data_t_int",
+    as_pred_data_t,
+    key_cols = c("id_pred", "id_coord", "id_period"),
+    type = "int"
+  )(x)
+})
+
+evoland_db$set("active", "pred_data_t_bool", function(x) {
+  create_table_binding(
+    self,
+    "pred_data_t_bool",
+    as_pred_data_t,
+    key_cols = c("id_pred", "id_coord", "id_period"),
+    type = "bool"
+  )(x)
+})
+
+evoland_db$set("active", "pred_meta_t", function(x) {
+  create_table_binding(
+    self,
+    "pred_meta_t",
+    as_pred_meta_t,
+    key_cols = "name",
+    autoincrement_cols = "id_pred"
+  )(x)
+})
+
+evoland_db$set("active", "trans_meta_t", function(x) {
+  create_table_binding(
+    self,
+    "trans_meta_t",
+    as_trans_meta_t,
+    key_cols = c("id_lulc_anterior", "id_lulc_posterior"),
+    autoincrement_cols = "id_trans"
+  )(x)
+})
+
+evoland_db$set("active", "trans_preds_t", function(x) {
+  create_table_binding(
+    self,
+    "trans_preds_t",
+    as_trans_preds_t,
+    key_cols = c("id_trans", "id_pred")
+  )(x)
+})
+
+evoland_db$set("active", "intrv_meta_t", function(x) {
+  create_table_binding(
+    self,
+    "intrv_meta_t",
+    as_intrv_meta_t,
+    key_cols = "id_intrv",
+    map_cols = "params"
+  )(x)
+})
+
+evoland_db$set("active", "intrv_masks_t", function(x) {
+  create_table_binding(
+    self,
+    "intrv_masks_t",
+    as_intrv_masks_t,
+    key_cols = c("id_coord", "id_intrv")
+  )(x)
+})
+
+evoland_db$set("active", "trans_models_t", function(x) {
+  create_table_binding(
+    self,
+    "trans_models_t",
+    as_trans_models_t,
+    key_cols = c("id_trans", "id_period"),
+    map_cols = c("model_params", "goodness_of_fit")
+  )(x)
+})
+
+evoland_db$set("active", "alloc_params_t", function(x) {
+  create_table_binding(
+    self,
+    "alloc_params_t",
+    as_alloc_params_t,
+    key_cols = c("id_trans", "id_period"),
+    map_cols = c("alloc_params", "goodness_of_fit")
+  )(x)
+})
+
+evoland_db$set("active", "neighbors_t", function(x) {
+  create_table_binding(
+    self,
+    "neighbors_t",
+    as_neighbors_t,
+    key_cols = c("id_coord_origin", "id_coord_neighbor")
+  )(x)
+})
diff --git a/R/evoland_db_views.R b/R/evoland_db_views.R
index f17c8df..06325cb 100644
--- a/R/evoland_db_views.R
+++ b/R/evoland_db_views.R
@@ -1,111 +1,286 @@
 #' Views on the evoland-plus data model
 #'
 #' @description
-#' Functions to generate views on the database
+#' This file adds view active bindings and methods to the `evoland_db` class using R6's `$set()` method.
+#' These provide computed views on the database without storing additional data.
+#'
+#' @section Active Bindings Added:
+#'
+#' - `lulc_meta_long_v` - Unrolled LULC metadata with one row per source class
+#' - `pred_sources_v` - Distinct predictor URLs and their MD5 checksums
+#' - `transitions_v` - Land use transitions derived from lulc_data_t
+#' - `extent` - Spatial extent of coords_t as terra::SpatExtent
+#' - `coords_minimal` - Minimal coordinate representation (id_coord, lon, lat)
+#'
+#' @section Methods Added:
+#'
+#' - `trans_pred_data_v(id_trans)` - Returns wide table of transition results and predictor data for a specific transition. Used as input to covariance filtering.
 #'
 #' @name evoland_db_views
+#' @include evoland_db.R
 NULL
 
-#' @describeIn evoland_db_views Retrieve a table of distinct predictor urls and their
-#' md5sum
-make_pred_sources_v <- function(self, private, where = NULL) {
-  self$attach_table("pred_meta_t")
-  on.exit(self$detach_table("pred_meta_t"))
-
-  where_clause <- if (!is.null(where)) paste("AND", where) else ""
-
-  self$get_query(glue::glue(
-    r"{
-    select distinct
-      unnest(sources).url as url,
-      unnest(sources).md5sum as md5sum
-    from pred_meta_t
-    where sources is not null {where_clause}
-    }"
-  ))
-}
-
-
-#' @describeIn evoland_db_views Return a `lulc_meta_long_v` instance, i.e. unrolled `lulc_meta_t`.
-make_lulc_meta_long_v <- function(self, private, where = NULL) {
-  self$attach_table("lulc_meta_t")
-  on.exit(self$detach_table("lulc_meta_t"))
-
-  where_clause <- if (!is.null(where)) paste("WHERE", where) else ""
-
-  self$get_query(glue::glue(
-    r"{
-    select
-      id_lulc,
-      name,
-      unnest(src_classes) as src_class
-    from 
-      lulc_meta_t
-    {where_clause}
-    }"
-  ))
-}
-
-#' @describeIn evoland_db_views Minimal coordinate representation (id_coord, lon, lat)
-make_coords_minimal <- function(self, private, where = NULL) {
-  self$attach_table("coords_t", c("id_coord", "lon", "lat"))
-  on.exit(self$detach_table("coords_t"))
-
-  where_clause <- if (!is.null(where)) paste("WHERE", where) else ""
-
-  self$get_query(glue::glue(
-    r"{
-    select id_coord, lon, lat 
-    from coords_t
-    {where_clause}
-    }"
-  )) |>
-    cast_dt_col("id_coord", as.integer) |>
-    data.table::setkeyv("id_coord")
-}
-
-#' @describeIn evoland_db_views Returns the extent of the coords_t as terra::SpatExtent
-make_extent_db <- function(self, private) {
-  self$attach_table("coords_t", c("lon", "lat"))
-  on.exit(self$detach_table("coords_t"))
-
-  self$get_query(glue::glue(
-    r"{
-        SELECT
-          min(lon) as xmin,
-          max(lon) as xmax,
-          min(lat) as ymin,
-          max(lat) as ymax
-        FROM
-          coords_t
-        }"
-  )) |>
-    unlist() |>
-    terra::ext()
-}
-
-#' @describeIn evoland_db_views
-make_transitions_v <- function(self, private, where = NULL) {
-  self$attach_table("lulc_data_t")
-  on.exit(self$detach_table("lulc_data_t"))
-
-  where_clause <- if (!is.null(where)) paste("WHERE", where) else ""
-
-  self$get_query(glue::glue(
-    r"{
-    SELECT
-      curr.id_period,
-      prev.id_lulc as id_lulc_anterior,
-      curr.id_lulc as id_lulc_posterior,
-      curr.id_coord
-    FROM
-      lulc_data_t as curr
-    INNER JOIN
-      lulc_data_t as prev
-    ON
-      curr.id_coord = prev.id_coord
-      AND curr.id_period = prev.id_period + 1
-    {where_clause}
-    }"
-  ))
-}
+evoland_db$set("active", "lulc_meta_long_v", function() {
+  self$with_tables("lulc_meta_t", function() {
+    self$get_query(glue::glue(
+      r"{
+      select
+        id_lulc,
+        name,
+        unnest(src_classes) as src_class
+      from
+        lulc_meta_t
+      }"
+    ))
+  })
+})
+
+evoland_db$set("active", "pred_sources_v", function() {
+  self$with_tables("pred_meta_t", function() {
+    self$get_query(glue::glue(
+      r"{
+      select distinct
+        unnest(sources).url as url,
+        unnest(sources).md5sum as md5sum
+      from pred_meta_t
+      where sources is not null
+      }"
+    ))
+  })
+})
+
+evoland_db$set("active", "transitions_v", function() {
+  self$with_tables("lulc_data_t", function() {
+    self$get_query(glue::glue(
+      r"{
+      SELECT
+        curr.id_period,
+        prev.id_lulc as id_lulc_anterior,
+        curr.id_lulc as id_lulc_posterior,
+        curr.id_coord
+      FROM
+        lulc_data_t as curr
+      INNER JOIN
+        lulc_data_t as prev
+      ON
+        curr.id_coord = prev.id_coord
+        AND curr.id_period = prev.id_period + 1
+      }"
+    ))
+  })
+})
+
+evoland_db$set("active", "extent", function() {
+  self$with_tables("coords_t", function() {
+    self$get_query(glue::glue(
+      r"{
+      SELECT
+        min(lon) as xmin,
+        max(lon) as xmax,
+        min(lat) as ymin,
+        max(lat) as ymax
+      FROM
+        coords_t
+      }"
+    )) |>
+      unlist() |>
+      terra::ext()
+  })
+})
+
+evoland_db$set("active", "coords_minimal", function() {
+  self$with_tables("coords_t", function() {
+    self$get_query(glue::glue(
+      r"{
+      select id_coord, lon, lat
+      from coords_t
+      }"
+    )) |>
+      cast_dt_col("id_coord", "int") |>
+      data.table::setkeyv("id_coord")
+  })
+})
+
+# get transitions along with their predictor data in a wide data.table
+# id_trans - integer transition ID
+# id_pred - optional integer vector of predictor IDs to include (NULL = all predictors)
+# na_value - if not NA, replace all NULL/NA predictor values with this value
+evoland_db$set("public", "trans_pred_data_v", function(id_trans, id_pred = NULL, na_value = NA) {
+  stopifnot(
+    "id_trans must be a single integer" = length(id_trans) == 1L && is.numeric(id_trans),
+    "id_pred must be NULL or a numeric vector" = is.null(id_pred) || is.numeric(id_pred)
+  )
+
+  all_tables <- self$list_tables()
+  pred_tables <- c("pred_data_t_float", "pred_data_t_int", "pred_data_t_bool")
+  existing_pred_tables <- intersect(pred_tables, all_tables)
+
+  tables_to_attach <- c("trans_meta_t", "lulc_data_t", existing_pred_tables)
+
+  self$with_tables(
+    tables_to_attach,
+    function() {
+      trans_info <- self$get_query(glue::glue(
+        "SELECT id_lulc_anterior, id_lulc_posterior
+         FROM trans_meta_t
+         WHERE id_trans = {id_trans}"
+      ))
+
+      if (nrow(trans_info) == 0L) {
+        stop(glue::glue("Transition id_trans = {id_trans} not found in trans_meta_t"))
+      }
+
+      id_lulc_ant <- trans_info$id_lulc_anterior
+      id_lulc_post <- trans_info$id_lulc_posterior
+
+      ctes <- list()
+
+      ctes$trans_result <- glue::glue(
+        "trans_result AS (
+          SELECT
+            curr.id_coord,
+            curr.id_period,
+            CASE
+              WHEN prev.id_lulc = {id_lulc_ant} AND curr.id_lulc = {id_lulc_post} THEN TRUE
+              WHEN prev.id_lulc = {id_lulc_ant} AND curr.id_lulc != {id_lulc_post} THEN FALSE
+              ELSE NULL
+            END AS result
+          FROM lulc_data_t AS curr
+          INNER JOIN lulc_data_t AS prev
+            ON curr.id_coord = prev.id_coord
+            AND curr.id_period = prev.id_period + 1
+          WHERE prev.id_lulc = {id_lulc_ant}
+        )"
+      )
+
+      pred_filter <- ""
+      if (!is.null(id_pred)) {
+        pred_filter <- glue::glue(" AND id_pred IN ({toString(id_pred)})")
+      }
+
+      if ("pred_data_t_float" %in% existing_pred_tables) {
+        ctes$pred_float_combined <- glue::glue(
+          "pred_float_combined AS (
+          SELECT id_coord, id_period, id_pred, value
+          FROM pred_data_t_float
+          WHERE id_period >= 1
+            {pred_filter}
+          UNION ALL
+          SELECT p0.id_coord, periods.id_period, p0.id_pred, p0.value
+          FROM pred_data_t_float AS p0
+          CROSS JOIN (SELECT DISTINCT id_period FROM trans_result WHERE id_period >= 1) AS periods
+          WHERE p0.id_period = 0
+            {pred_filter}
+        )"
+        )
+
+        ctes$pred_float_wide <- "pred_float_wide AS (
+          PIVOT pred_float_combined ON id_pred USING FIRST(value) GROUP BY id_coord, id_period
+        )"
+      }
+
+      if ("pred_data_t_int" %in% existing_pred_tables) {
+        ctes$pred_int_combined <- glue::glue(
+          "pred_int_combined AS (
+          SELECT id_coord, id_period, id_pred, value
+          FROM pred_data_t_int
+          WHERE id_period >= 1
+            {pred_filter}
+          UNION ALL
+          SELECT p0.id_coord, periods.id_period, p0.id_pred, p0.value
+          FROM pred_data_t_int AS p0
+          CROSS JOIN (SELECT DISTINCT id_period FROM trans_result WHERE id_period >= 1) AS periods
+          WHERE p0.id_period = 0
+            {pred_filter}
+        )"
+        )
+
+        ctes$pred_int_wide <- "pred_int_wide AS (
+          PIVOT pred_int_combined ON id_pred USING FIRST(value) GROUP BY id_coord, id_period
+        )"
+      }
+
+      if ("pred_data_t_bool" %in% existing_pred_tables) {
+        ctes$pred_bool_combined <- glue::glue(
+          "pred_bool_combined AS (
+          SELECT id_coord, id_period, id_pred, value
+          FROM pred_data_t_bool
+          WHERE id_period >= 1
+            {pred_filter}
+          UNION ALL
+          SELECT p0.id_coord, periods.id_period, p0.id_pred, p0.value
+          FROM pred_data_t_bool AS p0
+          CROSS JOIN (SELECT DISTINCT id_period FROM trans_result WHERE id_period >= 1) AS periods
+          WHERE p0.id_period = 0
+            {pred_filter}
+        )"
+        )
+
+        ctes$pred_bool_wide <- "pred_bool_wide AS (
+          PIVOT pred_bool_combined ON id_pred USING FIRST(value) GROUP BY id_coord, id_period
+        )"
+      }
+
+      select_cols <- "tr.result"
+      if ("pred_data_t_float" %in% existing_pred_tables) {
+        select_cols <- paste0(select_cols, ", pf.* EXCLUDE (id_coord, id_period)")
+      }
+      if ("pred_data_t_int" %in% existing_pred_tables) {
+        select_cols <- paste0(select_cols, ", pi.* EXCLUDE (id_coord, id_period)")
+      }
+      if ("pred_data_t_bool" %in% existing_pred_tables) {
+        select_cols <- paste0(select_cols, ", pb.* EXCLUDE (id_coord, id_period)")
+      }
+
+      joins <- ""
+      if ("pred_data_t_float" %in% existing_pred_tables) {
+        joins <- paste0(
+          joins,
+          "\n LEFT JOIN pred_float_wide AS pf ON tr.id_coord = pf.id_coord AND tr.id_period = pf.id_period"
+        )
+      }
+      if ("pred_data_t_int" %in% existing_pred_tables) {
+        joins <- paste0(
+          joins,
+          "\n LEFT JOIN pred_int_wide AS pi ON tr.id_coord = pi.id_coord AND tr.id_period = pi.id_period"
+        )
+      }
+      if ("pred_data_t_bool" %in% existing_pred_tables) {
+        joins <- paste0(
+          joins,
+          "\n LEFT JOIN pred_bool_wide AS pb ON tr.id_coord = pb.id_coord AND tr.id_period = pb.id_period"
+        )
+      }
+
+      cte_string <- paste(unlist(ctes), collapse = ",\n\n ")
+
+      query <- glue::glue(
+        "WITH {cte_string}
+
+        SELECT {select_cols}
+        FROM trans_result AS tr{joins}
+        WHERE tr.result IS NOT NULL"
+      )
+
+      result <- self$get_query(query)
+
+      old_names <- names(result)
+      new_names <- old_names
+      for (i in seq_along(old_names)) {
+        if (old_names[i] != "result" && grepl("^\\d+$", old_names[i])) {
+          new_names[i] <- paste0("id_pred_", old_names[i])
+        }
+      }
+      data.table::setnames(result, old_names, new_names)
+
+      if (!is.na(na_value)) {
+        pred_cols <- setdiff(names(result), "result")
+        for (col in pred_cols) {
+          data.table::set(result, i = which(is.na(result[[col]])), j = col, value = na_value)
+        }
+      }
+
+      result
+    }
+  )
+})
diff --git a/R/grrf_filter.r b/R/grrf_filter.r
new file mode 100644
index 0000000..dc07e5a
--- /dev/null
+++ b/R/grrf_filter.r
@@ -0,0 +1,139 @@
+#' Guided Regularized Random Forest Feature Selection
+#'
+#' The `grrf_filter` returns a set of covariates for land use land cover change (LULCC) models based
+#' on feature selection with Guided Regularized Random Forests. This is a two-stage random forest
+#' approach: a first unregularized random forest estimates variable importance scores. These scores
+#' are then used to guide a second regularized random forest that penalizes less important features,
+#' resulting in a more parsimonious feature set.
+#'
+#' @param data A data.table of target variable and candidate covariates to be filtered; wide format
+#'        with one predictor per column.
+#' @param result_col Name of the column representing the transition results (0: no trans, 1: trans)
+#' @param weights Optional named vector of class weights. If NULL, class-balanced weights
+#'        are computed automatically using compute_grrf_weights().
+#' @param gamma Numeric between 0-1 controlling the weight of the normalized importance
+#'        score (the "importance coefficient"). When gamma = 0, we perform unguided
+#'        regularized random forest (no guiding effect). When gamma = 1, we apply the
+#'        strongest guiding effect, leading to the most penalization of redundant
+#'        features and the most concise feature sets. Default is 0.5.
+#' @param num.trees Number of trees to grow in each random forest. Default is 500.
+#' @param ... Additional arguments passed to ranger::ranger().
+#'
+#' @return A character vector of column names (covariates) to retain, ordered by
+#'         importance (most important first)
+#'
+#' @details
+#' The Guided Regularized Random Forest (GRRF) algorithm works as follows:
+#' 1. Fit an initial unregularized random forest to obtain variable importance scores
+#' 2. Normalize these importance scores and use them to compute regularization
+#'    coefficients: coefReg = (1 - gamma) + gamma * normalized_importance
+#' 3. Fit a regularized random forest using these coefficients to penalize splits
+#'    on less important variables
+#' 4. Return variables with positive importance in the regularized model
+#'
+#' Class weights are used to handle class imbalance. Variables in terminal nodes are
+#' weighted by class, and splits are evaluated using weighted Gini impurity.
+#'
+#' The ranger implementation uses the `split.select.weights` parameter to apply
+#' regularization penalties, approximating the RRF regularization approach.
+#'
+#' @references
+#' Deng, H., & Runger, G. (2013). Gene selection with guided regularized random forest.
+#' Pattern Recognition, 46(12), 3483-3489. https://arxiv.org/pdf/1306.0237.pdf
+#'
+#' Original implementation by Antoine Adde, edited by Ben Black and adapted for
+#' ranger by the evoland-plus team.
+#'
+#' @name grrf_filter
+#'
+#' @export
+
+grrf_filter <- function(
+  data,
+  result_col = "result",
+  weights = compute_balanced_weights(data[[result_col]]),
+  gamma = 0.5,
+  num.trees = 500,
+  max.depth = 100,
+  ...
+) {
+  # Check if ranger is available
+  if (!requireNamespace("ranger", quietly = TRUE)) {
+    stop(
+      "Package 'ranger' is required for grrf_filter but is not installed.\n",
+      "Please install it with: install.packages('ranger')",
+      call. = FALSE
+    )
+  }
+
+  data.table::setDT(data)
+
+  # Validate inputs
+  stopifnot(
+    "gamma must be between 0 and 1" = gamma >= 0 && gamma <= 1,
+    "result_col must exist in data" = result_col %in% names(data),
+    "data must have at least one predictor column" = ncol(data) > 1
+  )
+
+  # Prepare data: separate predictors from response
+  predictor_cols <- setdiff(names(data), result_col)
+  y <- as.factor(data[[result_col]])
+  x <- data[, ..predictor_cols]
+
+  # Step 1: Run initial unregularized random forest to get importance scores
+  rf_initial <- ranger::ranger(
+    x = x,
+    y = y,
+    num.trees = num.trees,
+    importance = "impurity",
+    case.weights = weights,
+    max.depth = max.depth,
+    ...
+  )
+
+  # Extract and normalize importance scores
+  imp_initial <- rf_initial$variable.importance
+  # Normalize to [0,1] (importance values may be negative)
+  imp_normalized <- {
+    (imp_initial - min(imp_initial)) /
+      (max(imp_initial) - min(imp_initial))
+  }
+
+  # Step 2: Calculate regularization coefficients (penalty weights)
+  # Higher importance -> higher coefficient -> less penalty
+  coef_reg <- (1 - gamma) + gamma * imp_normalized
+
+  # Step 3: Run guided regularized random forest
+  # Use split.select.weights to penalize variables with lower importance
+  # Higher weight = more likely to be selected for splitting
+  rf_grrf <- ranger::ranger(
+    x = x,
+    y = y,
+    num.trees = num.trees,
+    importance = "impurity",
+    case.weights = weights,
+    max.depth = max.depth,
+    split.select.weights = coef_reg,
+    ...
+  )
+
+  # Extract final importance scores
+  imp_final <- rf_grrf$variable.importance
+
+  # Select variables with positive importance
+  selected_vars <- names(imp_final[imp_final > 0])
+
+  if (length(selected_vars) == 0) {
+    warning("No variables with positive importance found. Returning all variables.")
+    selected_vars <- predictor_cols
+  }
+
+  # Order by importance (descending)
+  selected_vars <- selected_vars[order(imp_final[selected_vars], decreasing = TRUE)]
+
+  message(glue::glue(
+    "Selected {length(selected_vars)}/{length(predictor_cols)} predictors"
+  ))
+
+  return(selected_vars)
+}
diff --git a/R/intrv_masks_t.R b/R/intrv_masks_t.R
index 501daf7..8aee5f3 100644
--- a/R/intrv_masks_t.R
+++ b/R/intrv_masks_t.R
@@ -18,8 +18,8 @@ as_intrv_masks_t <- function(x) {
       id_coord = integer(0)
     )
   }
-  cast_dt_col(x, "id_coord", as.integer)
-  cast_dt_col(x, "id_coord", as.integer)
+  cast_dt_col(x, "id_coord", "int")
+  cast_dt_col(x, "id_coord", "int")
   new_evoland_table(
     x,
     "intrv_masks_t",
diff --git a/R/lulc_data_t.R b/R/lulc_data_t.R
index 91c4bcc..817df05 100644
--- a/R/lulc_data_t.R
+++ b/R/lulc_data_t.R
@@ -19,7 +19,7 @@ as_lulc_data_t <- function(x) {
       id_period = integer(0)
     )
   }
-  cast_dt_col(x, "id_coord", as.integer)
+  cast_dt_col(x, "id_coord", "int")
   new_evoland_table(
     x,
     "lulc_data_t",
diff --git a/R/neighbors_t.R b/R/neighbors_t.R
new file mode 100644
index 0000000..4a50301
--- /dev/null
+++ b/R/neighbors_t.R
@@ -0,0 +1,151 @@
+#' Create Neighbors Table
+#'
+#' Creates a `neighbors_t` table and validates that it matches the schema.
+#'
+#' @name neighbors_t
+#'
+#' @param x An object that can be passed to [data.table::setDT()]
+#'
+#' @return A data.table of class "neighbors_t" with columns:
+#'   - `id_coord_origin`: Foreign key to coords_t (origin coordinate)
+#'   - `id_coord_neighbor`: Foreign key to coords_t (neighbor coordinate)
+#'   - `distance`: Numeric distance between coordinates
+#'   - `distance_class`: Optional factor representing distance intervals
+#' @export
+as_neighbors_t <- function(x) {
+  if (missing(x)) {
+    x <- data.table::data.table(
+      id_coord_origin = integer(0),
+      id_coord_neighbor = integer(0),
+      distance = numeric(0)
+    )
+  }
+  cast_dt_col(x, "id_coord_origin", "int")
+  cast_dt_col(x, "id_coord_neighbor", "int")
+  if ("distance_class" %in% names(x)) {
+    cast_dt_col(x, "distance_class", "factor")
+  }
+  new_evoland_table(
+    x,
+    "neighbors_t",
+    c("id_coord_origin", "id_coord_neighbor")
+  )
+}
+
+#' @describeIn neighbors_t Compute neighboring coordinates within specified distances. In
+#' order to be computationally feasible, the coordinates' IDs are rasterized before
+#' their actual Euclidean distance is calculated. If coordinates are so close that they
+#' get rasterized to the same cell, the first one is used and a warning is emitted. If
+#' this happens, try again using a lower resolution.
+#' @param max_distance Maximum distance to search for neighbors (in same units as
+#' coordinates and resolution)
+#' @param distance_breaks Optional numeric vector defining distance class boundaries.
+#'   If NULL, no distance classification is performed.
+#'   If provided, must have at least 2 elements defining interval breaks.
+#' @param resolution Grid cell size for rasterization (default: 100.0, in same units as coordinates)
+#' @return A data.table with columns:
+#'   - id_coord_origin: ID of the origin coordinate
+#'   - id_coord_neighbor: ID of the neighboring coordinate
+#'   - distance: Distance between origin and neighbor
+#'   - distance_class: Factor indicating distance class (if distance_breaks provided)
+#' @export
+create_neighbors_t <- function(
+  coords_t,
+  max_distance,
+  distance_breaks = NULL,
+  resolution = 100.0,
+  quiet = FALSE
+) {
+  # Validate inputs
+  if (!inherits(coords_t, "coords_t")) {
+    stop("coords_t must be a coords_t object")
+  }
+
+  if (!is.numeric(max_distance) || length(max_distance) != 1 || max_distance <= 0) {
+    stop("max_distance must be a positive scalar numeric")
+  }
+
+  if (!is.null(distance_breaks)) {
+    if (!is.numeric(distance_breaks) || length(distance_breaks) < 2) {
+      stop("distance_breaks must be NULL or a numeric vector with at least 2 elements")
+    }
+  }
+
+  # Call C++ function
+  dt <- distance_neighbors_cpp(
+    coords_t = coords_t,
+    max_distance = max_distance,
+    resolution = resolution,
+    quiet = quiet
+  )
+
+  data.table::setalloccol(dt)
+
+  # Rename distance_approx to distance
+  data.table::setnames(dt, "distance_approx", "distance")
+
+  # Add distance class if breaks provided
+  if (!is.null(distance_breaks)) {
+    dt[,
+      distance_class := cut(
+        distance,
+        breaks = distance_breaks,
+        right = FALSE,
+        include.lowest = TRUE
+      )
+    ]
+  }
+
+  as_neighbors_t(dt)
+}
+
+
+#' @describeIn neighbors_t Validate a neighbors_t object
+#' @export
+validate.neighbors_t <- function(x, ...) {
+  NextMethod()
+
+  required_cols <- c("id_coord_origin", "id_coord_neighbor", "distance")
+
+  data.table::setcolorder(x, required_cols)
+
+  stopifnot(
+    is.integer(x[["id_coord_origin"]]),
+    is.integer(x[["id_coord_neighbor"]]),
+    is.numeric(x[["distance"]])
+  )
+
+  return(x)
+}
+
+#' @describeIn neighbors_t Print a neighbors_t object
+#' @param nrow Maximum number of rows to print. See [data.table::print.data.table]
+#' @param ... Passed to [data.table::print.data.table]
+#' @export
+print.neighbors_t <- function(x, nrow = 10, ...) {
+  if (nrow(x) > 0) {
+    total_pairs <- format(
+      nrow(x),
+      big.mark = "_",
+      scientific = FALSE
+    )
+
+    extra_info <- ""
+    if ("distance_class" %in% names(x)) {
+      extra_info <- paste(
+        "Distance classes:",
+        paste(levels(x[["distance_class"]]), collapse = ", ")
+      )
+    }
+
+    cat(glue::glue(
+      "Neighbors Table\n",
+      "Neighbor pairs: {total_pairs}\n",
+      "{extra_info}\n\n"
+    ))
+  } else {
+    cat("Neighbors Table (empty)\n")
+  }
+  NextMethod(nrow = nrow, ...)
+  invisible(x)
+}
diff --git a/R/parquet_duckdb.R b/R/parquet_duckdb.R
new file mode 100644
index 0000000..e7c5538
--- /dev/null
+++ b/R/parquet_duckdb.R
@@ -0,0 +1,725 @@
+#' R6 Base Class for Parquet-Backed DuckDB Storage
+#'
+#' @description
+#' A domain-agnostic R6 class that provides an interface to a folder-based data
+#' storage system using DuckDB for in-memory SQL operations and parquet files
+#' for efficient on-disk persistence. This class can be inherited by
+#' domain-specific database classes.
+#'
+#' @export
+
+parquet_duckdb <- R6::R6Class(
+  classname = "parquet_duckdb",
+
+  ## Public Methods ----
+  public = list(
+    #' @field connection DBI connection object to an in-memory DuckDB database
+    connection = NULL,
+
+    #' @field path Character string path to the data folder
+    path = NULL,
+
+    #' @field default_format Default file format for new tables
+    default_format = NULL,
+
+    #' @field writeopts Default write options for DuckDB
+    writeopts = NULL,
+
+    #' @description
+    #' Initialize a new parquet_duckdb object
+    #' @param path Character string. Path to the data folder.
+    #' @param default_format Character. Default file format ("parquet" or "csv").
+    #' Default is "parquet".
+    #' @param extensions Character vector of DuckDB extensions to load (e.g., "spatial")
+    #'
+    #' @return A new `parquet_duckdb` object
+    initialize = function(
+      path,
+      default_format = c("parquet", "csv"),
+      extensions = character(0)
+    ) {
+      # Create folder if it doesn't exist
+      self$path <- ensure_dir(path)
+
+      # Set format / writeopts
+      self$default_format <- match.arg(default_format)
+      self$writeopts <- switch(
+        self$default_format,
+        parquet = "format parquet, compression zstd",
+        csv = "format csv",
+        stop(glue::glue("Unsupported format: {self$default_format}"))
+      )
+
+      # Create in-memory connection for SQL operations
+      self$connection <- DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:")
+
+      # load extensions
+      for (ext in extensions) {
+        self$execute(glue::glue("install {ext}; load {ext};"))
+      }
+
+      invisible(self)
+    },
+
+    ### Core Database Methods ----
+
+    #' @description
+    #' Execute a SQL statement
+    #' @param statement A SQL statement
+    #' @return Number of rows affected by statement
+    execute = function(statement) {
+      DBI::dbExecute(self$connection, statement)
+    },
+
+    #' @description
+    #' Execute a SQL query and return results
+    #' @param statement A SQL query statement
+    #' @return A data.table with query results
+    get_query = function(statement) {
+      DBI::dbGetQuery(self$connection, statement) |>
+        data.table::as.data.table()
+    },
+
+    #' @description
+    #' Attach a table from parquet/CSV file as a temporary table in DuckDB
+    #' @param table_name Character. Name of table to attach.
+    #' @param columns Character vector. Optional SQL column selection, defaults to "*"
+    #' @param where Character. Optional SQL WHERE clause to subset the table.
+    #' @return Invisible NULL (called for side effects)
+    attach_table = function(table_name, columns = "*", where = NULL) {
+      file_info <- private$get_file_path(table_name)
+
+      if (!file_info$exists) {
+        stop(glue::glue("Table '{table_name}' does not exist at path: {self$path}"))
+      }
+
+      # Build SQL query
+      sql <- glue::glue(
+        "create temp table {table_name} as ",
+        "select {paste(columns, collapse = ', ')} ",
+        "from read_{file_info$format}('{file_info$path}')"
+      )
+
+      if (!is.null(where)) {
+        sql <- glue::glue("{sql} where {where}")
+      }
+
+      self$execute(sql)
+      invisible(NULL)
+    },
+
+    #' @description
+    #' Detach a table from the in-memory database
+    #' @param table_name Character. Name of table to drop.
+    #' @return Invisible NULL (called for side effects)
+    detach_table = function(table_name) {
+      self$execute(paste0("drop table if exists ", table_name, ";"))
+      invisible(NULL)
+    },
+
+    #' @description
+    #' Get row count for a table
+    #' @param table_name Character string. Name of the table to query.
+    #' @return Integer number of rows
+    row_count = function(table_name) {
+      file_info <- private$get_file_path(table_name)
+
+      if (!file_info$exists) {
+        return(0L)
+      }
+
+      self$get_query(
+        glue::glue("select count(*) as n from read_{file_info$format}('{file_info$path}')")
+      )[[1]]
+    },
+
+    #' @description
+    #' List all tables (files) in storage
+    #' @return Character vector of table names
+    list_tables = function() {
+      list.files(self$path, pattern = "\\.(parquet|csv)$", full.names = FALSE) |>
+        tools::file_path_sans_ext() |>
+        unique() |>
+        sort()
+    },
+
+    #' @description
+    #' Execute a function with specified tables attached, handling attach/detach automatically.
+    #' If a table is already attached in the DuckDB instance, it won't be re-attached or detached.
+    #'
+    #' @param tables Character vector of table names to attach
+    #' @param func Function to execute with tables attached
+    #' @param ... Additional arguments passed to func
+    #' @return Result of func
+    with_tables = function(tables, func, ...) {
+      # Track which tables we attach (so we know which to detach)
+      attached_tables <- character(0)
+
+      # Check which tables are already attached
+      existing_tables <- DBI::dbListTables(self$connection)
+
+      # Attach tables that aren't already present
+      for (table in tables) {
+        if (!table %in% existing_tables) {
+          self$attach_table(table)
+          attached_tables <- c(attached_tables, table)
+        }
+      }
+
+      # Ensure cleanup on exit
+      on.exit(
+        {
+          for (table in attached_tables) {
+            self$detach_table(table)
+          }
+        },
+        add = TRUE
+      )
+
+      # Execute the function
+      func(...)
+    },
+
+    #' @description
+    #' Fetch data from a table
+    #' @param table_name Character string. Name of the table to query.
+    #' @param where Character string. Optional WHERE clause for the SQL query.
+    #' @param limit Integer. Optional limit on number of rows to return.
+    #'
+    #' @return A data.table
+    fetch = function(table_name, where = NULL, limit = NULL) {
+      file_info <- private$get_file_path(table_name)
+
+      if (!file_info$exists) {
+        return(data.table::data.table())
+      }
+
+      # build sql query
+      sql <- glue::glue("select * from read_{file_info$format}('{file_info$path}')")
+
+      if (!is.null(where)) {
+        sql <- glue::glue("{sql} where {where}")
+      }
+      if (!is.null(limit)) {
+        sql <- glue::glue("{sql} limit {limit}")
+      }
+
+      self$get_query(sql)
+    },
+
+    #' @description
+    #' Delete rows from a table
+    #' @param table_name Character string. Name of the table to delete from.
+    #' @param where Character string. Optional WHERE clause; if NULL, deletes all rows.
+    #' @return Number of rows deleted
+    delete_from = function(table_name, where = NULL) {
+      file_info <- private$get_file_path(table_name)
+
+      if (!file_info$exists) {
+        return(0L)
+      }
+
+      count_before <- self$row_count(table_name)
+
+      if (is.null(where)) {
+        file.remove(file_info$path)
+        return(count_before)
+      }
+
+      self$execute(glue::glue(
+        r"{
+        copy (
+          select * from read_{file_info$format}('{file_info$path}')
+          where not ({where})
+        )
+        to '{file_info$path}' ({self$writeopts})
+        }"
+      ))
+
+      count_after <- self$row_count(table_name)
+
+      return(count_before - count_after)
+    },
+
+    #' @description
+    #' Commit data using overwrite, append, or upsert modes. Handles autoincrement, key
+    #' identity columns, and list-to-MAP conversion.
+    #' @param x Data frame to commit. If character, in-duckdb-memory table.
+    #' @param table_name Character string table name
+    #' @param key_cols Character vector of columns that define uniqueness. If missing,
+    #' use all columns starting with `id_`
+    #' @param autoincrement_cols Character vector of column names to auto-increment
+    #' @param map_cols Character vector of columns to convert to MAP format
+    #' @param method Character, one of "overwrite", "append", "upsert" (upsert being an
+    #' update for existing rows, and insert for new rows)
+    #' @return Invisible NULL (called for side effects)
+    commit = function(
+      x,
+      table_name,
+      key_cols,
+      autoincrement_cols = character(0),
+      map_cols = character(0),
+      method = c("overwrite", "append", "upsert")
+    ) {
+      method <- match.arg(method)
+
+      private$register_new_data_v(x, map_cols)
+      on.exit(private$cleanup_new_data_v(), add = TRUE)
+      all_cols <- self$get_query(
+        "select column_name from (describe new_data_v)"
+      )[[1]]
+
+      file_info <- private$get_file_path(table_name)
+
+      if (method == "overwrite" || !file_info$exists) {
+        # in case overwrite explicitly required, or no previously existing data to
+        # append or upsert to; rest of logic can be skipped
+        return(private$commit_overwrite(
+          table_name = table_name,
+          all_cols = all_cols,
+          autoincrement_cols = autoincrement_cols,
+          file_info = file_info
+        ))
+      }
+
+      self$attach_table(table_name)
+      on.exit(self$detach_table(table_name), add = TRUE)
+      private$set_autoincrement_vars(table_name, autoincrement_cols)
+
+      if (missing(key_cols)) {
+        key_cols <- grep("^id_", all_cols, value = TRUE)
+      }
+
+      if (method == "append" || length(key_cols) == 0L) {
+        # if there are no key columns to join on, upsert becomes append
+        private$commit_append(
+          table_name = table_name,
+          all_cols = all_cols,
+          autoincrement_cols = autoincrement_cols,
+          file_info = file_info
+        )
+      } else {
+        private$commit_upsert(
+          table_name = table_name,
+          all_cols = all_cols,
+          key_cols = key_cols,
+          autoincrement_cols = autoincrement_cols,
+          file_info = file_info
+        )
+      }
+    },
+
+    #' @description
+    #' Print method for parquet_duckdb
+    #' @param ... Not used
+    #' @return self (invisibly)
+    print = function(...) {
+      # gather data to be printed
+      classes <- class(self)
+      classes <- classes[classes != "R6"]
+
+      all_names <- names(self)
+      methods <- character(0)
+      active_bindings <- character(0)
+
+      names(self$.__enclos_env__$private)
+      if (!is.null(self$.__enclos_env__$super)) {
+        # exclude private super names
+        super_names <- setdiff(
+          ls(self$.__enclos_env__$super),
+          ls(self$.__enclos_env__$super$.__enclos_env__$private)
+        )
+      } else {
+        super_names <- character(0)
+      }
+      nonsuper_names <- setdiff(all_names, super_names)
+
+      for (name in nonsuper_names) {
+        # Check if it's an active binding first; subset2 would evaluate it
+        if (bindingIsActive(name, self$.__enclos_env__$self)) {
+          active_bindings <- c(active_bindings, name)
+        } else {
+          obj <- .subset2(self, name)
+          if (is.function(obj) && !name %in% c("initialize", "print", "clone")) {
+            methods <- c(methods, name)
+          }
+        }
+      }
+
+      methods <- sort(methods)
+      active_bindings <-
+        active_bindings[!grepl("_t($|_)", active_bindings)] |>
+        sort()
+
+      # actually start printing
+      if (length(classes) == 1) {
+        cat("<", classes[1], "> Object\n")
+      } else {
+        cat(classes[1], "Object. Inherits from", toString(classes[-1]), "\n")
+      }
+
+      compression <- if (grepl("compression\\s+(\\w+)", self$writeopts)) {
+        sub(".*compression\\s+(\\w+).*", "\\1", self$writeopts)
+      } else {
+        "none"
+      }
+
+      # Database info on one line
+      cat(
+        glue::glue(
+          "Database: {self$path} | Format: {self$default_format} | Compression: {compression}"
+        ),
+        "\n\n"
+      )
+
+      tables <- self$list_tables()
+      if (length(tables) > 0) {
+        cat("Tables present:\n  ")
+        cat(strwrap(toString(tables), width = 80), sep = "\n  ")
+        cat("\n")
+      } else {
+        cat("Tables present: (none)\n\n")
+      }
+
+      if (length(super_names) > 0) {
+        cat("DB Methods:\n  ")
+        cat(strwrap(toString(super_names), width = 80), sep = "\n  ")
+        cat("\n")
+      }
+
+      if (length(methods) > 0) {
+        cat("Public methods:\n  ")
+        cat(strwrap(toString(methods), width = 80), sep = "\n  ")
+        cat("\n")
+      }
+
+      if (length(active_bindings) > 0) {
+        cat("Active bindings:\n  ")
+        cat(strwrap(toString(active_bindings), width = 80), sep = "\n  ")
+      }
+
+      invisible(self)
+    }
+  ),
+
+  ## Private Methods ----
+  private = list(
+    # R6 hook called on gc(); Close the database connection
+    finalize = function() {
+      if (!is.null(self$connection)) {
+        DBI::dbDisconnect(self$connection)
+        self$connection <- NULL
+      }
+    },
+
+    ### Commit Methods ----
+    #' param x Data frame to commit. If character, in-duckdb-memory table.
+    #' param table_name Character string table name
+    #' param autoincrement_cols Character vector of column names to auto-increment
+    #' param map_cols Character vector of columns to convert to MAP format
+    #' return Invisible NULL (called for side effects)
+    commit_overwrite = function(
+      table_name,
+      all_cols,
+      autoincrement_cols = character(0),
+      file_info
+    ) {
+      # Warn if overriding existing IDs
+      if (length(intersect(autoincrement_cols, all_cols)) > 0) {
+        warning(glue::glue(
+          "Overriding existing IDs ({toString(autoincrement_cols)}) with row numbers;\n",
+          "Assign these IDs manually and do not pass any autoincrement_cols to avoid this warning"
+        ))
+      }
+
+      # Build SELECT expression
+      ordinary_cols <- setdiff(all_cols, autoincrement_cols)
+      select_expr <- glue::glue_collapse(
+        c(glue::glue("row_number() over () as {autoincrement_cols}"), ordinary_cols),
+        sep = ",\n "
+      )
+
+      self$execute(glue::glue(
+        r"{
+        copy (
+          select {select_expr}
+          from new_data_v
+        ) to '{file_info$path}' ({self$writeopts})
+        }"
+      ))
+    },
+
+    #' param x Data frame to commit. If character, in-duckdb-memory table.
+    #' param table_name Character string table name
+    #' param autoincrement_cols Character vector of column names to auto-increment
+    #' param map_cols Character vector of columns to convert to MAP format
+    #' return Invisible NULL (called for side effects)
+    commit_append = function(
+      table_name,
+      all_cols,
+      autoincrement_cols = character(0),
+      file_info
+    ) {
+      ordinary_cols <- setdiff(all_cols, autoincrement_cols)
+      select_new <- glue::glue_collapse(
+        c(
+          glue::glue(
+            "row_number() over () + getvariable('max_{autoincrement_cols}') as {autoincrement_cols}"
+          ),
+          ordinary_cols
+        ),
+        sep = ",\n "
+      )
+
+      # Concatenation using UNION ALL; "by name" handles missing columns
+      self$execute(glue::glue(
+        r"{
+          copy (
+            select * from {table_name}
+            union all by name
+            select {select_new}
+            from new_data_v
+          )
+          to '{file_info$path}' ({self$writeopts})
+          }"
+      ))
+    },
+
+    #' param x Data frame to commit. If character, in-duckdb-memory table.
+    #' param table_name Character string table name
+    #' param key_cols Character vector of columns that define uniqueness. If missing,
+    #' use all columns starting with `id_`
+    #' param autoincrement_cols Character vector of column names to auto-increment
+    #' param map_cols Character vector of columns to convert to MAP format
+    #' return Invisible NULL (called for side effects)
+    commit_upsert = function(
+      table_name,
+      all_cols,
+      key_cols,
+      autoincrement_cols = character(0),
+      file_info
+    ) {
+      # Update existing data
+      ordinary_cols <- setdiff(all_cols, union(key_cols, autoincrement_cols))
+      update_select_expr <- glue::glue_collapse(
+        glue::glue("{ordinary_cols} = new_data_v.{ordinary_cols}"),
+        sep = ",\n "
+      )
+      update_join_condition <- glue::glue_collapse(
+        glue::glue("{table_name}.{key_cols} = new_data_v.{key_cols}"),
+        sep = "\nand "
+      )
+
+      self$execute(glue::glue(
+        r"{
+        update {table_name} set
+          {update_select_expr}
+        from new_data_v
+        where
+          {update_join_condition};
+        }"
+      ))
+
+      # Insert new data
+      insert_select_expr <- glue::glue_collapse(
+        c(
+          glue::glue(
+            "row_number() over () + getvariable('max_{autoincrement_cols}') as {autoincrement_cols}"
+          ),
+          glue::glue("new_data_v.{setdiff(all_cols, autoincrement_cols)}")
+        ),
+        sep = ",\n "
+      )
+      null_condition <- glue::glue_collapse(
+        glue::glue("{table_name}.{key_cols} is null"),
+        sep = "\nand "
+      )
+
+      self$execute(glue::glue(
+        r"{
+        insert into {table_name}
+        select
+          {insert_select_expr}
+        from
+          new_data_v
+        left join
+          {table_name}
+        on
+          {update_join_condition}
+        where
+          {null_condition}
+        ;
+        }"
+      ))
+
+      self$execute(glue::glue("copy {table_name} to '{file_info$path}' ({self$writeopts})"))
+    },
+
+    # Get file path and format for a table
+    #
+    # @param table_name Character string table name
+    # @return List with path, format, and exists flag
+    get_file_path = function(table_name) {
+      # Check for parquet first, then csv
+      parquet_path <- file.path(self$path, paste0(table_name, ".parquet"))
+      csv_path <- file.path(self$path, paste0(table_name, ".csv"))
+
+      if (file.exists(parquet_path)) {
+        return(list(path = parquet_path, format = "parquet", exists = TRUE))
+      } else if (file.exists(csv_path)) {
+        return(list(path = csv_path, format = "csv", exists = TRUE))
+      } else {
+        # Return default format for new file
+        default_path <- file.path(
+          self$path,
+          paste0(table_name, ".", self$default_format)
+        )
+        return(list(
+          path = default_path,
+          format = self$default_format,
+          exists = FALSE
+        ))
+      }
+    },
+
+    # Register new_data_v table, optionally converting MAP columns
+    #
+    # @param x Data to register
+    # @param map_cols Character vector of columns to convert to MAP format
+    # @return NULL (called for side effects)
+    register_new_data_v = function(x, map_cols = character(0)) {
+      if (is.character(x)) {
+        # TODO add tests
+        self$execute(glue::glue("create view new_data_v as from {x}"))
+        return(invisible(NULL))
+      }
+
+      if (length(map_cols) == 0) {
+        # No MAP conversion needed - register directly
+        duckdb::duckdb_register(self$connection, "new_data_v", x)
+      } else {
+        # Convert list columns to key-value dataframes
+        x <-
+          data.table::copy(x) |>
+          convert_list_cols(map_cols, list_to_kv_df)
+
+        # Register as intermediate table
+        duckdb::duckdb_register(self$connection, "new_data_raw", x)
+
+        # Build SELECT expression with map_from_entries for MAP columns
+        map_exprs <- glue::glue("map_from_entries({map_cols}) as {map_cols}")
+        other_cols <- setdiff(names(x), map_cols)
+        all_exprs <- c(other_cols, map_exprs)
+        select_expr <- glue::glue_collapse(all_exprs, sep = ", ")
+
+        # Create new_data_v from new_data_raw
+        self$execute(glue::glue(
+          "create temp table new_data_v as select {select_expr} from new_data_raw"
+        ))
+      }
+
+      invisible(NULL)
+    },
+
+    # Cleanup new_data_v and related tables
+    #
+    # @param map_cols Character vector indicating if MAP conversion was used
+    # @return NULL (called for side effects)
+    cleanup_new_data_v = function() {
+      try(duckdb::duckdb_unregister(self$connection, "new_data_v"), silent = TRUE)
+      try(duckdb::duckdb_unregister(self$connection, "new_data_raw"), silent = TRUE)
+      try(self$execute("drop table if exists new_data_v"), silent = TRUE)
+      try(self$execute("drop view if exists new_data_v"), silent = TRUE)
+
+      invisible(NULL)
+    },
+
+    # Set DuckDB variables max_{colname} for autoincrement columns
+    #
+    # @param table_name Character string table name
+    # @param autoincrement_cols Character vector of column names
+    # @return NULL (called for side effects)
+    set_autoincrement_vars = function(table_name, autoincrement_cols) {
+      if (length(autoincrement_cols) == 0L) {
+        return(NULL)
+      }
+
+      if (!DBI::dbExistsTable(self$connection, table_name)) {
+        # Attach/detach if not already attached
+        self$attach_table(table_name)
+        on.exit(self$detach_table(table_name))
+      }
+
+      existing_cols <-
+        glue::glue("select column_name from (describe {table_name})") |>
+        self$get_query() |>
+        (\(x) x[[1]])() |>
+        intersect(autoincrement_cols)
+
+      missing_cols <- setdiff(autoincrement_cols, existing_cols)
+
+      set_exprs <- glue::glue_collapse(
+        c(
+          glue::glue(
+            "set variable max_{existing_cols} =
+              (select coalesce(max({existing_cols}), 0) from {table_name});"
+          ),
+          glue::glue(
+            "set variable max_{missing_cols} = 0;"
+          )
+        ),
+        sep = "\n"
+      )
+
+      self$execute(set_exprs)
+      invisible(NULL)
+    }
+  )
+)
+
+
+# Helper functions for converting between list and data.frame formats for DuckDB MAPs
+convert_list_cols <- function(x, cols, fn) {
+  for (col in cols) {
+    x[[col]] <- lapply(x[[col]], fn)
+  }
+  x
+}
+
+list_to_kv_df <- function(x) {
+  if (is.null(x) || length(x) == 0) {
+    return(data.frame(
+      key = character(0),
+      value = character(0),
+      stringsAsFactors = FALSE
+    ))
+  }
+  data.frame(
+    key = names(x),
+    value = as.character(unlist(x)),
+    stringsAsFactors = FALSE
+  )
+}
+
+kv_df_to_list <- function(x) {
+  if (is.null(x) || nrow(x) == 0) {
+    return(NULL)
+  }
+
+  out <- list()
+
+  for (row in seq_len(nrow(x))) {
+    key <- x$key[row]
+    val <- x$value[row]
+
+    # Try numeric conversion
+    num_val <- suppressWarnings(as.numeric(val))
+    if (!is.na(num_val)) {
+      out[[key]] <- num_val
+    } else {
+      out[[key]] <- val
+    }
+  }
+
+  out
+}
diff --git a/R/pred_data_t.R b/R/pred_data_t.R
index d3c7972..45ee49e 100644
--- a/R/pred_data_t.R
+++ b/R/pred_data_t.R
@@ -18,26 +18,18 @@
 as_pred_data_t <- function(x, type) {
   stopifnot(type %in% c("float", "int", "bool"))
 
-  # Create empty table with proper value type
-  coercion_fn <- switch(
-    type,
-    float = as.numeric,
-    int = as.integer,
-    bool = as.logical
-  )
-
   if (missing(x)) {
     x <- data.table::data.table(
       id_pred = integer(0),
       id_coord = integer(0),
       id_period = integer(0),
-      value = coercion_fn(integer(0))
+      value = integer(0)
     )
   }
 
   data.table::setDT(x, key = c("id_pred", "id_coord", "id_period")) |>
-    cast_dt_col("value", coercion_fn) |>
-    cast_dt_col("id_coord", as.integer)
+    cast_dt_col("value", type) |>
+    cast_dt_col("id_coord", "int")
 
   class_name <- paste0("pred_data_t_", type)
   new_evoland_table(x, c(class_name, "pred_data_t"))
diff --git a/R/trans_meta_t.R b/R/trans_meta_t.R
index 8613add..db81f8e 100644
--- a/R/trans_meta_t.R
+++ b/R/trans_meta_t.R
@@ -41,7 +41,8 @@ as_trans_meta_t <- function(x) {
   )
 }
 
-#' @describeIn trans_meta_t Calculate the transition metadata and mark for modelling feasibility
+#' @describeIn trans_meta_t Calculate the transition metadata and mark for modelling
+#' feasibility. Does not attribute `id_trans`; this only makes sense as part of a DB.
 #' @export
 create_trans_meta_t <- function(
   transitions,
@@ -108,9 +109,6 @@ create_trans_meta_t <- function(
     }
   }
 
-  # Add id_trans
-  trans_summary[, id_trans := .I]
-
   as_trans_meta_t(trans_summary)
 }
 
diff --git a/R/trans_preds_t.R b/R/trans_preds_t.R
index 6d7c5dc..4fc51c9 100644
--- a/R/trans_preds_t.R
+++ b/R/trans_preds_t.R
@@ -5,6 +5,7 @@
 #' modelling each transition type.
 #'
 #' @name trans_preds_t
+#' @include evoland_db.R
 #'
 #' @param db An [evoland_db] instance with populated trans_meta_t and pred_meta_t tables
 #'
@@ -19,6 +20,8 @@ as_trans_preds_t <- function(x) {
       id_trans = integer(0)
     )
   }
+  cast_dt_col(x, "id_pred", "int")
+  cast_dt_col(x, "id_trans", "int")
   new_evoland_table(
     x,
     "trans_preds_t",
@@ -26,12 +29,139 @@ as_trans_preds_t <- function(x) {
   )
 }
 
-#' @describeIn trans_preds_t Create a transition-predictor relation, i.e. records the
-#' result of a predictor selection step.
-#' @export
-create_trans_preds_t <- function() {
-  as_trans_preds_t()
-}
+# set an initial full set of transition / predictor relations
+evoland_db$set(
+  "public",
+  "set_full_trans_preds",
+  function(overwrite = FALSE) {
+    if (self$row_count("trans_preds_t") > 0 && !overwrite) {
+      stop("Set overwrite to TRUE to overwrite existing trans_preds_t")
+    }
+    p <- self$pred_meta_t
+    t <- self$trans_meta_t[is_viable == TRUE]
+
+    full <- expand.grid(id_pred = p[["id_pred"]], id_trans = t[["id_trans"]])
+    self$commit(as_trans_preds_t(full), "trans_preds_t", method = "overwrite")
+  }
+)
+
+#' describeIn trans_preds_t Create a transition-predictor relation, i.e. records the
+#' result of a predictor selection step. Runs covariance filtering for each viable
+#' transition and stores the selected predictors.
+#' param corcut Numeric threshold (0-1) for correlation filtering passed to [covariance_filter()]
+#' param filter_fun Defaults to [covariance_filter()], but can be any function that returns
+#' param na_value Passed to db$trans_pred_data_v - if not NA, replace all NA predictor values with this value
+#' param ... Additional arguments passed to rank_fun via [covariance_filter()]
+evoland_db$set(
+  "public",
+  "prune_trans_preds",
+  function(
+    filter_fun = covariance_filter,
+    na_value = NA,
+    ...
+  ) {
+    viable_trans <- self$trans_meta_t[is_viable == TRUE]
+    pred_meta <- self$pred_meta_t
+    stopifnot(
+      "No viable transitions found in trans_meta_t" = nrow(viable_trans) > 0L
+    )
+    if (self$row_count("trans_preds_t") == 0) {
+      self$set_full_trans_preds()
+    }
+    trans_preds_pre <- self$trans_preds_t
+
+    results_list <- list()
+
+    # Iterate over transitions (anterior/posterior pairs)
+    for (i in seq_len(nrow(viable_trans))) {
+      id_trans <- viable_trans$id_trans[i]
+      id_lulc_ant <- viable_trans$id_lulc_anterior[i]
+      id_lulc_post <- viable_trans$id_lulc_posterior[i]
+      id_preds <- trans_preds_pre$id_pred[
+        trans_preds_pre$id_trans == id_trans
+      ]
+
+      message(glue::glue(
+        "Processing transition {i}/{nrow(viable_trans)}: ",
+        "id_trans={id_trans} ({id_lulc_ant} -> {id_lulc_post})"
+      ))
+
+      if (length(id_preds) == 0L) {
+        next
+      }
+
+      # Get wide transition-predictor data
+      tryCatch(
+        {
+          trans_pred_data <- self$trans_pred_data_v(id_trans, id_preds, na_value)
+
+          # Check if we have any data
+          if (nrow(trans_pred_data) == 0L) {
+            warning(glue::glue(
+              "No data for transition {id_trans}, skipping"
+            ))
+            next
+          }
+
+          # Check if we have any predictor columns
+          pred_cols <- grep("^id_pred_", names(trans_pred_data), value = TRUE)
+          if (length(pred_cols) == 0L) {
+            warning(glue::glue(
+              "No predictor columns for transition {id_trans}, skipping"
+            ))
+            next
+          }
+
+          # Return ranked + filtered predictor names as id_pred_{n}
+          filtered_preds <- filter_fun(
+            data = trans_pred_data,
+            result_col = "result",
+            ...
+          )
+
+          if (length(filtered_preds) > 0L) {
+            # Parse id_pred values from column names (e.g., "id_pred_1" -> 1)
+            selected_ids <- as.integer(sub("^id_pred_", "", filtered_preds))
+
+            # Create result rows
+            results_list[[id_trans]] <- data.table::data.table(
+              id_pred = selected_ids,
+              id_trans = id_trans
+            )
+
+            message(glue::glue(
+              "  Selected {length(selected_ids)} predictor(s) for transition {id_trans}"
+            ))
+          } else {
+            message(glue::glue(
+              "  No predictors selected for transition {id_trans}"
+            ))
+          }
+        },
+        error = function(e) {
+          # do not prune on error
+          results_list[[id_trans]] <- data.table::data.table(
+            id_pred = id_preds,
+            id_trans = id_trans
+          )
+          warning(glue::glue(
+            "Error processing transition {id_trans}: {e$message}"
+          ))
+        }
+      )
+    }
+
+    # Combine all results
+    if (length(results_list) == 0L) {
+      warning("No predictors selected for any transition")
+      return(invisible(NULL))
+    }
+
+    result <- data.table::rbindlist(results_list)
+
+    self$commit(as_trans_preds_t(result), "trans_preds_t", method = "overwrite")
+  }
+)
 
 #' @export
 validate.trans_preds_t <- function(x, ...) {
diff --git a/R/util.R b/R/util.R
index ceebbcb..7b8e1c0 100644
--- a/R/util.R
+++ b/R/util.R
@@ -143,11 +143,29 @@ print_rowwise_yaml <- function(df) {
 }
 
 #' @describeIn util Cast a data.table column; invisibly returns x
-cast_dt_col <- function(x, colname, castfun) {
+cast_dt_col <- function(x, colname, type) {
+  predicate_fn <- switch(
+    type,
+    float = is.numeric,
+    int = is.integer,
+    bool = is.logical,
+    factor = is.factor
+  )
+  if (predicate_fn(x[[colname]])) {
+    return(invisible(x))
+  }
+
+  coercion_fn <- switch(
+    type,
+    float = as.numeric,
+    int = as.integer,
+    bool = as.logical,
+    factor = as.factor
+  )
   data.table::set(
     x = x,
     j = colname,
-    value = castfun(x[[colname]])
+    value = coercion_fn(x[[colname]])
   )
   invisible(x)
 }
diff --git a/R/util_terra.R b/R/util_terra.R
index 00d8825..463c772 100644
--- a/R/util_terra.R
+++ b/R/util_terra.R
@@ -89,81 +89,5 @@ extract_using_coords_t.SpatVector <- function(x, coords_t, na_omit = TRUE) {
   out
 }
 
-#' @describeIn util_terra Compute neighboring coordinates within specified distances. In
-#' order to be computationally feasible, the coordinates' IDs are rasterized before
-#' their actual Euclidean distance is calculated. If coordinates are so close that they
-#' get rasterized to the same cell, the first one is used and a warning is emitted. If
-#' this happens, try again using a lower resolution.
-#' @param max_distance Maximum distance to search for neighbors (in same units as
-#' coordinates and resolution)
-#' @param distance_breaks Optional numeric vector defining distance class boundaries.
-#'   If NULL, no distance classification is performed.
-#'   If provided, must have at least 2 elements defining interval breaks.
-#' @param resolution Grid cell size for rasterization (default: 100.0, in same units as coordinates)
-#' @return A data.table with columns:
-#'   - id_coord_origin: ID of the origin coordinate
-#'   - id_coord_neighbor: ID of the neighboring coordinate
-#'   - distance: Distance between origin and neighbor
-#'   - distance_class: Factor indicating distance class (if distance_breaks provided)
-#' @export
-compute_neighbors <- function(
-  coords_t,
-  max_distance,
-  distance_breaks = NULL,
-  resolution = 100.0
-) {
-  # Validate inputs
-  if (!inherits(coords_t, "coords_t")) {
-    stop("coords_t must be a coords_t object")
-  }
-
-  if (!is.numeric(max_distance) || length(max_distance) != 1 || max_distance <= 0) {
-    stop("max_distance must be a positive scalar numeric")
-  }
-
-  if (!is.null(distance_breaks)) {
-    if (!is.numeric(distance_breaks) || length(distance_breaks) < 2) {
-      stop("distance_breaks must be NULL or a numeric vector with at least 2 elements")
-    }
-  }
-
-  # Call C++ function
-  dt <- distance_neighbors_cpp(
-    coords_t = coords_t,
-    max_distance = max_distance,
-    resolution = resolution
-  )
-
-  # Set data.table allocation
-  dt <- data.table::setalloccol(dt)
-
-  # Rename distance_approx to distance
-  data.table::setnames(dt, "distance_approx", "distance")
-
-  # Add distance class if breaks provided
-  if (!is.null(distance_breaks)) {
-    dt[,
-      distance_class := cut(
-        distance,
-        breaks = distance_breaks,
-        right = FALSE,
-        include.lowest = TRUE
-      )
-    ]
-  }
-
-  # Reorder columns
-  if (!is.null(distance_breaks)) {
-    data.table::setcolorder(
-      dt,
-      c("id_coord_origin", "id_coord_neighbor", "distance_class", "distance")
-    )
-  } else {
-    data.table::setcolorder(
-      dt,
-      c("id_coord_origin", "id_coord_neighbor", "distance")
-    )
-  }
-
-  return(dt)
-}
+# TODO move this to a neighbors_t.R file that includes a formal class definition
+# for the return table, including validation, print, coercion
diff --git a/inst/tinytest/test_evoland_db.R b/inst/tinytest/test_evoland_db.R
index b84f060..c48f2df 100644
--- a/inst/tinytest/test_evoland_db.R
+++ b/inst/tinytest/test_evoland_db.R
@@ -1,10 +1,12 @@
-# Test that a new database can be set up using evoland_db$new()
+# Test evoland_db domain-specific functionality
+# Generic parquet_duckdb tests are in test_parquet_duckdb.R
 library(tinytest)
 
 # Create temporary directory for testing
 test_dir <- tempfile("evoland_test_")
 on.exit(unlink(test_dir, recursive = TRUE), add = TRUE)
 
+# Test 1: evoland_db initialization with reporting
 expect_silent(
   db <- evoland_db$new(
     path = test_dir,
@@ -13,13 +15,15 @@ expect_silent(
   )
 )
 expect_true(inherits(db, "evoland_db"))
+expect_true(inherits(db, "parquet_duckdb")) # Should inherit from parent
 
-# In folder-based storage, only reporting_t exists initially
-# Other tables are created on demand
-expected_tables_initial <- c("reporting_t")
-expect_identical(db$list_tables(), expected_tables_initial)
-
+# Test 2: Reporting table is created and populated
+expect_identical(db$list_tables(), "reporting_t")
 reporting1 <- db$fetch("reporting_t")
+expect_true("report_name" %in% reporting1$key)
+expect_true("tinytest" %in% reporting1$value)
+
+# Test 3: Persistence of reporting data
 rm(db)
 gc()
 db <- evoland_db$new(
@@ -34,30 +38,9 @@ expect_equal(
   sort(reporting2$key)
 )
 
+# Test 4 removed: Fetching a missing table now produces an error
 
-# Check that accessing non-existent tables returns empty data.tables
-# (these tables don't appear in list_tables() until they have data)
-empty_tables <- c(
-  "alloc_params_t",
-  "coords_t",
-  "intrv_masks_t",
-  "intrv_meta_t",
-  "lulc_data_t",
-  "lulc_meta_t",
-  "periods_t",
-  "pred_data_t_bool",
-  "pred_data_t_float",
-  "pred_data_t_int",
-  "pred_meta_t",
-  "trans_meta_t",
-  "trans_models_t",
-  "trans_preds_t"
-)
-for (table in empty_tables) {
-  expect_equal(nrow(db[[table]]), 0L)
-}
-
-# Create synthetic, minimal test data
+# Test 5: Create synthetic evoland-specific test data
 coords_t <- create_coords_t_square(
   epsg = 2056,
   extent = terra::ext(c(
@@ -230,8 +213,7 @@ intrv_masks_t <- as_intrv_masks_t(
   )
 )
 
-# Test DB roundtrips & integrity checks, repeated assignments (~upserts)
-# TODO make this into a testing harness that iterates over a list of sample data
+# Test 6: Active bindings - coords_t
 expect_silent(db$coords_t <- coords_t)
 expect_silent(db$coords_t <- coords_t)
 expect_identical(db$coords_t, coords_t)
@@ -240,12 +222,15 @@ expect_identical(
   data.table::as.data.table(coords_t[, 1:3])
 )
 
+# Test 7: Active bindings - lulc_meta_t
 expect_silent(db$lulc_meta_t <- lulc_meta_t)
 expect_identical(db$lulc_meta_t, lulc_meta_t)
 
+# Test 8: Active bindings - periods_t
 expect_silent(db$periods_t <- periods_t)
 expect_identical(db$periods_t, periods_t)
 
+# Test 9: Active bindings - pred_meta_t with auto-increment
 expect_silent(db$pred_meta_t <- pred_meta_t[1, ])
 expect_silent(db$pred_meta_t <- pred_meta_t)
 # After committing, id_pred should be assigned (1:2 in this case)
@@ -254,20 +239,24 @@ expect_equal(retrieved_pred_meta[["id_pred"]], 1:2)
 expect_equal(retrieved_pred_meta[["name"]], c("noise", "distance_to_lake"))
 db$delete_from("pred_meta_t") # clear up, we want to ensure this works with add_predictor
 
+# Test 10: Active bindings - intrv_meta_t (with MAP columns)
 expect_silent(db$intrv_meta_t <- intrv_meta_t)
 expect_equal(db$intrv_meta_t, intrv_meta_t)
 
+# Test 11: Active bindings - trans_meta_t
 expect_silent(db$trans_meta_t <- trans_meta_t)
 expect_equal(db$trans_meta_t[, c(-1)], trans_meta_t)
 
+# Test 12: Active bindings - trans_models_t (with MAP and BLOB columns)
 expect_silent(db$trans_models_t <- trans_models_t)
 expect_equal(db$trans_models_t, trans_models_t)
 
-# now that we have the trans_meta we shouldn't be violating the FK anymore
+# Test 13: Active bindings - alloc_params_t (with MAP columns)
 expect_silent(db$alloc_params_t <- alloc_params_t)
 expect_equal(db$alloc_params_t, alloc_params_t)
 
-# repeated upsert should be idempotent
+# Test 14: add_predictor method - first predictor
+# Test 15: add_predictor method - second predictor
 expect_silent(
   db$add_predictor(
     pred_spec = pred_spec["noise"],
@@ -284,17 +273,19 @@ expect_silent(
     pred_type = "float"
   )
 )
-# check that add_predictor works for metadata
+# Test 16: add_predictor correctly updates metadata
 retrieved_pred_meta <- db$pred_meta_t
 expect_equal(retrieved_pred_meta[["id_pred"]], 1:2)
 expect_equal(retrieved_pred_meta[["name"]], c("noise", "distance_to_lake"))
 
-# check that add_predictor works for data
+# Test 17: add_predictor correctly updates data
 expect_equal(db$row_count("pred_data_t_float"), 48L)
+# Test 18: Active bindings - pred_data_t_float upsert is idempotent
 expect_silent(db$pred_data_t_float <- pred_data_t)
 expect_equal(db$row_count("pred_data_t_float"), 48L)
 expect_silent(db$pred_data_t_float <- pred_data_t)
 expect_equal(db$row_count("pred_data_t_float"), 48L)
+# Test 19: Active bindings enforce type validation
 expect_error(
   # should only be able to insert the correct class
   db$pred_data_t_float <- data.table::data.table(
@@ -306,6 +297,7 @@ expect_error(
   r"(^inherits.* is not TRUE$)"
 )
 
+# Test 20: lulc_data_t requires proper type
 expect_error(
   db$lulc_data_t <- lulc_data_dt <- data.table::data.table(
     id_coord = c(1L, 2L, 1L),
@@ -318,146 +310,77 @@ expect_silent(
   db$lulc_data_t <- as_lulc_data_t(lulc_data_dt)
 )
 
-# Test delete_from functionality
-# Setup: we already have pred_data_t_float with 48 rows (id_pred 1:2, id_coord 3:50, id_period 1)
-expect_equal(db$row_count("pred_data_t_float"), 48L)
-
-# Test 1: Delete with WHERE clause - delete specific predictor
-deleted_count <- db$delete_from("pred_data_t_float", where = "id_pred = 1")
-expect_equal(deleted_count, 24L)
-expect_equal(db$row_count("pred_data_t_float"), 24L)
-
-# Verify only id_pred = 2 remains
-remaining <- db$pred_data_t_float
-expect_equal(unique(remaining$id_pred), 2L)
-expect_equal(nrow(remaining), 24L)
-
-# Test 2: Delete with complex WHERE clause
-# Add back some data first
-db$pred_data_t_float <- pred_data_t
-expect_equal(db$row_count("pred_data_t_float"), 48L)
+# Test 21: Domain-specific view - coords_minimal
+coords_minimal <- db$coords_minimal
+expect_true(inherits(coords_minimal, "data.table"))
+expect_equal(ncol(coords_minimal), 3L)
+expect_true(all(c("id_coord", "lon", "lat") %in% names(coords_minimal)))
+expect_equal(nrow(coords_minimal), nrow(coords_t))
+
+# Test 22: Domain-specific view - extent
+# Set up coords first
+db$coords_t <- coords_t
+extent <- db$extent
+expect_true(inherits(extent, "SpatExtent"))
+
+# Test 23: Domain-specific view - lulc_meta_long_v
+db$lulc_meta_t <- lulc_meta_t
+lulc_long <- db$lulc_meta_long_v
+expect_true(inherits(lulc_long, "data.table"))
+expect_true("src_class" %in% names(lulc_long))
+# Should have one row per src_class
+expect_true(nrow(lulc_long) > nrow(lulc_meta_t))
+
+# Test 24: Domain-specific view - pred_sources_v
+db$pred_meta_t <- pred_meta_t
+sources <- db$pred_sources_v
+expect_true(inherits(sources, "data.table"))
+expect_true(all(c("url", "md5sum") %in% names(sources)))
+expect_true(nrow(sources) > 0L)
+
+# Test 25: set_coords method
+test_dir_coords <- tempfile("evoland_coords_")
+on.exit(unlink(test_dir_coords, recursive = TRUE), add = TRUE)
+db_coords <- evoland_db$new(test_dir_coords)
 
-# Delete only specific coordinates
-deleted_count <- db$delete_from("pred_data_t_float", where = "id_coord < 10")
-expect_true(deleted_count > 0L)
-remaining <- db$pred_data_t_float
-expect_true(all(remaining$id_coord >= 10))
-
-# Test 3: Delete all rows (NULL where clause)
-count_before_delete <- db$row_count("pred_data_t_float")
-deleted_count <- db$delete_from("pred_data_t_float", where = NULL)
-expect_equal(deleted_count, count_before_delete)
-expect_equal(db$row_count("pred_data_t_float"), 0L)
-
-# Test 4: Delete from non-existent table returns 0
-deleted_count <- db$delete_from("nonexistent_table", where = "id = 1")
-expect_equal(deleted_count, 0L)
-
-# Test 5: Delete with WHERE that matches nothing
-db$pred_data_t_float <- pred_data_t
-initial_count <- db$row_count("pred_data_t_float")
-deleted_count <- db$delete_from("pred_data_t_float", where = "id_pred = 999")
-expect_equal(deleted_count, 0L)
-expect_equal(db$row_count("pred_data_t_float"), initial_count)
-
-# Test auto-increment functionality
-# Create a new test database for auto-increment tests
-test_dir_autoinc <- tempfile("evoland_autoinc_")
-on.exit(unlink(test_dir_autoinc, recursive = TRUE), add = TRUE)
-db_autoinc <- evoland_db$new(test_dir_autoinc)
-
-# Test 1: Auto-increment on overwrite mode (new table)
-test_data_1 <- data.table::data.table(
-  name = c("predictor_a", "predictor_b", "predictor_c"),
-  unit = c("m", "kg", "s")
-)
-db_autoinc$commit_overwrite(
-  test_data_1,
-  "test_autoinc_t",
-  autoincrement_cols = "id_test"
-)
-result_1 <- db_autoinc$fetch("test_autoinc_t")
-expect_equal(result_1$id_test, 1:3)
-expect_equal(result_1$name, c("predictor_a", "predictor_b", "predictor_c"))
-
-# Test 2: Auto-increment on append mode
-test_data_2 <- data.table::data.table(
-  name = c("predictor_d", "predictor_e"),
-  unit = c("A", "V")
-)
-db_autoinc$commit_append(
-  test_data_2,
-  "test_autoinc_t",
-  autoincrement_cols = "id_test"
-)
-result_2 <- db_autoinc$fetch("test_autoinc_t")
-expect_equal(nrow(result_2), 5L)
-expect_equal(result_2$id_test, 1:5)
-expect_equal(result_2$name[4:5], c("predictor_d", "predictor_e"))
-
-# Test 3: Auto-increment on upsert mode with new rows
-test_data_3 <- data.table::data.table(
-  name = c("predictor_f", "predictor_g"),
-  unit = c("W", "J")
-)
-db_autoinc$commit_upsert(
-  test_data_3,
-  "test_autoinc_t",
-  autoincrement_cols = "id_test"
-)
-result_3 <- db_autoinc$fetch("test_autoinc_t")
-expect_equal(nrow(result_3), 7L)
-expect_equal(result_3$id_test, 1:7)
-
-# Test 4: Auto-increment preserves existing IDs in data
-test_data_4 <- data.table::data.table(
-  id_test = c(NA, 100L, NA),
-  name = c("new_a", "existing", "new_b"),
-  unit = c("x", "y", "z")
+expect_silent(
+  db_coords$set_coords(
+    type = "square",
+    epsg = 2056,
+    extent = terra::ext(c(xmin = 2697000, xmax = 2698000, ymin = 1252000, ymax = 1253000)),
+    resolution = 100
+  )
 )
+expect_true(db_coords$row_count("coords_t") > 0L)
+
+# Should refuse to overwrite
 expect_warning(
-  db_autoinc$commit_overwrite(
-    test_data_4,
-    "test_autoinc2_t",
-    autoincrement_cols = "id_test"
+  db_coords$set_coords(
+    type = "square",
+    epsg = 2056,
+    extent = terra::ext(c(xmin = 2697000, xmax = 2698000, ymin = 1252000, ymax = 1253000)),
+    resolution = 100
   ),
-  "Overriding existing IDs"
+  "not empty"
 )
-result_4 <- db_autoinc$fetch("test_autoinc2_t")
-expect_equal(result_4$id_test, 1:3)
 
-# Test 5: Multiple auto-increment columns
-test_data_5 <- data.table::data.table(
-  name = c("item1", "item2"),
-  value = c(10, 20)
-)
-db_autoinc$commit_overwrite(
-  test_data_5,
-  "test_multi_autoinc_t",
-  autoincrement_cols = c("id_a", "id_b")
-)
-result_5 <- db_autoinc$fetch("test_multi_autoinc_t")
-expect_equal(result_5$id_a, 1:2)
-expect_equal(result_5$id_b, 1:2)
-
-# Test 6: Auto-increment continues from max in append
-test_data_6a <- data.table::data.table(
-  id_seq = c(5L, 10L, 15L),
-  value = c(100, 200, 300)
-)
-db_autoinc$commit_overwrite(
-  test_data_6a,
-  "test_continue_t"
-)
-test_data_6b <- data.table::data.table(
-  value = c(400, 500)
+# Test 26: set_periods method
+test_dir_periods <- tempfile("evoland_periods_")
+on.exit(unlink(test_dir_periods, recursive = TRUE), add = TRUE)
+db_periods <- evoland_db$new(test_dir_periods)
+
+expect_silent(
+  db_periods$set_periods(
+    period_length_str = "P10Y",
+    start_observed = "1985-01-01",
+    end_observed = "2020-01-01",
+    end_extrapolated = "2060-01-01"
+  )
 )
-db_autoinc$commit_append(
-  test_data_6b,
-  "test_continue_t",
-  autoincrement_cols = "id_seq"
+expect_true(db_periods$row_count("periods_t") > 0L)
+
+# Should refuse to overwrite
+expect_warning(
+  db_periods$set_periods(),
+  "not empty"
 )
-result_6 <- db_autoinc$fetch("test_continue_t")
-expect_equal(nrow(result_6), 5L)
-expect_equal(result_6$id_seq[4:5], c(16L, 17L))
-expect_equal(result_6$value[4:5], c(400, 500))
diff --git a/inst/tinytest/test_parquet_duckdb.R b/inst/tinytest/test_parquet_duckdb.R
new file mode 100644
index 0000000..5f1c3db
--- /dev/null
+++ b/inst/tinytest/test_parquet_duckdb.R
@@ -0,0 +1,392 @@
+# Test generic parquet_duckdb functionality
+library(tinytest)
+
+# Create temporary directory for testing
+test_dir <- tempfile("parquet_duckdb_test_")
+on.exit(unlink(test_dir, recursive = TRUE), add = TRUE)
+
+# Test 1: Initialization
+expect_silent(
+  db <- parquet_duckdb$new(
+    path = test_dir,
+    default_format = "parquet"
+  )
+)
+expect_true(inherits(db, "parquet_duckdb"))
+expect_true(dir.exists(test_dir))
+expect_equal(db$default_format, "parquet")
+expect_true(!is.null(db$connection))
+expect_true(inherits(db$connection, "duckdb_connection"))
+
+# Test 2: Initial state - no tables
+expect_identical(db$list_tables(), character(0))
+
+# Test 3: Fetch from non-existent table returns empty data.table
+result <- db$fetch("nonexistent_table")
+expect_true(inherits(result, "data.table"))
+expect_equal(nrow(result), 0L)
+
+# Test 4: Row count for non-existent table
+expect_equal(db$row_count("nonexistent_table"), 0L)
+
+# Test 5: commit overwrite creates new table
+test_data_1 <- data.table::data.table(
+  id = 1:5,
+  name = letters[1:5],
+  value = c(10.1, 20.2, 30.3, 40.4, 50.5)
+)
+expect_silent(
+  db$commit(
+    method = "overwrite",
+    test_data_1,
+    "test_table_1"
+  )
+)
+expect_true("test_table_1" %in% db$list_tables())
+expect_equal(db$row_count("test_table_1"), 5L)
+
+# Test 6: Fetch retrieves committed data
+retrieved <- db$fetch("test_table_1")
+expect_equal(retrieved, test_data_1)
+
+# Test 7: commit overwrite replaces existing data
+test_data_1b <- data.table::data.table(
+  id = 10:12,
+  name = letters[24:26],
+  value = c(100.1, 200.2, 300.3)
+)
+expect_silent(
+  db$commit(
+    method = "overwrite",
+    test_data_1b,
+    "test_table_1"
+  )
+)
+expect_equal(db$row_count("test_table_1"), 3L)
+retrieved <- db$fetch("test_table_1")
+expect_equal(retrieved, test_data_1b)
+
+# Test 8: commit append adds to existing data
+test_data_1c <- data.table::data.table(
+  id = 13:15,
+  name = letters[1:3],
+  value = c(111.1, 222.2, 333.3)
+)
+expect_silent(
+  db$commit(test_data_1c, "test_table_1", method = "append")
+)
+expect_equal(db$row_count("test_table_1"), 6L)
+retrieved <- db$fetch("test_table_1")
+expect_equal(nrow(retrieved), 6L)
+expect_true(all(c(10:15) %in% retrieved$id))
+
+# Test 9: commit append on non-existent table creates it
+expect_silent(
+  db$commit(test_data_1, "test_table_2", method = "append")
+)
+expect_true("test_table_2" %in% db$list_tables())
+expect_equal(db$row_count("test_table_2"), 5L)
+
+# Test 10: commit w/ upsert on non-existent table creates it
+test_data_3 <- data.table::data.table(
+  id_key = 1:3,
+  name = c("a", "b", "c"),
+  value = c(1.1, 2.2, 3.3)
+)
+expect_silent(
+  db$commit(test_data_3, "test_table_3", key_cols = "id_key", method = "upsert")
+)
+expect_true("test_table_3" %in% db$list_tables())
+expect_equal(db$row_count("test_table_3"), 3L)
+
+# Test 11: commit w/ upsert updates existing rows and inserts new ones
+test_data_3b <- data.table::data.table(
+  id_key = c(2L, 3L, 4L),
+  name = c("b_updated", "c_updated", "d"),
+  value = c(22.2, 33.3, 44.4)
+)
+expect_silent(
+  db$commit(test_data_3b, "test_table_3", key_cols = "id_key", method = "upsert")
+)
+expect_equal(db$row_count("test_table_3"), 4L)
+retrieved <- db$fetch("test_table_3")
+expect_equal(nrow(retrieved), 4L)
+expect_equal(retrieved[id_key == 2]$name, "b_updated")
+expect_equal(retrieved[id_key == 3]$value, 33.3)
+expect_equal(retrieved[id_key == 4]$name, "d")
+expect_equal(retrieved[id_key == 1]$name, "a") # unchanged
+
+# Test 12: Fetch with WHERE clause
+result <- db$fetch("test_table_3", where = "id_key >= 3")
+expect_equal(nrow(result), 2L)
+expect_true(all(result$id_key >= 3))
+
+# Test 13: Fetch with LIMIT
+result <- db$fetch("test_table_3", limit = 2)
+expect_equal(nrow(result), 2L)
+
+# Test 14: Fetch with WHERE and LIMIT
+result <- db$fetch("test_table_3", where = "value > 20", limit = 1)
+expect_equal(nrow(result), 1L)
+expect_true(result$value > 20)
+
+# Test 15: delete_from with WHERE clause
+deleted_count <- db$delete_from("test_table_3", where = "id_key = 1")
+expect_equal(deleted_count, 1L)
+expect_equal(db$row_count("test_table_3"), 3L)
+retrieved <- db$fetch("test_table_3")
+expect_false(1L %in% retrieved$id_key)
+
+# Test 16: delete_from with complex WHERE
+db$commit(
+  method = "overwrite",
+  test_data_1,
+  "test_table_4"
+)
+deleted_count <- db$delete_from("test_table_4", where = "id < 3")
+expect_equal(deleted_count, 2L)
+retrieved <- db$fetch("test_table_4")
+expect_true(all(retrieved$id >= 3))
+
+# Test 17: delete_from with NULL (delete all)
+count_before <- db$row_count("test_table_4")
+deleted_count <- db$delete_from("test_table_4", where = NULL)
+expect_equal(deleted_count, count_before)
+expect_equal(db$row_count("test_table_4"), 0L)
+
+# Test 18: delete_from on non-existent table
+deleted_count <- db$delete_from("nonexistent", where = "id = 1")
+expect_equal(deleted_count, 0L)
+
+# Test 19: delete_from with WHERE that matches nothing
+db$commit(
+  method = "overwrite",
+  test_data_1,
+  "test_table_5"
+)
+initial_count <- db$row_count("test_table_5")
+deleted_count <- db$delete_from("test_table_5", where = "id = 999")
+expect_equal(deleted_count, 0L)
+expect_equal(db$row_count("test_table_5"), initial_count)
+
+# Test 20: Auto-increment on overwrite (new table)
+test_autoinc_1 <- data.table::data.table(
+  name = c("item_a", "item_b", "item_c"),
+  value = c(10, 20, 30)
+)
+db$commit(
+  method = "overwrite",
+  test_autoinc_1,
+  "test_autoinc_1",
+  autoincrement_cols = "id"
+)
+result <- db$fetch("test_autoinc_1")
+expect_equal(result$id, 1:3)
+expect_equal(result$name, c("item_a", "item_b", "item_c"))
+
+# Test 21: Auto-increment on append
+test_autoinc_2 <- data.table::data.table(
+  name = c("item_d", "item_e"),
+  value = c(40, 50)
+)
+db$commit(
+  test_autoinc_2,
+  "test_autoinc_1",
+  autoincrement_cols = "id",
+  method = "append"
+)
+result <- db$fetch("test_autoinc_1")
+expect_equal(nrow(result), 5L)
+expect_equal(result$id, 1:5)
+expect_equal(result$name[4:5], c("item_d", "item_e"))
+
+# Test 22: Auto-increment on upsert with new rows
+test_autoinc_3 <- data.table::data.table(
+  name = c("item_f", "item_g"),
+  value = c(60, 70)
+)
+db$commit(
+  test_autoinc_3,
+  "test_autoinc_1",
+  autoincrement_cols = "id",
+  method = "upsert"
+)
+result <- db$fetch("test_autoinc_1")
+expect_equal(nrow(result), 7L)
+expect_equal(result$id, 1:7)
+
+# Test 23: Auto-increment warning when overriding existing IDs
+test_autoinc_with_ids <- data.table::data.table(
+  id = c(NA, 100L, NA),
+  name = c("new_a", "existing", "new_b"),
+  value = c(1, 2, 3)
+)
+expect_warning(
+  db$commit(
+    method = "overwrite",
+    test_autoinc_with_ids,
+    "test_autoinc_2",
+    autoincrement_cols = "id"
+  ),
+  "Overriding existing IDs"
+)
+result <- db$fetch("test_autoinc_2")
+expect_equal(result$id, 1:3)
+
+# Test 24: Multiple auto-increment columns
+test_multi_autoinc <- data.table::data.table(
+  name = c("item1", "item2"),
+  value = c(10, 20)
+)
+db$commit(
+  method = "overwrite",
+  test_multi_autoinc,
+  "test_multi_autoinc",
+  autoincrement_cols = c("id_a", "id_b")
+)
+result <- db$fetch("test_multi_autoinc")
+expect_equal(result$id_a, 1:2)
+expect_equal(result$id_b, 1:2)
+
+# Test 25: Auto-increment continues from max
+test_continue_a <- data.table::data.table(
+  id_seq = c(5L, 10L, 15L),
+  value = c(100, 200, 300)
+)
+db$commit(
+  method = "overwrite",
+  test_continue_a,
+  "test_continue"
+)
+
+test_continue_b <- data.table::data.table(
+  value = c(400, 500)
+)
+db$commit(
+  test_continue_b,
+  "test_continue",
+  autoincrement_cols = "id_seq",
+  method = "append"
+)
+result <- db$fetch("test_continue")
+expect_equal(nrow(result), 5L)
+expect_equal(result$id_seq[4:5], c(16L, 17L))
+expect_equal(result$value[4:5], c(400, 500))
+
+# Test 26: attach_table and detach_table
+db$commit(
+  method = "overwrite",
+  test_data_1,
+  "test_attach"
+)
+expect_silent(db$attach_table("test_attach"))
+# Verify table is attached by querying it directly
+result <- db$get_query("SELECT COUNT(*) as n FROM test_attach")
+expect_equal(result$n, 5L)
+expect_silent(db$detach_table("test_attach"))
+# After detach, table should not be accessible
+expect_error(
+  db$get_query("SELECT COUNT(*) as n FROM test_attach"),
+  "test_attach"
+)
+
+# Test 27: attach_table with column selection
+db$attach_table("test_attach", columns = c("id", "name"))
+result <- db$get_query("SELECT * FROM test_attach")
+expect_equal(ncol(result), 2L)
+expect_true(all(c("id", "name") %in% names(result)))
+expect_false("value" %in% names(result))
+db$detach_table("test_attach")
+
+# Test 28: attach_table with WHERE clause
+db$attach_table("test_attach", where = "id > 3")
+result <- db$get_query("SELECT * FROM test_attach")
+expect_equal(nrow(result), 2L)
+expect_true(all(result$id > 3))
+db$detach_table("test_attach")
+
+# Test 29: execute() method
+db$attach_table("test_attach")
+rows_affected <- db$execute("DELETE FROM test_attach WHERE id = 1")
+expect_true(rows_affected >= 0) # DuckDB returns number of affected rows
+db$detach_table("test_attach")
+
+# Test 30: get_query() method
+db$attach_table("test_attach")
+result <- db$get_query("SELECT MAX(id) as max_id FROM test_attach")
+expect_true(inherits(result, "data.table"))
+expect_true("max_id" %in% names(result))
+db$detach_table("test_attach")
+
+# Test 31: CSV format support
+test_dir_csv <- tempfile("parquet_duckdb_csv_")
+on.exit(unlink(test_dir_csv, recursive = TRUE), add = TRUE)
+
+db_csv <- parquet_duckdb$new(
+  path = test_dir_csv,
+  default_format = "csv"
+)
+expect_equal(db_csv$default_format, "csv")
+
+test_csv_data <- data.table::data.table(
+  id = 1:3,
+  name = c("a", "b", "c")
+)
+db_csv$commit(test_csv_data, "csv_table", method = "overwrite")
+expect_true("csv_table" %in% db_csv$list_tables())
+retrieved <- db_csv$fetch("csv_table")
+expect_equal(retrieved, test_csv_data)
+
+# Test 32: Extension loading
+test_dir_ext <- tempfile("parquet_duckdb_ext_")
+on.exit(unlink(test_dir_ext, recursive = TRUE), add = TRUE)
+
+db_ext <- parquet_duckdb$new(
+  path = test_dir_ext,
+  extensions = "spatial"
+)
+# Verify spatial extension is loaded by using a spatial function
+expect_silent(
+  db_ext$get_query("SELECT ST_Point(0, 0) as geom")
+)
+
+# Test 33: Persistence across connections
+db$commit(
+  method = "overwrite",
+  test_data_1,
+  "persist_test"
+)
+rm(db)
+gc()
+
+# Reconnect to same path
+db <- parquet_duckdb$new(path = test_dir)
+expect_true("persist_test" %in% db$list_tables())
+retrieved <- db$fetch("persist_test")
+expect_equal(retrieved, test_data_1)
+
+# Test 34: commit with no key_cols defaults to append
+test_no_keys <- data.table::data.table(
+  name = c("x", "y"),
+  value = c(1, 2)
+)
+db$commit(
+  method = "overwrite",
+  test_no_keys,
+  "no_keys_test"
+)
+expect_equal(db$row_count("no_keys_test"), 2L)
+
+db$commit(
+  test_no_keys,
+  "no_keys_test",
+  key_cols = character(0),
+  method = "upsert"
+)
+expect_equal(db$row_count("no_keys_test"), 4L) # Should append
+
+# Test 35: Print method
+expect_stdout(
+  print(db),
+  "Public methods:|Active bindings:|Format|Compression"
+)
diff --git a/inst/tinytest/test_trans_preds_t.R b/inst/tinytest/test_trans_preds_t.R
index 866c455..d9c9da2 100644
--- a/inst/tinytest/test_trans_preds_t.R
+++ b/inst/tinytest/test_trans_preds_t.R
@@ -2,6 +2,206 @@ library(tinytest)
 
 
 # Test creation and validation
-trans_preds_t <- create_trans_preds_t()
+trans_preds_t <- as_trans_preds_t()
 expect_silent(print(trans_preds_t))
 expect_true(nrow(trans_preds_t) >= 0L)
+
+
+#' Actually running the predictor selection logic is heavily dependent on previous
+#' steps. Hence, this is a more heavy-handed test verging on integration testing.
+test_dir_trans_preds <- tempfile("evoland_trans_preds_")
+on.exit(unlink(test_dir_trans_preds, recursive = TRUE), add = TRUE)
+db_tps <- evoland_db$new(test_dir_trans_preds)
+
+# Set up minimal coords and periods
+db_tps$coords_t <- create_coords_t_square(
+  epsg = 2056,
+  extent = terra::ext(c(xmin = 2697000, xmax = 2697500, ymin = 1252000, ymax = 1252500)),
+  resolution = 100
+)
+
+db_tps$periods_t <- create_periods_t(
+  period_length_str = "P10Y",
+  start_observed = "1985-01-01",
+  end_observed = "2005-01-01",
+  end_extrapolated = "2015-01-01"
+)
+
+# Set up LULC classes
+db_tps$lulc_meta_t <- create_lulc_meta_t(list(
+  forest = list(pretty_name = "Forest", src_classes = 1L),
+  urban = list(pretty_name = "Urban", src_classes = 2L),
+  agriculture = list(pretty_name = "Agriculture", src_classes = 3L)
+))
+
+# Create synthetic LULC data with known transitions
+set.seed(42)
+n_coords <- nrow(db_tps$coords_t)
+lulc_data <- data.table::rbindlist(list(
+  data.table::data.table(
+    id_coord = 1:n_coords,
+    id_lulc = sample(1:2, n_coords, replace = TRUE, prob = c(0.7, 0.3)),
+    id_period = 1L
+  ),
+  data.table::data.table(
+    id_coord = 1:n_coords,
+    id_lulc = sample(1:2, n_coords, replace = TRUE, prob = c(0.5, 0.5)),
+    id_period = 2L
+  ),
+  data.table::data.table(
+    id_coord = 1:n_coords,
+    id_lulc = sample(1:2, n_coords, replace = TRUE, prob = c(0.4, 0.6)),
+    id_period = 3L
+  )
+))
+db_tps$lulc_data_t <- as_lulc_data_t(lulc_data)
+
+# Create transition metadata
+transitions <- db_tps$fetch("transitions_v")
+db_tps$trans_meta_t <- create_trans_meta_t(
+  transitions,
+  min_cardinality_abs = 5L
+)
+
+# Add predictor metadata with multiple predictors
+pred_spec_tps <- list(
+  elevation = list(
+    unit = "m",
+    pretty_name = "Elevation",
+    description = "Elevation above sea level",
+    sources = list(list(url = "https://example.com/elevation.tif", md5sum = "abc123"))
+  ),
+  slope = list(
+    unit = "degrees",
+    pretty_name = "Slope",
+    description = "Terrain slope",
+    sources = list(list(url = "https://example.com/slope.tif", md5sum = "def456"))
+  ),
+  distance_to_road = list(
+    unit = "m",
+    pretty_name = "Distance to road",
+    description = "Distance to nearest road",
+    sources = list(list(url = "https://example.com/roads.gpkg", md5sum = "ghi789"))
+  ),
+  aspect = list(
+    unit = "degrees",
+    pretty_name = "Aspect",
+    description = "Terrain aspect",
+    sources = list(list(url = "https://example.com/aspect.tif", md5sum = "jkl012"))
+  ),
+  soil_type = list(
+    pretty_name = "Soil type",
+    description = "Soil classification",
+    sources = list(list(url = "https://example.com/soil.tif", md5sum = "mno345"))
+  )
+)
+db_tps$pred_meta_t <- create_pred_meta_t(pred_spec_tps)
+
+# Add predictor data - mix of static and time-varying
+set.seed(43)
+pred_data_static <- data.table::rbindlist(list(
+  # Elevation (static, period 0)
+  data.table::data.table(
+    id_pred = 1L,
+    id_coord = 1:n_coords,
+    id_period = 0L,
+    value = runif(n_coords, 400, 800)
+  ),
+  # Slope (static, period 0)
+  data.table::data.table(
+    id_pred = 2L,
+    id_coord = 1:n_coords,
+    id_period = 0L,
+    value = runif(n_coords, 0, 30)
+  ),
+  # Aspect (static, period 0)
+  data.table::data.table(
+    id_pred = 4L,
+    id_coord = 1:n_coords,
+    id_period = 0L,
+    value = runif(n_coords, 0, 360)
+  )
+))
+
+# Time-varying predictor
+pred_data_varying <- data.table::rbindlist(lapply(1:3, function(period) {
+  data.table::data.table(
+    id_pred = 3L, # distance_to_road
+    id_coord = 1:n_coords,
+    id_period = period,
+    value = runif(n_coords, 0, 5000)
+  )
+}))
+
+db_tps$pred_data_t_float <- as_pred_data_t(
+  rbind(pred_data_static, pred_data_varying),
+  type = "float"
+)
+
+# Add integer predictor (soil_type)
+pred_data_int <- data.table::data.table(
+  id_pred = 5L,
+  id_coord = 1:n_coords,
+  id_period = 0L,
+  value = sample(1:5, n_coords, replace = TRUE)
+)
+db_tps$pred_data_t_int <- as_pred_data_t(pred_data_int, type = "int")
+
+# Test pruning
+expect_message(
+  db_tps$prune_trans_preds(
+    corcut = 0.2
+  ),
+  "Processing transition 1/2"
+)
+trans_preds_result <- db_tps$trans_preds_t
+expect_true(inherits(trans_preds_result, "trans_preds_t"))
+expect_equal(nrow(trans_preds_result), 4L)
+
+# Verify that all id_trans in result are viable
+viable_trans_ids <- db_tps$trans_meta_t[is_viable == TRUE]$id_trans
+expect_true(all(trans_preds_result$id_trans %in% viable_trans_ids))
+
+# reset to full set of trans - preds
+expect_silent(db_tps$set_full_trans_preds(overwrite = TRUE))
+expect_message(
+  db_tps$prune_trans_preds(
+    filter_fun = grrf_filter,
+    num.trees = 10,
+    gamma = 0.9
+  ),
+  "Selected 5 predictor\\(s\\) for transition"
+)
+
+# Test error handling - empty database
+test_dir_empty <- tempfile("evoland_empty_")
+on.exit(unlink(test_dir_empty, recursive = TRUE), add = TRUE)
+db_empty <- evoland_db$new(test_dir_empty)
+
+expect_error(
+  db_empty$prune_trans_preds(),
+  "Table `trans_meta_t` does not exist"
+)
+
+# Test with no predictors
+test_dir_no_pred <- tempfile("evoland_no_pred_")
+on.exit(unlink(test_dir_no_pred, recursive = TRUE), add = TRUE)
+db_no_pred <- evoland_db$new(test_dir_no_pred)
+db_no_pred$coords_t <- db_tps$coords_t
+db_no_pred$periods_t <- db_tps$periods_t
+db_no_pred$lulc_meta_t <- db_tps$lulc_meta_t
+db_no_pred$lulc_data_t <- db_tps$lulc_data_t
+expect_warning(
+  db_no_pred$trans_meta_t <- db_tps$trans_meta_t,
+  "Overriding existing IDs"
+)
+expect_error(
+  db_no_pred$prune_trans_preds(),
+  "Table `pred_meta_t` does not exist"
+)
+
+# Test print method
+expect_stdout(
+  print(trans_preds_result),
+  "Transition-Predictor|Total relationships"
+)
diff --git a/inst/tinytest/test_util_terra.R b/inst/tinytest/test_util_terra.R
index b3089d3..35a0bab 100644
--- a/inst/tinytest/test_util_terra.R
+++ b/inst/tinytest/test_util_terra.R
@@ -140,7 +140,7 @@ expect_true(!anyNA(vector_na_omit_true$value))
 vector_na_omit_false <- extract_using_coords_t(vect_with_na, coords_t, na_omit = FALSE)
 expect_true(anyNA(vector_na_omit_false$value))
 
-# Test compute_neighbors
+# Test create_neighbors_t
 # Create a simple test coords_t with known distances
 test_coords <- data.table::data.table(
   id_coord = 1L:5L,
@@ -152,7 +152,10 @@ test_coords <- data.table::data.table(
 test_coords <- as_coords_t(test_coords)
 
 # Test basic neighbor computation with max_distance = 150
-neighbors <- compute_neighbors(test_coords, max_distance = 150)
+expect_stdout(
+  neighbors <- create_neighbors_t(test_coords, max_distance = 150),
+  "Progress"
+)
 
 # Check structure
 expect_true(data.table::is.data.table(neighbors))
@@ -204,10 +207,13 @@ expect_equal(
 )
 
 # Test with distance_breaks
-neighbors_classified <- compute_neighbors(
-  test_coords,
-  max_distance = 150,
-  distance_breaks = c(0, 100, 150)
+expect_stdout(
+  neighbors_classified <- create_neighbors_t(
+    test_coords,
+    max_distance = 150,
+    distance_breaks = c(0, 100, 150)
+  ),
+  "Progress"
 )
 
 # Check that distance_class is populated
@@ -234,7 +240,10 @@ expect_equal(
 )
 
 # Test with smaller max_distance
-neighbors_small <- compute_neighbors(test_coords, max_distance = 110)
+expect_stdout(
+  neighbors_small <- create_neighbors_t(test_coords, max_distance = 110),
+  "Progress"
+)
 
 # With max_distance = 110, point 1 should only have neighbors 2 and 4 (distance 100)
 # but not 5 (distance ~141.4)
@@ -246,27 +255,27 @@ expect_false(5L %in% neighbors_from_1_small$id_coord_neighbor)
 
 # Test error handling
 expect_error(
-  compute_neighbors("not_coords_t", max_distance = 100),
+  create_neighbors_t("not_coords_t", max_distance = 100),
   "coords_t must be a coords_t object"
 )
 
 expect_error(
-  compute_neighbors(test_coords, max_distance = -10),
+  create_neighbors_t(test_coords, max_distance = -10),
   "max_distance must be a positive scalar numeric"
 )
 
 expect_error(
-  compute_neighbors(test_coords, max_distance = 100, distance_breaks = c(1)),
+  create_neighbors_t(test_coords, max_distance = 100, distance_breaks = c(1)),
   "distance_breaks must be NULL or a numeric vector with at least 2 elements"
 )
 
 expect_error(
-  compute_neighbors(test_coords, max_distance = 100, distance_breaks = "invalid"),
+  create_neighbors_t(test_coords, max_distance = 100, distance_breaks = "invalid"),
   "distance_breaks must be NULL or a numeric vector with at least 2 elements"
 )
 
 # Test with real coords_t from earlier in the test file
-real_neighbors <- compute_neighbors(coords_t, max_distance = 300)
+expect_stdout(real_neighbors <- create_neighbors_t(coords_t, max_distance = 300), "Progress")
 
 # Each point in a regular 100m grid should have neighbors
 # Interior points should have 8 neighbors within 300m (8-connectivity)
@@ -291,10 +300,13 @@ for (i in seq_len(
 }
 
 # Test with distance classification on real data
-real_neighbors_class <- compute_neighbors(
-  coords_t,
-  max_distance = 300,
-  distance_breaks = c(0, 150, 300)
+expect_silent(
+  real_neighbors_class <- create_neighbors_t(
+    coords_t,
+    max_distance = 300,
+    distance_breaks = c(0, 150, 300),
+    quiet = TRUE
+  )
 )
 
 expect_true(all(!is.na(real_neighbors_class$distance_class)))
diff --git a/man/covariance_filter.Rd b/man/covariance_filter.Rd
new file mode 100644
index 0000000..0693652
--- /dev/null
+++ b/man/covariance_filter.Rd
@@ -0,0 +1,86 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/covariance_filter.R
+\name{covariance_filter}
+\alias{covariance_filter}
+\alias{rank_poly_glm}
+\alias{compute_balanced_weights}
+\alias{select_by_correlation}
+\title{Two stage covariate filtering}
+\usage{
+covariance_filter(
+  data,
+  result_col = "result",
+  rank_fun = rank_poly_glm,
+  weights = compute_balanced_weights(data[[result_col]]),
+  corcut = 0.7,
+  ...
+)
+
+rank_poly_glm(x, y, weights = NULL, ...)
+
+compute_balanced_weights(trans_result, legacy = FALSE)
+
+select_by_correlation(cor_mat, corcut)
+}
+\arguments{
+\item{data}{A data.table of target variable and candidate covariates to be filtered;
+wide format with one predictor per column.}
+
+\item{result_col}{Name of the column representing the transition results (0: no
+trans, 1: trans)}
+
+\item{rank_fun}{Optional function to compute ranking scores for each covariate.
+Should take arguments (x, y, weights, ...) and return a single numeric value
+(lower = better). Defaults to polynomial GLM p-value ranking.}
+
+\item{weights}{Optional weights vector}
+
+\item{corcut}{Correlation cutoff threshold}
+
+\item{...}{Additional arguments passed to rank_fun.}
+
+\item{x}{A numeric vector representing a single covariate}
+
+\item{y}{A binary outcome vector (0/1)}
+
+\item{trans_result}{Binary outcome vector (0/1)}
+
+\item{legacy}{Bool, use legacy weighting?}
+
+\item{cor_mat}{Absolute correlation matrix}
+}
+\value{
+A set of column names (covariates) to retain
+}
+\description{
+The \code{covariance_filter} returns a set of covariates for land use land cover change
+(LULCC) models based on a two-stage variable selection: a first statistical fit
+estimates a covariate's quality for a given prediction task. A second step selects
+all variables below a given correlation threshold: We iterate over a correlation
+matrix ordered in the first step. Starting within the leftmost column, all rows (i.e.
+candidates) greater than the given threshold are dropped from the full set of
+candidates. This candidate selection is retained and used to select the next column,
+until no further columns are left to investigate. The columns that were iterated over
+are those returned as a character vector of selected variable names.
+}
+\details{
+The function first ranks covariates using the provided ranking function (default:
+quasibinomial polynomial GLM). Then, it iteratively removes highly (Pearson)
+correlated variables based on the correlation cutoff threshold, preserving variables
+in order of their ranking. See
+\url{https://github.com/ethzplus/evoland-plus-legacy/blob/main/R/lulcc.covfilter.r} for
+where the concept came from. The original author was Antoine Adde, with edits by
+Benjamin Black. A similar mechanism is found in \url{https://github.com/antadde/covsel/}.
+}
+\section{Functions}{
+\itemize{
+\item \code{rank_poly_glm()}: Default ranking function using polynomial GLM. Returns
+the lower p value for each of the polynomial terms
+
+\item \code{compute_balanced_weights()}: Compute class-balanced weights for imbalanced binary
+outcomes; returns a numeric vector
+
+\item \code{select_by_correlation()}: Implements the iterative selection procedure.
+
+}}
+\keyword{internal}
diff --git a/man/evoland_db.Rd b/man/evoland_db.Rd
index e555300..c3775f3 100644
--- a/man/evoland_db.Rd
+++ b/man/evoland_db.Rd
@@ -8,219 +8,115 @@ An R6 class that provides an interface to a folder-based data storage system
 for the evoland package. Each table is stored as a parquet (or CSV) file.
 This class uses DuckDB for in-memory SQL operations while persisting data
 to disk in parquet format for better compression.
-}
-\section{Public fields}{
-\if{html}{\out{<div class="r6-fields">}}
-\describe{
-\item{\code{connection}}{DBI connection object to an in-memory DuckDB database}
-
-\item{\code{path}}{Character string path to the data folder}
 
-\item{\code{default_format}}{Default file format for new tables}
-
-\item{\code{writeopts}}{Default write options for DuckDB, see}
+Inherits from \link{parquet_duckdb} for generic database operations.
 }
-\if{html}{\out{</div>}}
+\seealso{
+Additional methods and active bindings are added to this class in separate files:
+\itemize{
+\item \link{evoland_db_tables} - Table active bindings (coords_t, lulc_data_t, etc.)
+\item \link{evoland_db_views} - View active bindings (lulc_meta_long_v, etc.) and methods
+\item \link{evoland_db_neighbors} - Neighbor analysis methods
 }
-\section{Active bindings}{
-\if{html}{\out{<div class="r6-active-bindings">}}
-\describe{
-\item{\code{coords_t}}{A \code{coords_t} instance; see \code{\link[=create_coords_t]{create_coords_t()}} for the type of
-object to assign. Assigning is an upsert operation.}
-
-\item{\code{periods_t}}{A \code{periods_t} instance; see \code{\link[=create_periods_t]{create_periods_t()}} for the type of
-object to assign. Assigning is an upsert operation.}
-
-\item{\code{lulc_meta_t}}{A \code{lulc_meta_t} instance; see \code{\link[=create_lulc_meta_t]{create_lulc_meta_t()}} for the type of
-object to assign. Assigning is an upsert operation.}
-
-\item{\code{lulc_meta_long_v}}{Return a \code{lulc_meta_long_v} instance, i.e. unrolled \code{lulc_meta_t}.}
-
-\item{\code{lulc_data_t}}{A \code{lulc_data_t} instance; see \code{\link[=as_lulc_data_t]{as_lulc_data_t()}} for the type of
-object to assign. Assigning is an upsert operation.}
-
-\item{\code{pred_data_t_float}}{A \code{pred_data_t_float} instance; see
-\code{\link[=create_pred_data_t]{create_pred_data_t()}} for the type of object to assign. Assigning is an
-upsert operation.}
-
-\item{\code{pred_data_t_int}}{A \code{pred_data_t_int} instance; see
-\code{\link[=create_pred_data_t]{create_pred_data_t()}} for the type of object to assign. Assigning is an
-upsert operation.}
-
-\item{\code{pred_data_t_bool}}{A \code{pred_data_t_bool} instance; see
-\code{\link[=create_pred_data_t]{create_pred_data_t()}} for the type of object to assign. Assigning is an
-upsert operation.}
-
-\item{\code{extent}}{Return a terra SpatExtent based on coords_t}
-
-\item{\code{coords_minimal}}{data.table with only (id_coord, lon, lat)}
-
-\item{\code{pred_meta_t}}{A \code{pred_meta_t} instance; see \code{\link[=create_pred_meta_t]{create_pred_meta_t()}} for the type of
-object to assign. Assigning is an upsert operation.}
-
-\item{\code{pred_sources_v}}{Retrieve a table of distinct predictor urls and their
-md5sum}
-
-\item{\code{trans_meta_t}}{A \code{trans_meta_t} instance; see \code{\link[=create_trans_meta_t]{create_trans_meta_t()}} for the type of
-object to assign. Assigning is an upsert operation.}
-
-\item{\code{trans_preds_t}}{A \code{trans_preds_t} instance; see \code{\link[=create_trans_preds_t]{create_trans_preds_t()}} for the type of
-object to assign. Assigning is an upsert operation.}
-
-\item{\code{intrv_meta_t}}{A \code{intrv_meta_t} instance; see \code{\link[=create_intrv_meta_t]{create_intrv_meta_t()}} for the type of
-object to assign. Assigning is an upsert operation.}
-
-\item{\code{intrv_masks_t}}{A \code{intrv_masks_t} instance; see \code{\link[=as_intrv_masks_t]{as_intrv_masks_t()}} for the type of
-object to assign. Assigning is an upsert operation.}
-
-\item{\code{trans_models_t}}{A \code{trans_models_t} instance; see \code{\link[=create_trans_models_t]{create_trans_models_t()}} for the type
-of object to assign. Assigning is an upsert operation.}
-
-\item{\code{alloc_params_t}}{A \code{alloc_params_t} instance; see \code{\link[=as_alloc_params_t]{as_alloc_params_t()}} for the type
-of object to assign. Assigning is an upsert operation.}
 }
-\if{html}{\out{</div>}}
+\section{Super class}{
+\code{\link[evoland:parquet_duckdb]{evoland::parquet_duckdb}} -> \code{evoland_db}
 }
 \section{Methods}{
 \subsection{Public methods}{
 \itemize{
+\item \href{#method-evoland_db-set_neighbors}{\code{evoland_db$set_neighbors()}}
+\item \href{#method-evoland_db-generate_neighbor_predictors}{\code{evoland_db$generate_neighbor_predictors()}}
+\item \href{#method-evoland_db-trans_pred_data_v}{\code{evoland_db$trans_pred_data_v()}}
 \item \href{#method-evoland_db-new}{\code{evoland_db$new()}}
-\item \href{#method-evoland_db-commit_overwrite}{\code{evoland_db$commit_overwrite()}}
-\item \href{#method-evoland_db-commit_append}{\code{evoland_db$commit_append()}}
-\item \href{#method-evoland_db-commit_upsert}{\code{evoland_db$commit_upsert()}}
 \item \href{#method-evoland_db-fetch}{\code{evoland_db$fetch()}}
-\item \href{#method-evoland_db-list_tables}{\code{evoland_db$list_tables()}}
-\item \href{#method-evoland_db-execute}{\code{evoland_db$execute()}}
-\item \href{#method-evoland_db-get_query}{\code{evoland_db$get_query()}}
-\item \href{#method-evoland_db-attach_table}{\code{evoland_db$attach_table()}}
-\item \href{#method-evoland_db-detach_table}{\code{evoland_db$detach_table()}}
-\item \href{#method-evoland_db-row_count}{\code{evoland_db$row_count()}}
-\item \href{#method-evoland_db-delete_from}{\code{evoland_db$delete_from()}}
 \item \href{#method-evoland_db-set_report}{\code{evoland_db$set_report()}}
 \item \href{#method-evoland_db-set_coords}{\code{evoland_db$set_coords()}}
 \item \href{#method-evoland_db-set_periods}{\code{evoland_db$set_periods()}}
 \item \href{#method-evoland_db-add_predictor}{\code{evoland_db$add_predictor()}}
+\item \href{#method-evoland_db-set_full_trans_preds}{\code{evoland_db$set_full_trans_preds()}}
+\item \href{#method-evoland_db-prune_trans_preds}{\code{evoland_db$prune_trans_preds()}}
 \item \href{#method-evoland_db-clone}{\code{evoland_db$clone()}}
 }
 }
+\if{html}{\out{
+<details><summary>Inherited methods</summary>
+<ul>
+<li><span class="pkg-link" data-pkg="evoland" data-topic="parquet_duckdb" data-id="attach_table"><a href='../../evoland/html/parquet_duckdb.html#method-parquet_duckdb-attach_table'><code>evoland::parquet_duckdb$attach_table()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="evoland" data-topic="parquet_duckdb" data-id="commit"><a href='../../evoland/html/parquet_duckdb.html#method-parquet_duckdb-commit'><code>evoland::parquet_duckdb$commit()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="evoland" data-topic="parquet_duckdb" data-id="delete_from"><a href='../../evoland/html/parquet_duckdb.html#method-parquet_duckdb-delete_from'><code>evoland::parquet_duckdb$delete_from()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="evoland" data-topic="parquet_duckdb" data-id="detach_table"><a href='../../evoland/html/parquet_duckdb.html#method-parquet_duckdb-detach_table'><code>evoland::parquet_duckdb$detach_table()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="evoland" data-topic="parquet_duckdb" data-id="execute"><a href='../../evoland/html/parquet_duckdb.html#method-parquet_duckdb-execute'><code>evoland::parquet_duckdb$execute()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="evoland" data-topic="parquet_duckdb" data-id="get_query"><a href='../../evoland/html/parquet_duckdb.html#method-parquet_duckdb-get_query'><code>evoland::parquet_duckdb$get_query()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="evoland" data-topic="parquet_duckdb" data-id="list_tables"><a href='../../evoland/html/parquet_duckdb.html#method-parquet_duckdb-list_tables'><code>evoland::parquet_duckdb$list_tables()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="evoland" data-topic="parquet_duckdb" data-id="print"><a href='../../evoland/html/parquet_duckdb.html#method-parquet_duckdb-print'><code>evoland::parquet_duckdb$print()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="evoland" data-topic="parquet_duckdb" data-id="row_count"><a href='../../evoland/html/parquet_duckdb.html#method-parquet_duckdb-row_count'><code>evoland::parquet_duckdb$row_count()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="evoland" data-topic="parquet_duckdb" data-id="with_tables"><a href='../../evoland/html/parquet_duckdb.html#method-parquet_duckdb-with_tables'><code>evoland::parquet_duckdb$with_tables()</code></a></span></li>
+</ul>
+</details>
+}}
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-evoland_db-new"></a>}}
-\if{latex}{\out{\hypertarget{method-evoland_db-new}{}}}
-\subsection{Method \code{new()}}{
-Initialize a new evoland_db object
+\if{html}{\out{<a id="method-evoland_db-set_neighbors"></a>}}
+\if{latex}{\out{\hypertarget{method-evoland_db-set_neighbors}{}}}
+\subsection{Method \code{set_neighbors()}}{
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{evoland_db$new(path, default_format = c("parquet", "csv"), ...)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{evoland_db$set_neighbors(
+  max_distance = 1000,
+  distance_breaks = c(0, 100, 500, 1000),
+  resolution = 100,
+  overwrite = FALSE
+)}\if{html}{\out{</div>}}
 }
 
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{path}}{Character string. Path to the data folder.}
-
-\item{\code{default_format}}{Character. Default file format ("parquet" or "csv").
-Default is "parquet".}
-
-\item{\code{...}}{passed on to \code{set_report}}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-A new \code{evoland_db} object
-}
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-evoland_db-commit_overwrite"></a>}}
-\if{latex}{\out{\hypertarget{method-evoland_db-commit_overwrite}{}}}
-\subsection{Method \code{commit_overwrite()}}{
-Commit data in overwrite mode
+\if{html}{\out{<a id="method-evoland_db-generate_neighbor_predictors"></a>}}
+\if{latex}{\out{\hypertarget{method-evoland_db-generate_neighbor_predictors}{}}}
+\subsection{Method \code{generate_neighbor_predictors()}}{
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{evoland_db$commit_overwrite(
-  x,
-  table_name,
-  autoincrement_cols = character(0),
-  map_cols = character(0)
-)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{evoland_db$generate_neighbor_predictors()}\if{html}{\out{</div>}}
 }
 
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{x}}{Data frame to commit}
-
-\item{\code{table_name}}{Character string table name}
-
-\item{\code{autoincrement_cols}}{Character vector of column names to auto-increment}
-
-\item{\code{map_cols}}{Character vector of columns to convert to MAP format}
-}
-\if{html}{\out{</div>}}
-}
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-evoland_db-commit_append"></a>}}
-\if{latex}{\out{\hypertarget{method-evoland_db-commit_append}{}}}
-\subsection{Method \code{commit_append()}}{
-Commit data in append mode
+\if{html}{\out{<a id="method-evoland_db-trans_pred_data_v"></a>}}
+\if{latex}{\out{\hypertarget{method-evoland_db-trans_pred_data_v}{}}}
+\subsection{Method \code{trans_pred_data_v()}}{
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{evoland_db$commit_append(
-  x,
-  table_name,
-  autoincrement_cols = character(0),
-  map_cols = character(0)
-)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{evoland_db$trans_pred_data_v(id_trans, id_pred = NULL, na_value = NA)}\if{html}{\out{</div>}}
 }
 
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{x}}{Data frame to commit}
-
-\item{\code{table_name}}{Character string table name}
-
-\item{\code{autoincrement_cols}}{Character vector of column names to auto-increment}
-
-\item{\code{map_cols}}{Character vector of columns to convert to MAP format}
-}
-\if{html}{\out{</div>}}
-}
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-evoland_db-commit_upsert"></a>}}
-\if{latex}{\out{\hypertarget{method-evoland_db-commit_upsert}{}}}
-\subsection{Method \code{commit_upsert()}}{
-Commit data in upsert mode
+\if{html}{\out{<a id="method-evoland_db-new"></a>}}
+\if{latex}{\out{\hypertarget{method-evoland_db-new}{}}}
+\subsection{Method \code{new()}}{
+Initialize a new evoland_db object
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{evoland_db$commit_upsert(
-  x,
-  table_name,
-  key_cols = grep("^id_", names(x), value = TRUE),
-  autoincrement_cols = character(0),
-  map_cols = character(0)
-)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{evoland_db$new(path, default_format = c("parquet", "csv"), ...)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
-\item{\code{x}}{Data frame to commit}
-
-\item{\code{table_name}}{Character string table name}
-
-\item{\code{key_cols}}{Identify unique columns - heuristic: if prefixed with
-id_, the set of all columns designates a uniqueness condition}
+\item{\code{path}}{Character string. Path to the data folder.}
 
-\item{\code{autoincrement_cols}}{Character vector of column names to auto-increment}
+\item{\code{default_format}}{Character. Default file format ("parquet" or "csv").
+Default is "parquet".}
 
-\item{\code{map_cols}}{Character vector of columns to convert to MAP format}
+\item{\code{...}}{passed on to \code{set_report}}
 }
 \if{html}{\out{</div>}}
 }
+\subsection{Returns}{
+A new \code{evoland_db} object
+}
 }
 \if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-evoland_db-fetch"></a>}}
 \if{latex}{\out{\hypertarget{method-evoland_db-fetch}{}}}
 \subsection{Method \code{fetch()}}{
-Fetch data from storage
+Fetch data from storage with evoland-specific view support
 \subsection{Usage}{
 \if{html}{\out{<div class="r">}}\preformatted{evoland_db$fetch(table_name, where = NULL, limit = NULL)}\if{html}{\out{</div>}}
 }
@@ -241,139 +137,6 @@ A data.table
 }
 }
 \if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-evoland_db-list_tables"></a>}}
-\if{latex}{\out{\hypertarget{method-evoland_db-list_tables}{}}}
-\subsection{Method \code{list_tables()}}{
-List all tables (files) in storage
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{evoland_db$list_tables()}\if{html}{\out{</div>}}
-}
-
-\subsection{Returns}{
-Character vector of table names
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-evoland_db-execute"></a>}}
-\if{latex}{\out{\hypertarget{method-evoland_db-execute}{}}}
-\subsection{Method \code{execute()}}{
-Execute statement
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{evoland_db$execute(statement)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{statement}}{A SQL statement}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-No. of rows affected by statement
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-evoland_db-get_query"></a>}}
-\if{latex}{\out{\hypertarget{method-evoland_db-get_query}{}}}
-\subsection{Method \code{get_query()}}{
-Get Query
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{evoland_db$get_query(statement)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{statement}}{A SQL statement}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-No. of rows affected by statement
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-evoland_db-attach_table"></a>}}
-\if{latex}{\out{\hypertarget{method-evoland_db-attach_table}{}}}
-\subsection{Method \code{attach_table()}}{
-Attach one or more tables from the database folder as temporary tables in DuckDB.
-This is useful for working with multiple tables in SQL queries without loading
-them into R memory.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{evoland_db$attach_table(table_name, columns = "*")}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{table_name}}{Character vector. Names of table to attach.}
-
-\item{\code{columns}}{Character vector. Optional sql column selection, defaults to "*"}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-evoland_db-detach_table"></a>}}
-\if{latex}{\out{\hypertarget{method-evoland_db-detach_table}{}}}
-\subsection{Method \code{detach_table()}}{
-Detach one or more tables from the database.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{evoland_db$detach_table(table_name)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{table_name}}{Character. Name of table to drop.}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-evoland_db-row_count"></a>}}
-\if{latex}{\out{\hypertarget{method-evoland_db-row_count}{}}}
-\subsection{Method \code{row_count()}}{
-Get table row count
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{evoland_db$row_count(table_name)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{table_name}}{Character string. Name of the table to query.}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-No. of rows
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-evoland_db-delete_from"></a>}}
-\if{latex}{\out{\hypertarget{method-evoland_db-delete_from}{}}}
-\subsection{Method \code{delete_from()}}{
-Delete rows from a table
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{evoland_db$delete_from(table_name, where = NULL)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{table_name}}{Character string. Name of the table to delete from.}
-
-\item{\code{where}}{Character string, defaults to NULL: delete everything in table.}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-No. of rows affected
-}
-}
-\if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-evoland_db-set_report"></a>}}
 \if{latex}{\out{\hypertarget{method-evoland_db-set_report}{}}}
 \subsection{Method \code{set_report()}}{
@@ -460,6 +223,28 @@ Add a predictor to the database
 }
 \if{html}{\out{</div>}}
 }
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-evoland_db-set_full_trans_preds"></a>}}
+\if{latex}{\out{\hypertarget{method-evoland_db-set_full_trans_preds}{}}}
+\subsection{Method \code{set_full_trans_preds()}}{
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{evoland_db$set_full_trans_preds(overwrite = FALSE)}\if{html}{\out{</div>}}
+}
+
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-evoland_db-prune_trans_preds"></a>}}
+\if{latex}{\out{\hypertarget{method-evoland_db-prune_trans_preds}{}}}
+\subsection{Method \code{prune_trans_preds()}}{
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{evoland_db$prune_trans_preds(
+  filter_fun = covariance_filter,
+  na_value = NA,
+  ...
+)}\if{html}{\out{</div>}}
+}
+
 }
 \if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-evoland_db-clone"></a>}}
diff --git a/man/evoland_db_neighbors.Rd b/man/evoland_db_neighbors.Rd
new file mode 100644
index 0000000..91db904
--- /dev/null
+++ b/man/evoland_db_neighbors.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/evoland_db_neighbors.R
+\name{evoland_db_neighbors}
+\alias{evoland_db_neighbors}
+\title{Neighbor analysis methods for evoland_db}
+\description{
+This file adds neighbor analysis methods to the \code{evoland_db} class using R6's \verb{$set()} method.
+These methods compute neighbor relationships and generate neighbor-based predictors.
+}
+\section{Methods Added}{
+
+\itemize{
+\item \code{create_neighbors_t(max_distance, distance_breaks, resolution, overwrite)} -
+Computes neighbor relationships between coordinates.
+\itemize{
+\item \code{max_distance}: Maximum distance for neighbors (default: 1000)
+\item \code{distance_breaks}: Vector of breaks for distance classes (default: c(0, 100, 500, 1000))
+\item \code{resolution}: Grid resolution for distance calculations (default: 100)
+\item \code{overwrite}: Whether to overwrite existing neighbors_t (default: FALSE)
+}
+\item \code{generate_neighbor_predictors()} - Generates predictor variables based on neighbor
+land use counts by distance class. Requires neighbors_t with distance_class column
+(create_neighbors_t with distance_breaks).
+}
+}
+
diff --git a/man/evoland_db_tables.Rd b/man/evoland_db_tables.Rd
new file mode 100644
index 0000000..59cc49d
--- /dev/null
+++ b/man/evoland_db_tables.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/evoland_db_tables.R
+\name{evoland_db_tables}
+\alias{evoland_db_tables}
+\title{Table active bindings for evoland_db}
+\description{
+This file adds table active bindings to the \code{evoland_db} class using R6's \verb{$set()} method.
+These bindings provide read/write access to database tables with automatic validation.
+}
+\section{Active Bindings Added}{
+
+\itemize{
+\item \code{coords_t} - Coordinates table. See \code{\link[=as_coords_t]{as_coords_t()}}
+\item \code{periods_t} - Time periods table. See \code{\link[=as_periods_t]{as_periods_t()}}
+\item \code{lulc_meta_t} - Land use/land cover metadata. See \code{\link[=as_lulc_meta_t]{as_lulc_meta_t()}}
+\item \code{lulc_data_t} - Land use/land cover data. See \code{\link[=as_lulc_data_t]{as_lulc_data_t()}}
+\item \code{pred_data_t_float} - Float predictor data. See \code{\link[=as_pred_data_t]{as_pred_data_t()}}
+\item \code{pred_data_t_int} - Integer predictor data. See \code{\link[=as_pred_data_t]{as_pred_data_t()}}
+\item \code{pred_data_t_bool} - Boolean predictor data. See \code{\link[=as_pred_data_t]{as_pred_data_t()}}
+\item \code{pred_meta_t} - Predictor metadata. See \code{\link[=as_pred_meta_t]{as_pred_meta_t()}}
+\item \code{trans_meta_t} - Transition metadata. See \code{\link[=as_trans_meta_t]{as_trans_meta_t()}}
+\item \code{trans_preds_t} - Transition-predictor relationships. See \code{\link[=as_trans_preds_t]{as_trans_preds_t()}}
+\item \code{intrv_meta_t} - Intervention metadata. See \code{\link[=as_intrv_meta_t]{as_intrv_meta_t()}}
+\item \code{intrv_masks_t} - Intervention masks. See \code{\link[=as_intrv_masks_t]{as_intrv_masks_t()}}
+\item \code{trans_models_t} - Transition models. See \code{\link[=as_trans_models_t]{as_trans_models_t()}}
+\item \code{alloc_params_t} - Allocation parameters. See \code{\link[=as_alloc_params_t]{as_alloc_params_t()}}
+\item \code{neighbors_t} - Neighbor relationships. See \code{\link[=as_neighbors_t]{as_neighbors_t()}}
+}
+}
+
diff --git a/man/evoland_db_views.Rd b/man/evoland_db_views.Rd
index 68d5b66..d0a7c93 100644
--- a/man/evoland_db_views.Rd
+++ b/man/evoland_db_views.Rd
@@ -2,27 +2,26 @@
 % Please edit documentation in R/evoland_db_views.R
 \name{evoland_db_views}
 \alias{evoland_db_views}
-\alias{make_pred_sources_v}
-\alias{make_lulc_meta_long_v}
-\alias{make_coords_minimal}
 \title{Views on the evoland-plus data model}
-\usage{
-make_pred_sources_v(self, private)
-
-make_lulc_meta_long_v(self, private)
-
-make_coords_minimal(self, private)
-}
 \description{
-Functions to generate views on the database
+This file adds view active bindings and methods to the \code{evoland_db} class using R6's \verb{$set()} method.
+These provide computed views on the database without storing additional data.
 }
-\section{Functions}{
+\section{Active Bindings Added}{
+
 \itemize{
-\item \code{make_pred_sources_v()}: Retrieve a table of distinct predictor urls and their
-md5sum
+\item \code{lulc_meta_long_v} - Unrolled LULC metadata with one row per source class
+\item \code{pred_sources_v} - Distinct predictor URLs and their MD5 checksums
+\item \code{transitions_v} - Land use transitions derived from lulc_data_t
+\item \code{extent} - Spatial extent of coords_t as terra::SpatExtent
+\item \code{coords_minimal} - Minimal coordinate representation (id_coord, lon, lat)
+}
+}
 
-\item \code{make_lulc_meta_long_v()}: Return a \code{lulc_meta_long_v} instance, i.e. unrolled \code{lulc_meta_t}.
+\section{Methods Added}{
 
-\item \code{make_coords_minimal()}: Minimal coordinate representation (id_coord, lon, lat)
+\itemize{
+\item \code{trans_pred_data_v(id_trans)} - Returns wide table of transition results and predictor data for a specific transition. Used as input to covariance filtering.
+}
+}
 
-}}
diff --git a/man/grrf_filter.Rd b/man/grrf_filter.Rd
new file mode 100644
index 0000000..a3bbb93
--- /dev/null
+++ b/man/grrf_filter.Rd
@@ -0,0 +1,70 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/grrf_filter.r
+\name{grrf_filter}
+\alias{grrf_filter}
+\title{Guided Regularized Random Forest Feature Selection}
+\usage{
+grrf_filter(
+  data,
+  result_col = "result",
+  weights = compute_balanced_weights(data[[result_col]]),
+  gamma = 0.5,
+  num.trees = 500,
+  max.depth = 100,
+  ...
+)
+}
+\arguments{
+\item{data}{A data.table of target variable and candidate covariates to be filtered; wide format
+with one predictor per column.}
+
+\item{result_col}{Name of the column representing the transition results (0: no trans, 1: trans)}
+
+\item{weights}{Optional named vector of class weights. If NULL, class-balanced weights
+are computed automatically using compute_grrf_weights().}
+
+\item{gamma}{Numeric between 0-1 controlling the weight of the normalized importance
+score (the "importance coefficient"). When gamma = 0, we perform unguided
+regularized random forest (no guiding effect). When gamma = 1, we apply the
+strongest guiding effect, leading to the most penalization of redundant
+features and the most concise feature sets. Default is 0.5.}
+
+\item{num.trees}{Number of trees to grow in each random forest. Default is 500.}
+
+\item{...}{Additional arguments passed to ranger::ranger().}
+}
+\value{
+A character vector of column names (covariates) to retain, ordered by
+importance (most important first)
+}
+\description{
+The \code{grrf_filter} returns a set of covariates for land use land cover change (LULCC) models based
+on feature selection with Guided Regularized Random Forests. This is a two-stage random forest
+approach: a first unregularized random forest estimates variable importance scores. These scores
+are then used to guide a second regularized random forest that penalizes less important features,
+resulting in a more parsimonious feature set.
+}
+\details{
+The Guided Regularized Random Forest (GRRF) algorithm works as follows:
+\enumerate{
+\item Fit an initial unregularized random forest to obtain variable importance scores
+\item Normalize these importance scores and use them to compute regularization
+coefficients: coefReg = (1 - gamma) + gamma * normalized_importance
+\item Fit a regularized random forest using these coefficients to penalize splits
+on less important variables
+\item Return variables with positive importance in the regularized model
+}
+
+Class weights are used to handle class imbalance. Variables in terminal nodes are
+weighted by class, and splits are evaluated using weighted Gini impurity.
+
+The ranger implementation uses the \code{split.select.weights} parameter to apply
+regularization penalties, approximating the RRF regularization approach.
+}
+\references{
+Deng, H., & Runger, G. (2013). Gene selection with guided regularized random forest.
+Pattern Recognition, 46(12), 3483-3489. https://arxiv.org/pdf/1306.0237.pdf
+
+Original implementation by Antoine Adde, edited by Ben Black and adapted for
+ranger by the evoland-plus team.
+}
diff --git a/man/neighbors_t.Rd b/man/neighbors_t.Rd
new file mode 100644
index 0000000..55d0260
--- /dev/null
+++ b/man/neighbors_t.Rd
@@ -0,0 +1,76 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/neighbors_t.R
+\name{neighbors_t}
+\alias{neighbors_t}
+\alias{as_neighbors_t}
+\alias{create_neighbors_t}
+\alias{validate.neighbors_t}
+\alias{print.neighbors_t}
+\title{Create Neighbors Table}
+\usage{
+as_neighbors_t(x)
+
+create_neighbors_t(
+  coords_t,
+  max_distance,
+  distance_breaks = NULL,
+  resolution = 100,
+  quiet = FALSE
+)
+
+\method{validate}{neighbors_t}(x, ...)
+
+\method{print}{neighbors_t}(x, nrow = 10, ...)
+}
+\arguments{
+\item{x}{An object that can be passed to \code{\link[data.table:setDT]{data.table::setDT()}}}
+
+\item{max_distance}{Maximum distance to search for neighbors (in same units as
+coordinates and resolution)}
+
+\item{distance_breaks}{Optional numeric vector defining distance class boundaries.
+If NULL, no distance classification is performed.
+If provided, must have at least 2 elements defining interval breaks.}
+
+\item{resolution}{Grid cell size for rasterization (default: 100.0, in same units as coordinates)}
+
+\item{...}{Passed to \link[data.table:print.data.table]{data.table::print.data.table}}
+
+\item{nrow}{Maximum number of rows to print. See \link[data.table:print.data.table]{data.table::print.data.table}}
+}
+\value{
+A data.table of class "neighbors_t" with columns:
+\itemize{
+\item \code{id_coord_origin}: Foreign key to coords_t (origin coordinate)
+\item \code{id_coord_neighbor}: Foreign key to coords_t (neighbor coordinate)
+\item \code{distance}: Numeric distance between coordinates
+\item \code{distance_class}: Optional factor representing distance intervals
+}
+
+A data.table with columns:
+\itemize{
+\item id_coord_origin: ID of the origin coordinate
+\item id_coord_neighbor: ID of the neighboring coordinate
+\item distance: Distance between origin and neighbor
+\item distance_class: Factor indicating distance class (if distance_breaks provided)
+}
+}
+\description{
+Creates a \code{neighbors_t} table and validates that it matches the schema.
+}
+\section{Methods (by generic)}{
+\itemize{
+\item \code{validate(neighbors_t)}: Validate a neighbors_t object
+
+\item \code{print(neighbors_t)}: Print a neighbors_t object
+
+}}
+\section{Functions}{
+\itemize{
+\item \code{create_neighbors_t()}: Compute neighboring coordinates within specified distances. In
+order to be computationally feasible, the coordinates' IDs are rasterized before
+their actual Euclidean distance is calculated. If coordinates are so close that they
+get rasterized to the same cell, the first one is used and a warning is emitted. If
+this happens, try again using a lower resolution.
+
+}}
diff --git a/man/parquet_duckdb.Rd b/man/parquet_duckdb.Rd
new file mode 100644
index 0000000..cb41afa
--- /dev/null
+++ b/man/parquet_duckdb.Rd
@@ -0,0 +1,354 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/parquet_duckdb.R
+\name{parquet_duckdb}
+\alias{parquet_duckdb}
+\title{R6 Base Class for Parquet-Backed DuckDB Storage}
+\description{
+A domain-agnostic R6 class that provides an interface to a folder-based data
+storage system using DuckDB for in-memory SQL operations and parquet files
+for efficient on-disk persistence. This class can be inherited by
+domain-specific database classes.
+}
+\section{Public fields}{
+\if{html}{\out{<div class="r6-fields">}}
+\describe{
+\item{\code{connection}}{DBI connection object to an in-memory DuckDB database}
+
+\item{\code{path}}{Character string path to the data folder}
+
+\item{\code{default_format}}{Default file format for new tables}
+
+\item{\code{writeopts}}{Default write options for DuckDB}
+}
+\if{html}{\out{</div>}}
+}
+\section{Methods}{
+\subsection{Public methods}{
+\itemize{
+\item \href{#method-parquet_duckdb-new}{\code{parquet_duckdb$new()}}
+\item \href{#method-parquet_duckdb-execute}{\code{parquet_duckdb$execute()}}
+\item \href{#method-parquet_duckdb-get_query}{\code{parquet_duckdb$get_query()}}
+\item \href{#method-parquet_duckdb-attach_table}{\code{parquet_duckdb$attach_table()}}
+\item \href{#method-parquet_duckdb-detach_table}{\code{parquet_duckdb$detach_table()}}
+\item \href{#method-parquet_duckdb-row_count}{\code{parquet_duckdb$row_count()}}
+\item \href{#method-parquet_duckdb-list_tables}{\code{parquet_duckdb$list_tables()}}
+\item \href{#method-parquet_duckdb-with_tables}{\code{parquet_duckdb$with_tables()}}
+\item \href{#method-parquet_duckdb-fetch}{\code{parquet_duckdb$fetch()}}
+\item \href{#method-parquet_duckdb-delete_from}{\code{parquet_duckdb$delete_from()}}
+\item \href{#method-parquet_duckdb-commit}{\code{parquet_duckdb$commit()}}
+\item \href{#method-parquet_duckdb-print}{\code{parquet_duckdb$print()}}
+\item \href{#method-parquet_duckdb-clone}{\code{parquet_duckdb$clone()}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-parquet_duckdb-new"></a>}}
+\if{latex}{\out{\hypertarget{method-parquet_duckdb-new}{}}}
+\subsection{Method \code{new()}}{
+Initialize a new parquet_duckdb object
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{parquet_duckdb$new(
+  path,
+  default_format = c("parquet", "csv"),
+  extensions = character(0)
+)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{path}}{Character string. Path to the data folder.}
+
+\item{\code{default_format}}{Character. Default file format ("parquet" or "csv").
+Default is "parquet".}
+
+\item{\code{extensions}}{Character vector of DuckDB extensions to load (e.g., "spatial")}
+}
+\if{html}{\out{</div>}}
+}
+\subsection{Returns}{
+A new \code{parquet_duckdb} object
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-parquet_duckdb-execute"></a>}}
+\if{latex}{\out{\hypertarget{method-parquet_duckdb-execute}{}}}
+\subsection{Method \code{execute()}}{
+Execute a SQL statement
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{parquet_duckdb$execute(statement)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{statement}}{A SQL statement}
+}
+\if{html}{\out{</div>}}
+}
+\subsection{Returns}{
+Number of rows affected by statement
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-parquet_duckdb-get_query"></a>}}
+\if{latex}{\out{\hypertarget{method-parquet_duckdb-get_query}{}}}
+\subsection{Method \code{get_query()}}{
+Execute a SQL query and return results
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{parquet_duckdb$get_query(statement)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{statement}}{A SQL query statement}
+}
+\if{html}{\out{</div>}}
+}
+\subsection{Returns}{
+A data.table with query results
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-parquet_duckdb-attach_table"></a>}}
+\if{latex}{\out{\hypertarget{method-parquet_duckdb-attach_table}{}}}
+\subsection{Method \code{attach_table()}}{
+Attach a table from parquet/CSV file as a temporary table in DuckDB
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{parquet_duckdb$attach_table(table_name, columns = "*", where = NULL)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{table_name}}{Character. Name of table to attach.}
+
+\item{\code{columns}}{Character vector. Optional SQL column selection, defaults to "*"}
+
+\item{\code{where}}{Character. Optional SQL WHERE clause to subset the table.}
+}
+\if{html}{\out{</div>}}
+}
+\subsection{Returns}{
+Invisible NULL (called for side effects)
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-parquet_duckdb-detach_table"></a>}}
+\if{latex}{\out{\hypertarget{method-parquet_duckdb-detach_table}{}}}
+\subsection{Method \code{detach_table()}}{
+Detach a table from the in-memory database
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{parquet_duckdb$detach_table(table_name)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{table_name}}{Character. Name of table to drop.}
+}
+\if{html}{\out{</div>}}
+}
+\subsection{Returns}{
+Invisible NULL (called for side effects)
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-parquet_duckdb-row_count"></a>}}
+\if{latex}{\out{\hypertarget{method-parquet_duckdb-row_count}{}}}
+\subsection{Method \code{row_count()}}{
+Get row count for a table
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{parquet_duckdb$row_count(table_name)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{table_name}}{Character string. Name of the table to query.}
+}
+\if{html}{\out{</div>}}
+}
+\subsection{Returns}{
+Integer number of rows
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-parquet_duckdb-list_tables"></a>}}
+\if{latex}{\out{\hypertarget{method-parquet_duckdb-list_tables}{}}}
+\subsection{Method \code{list_tables()}}{
+List all tables (files) in storage
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{parquet_duckdb$list_tables()}\if{html}{\out{</div>}}
+}
+
+\subsection{Returns}{
+Character vector of table names
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-parquet_duckdb-with_tables"></a>}}
+\if{latex}{\out{\hypertarget{method-parquet_duckdb-with_tables}{}}}
+\subsection{Method \code{with_tables()}}{
+Execute a function with specified tables attached, handling attach/detach automatically.
+If a table is already attached in the DuckDB instance, it won't be re-attached or detached.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{parquet_duckdb$with_tables(tables, func, ...)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{tables}}{Character vector of table names to attach}
+
+\item{\code{func}}{Function to execute with tables attached}
+
+\item{\code{...}}{Additional arguments passed to func}
+}
+\if{html}{\out{</div>}}
+}
+\subsection{Returns}{
+Result of func
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-parquet_duckdb-fetch"></a>}}
+\if{latex}{\out{\hypertarget{method-parquet_duckdb-fetch}{}}}
+\subsection{Method \code{fetch()}}{
+Fetch data from a table
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{parquet_duckdb$fetch(table_name, where = NULL, limit = NULL)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{table_name}}{Character string. Name of the table to query.}
+
+\item{\code{where}}{Character string. Optional WHERE clause for the SQL query.}
+
+\item{\code{limit}}{Integer. Optional limit on number of rows to return.}
+}
+\if{html}{\out{</div>}}
+}
+\subsection{Returns}{
+A data.table
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-parquet_duckdb-delete_from"></a>}}
+\if{latex}{\out{\hypertarget{method-parquet_duckdb-delete_from}{}}}
+\subsection{Method \code{delete_from()}}{
+Delete rows from a table
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{parquet_duckdb$delete_from(table_name, where = NULL)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{table_name}}{Character string. Name of the table to delete from.}
+
+\item{\code{where}}{Character string. Optional WHERE clause; if NULL, deletes all rows.}
+}
+\if{html}{\out{</div>}}
+}
+\subsection{Returns}{
+Number of rows deleted
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-parquet_duckdb-commit"></a>}}
+\if{latex}{\out{\hypertarget{method-parquet_duckdb-commit}{}}}
+\subsection{Method \code{commit()}}{
+Commit data using overwrite, append, or upsert modes. Handles autoincrement, key
+identity columns, and list-to-MAP conversion.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{parquet_duckdb$commit(
+  x,
+  table_name,
+  key_cols,
+  autoincrement_cols = character(0),
+  map_cols = character(0),
+  method = c("overwrite", "append", "upsert")
+)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Data frame to commit. If character, in-duckdb-memory table.}
+
+\item{\code{table_name}}{Character string table name}
+
+\item{\code{key_cols}}{Character vector of columns that define uniqueness. If missing,
+use all columns starting with \code{id_}}
+
+\item{\code{autoincrement_cols}}{Character vector of column names to auto-increment}
+
+\item{\code{map_cols}}{Character vector of columns to convert to MAP format}
+
+\item{\code{method}}{Character, one of "overwrite", "append", "upsert" (upsert being an
+update for existing rows, and insert for new rows)}
+}
+\if{html}{\out{</div>}}
+}
+\subsection{Returns}{
+Invisible NULL (called for side effects)
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-parquet_duckdb-print"></a>}}
+\if{latex}{\out{\hypertarget{method-parquet_duckdb-print}{}}}
+\subsection{Method \code{print()}}{
+Print method for parquet_duckdb
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{parquet_duckdb$print(...)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{...}}{Not used}
+}
+\if{html}{\out{</div>}}
+}
+\subsection{Returns}{
+self (invisibly)
+param x Data frame to commit. If character, in-duckdb-memory table.
+param table_name Character string table name
+param autoincrement_cols Character vector of column names to auto-increment
+param map_cols Character vector of columns to convert to MAP format
+return Invisible NULL (called for side effects)
+param x Data frame to commit. If character, in-duckdb-memory table.
+param table_name Character string table name
+param autoincrement_cols Character vector of column names to auto-increment
+param map_cols Character vector of columns to convert to MAP format
+return Invisible NULL (called for side effects)
+param x Data frame to commit. If character, in-duckdb-memory table.
+param table_name Character string table name
+param key_cols Character vector of columns that define uniqueness. If missing,
+use all columns starting with \code{id_}
+param autoincrement_cols Character vector of column names to auto-increment
+param map_cols Character vector of columns to convert to MAP format
+return Invisible NULL (called for side effects)
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-parquet_duckdb-clone"></a>}}
+\if{latex}{\out{\hypertarget{method-parquet_duckdb-clone}{}}}
+\subsection{Method \code{clone()}}{
+The objects of this class are cloneable with this method.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{parquet_duckdb$clone(deep = FALSE)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{deep}}{Whether to make a deep clone.}
+}
+\if{html}{\out{</div>}}
+}
+}
+}
diff --git a/man/trans_meta_t.Rd b/man/trans_meta_t.Rd
index 8c1d23b..9577213 100644
--- a/man/trans_meta_t.Rd
+++ b/man/trans_meta_t.Rd
@@ -10,7 +10,7 @@
 as_trans_meta_t(x)
 
 create_trans_meta_t(
-  lulc_data,
+  transitions,
   min_cardinality_abs = NULL,
   min_frequency_rel = NULL,
   exclude_anterior = NULL,
@@ -20,7 +20,8 @@ create_trans_meta_t(
 \method{print}{trans_meta_t}(x, nrow = 10, ...)
 }
 \arguments{
-\item{lulc_data}{A \link{lulc_data_t} table with land use observations}
+\item{transitions}{A transitions_v table, with columns id_coord, id_lulc_anterior,
+id_lulc_posterior, id_period}
 
 \item{min_cardinality_abs}{Minimum absolute number of transitions for viability (optional)}
 
@@ -58,6 +59,7 @@ entries for each viable transition type.
 }}
 \section{Functions}{
 \itemize{
-\item \code{create_trans_meta_t()}: Calculate the transition metadata and mark for modelling feasibility
+\item \code{create_trans_meta_t()}: Calculate the transition metadata and mark for modelling
+feasibility. Does not attribute \code{id_trans}; this only makes sense as part of a DB.
 
 }}
diff --git a/man/trans_preds_t.Rd b/man/trans_preds_t.Rd
index 545381f..202028e 100644
--- a/man/trans_preds_t.Rd
+++ b/man/trans_preds_t.Rd
@@ -3,14 +3,11 @@
 \name{trans_preds_t}
 \alias{trans_preds_t}
 \alias{as_trans_preds_t}
-\alias{create_trans_preds_t}
 \alias{print.trans_preds_t}
 \title{Create Transition-Predictor Relationship Table}
 \usage{
 as_trans_preds_t(x)
 
-create_trans_preds_t()
-
 \method{print}{trans_preds_t}(x, nrow = 10, ...)
 }
 \arguments{
@@ -37,9 +34,3 @@ modelling each transition type.
 \item \code{print(trans_preds_t)}: Print a trans_preds_t object, passing params to data.table print
 
 }}
-\section{Functions}{
-\itemize{
-\item \code{create_trans_preds_t()}: Create a transition-predictor relation, i.e. records the
-result of a predictor selection step.
-
-}}
diff --git a/man/util.Rd b/man/util.Rd
index 575d7f8..a83b6fc 100644
--- a/man/util.Rd
+++ b/man/util.Rd
@@ -26,7 +26,7 @@ ensure_dir(dir)
 
 print_rowwise_yaml(df)
 
-cast_dt_col(x, colname, castfun)
+cast_dt_col(x, colname, type)
 }
 \arguments{
 \item{x}{Left-hand side value}
diff --git a/man/util_terra.Rd b/man/util_terra.Rd
index d46c499..e429d0a 100644
--- a/man/util_terra.Rd
+++ b/man/util_terra.Rd
@@ -3,43 +3,18 @@
 \name{util_terra}
 \alias{util_terra}
 \alias{extract_using_coords_t}
-\alias{compute_neighbors}
 \title{evoland utility functions to work with terra objects}
 \usage{
 extract_using_coords_t(x, coords_t, na_omit = TRUE)
-
-compute_neighbors(
-  coords_t,
-  max_distance,
-  distance_breaks = NULL,
-  resolution = 100
-)
 }
 \arguments{
 \item{x}{The object to extract from; use "simple" extraction for rasters, i.e. no resampling is done.}
 
 \item{coords_t}{A coords_t object containing coordinate points}
-
-\item{max_distance}{Maximum distance to search for neighbors (in same units as
-coordinates and resolution)}
-
-\item{distance_breaks}{Optional numeric vector defining distance class boundaries.
-If NULL, no distance classification is performed.
-If provided, must have at least 2 elements defining interval breaks.}
-
-\item{resolution}{Grid cell size for rasterization (default: 100.0, in same units as coordinates)}
 }
 \value{
 A long data.table with \code{id_coord}, a \code{layer}/\code{attribute} column, and a \code{value} column.
 NAs are omitted
-
-A data.table with columns:
-\itemize{
-\item id_coord_origin: ID of the origin coordinate
-\item id_coord_neighbor: ID of the neighboring coordinate
-\item distance: Distance between origin and neighbor
-\item distance_class: Factor indicating distance class (if distance_breaks provided)
-}
 }
 \description{
 Useful to coax terra raster and vector data into evoland tabular form.
@@ -48,10 +23,4 @@ Useful to coax terra raster and vector data into evoland tabular form.
 \itemize{
 \item \code{extract_using_coords_t()}: Extract values from a SpatRaster or SpatVector object using a (minimal) \code{coords_t}
 
-\item \code{compute_neighbors()}: Compute neighboring coordinates within specified distances. In
-order to be computationally feasible, the coordinates' IDs are rasterized before
-their actual Euclidean distance is calculated. If coordinates are so close that they
-get rasterized to the same cell, the first one is used and a warning is emitted. If
-this happens, try again using a lower resolution.
-
 }}
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index c2f3be0..13f1a2b 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -11,21 +11,22 @@ Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
 #endif
 
 // distance_neighbors_cpp
-List distance_neighbors_cpp(DataFrame coords_t, double max_distance, double resolution);
-RcppExport SEXP _evoland_distance_neighbors_cpp(SEXP coords_tSEXP, SEXP max_distanceSEXP, SEXP resolutionSEXP) {
+List distance_neighbors_cpp(DataFrame coords_t, double max_distance, double resolution, bool quiet);
+RcppExport SEXP _evoland_distance_neighbors_cpp(SEXP coords_tSEXP, SEXP max_distanceSEXP, SEXP resolutionSEXP, SEXP quietSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< DataFrame >::type coords_t(coords_tSEXP);
     Rcpp::traits::input_parameter< double >::type max_distance(max_distanceSEXP);
     Rcpp::traits::input_parameter< double >::type resolution(resolutionSEXP);
-    rcpp_result_gen = Rcpp::wrap(distance_neighbors_cpp(coords_t, max_distance, resolution));
+    Rcpp::traits::input_parameter< bool >::type quiet(quietSEXP);
+    rcpp_result_gen = Rcpp::wrap(distance_neighbors_cpp(coords_t, max_distance, resolution, quiet));
     return rcpp_result_gen;
 END_RCPP
 }
 
 static const R_CallMethodDef CallEntries[] = {
-    {"_evoland_distance_neighbors_cpp", (DL_FUNC) &_evoland_distance_neighbors_cpp, 3},
+    {"_evoland_distance_neighbors_cpp", (DL_FUNC) &_evoland_distance_neighbors_cpp, 4},
     {NULL, NULL, 0}
 };
 
diff --git a/src/distances.cpp b/src/distances.cpp
index cd41ffb..2e23a21 100644
--- a/src/distances.cpp
+++ b/src/distances.cpp
@@ -24,17 +24,16 @@ using namespace Rcpp;
  *   - id_coord_neighbor: ID of the neighboring coordinate
  *   - distance_approx: Approximate distance between origin and neighbor
  *
- * @note The function uses an approximate distance calculation based on grid cells.
- *   If multiple points fall into the same cell, a warning is issued and only the first
- *   point is retained for that cell.
+ * @note The function uses an approximate distance calculation based on grid
+ * cells. If multiple points fall into the same cell, a warning is issued and
+ * only the first point is retained for that cell.
  *
  * @warning Distance calculations are approximate and based on Euclidean distance in
  *   coordinate space. For geographic coordinates, this may not reflect true geodesic distance.
  */
 // [[Rcpp::export]]
-List distance_neighbors_cpp(DataFrame coords_t,
-                                 double max_distance,
-                                 double resolution = 100.0) {
+List distance_neighbors_cpp(DataFrame coords_t, double max_distance,
+                            double resolution = 100.0, bool quiet = false) {
 
   // Extract columns
   IntegerVector id_coord = coords_t["id_coord"];
@@ -112,7 +111,25 @@ List distance_neighbors_cpp(DataFrame coords_t,
   
   // 5. Find neighbors within max_distance
 
+  // Progress reporting setup
+  int progress_interval = std::max(1000, n_points / 20); // Report every 5%
+
+  if (!quiet) {
+    Rcpp::Rcout << "\rProgress: 0% (0/" << n_points << " points)" << std::flush;
+  }
+
   for (int pt_idx = 0; pt_idx < n_points; pt_idx++) {
+    // Check for user interrupt periodically
+    if (pt_idx % 1000 == 0) {
+      Rcpp::checkUserInterrupt();
+    }
+
+    // Report progress
+    if (!quiet && pt_idx > 0 && pt_idx % progress_interval == 0) {
+      int pct = (int)(100.0 * pt_idx / n_points);
+      Rcpp::Rcout << "\rProgress: " << pct << "% (" << pt_idx << "/" << n_points
+                  << " points)" << std::flush;
+    }
     int origin_id = id_coord[pt_idx];
     int origin_row = row_indices[pt_idx];
     int origin_col = col_indices[pt_idx];
@@ -160,6 +177,10 @@ List distance_neighbors_cpp(DataFrame coords_t,
     Named("id_coord_neighbor") = neighbor_ids,
     Named("distance_approx") = distances_approx
   );
+  if (!quiet) {
+    Rcpp::Rcout << "\rProgress: 100% (" << n_points << "/" << n_points
+                << " points)" << std::endl;
+  }
 
   res.attr("class") = CharacterVector::create("data.table", "data.frame");
 
diff --git a/vignettes/evoland.qmd b/vignettes/evoland.qmd
index f06bef8..37da973 100644
--- a/vignettes/evoland.qmd
+++ b/vignettes/evoland.qmd
@@ -108,7 +108,7 @@ db$commit(
     value = c(lulc_files$url, lulc_files$md5sum, "BFS Arealstatistik")
   ),
   table_name = "reporting_t",
-  mode = "append"
+  method = "append"
 )
 
 zippath <- file.path(
@@ -322,7 +322,7 @@ id_coord_keep <- lulc_data_t[, id_coord]
 db$commit(
   x = db$coords_t[id_coord %in% id_coord_keep],
   table_name = "coords_t",
-  mode = "overwrite"
+  method = "overwrite"
 )
 ```