From 51e46bd2007ea7955849af02b3f41f8b64d41f93 Mon Sep 17 00:00:00 2001 From: m7pr Date: Fri, 31 Oct 2025 09:59:33 +0100 Subject: [PATCH 1/8] handle subassignemnts with the same object --- R/utils-get_code_dependency.R | 16 +- .../testthat/test-utils-get_code_dependency.R | 171 ++++++++++++++++++ 2 files changed, 186 insertions(+), 1 deletion(-) diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R index 42e0650ad..0690abe92 100644 --- a/R/utils-get_code_dependency.R +++ b/R/utils-get_code_dependency.R @@ -306,7 +306,18 @@ extract_occurrence <- function(pd) { ans <- move_functions_after_arrow(ans, unique(x[sym_fc_cond, "text"])) roll <- in_parenthesis(pd) if (length(roll)) { - c(setdiff(ans, roll), roll) + # detect elements appread in parenthesis and move them on RHS + # but only their first appearance + # as the same object can appear as regular object and the one used in parenthesis + result <- ans + for (elem in roll) { + idx <- which(result == elem)[1] + if (!is.na(idx)) { + result <- result[-idx] + } + } + c(result, roll) + } else { ans } @@ -330,6 +341,9 @@ move_functions_after_arrow <- function(ans, functions) { if (length(arrow_pos) == 0) { return(ans) } + if (length(functions) == 0) { + return(ans) + } before_arrow <- setdiff(ans[1:arrow_pos], functions) after_arrow <- ans[(arrow_pos + 1):length(ans)] c(before_arrow, unique(c(intersect(ans[1:arrow_pos], functions), after_arrow))) diff --git a/tests/testthat/test-utils-get_code_dependency.R b/tests/testthat/test-utils-get_code_dependency.R index 2cd4c40cd..c5b0b272e 100644 --- a/tests/testthat/test-utils-get_code_dependency.R +++ b/tests/testthat/test-utils-get_code_dependency.R @@ -86,3 +86,174 @@ testthat::describe("get_code with multiple assignments inside an expression", { testthat::expect_equal(get_code(td, names = "var2"), code_source) }) }) + +testthat::describe("get_code with subassignments", { + testthat::it("tracks $ subassignment as producing the base object", { + td <- qenv() |> + within({ + iris <- iris + iris$Species[sample.int(nrow(iris), 50)] <- NA + }) + + code_source <- "iris <- iris\niris$Species[sample.int(nrow(iris), 50)] <- NA" + + testthat::expect_equal(get_code(td, names = "iris"), code_source) + }) + + testthat::it("tracks [ subassignment as producing the base object", { + td <- qenv() |> + within({ + x <- 1:10 + x[1:3] <- c(10, 20, 30) + }) + + code_source <- "x <- 1:10\nx[1:3] <- c(10, 20, 30)" + + testthat::expect_equal(get_code(td, names = "x"), code_source) + }) + + testthat::it("tracks [[ subassignment as producing the base object", { + td <- qenv() |> + within({ + lst <- list(a = 1, b = 2) + lst[["c"]] <- 3 + }) + + code_source <- "lst <- list(a = 1, b = 2)\nlst[[\"c\"]] <- 3" + + testthat::expect_equal(get_code(td, names = "lst"), code_source) + }) + + testthat::it("tracks nested subassignments", { + td <- qenv() |> + within({ + df <- data.frame(x = 1:5, y = 6:10) + df$x[df$y > 8] <- 99 + }) + + code_source <- "df <- data.frame(x = 1:5, y = 6:10)\ndf$x[df$y > 8] <- 99" + + testthat::expect_equal(get_code(td, names = "df"), code_source) + }) + + testthat::it("tracks multiple subassignments to same object", { + td <- qenv() |> + within({ + iris <- iris + iris$Species[sample.int(nrow(iris), 10)] <- NA + iris$Sepal.Length[1:5] <- 0 + }) + + code_source <- "iris <- iris\niris$Species[sample.int(nrow(iris), 10)] <- NA\niris$Sepal.Length[1:5] <- 0" + + testthat::expect_equal(get_code(td, names = "iris"), code_source) + }) + + testthat::it("tracks subassignments with complex expressions", { + td <- qenv() |> + within({ + mat <- matrix(1:12, nrow = 3) + mat[mat > 5 & mat < 10] <- 0 + }) + + code_source <- "mat <- matrix(1:12, nrow = 3)\nmat[mat > 5 & mat < 10] <- 0" + + testthat::expect_equal(get_code(td, names = "mat"), code_source) + }) + + testthat::it("tracks subassignments with function calls on LHS", { + td <- qenv() |> + within({ + lst <- list(a = 1, b = 2) + names(lst)[1] <- "first" + }) + + code_source <- "lst <- list(a = 1, b = 2)\nnames(lst)[1] <- \"first\"" + + testthat::expect_equal(get_code(td, names = "lst"), code_source) + }) + + testthat::it("tracks -> operator with subassignments", { + td <- qenv() |> + within({ + x <- 1:10 + c(10, 20, 30) -> x[1:3] # nolint: assignment. + }) + + code_source <- "x <- 1:10\nx[1:3] <- c(10, 20, 30)" + + testthat::expect_equal(get_code(td, names = "x"), code_source) + }) + + testthat::it("tracks attributes() function with subassignments", { + td <- qenv() |> + within({ + x <- 1:5 + attributes(x)$names <- letters[1:5] + }) + + code_source <- "x <- 1:5\nattributes(x)$names <- letters[1:5]" + + testthat::expect_equal(get_code(td, names = "x"), code_source) + }) + + testthat::it("handles complex nested subassignments", { + td <- qenv() |> + within({ + df <- data.frame(x = 1:5, y = 6:10) + df[df$x > 2, "y"][1:2] <- c(99, 100) + }) + + code_source <- "df <- data.frame(x = 1:5, y = 6:10)\ndf[df$x > 2, \"y\"][1:2] <- c(99, 100)" + + testthat::expect_equal(get_code(td, names = "df"), code_source) + }) + + testthat::it("handles subassignments with multiple operators", { + td <- qenv() |> + within({ + lst <- list(a = list(b = 1, c = 2)) + lst$a$b[2] <- 99 + }) + + code_source <- "lst <- list(a = list(b = 1, c = 2))\nlst$a$b[2] <- 99" + + testthat::expect_equal(get_code(td, names = "lst"), code_source) + }) + + testthat::it("handles subassignments with data frame column creation", { + td <- qenv() |> + within({ + df <- data.frame(x = 1:3) + df$new_col <- c("a", "b", "c") + }) + + code_source <- "df <- data.frame(x = 1:3)\ndf$new_col <- c(\"a\", \"b\", \"c\")" + + testthat::expect_equal(get_code(td, names = "df"), code_source) + }) + + testthat::it("handles subassignments with matrix indexing", { + td <- qenv() |> + within({ + mat <- matrix(1:9, nrow = 3) + mat[1:2, 2:3] <- matrix(0, nrow = 2, ncol = 2) + }) + + code_source <- "mat <- matrix(1:9, nrow = 3)\nmat[1:2, 2:3] <- matrix(0, nrow = 2, ncol = 2)" + + testthat::expect_equal(get_code(td, names = "mat"), code_source) + }) + + testthat::it("handles subassignments with logical indexing", { + td <- qenv() |> + within({ + vec <- 1:10 + vec[vec %% 2 == 0] <- vec[vec %% 2 == 0] * 2 + }) + + code_source <- "vec <- 1:10\nvec[vec %% 2 == 0] <- vec[vec %% 2 == 0] * 2" + + testthat::expect_equal(get_code(td, names = "vec"), code_source) + }) +}) From 8c09923fd1b685b1f67d13f9fe43bfeac882e6cd Mon Sep 17 00:00:00 2001 From: m7pr Date: Thu, 11 Dec 2025 11:58:20 +0100 Subject: [PATCH 2/8] Add copilot-setup-steps workflow using template --- .github/workflows/copilot-setup-steps.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/workflows/copilot-setup-steps.yml diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml new file mode 100644 index 000000000..ce3e530b8 --- /dev/null +++ b/.github/workflows/copilot-setup-steps.yml @@ -0,0 +1,11 @@ +--- +name: Copilot Agent Setup + +on: + workflow_dispatch: + +jobs: + copilot-setup-steps: + name: Copilot Agent Setup + uses: insightsengineering/r.pkg.template/.github/workflows/copilot-setup-steps.yaml@main + From 2e5ebcb1d433adc67f90196c138e4b406c7919d3 Mon Sep 17 00:00:00 2001 From: Marcin <133694481+m7pr@users.noreply.github.com> Date: Thu, 11 Dec 2025 12:02:11 +0100 Subject: [PATCH 3/8] Delete R/utils-get_code_dependency.R Signed-off-by: Marcin <133694481+m7pr@users.noreply.github.com> --- R/utils-get_code_dependency.R | 595 ---------------------------------- 1 file changed, 595 deletions(-) delete mode 100644 R/utils-get_code_dependency.R diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R deleted file mode 100644 index 22cb1089d..000000000 --- a/R/utils-get_code_dependency.R +++ /dev/null @@ -1,595 +0,0 @@ -# get_code_dependency ---- - -#' Get code dependency of an object -#' -#' Extract subset of code required to reproduce specific object(s), including code producing side-effects. -#' -#' Given a character vector with code, this function will extract the part of the code responsible for creating -#' the variables specified by `names`. -#' This includes the final call that creates the variable(s) in question as well as all _parent calls_, -#' _i.e._ calls that create variables used in the final call and their parents, etc. -#' Also included are calls that create side-effects like establishing connections. -#' -#' It is assumed that object dependency is established by using three assignment operators: `<-`, `=`, and `->` . -#' Other assignment methods (`assign`, `<<-`) or non-standard-evaluation methods are not supported. -#' -#' Side-effects are not detected automatically and must be marked in the code. -#' Add `# @linksto object` at the end of a line where a side-effect occurs to specify that this line is required -#' to reproduce a variable called `object`. -#' -#' @param code `character` with the code. -#' @param names `character` vector of object names. -#' @param check_code_names `logical(1)` flag specifying if a warning for non-existing names should be displayed. -#' -#' @return Character vector, a subset of `code`. -#' Note that subsetting is actually done on the calls `code`, not necessarily on the elements of the vector. -#' -#' @keywords internal -get_code_dependency <- function(code, names, check_code_names = TRUE) { - checkmate::assert_list(code, "character") - checkmate::assert_character(names, any.missing = FALSE) - - graph <- lapply(code, attr, "dependency") - - if (check_code_names) { - symbols <- unlist(lapply(graph, function(call) { - ind <- match("<-", call, nomatch = length(call) + 1L) - call[seq_len(ind - 1L)] - })) - - if (!all(names %in% unique(symbols))) { - warning("Object(s) not found in code: ", toString(setdiff(names, symbols)), ".", call. = FALSE) - } - } - - if (length(code) == 0) { - return(code) - } - - ind <- unlist(lapply(names, function(x) graph_parser(x, graph))) - - lib_ind <- detect_libraries(graph) - - code_ids <- sort(unique(c(lib_ind, ind))) - code[code_ids] -} - -#' Locate function call token -#' -#' Determine which row of parsed data is specific `SYMBOL_FUNCTION_CALL` token. -#' -#' Useful for determining occurrence of `assign` or `data` functions in an input call. -#' -#' @param call_pd `data.frame` as returned by `extract_calls()` -#' @param text `character(1)` to look for in `text` column of `call_pd` -#' -#' @return -#' Single integer specifying row in `call_pd` where `token` is `SYMBOL_FUNCTION_CALL` and `text` is `text`. -#' 0 if not found. -#' -#' @keywords internal -#' @noRd -find_call <- function(call_pd, text) { - checkmate::check_data_frame(call_pd) - checkmate::check_names(call_pd, must.include = c("token", "text")) - checkmate::check_string(text) - - ans <- which(call_pd$token == "SYMBOL_FUNCTION_CALL" & call_pd$text == text) - if (length(ans)) { - ans - } else { - 0L - } -} - -#' Split the result of `utils::getParseData()` into separate calls -#' -#' @param pd (`data.frame`) A result of `utils::getParseData()`. -#' -#' @return -#' A `list` of `data.frame`s. -#' Each element is a subset of `pd` corresponding to one call in the original code from which `pd` was obtained. -#' Only four columns (`"token"`, `"text"`, `"id"`, `"parent"`) are kept, the rest is discarded. -#' -#' @keywords internal -#' @noRd -extract_calls <- function(pd) { - calls <- lapply( - pd[pd$parent == 0 & (pd$token != "COMMENT" | grepl("@linksto", pd$text, fixed = TRUE)), "id"], - function(parent) { - rbind( - pd[pd$id == parent, ], - get_children(pd = pd, parent = parent) - ) - } - ) - calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls) - calls <- Filter(Negate(is.null), calls) - calls <- fix_shifted_comments(calls) - calls <- remove_custom_assign(calls, c(":=")) - fix_arrows(calls) -} - -#' @keywords internal -#' @noRd -get_children <- function(pd, parent) { - idx_children <- abs(pd$parent) == parent - children <- pd[idx_children, ] - if (nrow(children) == 0) { - return(NULL) - } - - if (parent > 0) { - do.call(rbind, c(list(children), lapply(children$id, get_children, pd = pd))) - } -} - -#' Fixes edge case of comments being shifted to the next call. -#' @keywords internal -#' @noRd -fix_shifted_comments <- function(calls) { - # If the first or the second token is a @linksto COMMENT, - # then it belongs to the previous call. - if (length(calls) >= 2) { - for (i in 2:length(calls)) { - comment_idx <- grep("@linksto", calls[[i]][, "text"]) - if (isTRUE(comment_idx[1] <= 2)) { - calls[[i - 1]] <- rbind( - calls[[i - 1]], - calls[[i]][comment_idx[1], ] - ) - calls[[i]] <- calls[[i]][-comment_idx[1], ] - } - } - } - Filter(nrow, calls) -} - -#' Fixes edge case of custom assignments operator being treated as assignment. -#' -#' @param exclude (`character`) custom assignment operators to be excluded -#' @keywords internal -#' @noRd -remove_custom_assign <- function(calls, exclude = NULL) { - checkmate::assert_list(calls) - checkmate::assert_character(exclude, null.ok = TRUE) - lapply(calls, function(call) { - if (!is.null(exclude)) { - call[!(call$token == "LEFT_ASSIGN" & call$text %in% exclude), ] - } else { - call - } - }) -} - -#' Fixes edge case of `<-` assignment operator being called as function, -#' which is \code{`<-`(y,x)} instead of traditional `y <- x`. -#' @keywords internal -#' @noRd -fix_arrows <- function(calls) { - checkmate::assert_list(calls) - lapply(calls, function(call) { - sym_fun <- call$token == "SYMBOL_FUNCTION_CALL" - call[sym_fun, ] <- sub_arrows(call[sym_fun, ]) - call - }) -} - -#' Execution of assignment operator substitutions for a call. -#' @keywords internal -#' @noRd -sub_arrows <- function(call) { - checkmate::assert_data_frame(call) - map <- data.frame( - row.names = c("<-", "<<-", "="), - token = rep("LEFT_ASSIGN", 3), - text = rep("<-", 3) - ) - sub_ids <- call$text %in% rownames(map) - call[sub_ids, c("token", "text")] <- map[call$text[sub_ids], ] - call -} - -# code_graph ---- - -#' Extract object occurrence -#' -#' Extracts objects occurrence within calls passed by `pd`. -#' Also detects which objects depend on which within a call. -#' -#' @param pd `data.frame`; -#' one of the results of `utils::getParseData()` split into subsets representing individual calls; -#' created by `extract_calls()` function -#' -#' @return -#' A character vector listing names of objects that depend on this call -#' and names of objects that this call depends on. -#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a` -#' depends on objects `b` and `c`. -#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call. -#' -#' @keywords internal -#' @noRd -extract_occurrence <- function(pd) { - is_in_function <- function(x) { - # If an object is a function parameter, - # then in calls_pd there is a `SYMBOL_FORMALS` entry for that object. - function_id <- x[x$token == "FUNCTION", "parent"] - if (length(function_id)) { - x$id %in% get_children(x, function_id[1])$id - } else { - rep(FALSE, nrow(x)) - } - } - in_parenthesis <- function(x) { - if (any(x$token %in% c("LBB", "'['"))) { - id_start <- min(x$id[x$token %in% c("LBB", "'['")]) - id_end <- min(x$id[x$token == "']'"]) - x$text[x$token == "SYMBOL" & x$id > id_start & x$id < id_end] - } - } - - # Handle data(object)/data("object")/data(object, envir = ) independently. - data_call <- find_call(pd, "data") - if (data_call) { - sym <- pd[data_call + 1, "text"] - return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) - } - # Handle assign(x = ). - assign_call <- find_call(pd, "assign") - if (assign_call) { - # Check if parameters were named. - # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named. - # "EQ_SUB" is for `=` appearing after the name of the named parameter. - if (any(pd$token == "SYMBOL_SUB")) { - params <- pd[pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"] - # Remove sequence of "=", ",". - if (length(params > 1)) { - remove <- integer(0) - for (i in 2:length(params)) { - if (params[i - 1] == "=" && params[i] == ",") { - remove <- c(remove, i - 1, i) - } - } - if (length(remove)) params <- params[-remove] - } - pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0)) - if (!pos) { - return(character(0L)) - } - # pos is indicator of the place of 'x' - # 1. All parameters are named, but none is 'x' - return(character(0L)) - # 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ",")) - # - check "x" in params being just a vector of named parameters. - # 3. Some parameters are named, 'x' is not in named parameters - # - check first appearance of "," (unnamed parameter) in vector parameters. - } else { - # Object is the first entry after 'assign'. - pos <- 1 - } - sym <- pd[assign_call + pos, "text"] - return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) - } - - # What occurs in a function body is not tracked. - x <- pd[!is_in_function(pd), ] - sym_cond <- which(x$token %in% c("SPECIAL", "SYMBOL", "SYMBOL_FUNCTION_CALL")) - sym_fc_cond <- which(x$token == "SYMBOL_FUNCTION_CALL") - - if (length(sym_cond) == 0) { - return(character(0L)) - } - # Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not. - # For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2. - dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"] - if (length(dollar_ids)) { - object_ids <- x[sym_cond, "id"] - after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids] - sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar)) - } - - assign_cond <- grep("ASSIGN", x$token) - if (!length(assign_cond)) { - return(c("<-", unique(x[sym_cond, "text"]))) - } - - # For cases like 'eval(expression(c <- b + 2))' removes 'eval(expression('. - sym_cond <- sym_cond[!(sym_cond < min(assign_cond) & sym_cond %in% sym_fc_cond)] - - # If there was an assignment operation detect direction of it. - if (unique(x$text[assign_cond]) == "->") { # What if there are 2 assignments: e.g. a <- b -> c. - sym_cond <- rev(sym_cond) - } - - after <- match(min(x$id[assign_cond]), sort(x$id[c(min(assign_cond), sym_cond)])) - 1 - ans <- append(x[sym_cond, "text"], "<-", after = max(1, after)) - ans <- move_functions_after_arrow(ans, unique(x[sym_fc_cond, "text"])) - roll <- in_parenthesis(pd) - if (length(roll)) { - # detect elements appeared in parenthesis and move them on RHS - # but only their first appearance - # as the same object can appear as regular object and the one used in parenthesis - result <- ans - for (elem in roll) { - idx <- which(result == elem)[1] - if (!is.na(idx)) { - result <- result[-idx] - } - } - c(result, roll) - } else { - ans - } -} - -#' Moves function names to the right side of dependency graph -#' -#' Changes status of the function call from dependent to dependency if occurs in the lhs. -#' Technically, it means to move function names after the dependency operator. -#' For example, for `attributes(a) <- b` the dependency graph should look like `c("a", "<-", "b", "attributes")`. -#' -#' @param ans `character` vector of object names in dependency graph. -#' @param functions `character` vector of function names. -#' -#' @return -#' A character vector. -#' @keywords internal -#' @noRd -move_functions_after_arrow <- function(ans, functions) { - arrow_pos <- which(ans == "<-") - if (length(arrow_pos) == 0) { - return(ans) - } - before_arrow <- setdiff(ans[1:arrow_pos], functions) - after_arrow <- ans[(arrow_pos + 1):length(ans)] - c(before_arrow, unique(c(intersect(ans[1:arrow_pos], functions), after_arrow))) -} - -#' Extract side effects -#' -#' Extracts all object names from the code that are marked with `@linksto` tag. -#' -#' The code may contain functions calls that create side effects, e.g. modify the environment. -#' Static code analysis may be insufficient to determine which objects are created or modified by such a function call. -#' The `@linksto` comment tag is introduced to mark a call as having a (side) effect on one or more objects. -#' With this tag a complete object dependency structure can be established. -#' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function. -#' -#' @param pd `data.frame`; -#' one of the results of `utils::getParseData()` split into subsets representing individual calls; -#' created by `extract_calls()` function -#' -#' @return -#' A character vector of names of objects -#' depending a call tagged with `@linksto` in a corresponding element of `pd`. -#' -#' @keywords internal -#' @noRd -extract_side_effects <- function(pd) { - linksto <- grep("@linksto", pd[pd$token == "COMMENT", "text"], value = TRUE) - unlist(strsplit(sub("\\s*#.*@linksto\\s+", "", linksto), "\\s+")) -} - -#' @param parsed_code results of `parse(text = code, keep.source = TRUE` (parsed text) -#' @keywords internal -#' @noRd -extract_dependency <- function(parsed_code) { - full_pd <- normalize_pd(utils::getParseData(parsed_code)) - reordered_full_pd <- extract_calls(full_pd) - - # Early return on empty code - if (length(reordered_full_pd) == 0L) { - return(NULL) - } - - if (length(parsed_code) == 0L) { - return(extract_side_effects(reordered_full_pd[[1]])) - } - expr_ix <- lapply(parsed_code[[1]], class) == "{" - - # Build queue of expressions to parse individually - queue <- list() - parsed_code_list <- if (all(!expr_ix)) { - list(parsed_code) - } else { - queue <- as.list(parsed_code[[1]][expr_ix]) - new_list <- parsed_code[[1]] - new_list[expr_ix] <- NULL - list(parse(text = as.expression(new_list), keep.source = TRUE)) - } - - while (length(queue) > 0) { - current <- queue[[1]] - queue <- queue[-1] - if (identical(current[[1L]], as.name("{"))) { - queue <- append(queue, as.list(current)[-1L]) - } else { - parsed_code_list[[length(parsed_code_list) + 1]] <- parse(text = as.expression(current), keep.source = TRUE) - } - } - - parsed_occurences <- lapply( - parsed_code_list, - function(parsed_code) { - pd <- normalize_pd(utils::getParseData(parsed_code)) - reordered_pd <- extract_calls(pd) - if (length(reordered_pd) > 0) { - # extract_calls is needed to reorder the pd so that assignment operator comes before symbol names - # extract_calls is needed also to substitute assignment operators into specific format with fix_arrows - # extract_calls is needed to omit empty calls that contain only one token `"';'"` - # This cleaning is needed as extract_occurrence assumes arrows are fixed, and order is different - # than in original pd - extract_occurrence(reordered_pd[[1]]) - } - } - ) - - # Merge results together - result <- Reduce( - function(u, v) { - ix <- if ("<-" %in% v) min(which(v == "<-")) else 0 - u$left_side <- c(u$left_side, v[seq_len(max(0, ix - 1))]) - u$right_side <- c( - u$right_side, - if (ix == length(v)) character(0L) else v[seq(ix + 1, max(ix + 1, length(v)))] - ) - u - }, - init = list(left_side = character(0L), right_side = character(0L)), - x = parsed_occurences - ) - - c(extract_side_effects(reordered_full_pd[[1]]), result$left_side, "<-", result$right_side) -} - -# graph_parser ---- - -#' Return the indices of calls needed to reproduce an object -#' -#' @param x The name of the object to return code for. -#' @param graph A result of `code_graph()`. -#' -#' @return -#' Integer vector of indices that can be applied to `graph` to obtain all calls required to reproduce object `x`. -#' -#' @keywords internal -#' @noRd -graph_parser <- function(x, graph) { - # x occurrences (lhs) - occurrence <- vapply( - graph, function(call) { - ind <- match("<-", call, nomatch = length(call) + 1L) - x %in% call[seq_len(ind - 1L)] - }, - logical(1) - ) - - # x-dependent objects (rhs) - dependencies <- lapply(graph[occurrence], function(call) { - ind <- match("<-", call, nomatch = 0L) - call[(ind + 1L):length(call)] - }) - dependencies <- setdiff(unlist(dependencies), x) - - dependency_occurrences <- lapply(dependencies, function(dependency) { - # track down dependencies and where they occur on the lhs in previous calls - last_x_occurrence <- max(which(occurrence)) - reduced_graph <- utils::head(graph[seq_len(last_x_occurrence)], -1) - c(graph_parser(dependency, reduced_graph), last_x_occurrence) - }) - - sort(unique(c(which(occurrence), unlist(dependency_occurrences)))) -} - - -# default_side_effects -------------------------------------------------------------------------------------------- - -#' Detect library calls -#' -#' Detects `library()` and `require()` function calls. -#' -#' @param `graph` the dependency graph, result of `lapply(code, attr, "dependency")` -#' -#' @return -#' Integer vector of indices that can be applied to `graph` to obtain all calls containing -#' `library()` or `require()` calls that are always returned for reproducibility. -#' -#' @keywords internal -#' @noRd -detect_libraries <- function(graph) { - defaults <- c("library", "require") - - which( - unlist( - lapply( - graph, function(x) { - any(grepl(pattern = paste(defaults, collapse = "|"), x = x)) - } - ) - ) - ) -} - - -# utils ----------------------------------------------------------------------------------------------------------- - - -#' Normalize parsed data removing backticks from symbols -#' -#' @param pd `data.frame` resulting from `utils::getParseData()` call. -#' -#' @return `data.frame` with backticks removed from `text` column for `SYMBOL` tokens. -#' -#' @keywords internal -#' @noRd -normalize_pd <- function(pd) { - # Remove backticks from SYMBOL tokens - symbol_index <- grepl("^SYMBOL.*$", pd$token) - pd[symbol_index, "text"] <- gsub("^`(.*)`$", "\\1", pd[symbol_index, "text"]) - - pd -} - - -# split_code ------------------------------------------------------------------------------------------------------ - - -#' Get line/column in the source where the calls end -#' -#' -#' @param code `character(1)` -#' -#' @return `matrix` with `colnames = c("line", "col")` -#' -#' @keywords internal -#' @noRd -get_call_breaks <- function(code) { - parsed_code <- parse(text = code, keep.source = TRUE) - pd <- utils::getParseData(parsed_code) - pd <- normalize_pd(pd) - pd <- pd[pd$token != "';'", ] - call_breaks <- t(sapply( - extract_calls(pd), - function(x) { - matrix(c(max(x$line2), max(x$col2[x$line2 == max(x$line2)]))) - } - )) - call_breaks <- call_breaks[-nrow(call_breaks), , drop = FALSE] # breaks in between needed only - if (nrow(call_breaks) == 0L) { - call_breaks <- matrix(numeric(0), ncol = 2) - } - colnames(call_breaks) <- c("line", "col") - call_breaks -} - -#' Split code by calls -#' -#' @param code `character` with the code. -#' -#' @return list of `character`s of the length equal to the number of calls in `code`. -#' -#' @keywords internal -#' @noRd -split_code <- function(code) { - call_breaks <- get_call_breaks(code) - if (nrow(call_breaks) == 0) { - return(code) - } - call_breaks <- call_breaks[order(call_breaks[, "line"], call_breaks[, "col"]), , drop = FALSE] - code_split <- strsplit(code, split = "\n", fixed = TRUE)[[1]] - char_count_lines <- c(0, cumsum(sapply(code_split, nchar, USE.NAMES = FALSE) + 1), -1)[seq_along(code_split)] - - idx_start <- c( - 0, # first call starts in the beginning of src - char_count_lines[call_breaks[, "line"]] + call_breaks[, "col"] + 1 - ) - idx_end <- c( - char_count_lines[call_breaks[, "line"]] + call_breaks[, "col"], - nchar(code) # last call end in the end of src - ) - new_code <- substring(code, idx_start, idx_end) - - # line split happens before call terminator (it could be `;` or `\n`) and the terminator goes to the next line - # we need to move remove leading and add \n instead when combining calls - c(new_code[1], gsub("^[\t ]*(\n|;)", "", new_code[-1])) -} From afd42f46f5f5ec51e10d7b47eb0d867a6fa53828 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 11 Dec 2025 11:06:24 +0000 Subject: [PATCH 4/8] [skip roxygen] [skip vbump] Roxygen Man Pages Auto Update --- DESCRIPTION | 1 - man/get_code_dependency.Rd | 37 ------------------------------------- 2 files changed, 38 deletions(-) delete mode 100644 man/get_code_dependency.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 0648ce2e1..9675c67e7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,5 +73,4 @@ Collate: 'qenv-show.R' 'qenv-within.R' 'teal.code-package.R' - 'utils-get_code_dependency.R' 'utils.R' diff --git a/man/get_code_dependency.Rd b/man/get_code_dependency.Rd deleted file mode 100644 index 678f0233f..000000000 --- a/man/get_code_dependency.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils-get_code_dependency.R -\name{get_code_dependency} -\alias{get_code_dependency} -\title{Get code dependency of an object} -\usage{ -get_code_dependency(code, names, check_code_names = TRUE) -} -\arguments{ -\item{code}{\code{character} with the code.} - -\item{names}{\code{character} vector of object names.} - -\item{check_code_names}{\code{logical(1)} flag specifying if a warning for non-existing names should be displayed.} -} -\value{ -Character vector, a subset of \code{code}. -Note that subsetting is actually done on the calls \code{code}, not necessarily on the elements of the vector. -} -\description{ -Extract subset of code required to reproduce specific object(s), including code producing side-effects. -} -\details{ -Given a character vector with code, this function will extract the part of the code responsible for creating -the variables specified by \code{names}. -This includes the final call that creates the variable(s) in question as well as all \emph{parent calls}, -\emph{i.e.} calls that create variables used in the final call and their parents, etc. -Also included are calls that create side-effects like establishing connections. - -It is assumed that object dependency is established by using three assignment operators: \verb{<-}, \code{=}, and \verb{->} . -Other assignment methods (\code{assign}, \verb{<<-}) or non-standard-evaluation methods are not supported. - -Side-effects are not detected automatically and must be marked in the code. -Add \verb{# @linksto object} at the end of a line where a side-effect occurs to specify that this line is required -to reproduce a variable called \code{object}. -} -\keyword{internal} From 2713f694a9c9eb93131ac6ae771edeb37203e08c Mon Sep 17 00:00:00 2001 From: m7pr Date: Thu, 11 Dec 2025 12:21:53 +0100 Subject: [PATCH 5/8] bring back utils --- R/utils-get_code_dependency.R | 607 ++++++++++++++++++++++++++++++++++ 1 file changed, 607 insertions(+) create mode 100644 R/utils-get_code_dependency.R diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R new file mode 100644 index 000000000..05a46c0df --- /dev/null +++ b/R/utils-get_code_dependency.R @@ -0,0 +1,607 @@ +# get_code_dependency ---- + +#' Get code dependency of an object +#' +#' Extract subset of code required to reproduce specific object(s), including code producing side-effects. +#' +#' Given a character vector with code, this function will extract the part of the code responsible for creating +#' the variables specified by `names`. +#' This includes the final call that creates the variable(s) in question as well as all _parent calls_, +#' _i.e._ calls that create variables used in the final call and their parents, etc. +#' Also included are calls that create side-effects like establishing connections. +#' +#' It is assumed that object dependency is established by using three assignment operators: `<-`, `=`, and `->` . +#' Other assignment methods (`assign`, `<<-`) or non-standard-evaluation methods are not supported. +#' +#' Side-effects are not detected automatically and must be marked in the code. +#' Add `# @linksto object` at the end of a line where a side-effect occurs to specify that this line is required +#' to reproduce a variable called `object`. +#' +#' @param code `character` with the code. +#' @param names `character` vector of object names. +#' @param check_code_names `logical(1)` flag specifying if a warning for non-existing names should be displayed. +#' +#' @return Character vector, a subset of `code`. +#' Note that subsetting is actually done on the calls `code`, not necessarily on the elements of the vector. +#' +#' @keywords internal +get_code_dependency <- function(code, names, check_code_names = TRUE) { + checkmate::assert_list(code, "character") + checkmate::assert_character(names, any.missing = FALSE) + + graph <- lapply(code, attr, "dependency") + + if (check_code_names) { + symbols <- unlist(lapply(graph, function(call) { + ind <- match("<-", call, nomatch = length(call) + 1L) + call[seq_len(ind - 1L)] + })) + + if (!all(names %in% unique(symbols))) { + warning("Object(s) not found in code: ", toString(setdiff(names, symbols)), ".", call. = FALSE) + } + } + + if (length(code) == 0) { + return(code) + } + + ind <- unlist(lapply(names, function(x) graph_parser(x, graph))) + + lib_ind <- detect_libraries(graph) + + code_ids <- sort(unique(c(lib_ind, ind))) + code[code_ids] +} + +#' Locate function call token +#' +#' Determine which row of parsed data is specific `SYMBOL_FUNCTION_CALL` token. +#' +#' Useful for determining occurrence of `assign` or `data` functions in an input call. +#' +#' @param call_pd `data.frame` as returned by `extract_calls()` +#' @param text `character(1)` to look for in `text` column of `call_pd` +#' +#' @return +#' Single integer specifying row in `call_pd` where `token` is `SYMBOL_FUNCTION_CALL` and `text` is `text`. +#' 0 if not found. +#' +#' @keywords internal +#' @noRd +find_call <- function(call_pd, text) { + checkmate::check_data_frame(call_pd) + checkmate::check_names(call_pd, must.include = c("token", "text")) + checkmate::check_string(text) + + ans <- which(call_pd$token == "SYMBOL_FUNCTION_CALL" & call_pd$text == text) + if (length(ans)) { + ans + } else { + 0L + } +} + +#' Split the result of `utils::getParseData()` into separate calls +#' +#' @param pd (`data.frame`) A result of `utils::getParseData()`. +#' +#' @return +#' A `list` of `data.frame`s. +#' Each element is a subset of `pd` corresponding to one call in the original code from which `pd` was obtained. +#' Only four columns (`"token"`, `"text"`, `"id"`, `"parent"`) are kept, the rest is discarded. +#' +#' @keywords internal +#' @noRd +extract_calls <- function(pd) { + calls <- lapply( + pd[pd$parent == 0 & (pd$token != "COMMENT" | grepl("@linksto", pd$text, fixed = TRUE)), "id"], + function(parent) { + rbind( + pd[pd$id == parent, ], + get_children(pd = pd, parent = parent) + ) + } + ) + calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls) + calls <- Filter(Negate(is.null), calls) + calls <- fix_shifted_comments(calls) + calls <- remove_custom_assign(calls, c(":=")) + fix_arrows(calls) +} + +#' @keywords internal +#' @noRd +get_children <- function(pd, parent) { + idx_children <- abs(pd$parent) == parent + children <- pd[idx_children, ] + if (nrow(children) == 0) { + return(NULL) + } + + if (parent > 0) { + do.call(rbind, c(list(children), lapply(children$id, get_children, pd = pd))) + } +} + +#' Fixes edge case of comments being shifted to the next call. +#' @keywords internal +#' @noRd +fix_shifted_comments <- function(calls) { + # If the first or the second token is a @linksto COMMENT, + # then it belongs to the previous call. + if (length(calls) >= 2) { + for (i in 2:length(calls)) { + comment_idx <- grep("@linksto", calls[[i]][, "text"]) + if (isTRUE(comment_idx[1] <= 2)) { + calls[[i - 1]] <- rbind( + calls[[i - 1]], + calls[[i]][comment_idx[1], ] + ) + calls[[i]] <- calls[[i]][-comment_idx[1], ] + } + } + } + Filter(nrow, calls) +} + +#' Fixes edge case of custom assignments operator being treated as assignment. +#' +#' @param exclude (`character`) custom assignment operators to be excluded +#' @keywords internal +#' @noRd +remove_custom_assign <- function(calls, exclude = NULL) { + checkmate::assert_list(calls) + checkmate::assert_character(exclude, null.ok = TRUE) + lapply(calls, function(call) { + if (!is.null(exclude)) { + call[!(call$token == "LEFT_ASSIGN" & call$text %in% exclude), ] + } else { + call + } + }) +} + +#' Fixes edge case of `<-` assignment operator being called as function, +#' which is \code{`<-`(y,x)} instead of traditional `y <- x`. +#' @keywords internal +#' @noRd +fix_arrows <- function(calls) { + checkmate::assert_list(calls) + lapply(calls, function(call) { + sym_fun <- call$token == "SYMBOL_FUNCTION_CALL" + call[sym_fun, ] <- sub_arrows(call[sym_fun, ]) + call + }) +} + +#' Execution of assignment operator substitutions for a call. +#' @keywords internal +#' @noRd +sub_arrows <- function(call) { + checkmate::assert_data_frame(call) + map <- data.frame( + row.names = c("<-", "<<-", "="), + token = rep("LEFT_ASSIGN", 3), + text = rep("<-", 3) + ) + sub_ids <- call$text %in% rownames(map) + call[sub_ids, c("token", "text")] <- map[call$text[sub_ids], ] + call +} + +# code_graph ---- + +#' Extract object occurrence +#' +#' Extracts objects occurrence within calls passed by `pd`. +#' Also detects which objects depend on which within a call. +#' +#' @param pd `data.frame`; +#' one of the results of `utils::getParseData()` split into subsets representing individual calls; +#' created by `extract_calls()` function +#' +#' @return +#' A character vector listing names of objects that depend on this call +#' and names of objects that this call depends on. +#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a` +#' depends on objects `b` and `c`. +#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call. +#' +#' @keywords internal +#' @noRd +extract_occurrence <- function(pd) { + is_in_function <- function(x) { + # If an object is a function parameter, + # then in calls_pd there is a `SYMBOL_FORMALS` entry for that object. + function_id <- x[x$token == "FUNCTION", "parent"] + if (length(function_id)) { + x$id %in% get_children(x, function_id[1])$id + } else { + rep(FALSE, nrow(x)) + } + } + in_parenthesis <- function(x) { + if (any(x$token %in% c("LBB", "'['"))) { + id_start <- min(x$id[x$token %in% c("LBB", "'['")]) + id_end <- min(x$id[x$token == "']'"]) + x$text[x$token == "SYMBOL" & x$id > id_start & x$id < id_end] + } + } + + # Handle data(object)/data("object")/data(object, envir = ) independently. + data_call <- find_call(pd, "data") + if (data_call) { + sym <- pd[data_call + 1, "text"] + return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) + } + # Handle assign(x = ). + assign_call <- find_call(pd, "assign") + if (assign_call) { + # Check if parameters were named. + # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named. + # "EQ_SUB" is for `=` appearing after the name of the named parameter. + if (any(pd$token == "SYMBOL_SUB")) { + params <- pd[pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"] + # Remove sequence of "=", ",". + if (length(params > 1)) { + remove <- integer(0) + for (i in 2:length(params)) { + if (params[i - 1] == "=" && params[i] == ",") { + remove <- c(remove, i - 1, i) + } + } + if (length(remove)) params <- params[-remove] + } + pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0)) + if (!pos) { + return(character(0L)) + } + # pos is indicator of the place of 'x' + # 1. All parameters are named, but none is 'x' - return(character(0L)) + # 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ",")) + # - check "x" in params being just a vector of named parameters. + # 3. Some parameters are named, 'x' is not in named parameters + # - check first appearance of "," (unnamed parameter) in vector parameters. + } else { + # Object is the first entry after 'assign'. + pos <- 1 + } + sym <- pd[assign_call + pos, "text"] + return(c(gsub("^['\"]|['\"]$", "", sym), "<-")) + } + + # What occurs in a function body is not tracked. + x <- pd[!is_in_function(pd), ] + sym_cond <- which(x$token %in% c("SPECIAL", "SYMBOL", "SYMBOL_FUNCTION_CALL")) + sym_fc_cond <- which(x$token == "SYMBOL_FUNCTION_CALL") + + if (length(sym_cond) == 0) { + return(character(0L)) + } + # Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not. + # For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2. + dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"] + if (length(dollar_ids)) { + object_ids <- x[sym_cond, "id"] + after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids] + sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar)) + } + + assign_cond <- grep("ASSIGN", x$token) + if (!length(assign_cond)) { + return(c("<-", unique(x[sym_cond, "text"]))) + } + + # For cases like 'eval(expression(c <- b + 2))' removes 'eval(expression('. + sym_cond <- sym_cond[!(sym_cond < min(assign_cond) & sym_cond %in% sym_fc_cond)] + + # If there was an assignment operation detect direction of it. + if (unique(x$text[assign_cond]) == "->") { # What if there are 2 assignments: e.g. a <- b -> c. + sym_cond <- rev(sym_cond) + } + + after <- match(min(x$id[assign_cond]), sort(x$id[c(min(assign_cond), sym_cond)])) - 1 + ans <- append(x[sym_cond, "text"], "<-", after = max(1, after)) + ans <- move_functions_after_arrow(ans, unique(x[sym_fc_cond, "text"])) + roll <- in_parenthesis(pd) + if (length(roll)) { + # detect elements appeared in parenthesis and move them on RHS + # but only their first appearance + # as the same object can appear as regular object and the one used in parenthesis + result <- ans + for (elem in roll) { + idx <- which(result == elem)[1] + if (!is.na(idx)) { + result <- result[-idx] + } + } + c(result, roll) + } else { + ans + } +} + +#' Moves function names to the right side of dependency graph +#' +#' Changes status of the function call from dependent to dependency if occurs in the lhs. +#' Technically, it means to move function names after the dependency operator. +#' For example, for `attributes(a) <- b` the dependency graph should look like `c("a", "<-", "b", "attributes")`. +#' +#' @param ans `character` vector of object names in dependency graph. +#' @param functions `character` vector of function names. +#' +#' @return +#' A character vector. +#' @keywords internal +#' @noRd +move_functions_after_arrow <- function(ans, functions) { + arrow_pos <- which(ans == "<-") + if (length(arrow_pos) == 0) { + return(ans) + } + if (length(functions) == 0) { + return(ans) + } + ans_pre <- ans[1:arrow_pos] + # it's setdiff but without the removal of duplicates + # do not use setdiff(ans_pre, functions) + # as it removes duplicates from ans_pre even if they do not appear in functions + # check setdiff(c("A", "A"), "B") - gives "A", where we want to keep c("A", "A") + for (fun in functions) { + if (any(ans_pre == fun)) ans_pre <- ans_pre[-match(fun, ans_pre)] + } + after_arrow <- if (arrow_pos < length(ans)) { + ans[(arrow_pos + 1):length(ans)] + } + c(ans_pre, after_arrow) +} + +#' Extract side effects +#' +#' Extracts all object names from the code that are marked with `@linksto` tag. +#' +#' The code may contain functions calls that create side effects, e.g. modify the environment. +#' Static code analysis may be insufficient to determine which objects are created or modified by such a function call. +#' The `@linksto` comment tag is introduced to mark a call as having a (side) effect on one or more objects. +#' With this tag a complete object dependency structure can be established. +#' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function. +#' +#' @param pd `data.frame`; +#' one of the results of `utils::getParseData()` split into subsets representing individual calls; +#' created by `extract_calls()` function +#' +#' @return +#' A character vector of names of objects +#' depending a call tagged with `@linksto` in a corresponding element of `pd`. +#' +#' @keywords internal +#' @noRd +extract_side_effects <- function(pd) { + linksto <- grep("@linksto", pd[pd$token == "COMMENT", "text"], value = TRUE) + unlist(strsplit(sub("\\s*#.*@linksto\\s+", "", linksto), "\\s+")) +} + +#' @param parsed_code results of `parse(text = code, keep.source = TRUE` (parsed text) +#' @keywords internal +#' @noRd +extract_dependency <- function(parsed_code) { + full_pd <- normalize_pd(utils::getParseData(parsed_code)) + reordered_full_pd <- extract_calls(full_pd) + + # Early return on empty code + if (length(reordered_full_pd) == 0L) { + return(NULL) + } + + if (length(parsed_code) == 0L) { + return(extract_side_effects(reordered_full_pd[[1]])) + } + expr_ix <- lapply(parsed_code[[1]], class) == "{" + + # Build queue of expressions to parse individually + queue <- list() + parsed_code_list <- if (all(!expr_ix)) { + list(parsed_code) + } else { + queue <- as.list(parsed_code[[1]][expr_ix]) + new_list <- parsed_code[[1]] + new_list[expr_ix] <- NULL + list(parse(text = as.expression(new_list), keep.source = TRUE)) + } + + while (length(queue) > 0) { + current <- queue[[1]] + queue <- queue[-1] + if (identical(current[[1L]], as.name("{"))) { + queue <- append(queue, as.list(current)[-1L]) + } else { + parsed_code_list[[length(parsed_code_list) + 1]] <- parse(text = as.expression(current), keep.source = TRUE) + } + } + + parsed_occurences <- lapply( + parsed_code_list, + function(parsed_code) { + pd <- normalize_pd(utils::getParseData(parsed_code)) + reordered_pd <- extract_calls(pd) + if (length(reordered_pd) > 0) { + # extract_calls is needed to reorder the pd so that assignment operator comes before symbol names + # extract_calls is needed also to substitute assignment operators into specific format with fix_arrows + # extract_calls is needed to omit empty calls that contain only one token `"';'"` + # This cleaning is needed as extract_occurrence assumes arrows are fixed, and order is different + # than in original pd + extract_occurrence(reordered_pd[[1]]) + } + } + ) + + # Merge results together + result <- Reduce( + function(u, v) { + ix <- if ("<-" %in% v) min(which(v == "<-")) else 0 + u$left_side <- c(u$left_side, v[seq_len(max(0, ix - 1))]) + u$right_side <- c( + u$right_side, + if (ix == length(v)) character(0L) else v[seq(ix + 1, max(ix + 1, length(v)))] + ) + u + }, + init = list(left_side = character(0L), right_side = character(0L)), + x = parsed_occurences + ) + + c(extract_side_effects(reordered_full_pd[[1]]), result$left_side, "<-", result$right_side) +} + +# graph_parser ---- + +#' Return the indices of calls needed to reproduce an object +#' +#' @param x The name of the object to return code for. +#' @param graph A result of `code_graph()`. +#' +#' @return +#' Integer vector of indices that can be applied to `graph` to obtain all calls required to reproduce object `x`. +#' +#' @keywords internal +#' @noRd +graph_parser <- function(x, graph) { + # x occurrences (lhs) + occurrence <- vapply( + graph, function(call) { + ind <- match("<-", call, nomatch = length(call) + 1L) + x %in% call[seq_len(ind - 1L)] + }, + logical(1) + ) + + # x-dependent objects (rhs) + dependencies <- lapply(graph[occurrence], function(call) { + ind <- match("<-", call, nomatch = 0L) + call[(ind + 1L):length(call)] + }) + dependencies <- setdiff(unlist(dependencies), x) + + dependency_occurrences <- lapply(dependencies, function(dependency) { + # track down dependencies and where they occur on the lhs in previous calls + last_x_occurrence <- max(which(occurrence)) + reduced_graph <- utils::head(graph[seq_len(last_x_occurrence)], -1) + c(graph_parser(dependency, reduced_graph), last_x_occurrence) + }) + + sort(unique(c(which(occurrence), unlist(dependency_occurrences)))) +} + + +# default_side_effects -------------------------------------------------------------------------------------------- + +#' Detect library calls +#' +#' Detects `library()` and `require()` function calls. +#' +#' @param `graph` the dependency graph, result of `lapply(code, attr, "dependency")` +#' +#' @return +#' Integer vector of indices that can be applied to `graph` to obtain all calls containing +#' `library()` or `require()` calls that are always returned for reproducibility. +#' +#' @keywords internal +#' @noRd +detect_libraries <- function(graph) { + defaults <- c("library", "require") + + which( + unlist( + lapply( + graph, function(x) { + any(grepl(pattern = paste(defaults, collapse = "|"), x = x)) + } + ) + ) + ) +} + + +# utils ----------------------------------------------------------------------------------------------------------- + + +#' Normalize parsed data removing backticks from symbols +#' +#' @param pd `data.frame` resulting from `utils::getParseData()` call. +#' +#' @return `data.frame` with backticks removed from `text` column for `SYMBOL` tokens. +#' +#' @keywords internal +#' @noRd +normalize_pd <- function(pd) { + # Remove backticks from SYMBOL tokens + symbol_index <- grepl("^SYMBOL.*$", pd$token) + pd[symbol_index, "text"] <- gsub("^`(.*)`$", "\\1", pd[symbol_index, "text"]) + + pd +} + + +# split_code ------------------------------------------------------------------------------------------------------ + + +#' Get line/column in the source where the calls end +#' +#' +#' @param code `character(1)` +#' +#' @return `matrix` with `colnames = c("line", "col")` +#' +#' @keywords internal +#' @noRd +get_call_breaks <- function(code) { + parsed_code <- parse(text = code, keep.source = TRUE) + pd <- utils::getParseData(parsed_code) + pd <- normalize_pd(pd) + pd <- pd[pd$token != "';'", ] + call_breaks <- t(sapply( + extract_calls(pd), + function(x) { + matrix(c(max(x$line2), max(x$col2[x$line2 == max(x$line2)]))) + } + )) + call_breaks <- call_breaks[-nrow(call_breaks), , drop = FALSE] # breaks in between needed only + if (nrow(call_breaks) == 0L) { + call_breaks <- matrix(numeric(0), ncol = 2) + } + colnames(call_breaks) <- c("line", "col") + call_breaks +} + +#' Split code by calls +#' +#' @param code `character` with the code. +#' +#' @return list of `character`s of the length equal to the number of calls in `code`. +#' +#' @keywords internal +#' @noRd +split_code <- function(code) { + call_breaks <- get_call_breaks(code) + if (nrow(call_breaks) == 0) { + return(code) + } + call_breaks <- call_breaks[order(call_breaks[, "line"], call_breaks[, "col"]), , drop = FALSE] + code_split <- strsplit(code, split = "\n", fixed = TRUE)[[1]] + char_count_lines <- c(0, cumsum(sapply(code_split, nchar, USE.NAMES = FALSE) + 1), -1)[seq_along(code_split)] + + idx_start <- c( + 0, # first call starts in the beginning of src + char_count_lines[call_breaks[, "line"]] + call_breaks[, "col"] + 1 + ) + idx_end <- c( + char_count_lines[call_breaks[, "line"]] + call_breaks[, "col"], + nchar(code) # last call end in the end of src + ) + new_code <- substring(code, idx_start, idx_end) + + # line split happens before call terminator (it could be `;` or `\n`) and the terminator goes to the next line + # we need to move remove leading and add \n instead when combining calls + c(new_code[1], gsub("^[\t ]*(\n|;)", "", new_code[-1])) +} \ No newline at end of file From c44f0ef637ebaac51e413b07d52e368d36c7d4fa Mon Sep 17 00:00:00 2001 From: Marcin <133694481+m7pr@users.noreply.github.com> Date: Thu, 11 Dec 2025 12:22:36 +0100 Subject: [PATCH 6/8] Update DESCRIPTION Signed-off-by: Marcin <133694481+m7pr@users.noreply.github.com> --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 9675c67e7..0648ce2e1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,4 +73,5 @@ Collate: 'qenv-show.R' 'qenv-within.R' 'teal.code-package.R' + 'utils-get_code_dependency.R' 'utils.R' From 7ea51ecbc3d7f1a284715be8d5db106c7567020e Mon Sep 17 00:00:00 2001 From: Marcin <133694481+m7pr@users.noreply.github.com> Date: Thu, 11 Dec 2025 12:23:11 +0100 Subject: [PATCH 7/8] Update R/utils-get_code_dependency.R Signed-off-by: Marcin <133694481+m7pr@users.noreply.github.com> --- R/utils-get_code_dependency.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R index 05a46c0df..954448d31 100644 --- a/R/utils-get_code_dependency.R +++ b/R/utils-get_code_dependency.R @@ -604,4 +604,4 @@ split_code <- function(code) { # line split happens before call terminator (it could be `;` or `\n`) and the terminator goes to the next line # we need to move remove leading and add \n instead when combining calls c(new_code[1], gsub("^[\t ]*(\n|;)", "", new_code[-1])) -} \ No newline at end of file +} From 3eda1d5d09355fbbe89ed95032a3d5ed9843d615 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 11 Dec 2025 11:27:40 +0000 Subject: [PATCH 8/8] [skip roxygen] [skip vbump] Roxygen Man Pages Auto Update --- man/get_code_dependency.Rd | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 man/get_code_dependency.Rd diff --git a/man/get_code_dependency.Rd b/man/get_code_dependency.Rd new file mode 100644 index 000000000..678f0233f --- /dev/null +++ b/man/get_code_dependency.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-get_code_dependency.R +\name{get_code_dependency} +\alias{get_code_dependency} +\title{Get code dependency of an object} +\usage{ +get_code_dependency(code, names, check_code_names = TRUE) +} +\arguments{ +\item{code}{\code{character} with the code.} + +\item{names}{\code{character} vector of object names.} + +\item{check_code_names}{\code{logical(1)} flag specifying if a warning for non-existing names should be displayed.} +} +\value{ +Character vector, a subset of \code{code}. +Note that subsetting is actually done on the calls \code{code}, not necessarily on the elements of the vector. +} +\description{ +Extract subset of code required to reproduce specific object(s), including code producing side-effects. +} +\details{ +Given a character vector with code, this function will extract the part of the code responsible for creating +the variables specified by \code{names}. +This includes the final call that creates the variable(s) in question as well as all \emph{parent calls}, +\emph{i.e.} calls that create variables used in the final call and their parents, etc. +Also included are calls that create side-effects like establishing connections. + +It is assumed that object dependency is established by using three assignment operators: \verb{<-}, \code{=}, and \verb{->} . +Other assignment methods (\code{assign}, \verb{<<-}) or non-standard-evaluation methods are not supported. + +Side-effects are not detected automatically and must be marked in the code. +Add \verb{# @linksto object} at the end of a line where a side-effect occurs to specify that this line is required +to reproduce a variable called \code{object}. +} +\keyword{internal}