From 51e46bd2007ea7955849af02b3f41f8b64d41f93 Mon Sep 17 00:00:00 2001
From: m7pr <marcin.kosinski.mk1@roche.com>
Date: Fri, 31 Oct 2025 09:59:33 +0100
Subject: [PATCH 1/8] handle subassignemnts with the same object

---
 R/utils-get_code_dependency.R                 |  16 +-
 .../testthat/test-utils-get_code_dependency.R | 171 ++++++++++++++++++
 2 files changed, 186 insertions(+), 1 deletion(-)

diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R
index 42e0650ad..0690abe92 100644
--- a/R/utils-get_code_dependency.R
+++ b/R/utils-get_code_dependency.R
@@ -306,7 +306,18 @@ extract_occurrence <- function(pd) {
   ans <- move_functions_after_arrow(ans, unique(x[sym_fc_cond, "text"]))
   roll <- in_parenthesis(pd)
   if (length(roll)) {
-    c(setdiff(ans, roll), roll)
+    # detect elements appread in parenthesis and move them on RHS
+    # but only their first appearance
+    # as the same object can appear as regular object and the one used in parenthesis
+    result <- ans
+    for (elem in roll) {
+      idx <- which(result == elem)[1]
+      if (!is.na(idx)) {
+        result <- result[-idx]
+      }
+    }
+    c(result, roll)
+
   } else {
     ans
   }
@@ -330,6 +341,9 @@ move_functions_after_arrow <- function(ans, functions) {
   if (length(arrow_pos) == 0) {
     return(ans)
   }
+  if (length(functions) == 0) {
+    return(ans)
+  }
   before_arrow <- setdiff(ans[1:arrow_pos], functions)
   after_arrow <- ans[(arrow_pos + 1):length(ans)]
   c(before_arrow, unique(c(intersect(ans[1:arrow_pos], functions), after_arrow)))
diff --git a/tests/testthat/test-utils-get_code_dependency.R b/tests/testthat/test-utils-get_code_dependency.R
index 2cd4c40cd..c5b0b272e 100644
--- a/tests/testthat/test-utils-get_code_dependency.R
+++ b/tests/testthat/test-utils-get_code_dependency.R
@@ -86,3 +86,174 @@ testthat::describe("get_code with multiple assignments inside an expression", {
     testthat::expect_equal(get_code(td, names = "var2"), code_source)
   })
 })
+
+testthat::describe("get_code with subassignments", {
+  testthat::it("tracks $ subassignment as producing the base object", {
+    td <- qenv() |>
+      within({
+        iris <- iris
+        iris$Species[sample.int(nrow(iris), 50)] <- NA
+      })
+
+    code_source <- "iris <- iris\niris$Species[sample.int(nrow(iris), 50)] <- NA"
+
+    testthat::expect_equal(get_code(td, names = "iris"), code_source)
+  })
+
+  testthat::it("tracks [ subassignment as producing the base object", {
+    td <- qenv() |>
+      within({
+        x <- 1:10
+        x[1:3] <- c(10, 20, 30)
+      })
+
+    code_source <- "x <- 1:10\nx[1:3] <- c(10, 20, 30)"
+
+    testthat::expect_equal(get_code(td, names = "x"), code_source)
+  })
+
+  testthat::it("tracks [[ subassignment as producing the base object", {
+    td <- qenv() |>
+      within({
+        lst <- list(a = 1, b = 2)
+        lst[["c"]] <- 3
+      })
+
+    code_source <- "lst <- list(a = 1, b = 2)\nlst[[\"c\"]] <- 3"
+
+    testthat::expect_equal(get_code(td, names = "lst"), code_source)
+  })
+
+  testthat::it("tracks nested subassignments", {
+    td <- qenv() |>
+      within({
+        df <- data.frame(x = 1:5, y = 6:10)
+        df$x[df$y > 8] <- 99
+      })
+
+    code_source <- "df <- data.frame(x = 1:5, y = 6:10)\ndf$x[df$y > 8] <- 99"
+
+    testthat::expect_equal(get_code(td, names = "df"), code_source)
+  })
+
+  testthat::it("tracks multiple subassignments to same object", {
+    td <- qenv() |>
+      within({
+        iris <- iris
+        iris$Species[sample.int(nrow(iris), 10)] <- NA
+        iris$Sepal.Length[1:5] <- 0
+      })
+
+    code_source <- "iris <- iris\niris$Species[sample.int(nrow(iris), 10)] <- NA\niris$Sepal.Length[1:5] <- 0"
+
+    testthat::expect_equal(get_code(td, names = "iris"), code_source)
+  })
+
+  testthat::it("tracks subassignments with complex expressions", {
+    td <- qenv() |>
+      within({
+        mat <- matrix(1:12, nrow = 3)
+        mat[mat > 5 & mat < 10] <- 0
+      })
+
+    code_source <- "mat <- matrix(1:12, nrow = 3)\nmat[mat > 5 & mat < 10] <- 0"
+
+    testthat::expect_equal(get_code(td, names = "mat"), code_source)
+  })
+
+  testthat::it("tracks subassignments with function calls on LHS", {
+    td <- qenv() |>
+      within({
+        lst <- list(a = 1, b = 2)
+        names(lst)[1] <- "first"
+      })
+
+    code_source <- "lst <- list(a = 1, b = 2)\nnames(lst)[1] <- \"first\""
+
+    testthat::expect_equal(get_code(td, names = "lst"), code_source)
+  })
+
+  testthat::it("tracks -> operator with subassignments", {
+    td <- qenv() |>
+      within({
+        x <- 1:10
+        c(10, 20, 30) -> x[1:3] # nolint: assignment.
+      })
+
+    code_source <- "x <- 1:10\nx[1:3] <- c(10, 20, 30)"
+
+    testthat::expect_equal(get_code(td, names = "x"), code_source)
+  })
+
+  testthat::it("tracks attributes() function with subassignments", {
+    td <- qenv() |>
+      within({
+        x <- 1:5
+        attributes(x)$names <- letters[1:5]
+      })
+
+    code_source <- "x <- 1:5\nattributes(x)$names <- letters[1:5]"
+
+    testthat::expect_equal(get_code(td, names = "x"), code_source)
+  })
+
+  testthat::it("handles complex nested subassignments", {
+    td <- qenv() |>
+      within({
+        df <- data.frame(x = 1:5, y = 6:10)
+        df[df$x > 2, "y"][1:2] <- c(99, 100)
+      })
+
+    code_source <- "df <- data.frame(x = 1:5, y = 6:10)\ndf[df$x > 2, \"y\"][1:2] <- c(99, 100)"
+
+    testthat::expect_equal(get_code(td, names = "df"), code_source)
+  })
+
+  testthat::it("handles subassignments with multiple operators", {
+    td <- qenv() |>
+      within({
+        lst <- list(a = list(b = 1, c = 2))
+        lst$a$b[2] <- 99
+      })
+
+    code_source <- "lst <- list(a = list(b = 1, c = 2))\nlst$a$b[2] <- 99"
+
+    testthat::expect_equal(get_code(td, names = "lst"), code_source)
+  })
+
+  testthat::it("handles subassignments with data frame column creation", {
+    td <- qenv() |>
+      within({
+        df <- data.frame(x = 1:3)
+        df$new_col <- c("a", "b", "c")
+      })
+
+    code_source <- "df <- data.frame(x = 1:3)\ndf$new_col <- c(\"a\", \"b\", \"c\")"
+
+    testthat::expect_equal(get_code(td, names = "df"), code_source)
+  })
+
+  testthat::it("handles subassignments with matrix indexing", {
+    td <- qenv() |>
+      within({
+        mat <- matrix(1:9, nrow = 3)
+        mat[1:2, 2:3] <- matrix(0, nrow = 2, ncol = 2)
+      })
+
+    code_source <- "mat <- matrix(1:9, nrow = 3)\nmat[1:2, 2:3] <- matrix(0, nrow = 2, ncol = 2)"
+
+    testthat::expect_equal(get_code(td, names = "mat"), code_source)
+  })
+
+  testthat::it("handles subassignments with logical indexing", {
+    td <- qenv() |>
+      within({
+        vec <- 1:10
+        vec[vec %% 2 == 0] <- vec[vec %% 2 == 0] * 2
+      })
+
+    code_source <- "vec <- 1:10\nvec[vec %% 2 == 0] <- vec[vec %% 2 == 0] * 2"
+
+    testthat::expect_equal(get_code(td, names = "vec"), code_source)
+  })
+})

From 8c09923fd1b685b1f67d13f9fe43bfeac882e6cd Mon Sep 17 00:00:00 2001
From: m7pr <marcin.kosinski.mk1@roche.com>
Date: Thu, 11 Dec 2025 11:58:20 +0100
Subject: [PATCH 2/8] Add copilot-setup-steps workflow using template

---
 .github/workflows/copilot-setup-steps.yml | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 .github/workflows/copilot-setup-steps.yml

diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml
new file mode 100644
index 000000000..ce3e530b8
--- /dev/null
+++ b/.github/workflows/copilot-setup-steps.yml
@@ -0,0 +1,11 @@
+---
+name: Copilot Agent Setup
+
+on:
+  workflow_dispatch:
+
+jobs:
+  copilot-setup-steps:
+    name: Copilot Agent Setup
+    uses: insightsengineering/r.pkg.template/.github/workflows/copilot-setup-steps.yaml@main
+

From 2e5ebcb1d433adc67f90196c138e4b406c7919d3 Mon Sep 17 00:00:00 2001
From: Marcin <133694481+m7pr@users.noreply.github.com>
Date: Thu, 11 Dec 2025 12:02:11 +0100
Subject: [PATCH 3/8] Delete R/utils-get_code_dependency.R

Signed-off-by: Marcin <133694481+m7pr@users.noreply.github.com>
---
 R/utils-get_code_dependency.R | 595 ----------------------------------
 1 file changed, 595 deletions(-)
 delete mode 100644 R/utils-get_code_dependency.R

diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R
deleted file mode 100644
index 22cb1089d..000000000
--- a/R/utils-get_code_dependency.R
+++ /dev/null
@@ -1,595 +0,0 @@
-# get_code_dependency ----
-
-#' Get code dependency of an object
-#'
-#' Extract subset of code required to reproduce specific object(s), including code producing side-effects.
-#'
-#' Given a character vector with code, this function will extract the part of the code responsible for creating
-#' the variables specified by `names`.
-#' This includes the final call that creates the variable(s) in question as well as all _parent calls_,
-#' _i.e._ calls that create variables used in the final call and their parents, etc.
-#' Also included are calls that create side-effects like establishing connections.
-#'
-#' It is assumed that object dependency is established by using three assignment operators: `<-`, `=`, and `->` .
-#' Other assignment methods (`assign`, `<<-`) or non-standard-evaluation methods are not supported.
-#'
-#' Side-effects are not detected automatically and must be marked in the code.
-#' Add `# @linksto object` at the end of a line where a side-effect occurs to specify that this line is required
-#' to reproduce a variable called `object`.
-#'
-#' @param code `character` with the code.
-#' @param names `character` vector of object names.
-#' @param check_code_names `logical(1)` flag specifying if a warning for non-existing names should be displayed.
-#'
-#' @return Character vector, a subset of `code`.
-#' Note that subsetting is actually done on the calls `code`, not necessarily on the elements of the vector.
-#'
-#' @keywords internal
-get_code_dependency <- function(code, names, check_code_names = TRUE) {
-  checkmate::assert_list(code, "character")
-  checkmate::assert_character(names, any.missing = FALSE)
-
-  graph <- lapply(code, attr, "dependency")
-
-  if (check_code_names) {
-    symbols <- unlist(lapply(graph, function(call) {
-      ind <- match("<-", call, nomatch = length(call) + 1L)
-      call[seq_len(ind - 1L)]
-    }))
-
-    if (!all(names %in% unique(symbols))) {
-      warning("Object(s) not found in code: ", toString(setdiff(names, symbols)), ".", call. = FALSE)
-    }
-  }
-
-  if (length(code) == 0) {
-    return(code)
-  }
-
-  ind <- unlist(lapply(names, function(x) graph_parser(x, graph)))
-
-  lib_ind <- detect_libraries(graph)
-
-  code_ids <- sort(unique(c(lib_ind, ind)))
-  code[code_ids]
-}
-
-#' Locate function call token
-#'
-#' Determine which row of parsed data is specific `SYMBOL_FUNCTION_CALL` token.
-#'
-#' Useful for determining occurrence of `assign` or `data` functions in an input call.
-#'
-#' @param call_pd `data.frame` as returned by `extract_calls()`
-#' @param text `character(1)` to look for in `text` column of `call_pd`
-#'
-#' @return
-#' Single integer specifying row in `call_pd` where `token` is `SYMBOL_FUNCTION_CALL` and `text` is `text`.
-#' 0 if not found.
-#'
-#' @keywords internal
-#' @noRd
-find_call <- function(call_pd, text) {
-  checkmate::check_data_frame(call_pd)
-  checkmate::check_names(call_pd, must.include = c("token", "text"))
-  checkmate::check_string(text)
-
-  ans <- which(call_pd$token == "SYMBOL_FUNCTION_CALL" & call_pd$text == text)
-  if (length(ans)) {
-    ans
-  } else {
-    0L
-  }
-}
-
-#' Split the result of `utils::getParseData()` into separate calls
-#'
-#' @param pd (`data.frame`) A result of `utils::getParseData()`.
-#'
-#' @return
-#' A `list` of `data.frame`s.
-#' Each element is a subset of `pd` corresponding to one call in the original code from which `pd` was obtained.
-#' Only four columns (`"token"`, `"text"`, `"id"`, `"parent"`) are kept, the rest is discarded.
-#'
-#' @keywords internal
-#' @noRd
-extract_calls <- function(pd) {
-  calls <- lapply(
-    pd[pd$parent == 0 & (pd$token != "COMMENT" | grepl("@linksto", pd$text, fixed = TRUE)), "id"],
-    function(parent) {
-      rbind(
-        pd[pd$id == parent, ],
-        get_children(pd = pd, parent = parent)
-      )
-    }
-  )
-  calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls)
-  calls <- Filter(Negate(is.null), calls)
-  calls <- fix_shifted_comments(calls)
-  calls <- remove_custom_assign(calls, c(":="))
-  fix_arrows(calls)
-}
-
-#' @keywords internal
-#' @noRd
-get_children <- function(pd, parent) {
-  idx_children <- abs(pd$parent) == parent
-  children <- pd[idx_children, ]
-  if (nrow(children) == 0) {
-    return(NULL)
-  }
-
-  if (parent > 0) {
-    do.call(rbind, c(list(children), lapply(children$id, get_children, pd = pd)))
-  }
-}
-
-#' Fixes edge case of comments being shifted to the next call.
-#' @keywords internal
-#' @noRd
-fix_shifted_comments <- function(calls) {
-  # If the first or the second token is a @linksto COMMENT,
-  #  then it belongs to the previous call.
-  if (length(calls) >= 2) {
-    for (i in 2:length(calls)) {
-      comment_idx <- grep("@linksto", calls[[i]][, "text"])
-      if (isTRUE(comment_idx[1] <= 2)) {
-        calls[[i - 1]] <- rbind(
-          calls[[i - 1]],
-          calls[[i]][comment_idx[1], ]
-        )
-        calls[[i]] <- calls[[i]][-comment_idx[1], ]
-      }
-    }
-  }
-  Filter(nrow, calls)
-}
-
-#' Fixes edge case of custom assignments operator being treated as assignment.
-#'
-#' @param exclude (`character`) custom assignment operators to be excluded
-#' @keywords internal
-#' @noRd
-remove_custom_assign <- function(calls, exclude = NULL) {
-  checkmate::assert_list(calls)
-  checkmate::assert_character(exclude, null.ok = TRUE)
-  lapply(calls, function(call) {
-    if (!is.null(exclude)) {
-      call[!(call$token == "LEFT_ASSIGN" & call$text %in% exclude), ]
-    } else {
-      call
-    }
-  })
-}
-
-#' Fixes edge case of `<-` assignment operator being called as function,
-#' which is \code{`<-`(y,x)} instead of traditional `y <- x`.
-#' @keywords internal
-#' @noRd
-fix_arrows <- function(calls) {
-  checkmate::assert_list(calls)
-  lapply(calls, function(call) {
-    sym_fun <- call$token == "SYMBOL_FUNCTION_CALL"
-    call[sym_fun, ] <- sub_arrows(call[sym_fun, ])
-    call
-  })
-}
-
-#' Execution of assignment operator substitutions for a call.
-#' @keywords internal
-#' @noRd
-sub_arrows <- function(call) {
-  checkmate::assert_data_frame(call)
-  map <- data.frame(
-    row.names = c("<-", "<<-", "="),
-    token = rep("LEFT_ASSIGN", 3),
-    text = rep("<-", 3)
-  )
-  sub_ids <- call$text %in% rownames(map)
-  call[sub_ids, c("token", "text")] <- map[call$text[sub_ids], ]
-  call
-}
-
-# code_graph ----
-
-#' Extract object occurrence
-#'
-#' Extracts objects occurrence within calls passed by `pd`.
-#' Also detects which objects depend on which within a call.
-#'
-#' @param pd `data.frame`;
-#'  one of the results of `utils::getParseData()` split into subsets representing individual calls;
-#'  created by `extract_calls()` function
-#'
-#' @return
-#' A character vector listing names of objects that depend on this call
-#' and names of objects that this call depends on.
-#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
-#' depends on objects `b` and `c`.
-#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
-#'
-#' @keywords internal
-#' @noRd
-extract_occurrence <- function(pd) {
-  is_in_function <- function(x) {
-    # If an object is a function parameter,
-    # then in calls_pd there is a `SYMBOL_FORMALS` entry for that object.
-    function_id <- x[x$token == "FUNCTION", "parent"]
-    if (length(function_id)) {
-      x$id %in% get_children(x, function_id[1])$id
-    } else {
-      rep(FALSE, nrow(x))
-    }
-  }
-  in_parenthesis <- function(x) {
-    if (any(x$token %in% c("LBB", "'['"))) {
-      id_start <- min(x$id[x$token %in% c("LBB", "'['")])
-      id_end <- min(x$id[x$token == "']'"])
-      x$text[x$token == "SYMBOL" & x$id > id_start & x$id < id_end]
-    }
-  }
-
-  # Handle data(object)/data("object")/data(object, envir = ) independently.
-  data_call <- find_call(pd, "data")
-  if (data_call) {
-    sym <- pd[data_call + 1, "text"]
-    return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
-  }
-  # Handle assign(x = ).
-  assign_call <- find_call(pd, "assign")
-  if (assign_call) {
-    # Check if parameters were named.
-    # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named.
-    # "EQ_SUB" is for `=` appearing after the name of the named parameter.
-    if (any(pd$token == "SYMBOL_SUB")) {
-      params <- pd[pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]
-      # Remove sequence of "=", ",".
-      if (length(params > 1)) {
-        remove <- integer(0)
-        for (i in 2:length(params)) {
-          if (params[i - 1] == "=" && params[i] == ",") {
-            remove <- c(remove, i - 1, i)
-          }
-        }
-        if (length(remove)) params <- params[-remove]
-      }
-      pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0))
-      if (!pos) {
-        return(character(0L))
-      }
-      # pos is indicator of the place of 'x'
-      # 1. All parameters are named, but none is 'x' - return(character(0L))
-      # 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ","))
-      # - check "x" in params being just a vector of named parameters.
-      # 3. Some parameters are named, 'x' is not in named parameters
-      # - check first appearance of "," (unnamed parameter) in vector parameters.
-    } else {
-      # Object is the first entry after 'assign'.
-      pos <- 1
-    }
-    sym <- pd[assign_call + pos, "text"]
-    return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
-  }
-
-  # What occurs in a function body is not tracked.
-  x <- pd[!is_in_function(pd), ]
-  sym_cond <- which(x$token %in% c("SPECIAL", "SYMBOL", "SYMBOL_FUNCTION_CALL"))
-  sym_fc_cond <- which(x$token == "SYMBOL_FUNCTION_CALL")
-
-  if (length(sym_cond) == 0) {
-    return(character(0L))
-  }
-  # Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not.
-  # For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2.
-  dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"]
-  if (length(dollar_ids)) {
-    object_ids <- x[sym_cond, "id"]
-    after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids]
-    sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar))
-  }
-
-  assign_cond <- grep("ASSIGN", x$token)
-  if (!length(assign_cond)) {
-    return(c("<-", unique(x[sym_cond, "text"])))
-  }
-
-  # For cases like 'eval(expression(c <- b + 2))' removes 'eval(expression('.
-  sym_cond <- sym_cond[!(sym_cond < min(assign_cond) & sym_cond %in% sym_fc_cond)]
-
-  # If there was an assignment operation detect direction of it.
-  if (unique(x$text[assign_cond]) == "->") { # What if there are 2 assignments: e.g. a <- b -> c.
-    sym_cond <- rev(sym_cond)
-  }
-
-  after <- match(min(x$id[assign_cond]), sort(x$id[c(min(assign_cond), sym_cond)])) - 1
-  ans <- append(x[sym_cond, "text"], "<-", after = max(1, after))
-  ans <- move_functions_after_arrow(ans, unique(x[sym_fc_cond, "text"]))
-  roll <- in_parenthesis(pd)
-  if (length(roll)) {
-    # detect elements appeared in parenthesis and move them on RHS
-    # but only their first appearance
-    # as the same object can appear as regular object and the one used in parenthesis
-    result <- ans
-    for (elem in roll) {
-      idx <- which(result == elem)[1]
-      if (!is.na(idx)) {
-        result <- result[-idx]
-      }
-    }
-    c(result, roll)
-  } else {
-    ans
-  }
-}
-
-#' Moves function names to the right side of dependency graph
-#'
-#' Changes status of the function call from dependent to dependency if occurs in the lhs.
-#' Technically, it means to move function names after the dependency operator.
-#' For example, for `attributes(a) <- b` the dependency graph should look like `c("a", "<-", "b", "attributes")`.
-#'
-#' @param ans `character` vector of object names in dependency graph.
-#' @param functions `character` vector of function names.
-#'
-#' @return
-#' A character vector.
-#' @keywords internal
-#' @noRd
-move_functions_after_arrow <- function(ans, functions) {
-  arrow_pos <- which(ans == "<-")
-  if (length(arrow_pos) == 0) {
-    return(ans)
-  }
-  before_arrow <- setdiff(ans[1:arrow_pos], functions)
-  after_arrow <- ans[(arrow_pos + 1):length(ans)]
-  c(before_arrow, unique(c(intersect(ans[1:arrow_pos], functions), after_arrow)))
-}
-
-#' Extract side effects
-#'
-#' Extracts all object names from the code that are marked with `@linksto` tag.
-#'
-#' The code may contain functions calls that create side effects, e.g. modify the environment.
-#' Static code analysis may be insufficient to determine which objects are created or modified by such a function call.
-#' The `@linksto` comment tag is introduced to mark a call as having a (side) effect on one or more objects.
-#' With this tag a complete object dependency structure can be established.
-#' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function.
-#'
-#' @param pd `data.frame`;
-#'  one of the results of `utils::getParseData()` split into subsets representing individual calls;
-#'  created by `extract_calls()` function
-#'
-#' @return
-#' A character vector of names of objects
-#' depending a call tagged with `@linksto` in a corresponding element of `pd`.
-#'
-#' @keywords internal
-#' @noRd
-extract_side_effects <- function(pd) {
-  linksto <- grep("@linksto", pd[pd$token == "COMMENT", "text"], value = TRUE)
-  unlist(strsplit(sub("\\s*#.*@linksto\\s+", "", linksto), "\\s+"))
-}
-
-#' @param parsed_code results of `parse(text = code, keep.source = TRUE` (parsed text)
-#' @keywords internal
-#' @noRd
-extract_dependency <- function(parsed_code) {
-  full_pd <- normalize_pd(utils::getParseData(parsed_code))
-  reordered_full_pd <- extract_calls(full_pd)
-
-  # Early return on empty code
-  if (length(reordered_full_pd) == 0L) {
-    return(NULL)
-  }
-
-  if (length(parsed_code) == 0L) {
-    return(extract_side_effects(reordered_full_pd[[1]]))
-  }
-  expr_ix <- lapply(parsed_code[[1]], class) == "{"
-
-  # Build queue of expressions to parse individually
-  queue <- list()
-  parsed_code_list <- if (all(!expr_ix)) {
-    list(parsed_code)
-  } else {
-    queue <- as.list(parsed_code[[1]][expr_ix])
-    new_list <- parsed_code[[1]]
-    new_list[expr_ix] <- NULL
-    list(parse(text = as.expression(new_list), keep.source = TRUE))
-  }
-
-  while (length(queue) > 0) {
-    current <- queue[[1]]
-    queue <- queue[-1]
-    if (identical(current[[1L]], as.name("{"))) {
-      queue <- append(queue, as.list(current)[-1L])
-    } else {
-      parsed_code_list[[length(parsed_code_list) + 1]] <- parse(text = as.expression(current), keep.source = TRUE)
-    }
-  }
-
-  parsed_occurences <- lapply(
-    parsed_code_list,
-    function(parsed_code) {
-      pd <- normalize_pd(utils::getParseData(parsed_code))
-      reordered_pd <- extract_calls(pd)
-      if (length(reordered_pd) > 0) {
-        # extract_calls is needed to reorder the pd so that assignment operator comes before symbol names
-        # extract_calls is needed also to substitute assignment operators into specific format with fix_arrows
-        # extract_calls is needed to omit empty calls that contain only one token `"';'"`
-        # This cleaning is needed as extract_occurrence assumes arrows are fixed, and order is different
-        # than in original pd
-        extract_occurrence(reordered_pd[[1]])
-      }
-    }
-  )
-
-  # Merge results together
-  result <- Reduce(
-    function(u, v) {
-      ix <- if ("<-" %in% v) min(which(v == "<-")) else 0
-      u$left_side <- c(u$left_side, v[seq_len(max(0, ix - 1))])
-      u$right_side <- c(
-        u$right_side,
-        if (ix == length(v)) character(0L) else v[seq(ix + 1, max(ix + 1, length(v)))]
-      )
-      u
-    },
-    init = list(left_side = character(0L), right_side = character(0L)),
-    x = parsed_occurences
-  )
-
-  c(extract_side_effects(reordered_full_pd[[1]]), result$left_side, "<-", result$right_side)
-}
-
-# graph_parser ----
-
-#' Return the indices of calls needed to reproduce an object
-#'
-#' @param x The name of the object to return code for.
-#' @param graph A result of `code_graph()`.
-#'
-#' @return
-#' Integer vector of indices that can be applied to `graph` to obtain all calls required to reproduce object `x`.
-#'
-#' @keywords internal
-#' @noRd
-graph_parser <- function(x, graph) {
-  # x occurrences (lhs)
-  occurrence <- vapply(
-    graph, function(call) {
-      ind <- match("<-", call, nomatch = length(call) + 1L)
-      x %in% call[seq_len(ind - 1L)]
-    },
-    logical(1)
-  )
-
-  # x-dependent objects (rhs)
-  dependencies <- lapply(graph[occurrence], function(call) {
-    ind <- match("<-", call, nomatch = 0L)
-    call[(ind + 1L):length(call)]
-  })
-  dependencies <- setdiff(unlist(dependencies), x)
-
-  dependency_occurrences <- lapply(dependencies, function(dependency) {
-    # track down dependencies and where they occur on the lhs in previous calls
-    last_x_occurrence <- max(which(occurrence))
-    reduced_graph <- utils::head(graph[seq_len(last_x_occurrence)], -1)
-    c(graph_parser(dependency, reduced_graph), last_x_occurrence)
-  })
-
-  sort(unique(c(which(occurrence), unlist(dependency_occurrences))))
-}
-
-
-# default_side_effects --------------------------------------------------------------------------------------------
-
-#' Detect library calls
-#'
-#' Detects `library()` and `require()` function calls.
-#'
-#' @param `graph` the dependency graph, result of `lapply(code, attr, "dependency")`
-#'
-#' @return
-#' Integer vector of indices that can be applied to `graph` to obtain all calls containing
-#' `library()` or `require()` calls that are always returned for reproducibility.
-#'
-#' @keywords internal
-#' @noRd
-detect_libraries <- function(graph) {
-  defaults <- c("library", "require")
-
-  which(
-    unlist(
-      lapply(
-        graph, function(x) {
-          any(grepl(pattern = paste(defaults, collapse = "|"), x = x))
-        }
-      )
-    )
-  )
-}
-
-
-# utils -----------------------------------------------------------------------------------------------------------
-
-
-#' Normalize parsed data removing backticks from symbols
-#'
-#' @param pd `data.frame` resulting from `utils::getParseData()` call.
-#'
-#' @return `data.frame` with backticks removed from `text` column for `SYMBOL` tokens.
-#'
-#' @keywords internal
-#' @noRd
-normalize_pd <- function(pd) {
-  # Remove backticks from SYMBOL tokens
-  symbol_index <- grepl("^SYMBOL.*$", pd$token)
-  pd[symbol_index, "text"] <- gsub("^`(.*)`$", "\\1", pd[symbol_index, "text"])
-
-  pd
-}
-
-
-# split_code ------------------------------------------------------------------------------------------------------
-
-
-#' Get line/column in the source where the calls end
-#'
-#'
-#' @param code `character(1)`
-#'
-#' @return `matrix` with `colnames = c("line", "col")`
-#'
-#' @keywords internal
-#' @noRd
-get_call_breaks <- function(code) {
-  parsed_code <- parse(text = code, keep.source = TRUE)
-  pd <- utils::getParseData(parsed_code)
-  pd <- normalize_pd(pd)
-  pd <- pd[pd$token != "';'", ]
-  call_breaks <- t(sapply(
-    extract_calls(pd),
-    function(x) {
-      matrix(c(max(x$line2), max(x$col2[x$line2 == max(x$line2)])))
-    }
-  ))
-  call_breaks <- call_breaks[-nrow(call_breaks), , drop = FALSE] # breaks in between needed only
-  if (nrow(call_breaks) == 0L) {
-    call_breaks <- matrix(numeric(0), ncol = 2)
-  }
-  colnames(call_breaks) <- c("line", "col")
-  call_breaks
-}
-
-#' Split code by calls
-#'
-#' @param code `character` with the code.
-#'
-#' @return list of `character`s of the length equal to the number of calls in `code`.
-#'
-#' @keywords internal
-#' @noRd
-split_code <- function(code) {
-  call_breaks <- get_call_breaks(code)
-  if (nrow(call_breaks) == 0) {
-    return(code)
-  }
-  call_breaks <- call_breaks[order(call_breaks[, "line"], call_breaks[, "col"]), , drop = FALSE]
-  code_split <- strsplit(code, split = "\n", fixed = TRUE)[[1]]
-  char_count_lines <- c(0, cumsum(sapply(code_split, nchar, USE.NAMES = FALSE) + 1), -1)[seq_along(code_split)]
-
-  idx_start <- c(
-    0, # first call starts in the beginning of src
-    char_count_lines[call_breaks[, "line"]] + call_breaks[, "col"] + 1
-  )
-  idx_end <- c(
-    char_count_lines[call_breaks[, "line"]] + call_breaks[, "col"],
-    nchar(code) # last call end in the end of src
-  )
-  new_code <- substring(code, idx_start, idx_end)
-
-  # line split happens before call terminator (it could be `;` or `\n`) and the terminator goes to the next line
-  # we need to move remove leading and add \n instead when combining calls
-  c(new_code[1], gsub("^[\t ]*(\n|;)", "", new_code[-1]))
-}

From afd42f46f5f5ec51e10d7b47eb0d867a6fa53828 Mon Sep 17 00:00:00 2001
From: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 11 Dec 2025 11:06:24 +0000
Subject: [PATCH 4/8] [skip roxygen] [skip vbump] Roxygen Man Pages Auto Update

---
 DESCRIPTION                |  1 -
 man/get_code_dependency.Rd | 37 -------------------------------------
 2 files changed, 38 deletions(-)
 delete mode 100644 man/get_code_dependency.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 0648ce2e1..9675c67e7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -73,5 +73,4 @@ Collate:
     'qenv-show.R'
     'qenv-within.R'
     'teal.code-package.R'
-    'utils-get_code_dependency.R'
     'utils.R'
diff --git a/man/get_code_dependency.Rd b/man/get_code_dependency.Rd
deleted file mode 100644
index 678f0233f..000000000
--- a/man/get_code_dependency.Rd
+++ /dev/null
@@ -1,37 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils-get_code_dependency.R
-\name{get_code_dependency}
-\alias{get_code_dependency}
-\title{Get code dependency of an object}
-\usage{
-get_code_dependency(code, names, check_code_names = TRUE)
-}
-\arguments{
-\item{code}{\code{character} with the code.}
-
-\item{names}{\code{character} vector of object names.}
-
-\item{check_code_names}{\code{logical(1)} flag specifying if a warning for non-existing names should be displayed.}
-}
-\value{
-Character vector, a subset of \code{code}.
-Note that subsetting is actually done on the calls \code{code}, not necessarily on the elements of the vector.
-}
-\description{
-Extract subset of code required to reproduce specific object(s), including code producing side-effects.
-}
-\details{
-Given a character vector with code, this function will extract the part of the code responsible for creating
-the variables specified by \code{names}.
-This includes the final call that creates the variable(s) in question as well as all \emph{parent calls},
-\emph{i.e.} calls that create variables used in the final call and their parents, etc.
-Also included are calls that create side-effects like establishing connections.
-
-It is assumed that object dependency is established by using three assignment operators: \verb{<-}, \code{=}, and \verb{->} .
-Other assignment methods (\code{assign}, \verb{<<-}) or non-standard-evaluation methods are not supported.
-
-Side-effects are not detected automatically and must be marked in the code.
-Add \verb{# @linksto object} at the end of a line where a side-effect occurs to specify that this line is required
-to reproduce a variable called \code{object}.
-}
-\keyword{internal}

From 2713f694a9c9eb93131ac6ae771edeb37203e08c Mon Sep 17 00:00:00 2001
From: m7pr <marcin.kosinski.mk1@roche.com>
Date: Thu, 11 Dec 2025 12:21:53 +0100
Subject: [PATCH 5/8] bring back utils

---
 R/utils-get_code_dependency.R | 607 ++++++++++++++++++++++++++++++++++
 1 file changed, 607 insertions(+)
 create mode 100644 R/utils-get_code_dependency.R

diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R
new file mode 100644
index 000000000..05a46c0df
--- /dev/null
+++ b/R/utils-get_code_dependency.R
@@ -0,0 +1,607 @@
+# get_code_dependency ----
+
+#' Get code dependency of an object
+#'
+#' Extract subset of code required to reproduce specific object(s), including code producing side-effects.
+#'
+#' Given a character vector with code, this function will extract the part of the code responsible for creating
+#' the variables specified by `names`.
+#' This includes the final call that creates the variable(s) in question as well as all _parent calls_,
+#' _i.e._ calls that create variables used in the final call and their parents, etc.
+#' Also included are calls that create side-effects like establishing connections.
+#'
+#' It is assumed that object dependency is established by using three assignment operators: `<-`, `=`, and `->` .
+#' Other assignment methods (`assign`, `<<-`) or non-standard-evaluation methods are not supported.
+#'
+#' Side-effects are not detected automatically and must be marked in the code.
+#' Add `# @linksto object` at the end of a line where a side-effect occurs to specify that this line is required
+#' to reproduce a variable called `object`.
+#'
+#' @param code `character` with the code.
+#' @param names `character` vector of object names.
+#' @param check_code_names `logical(1)` flag specifying if a warning for non-existing names should be displayed.
+#'
+#' @return Character vector, a subset of `code`.
+#' Note that subsetting is actually done on the calls `code`, not necessarily on the elements of the vector.
+#'
+#' @keywords internal
+get_code_dependency <- function(code, names, check_code_names = TRUE) {
+  checkmate::assert_list(code, "character")
+  checkmate::assert_character(names, any.missing = FALSE)
+
+  graph <- lapply(code, attr, "dependency")
+
+  if (check_code_names) {
+    symbols <- unlist(lapply(graph, function(call) {
+      ind <- match("<-", call, nomatch = length(call) + 1L)
+      call[seq_len(ind - 1L)]
+    }))
+
+    if (!all(names %in% unique(symbols))) {
+      warning("Object(s) not found in code: ", toString(setdiff(names, symbols)), ".", call. = FALSE)
+    }
+  }
+
+  if (length(code) == 0) {
+    return(code)
+  }
+
+  ind <- unlist(lapply(names, function(x) graph_parser(x, graph)))
+
+  lib_ind <- detect_libraries(graph)
+
+  code_ids <- sort(unique(c(lib_ind, ind)))
+  code[code_ids]
+}
+
+#' Locate function call token
+#'
+#' Determine which row of parsed data is specific `SYMBOL_FUNCTION_CALL` token.
+#'
+#' Useful for determining occurrence of `assign` or `data` functions in an input call.
+#'
+#' @param call_pd `data.frame` as returned by `extract_calls()`
+#' @param text `character(1)` to look for in `text` column of `call_pd`
+#'
+#' @return
+#' Single integer specifying row in `call_pd` where `token` is `SYMBOL_FUNCTION_CALL` and `text` is `text`.
+#' 0 if not found.
+#'
+#' @keywords internal
+#' @noRd
+find_call <- function(call_pd, text) {
+  checkmate::check_data_frame(call_pd)
+  checkmate::check_names(call_pd, must.include = c("token", "text"))
+  checkmate::check_string(text)
+
+  ans <- which(call_pd$token == "SYMBOL_FUNCTION_CALL" & call_pd$text == text)
+  if (length(ans)) {
+    ans
+  } else {
+    0L
+  }
+}
+
+#' Split the result of `utils::getParseData()` into separate calls
+#'
+#' @param pd (`data.frame`) A result of `utils::getParseData()`.
+#'
+#' @return
+#' A `list` of `data.frame`s.
+#' Each element is a subset of `pd` corresponding to one call in the original code from which `pd` was obtained.
+#' Only four columns (`"token"`, `"text"`, `"id"`, `"parent"`) are kept, the rest is discarded.
+#'
+#' @keywords internal
+#' @noRd
+extract_calls <- function(pd) {
+  calls <- lapply(
+    pd[pd$parent == 0 & (pd$token != "COMMENT" | grepl("@linksto", pd$text, fixed = TRUE)), "id"],
+    function(parent) {
+      rbind(
+        pd[pd$id == parent, ],
+        get_children(pd = pd, parent = parent)
+      )
+    }
+  )
+  calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls)
+  calls <- Filter(Negate(is.null), calls)
+  calls <- fix_shifted_comments(calls)
+  calls <- remove_custom_assign(calls, c(":="))
+  fix_arrows(calls)
+}
+
+#' @keywords internal
+#' @noRd
+get_children <- function(pd, parent) {
+  idx_children <- abs(pd$parent) == parent
+  children <- pd[idx_children, ]
+  if (nrow(children) == 0) {
+    return(NULL)
+  }
+
+  if (parent > 0) {
+    do.call(rbind, c(list(children), lapply(children$id, get_children, pd = pd)))
+  }
+}
+
+#' Fixes edge case of comments being shifted to the next call.
+#' @keywords internal
+#' @noRd
+fix_shifted_comments <- function(calls) {
+  # If the first or the second token is a @linksto COMMENT,
+  #  then it belongs to the previous call.
+  if (length(calls) >= 2) {
+    for (i in 2:length(calls)) {
+      comment_idx <- grep("@linksto", calls[[i]][, "text"])
+      if (isTRUE(comment_idx[1] <= 2)) {
+        calls[[i - 1]] <- rbind(
+          calls[[i - 1]],
+          calls[[i]][comment_idx[1], ]
+        )
+        calls[[i]] <- calls[[i]][-comment_idx[1], ]
+      }
+    }
+  }
+  Filter(nrow, calls)
+}
+
+#' Fixes edge case of custom assignments operator being treated as assignment.
+#'
+#' @param exclude (`character`) custom assignment operators to be excluded
+#' @keywords internal
+#' @noRd
+remove_custom_assign <- function(calls, exclude = NULL) {
+  checkmate::assert_list(calls)
+  checkmate::assert_character(exclude, null.ok = TRUE)
+  lapply(calls, function(call) {
+    if (!is.null(exclude)) {
+      call[!(call$token == "LEFT_ASSIGN" & call$text %in% exclude), ]
+    } else {
+      call
+    }
+  })
+}
+
+#' Fixes edge case of `<-` assignment operator being called as function,
+#' which is \code{`<-`(y,x)} instead of traditional `y <- x`.
+#' @keywords internal
+#' @noRd
+fix_arrows <- function(calls) {
+  checkmate::assert_list(calls)
+  lapply(calls, function(call) {
+    sym_fun <- call$token == "SYMBOL_FUNCTION_CALL"
+    call[sym_fun, ] <- sub_arrows(call[sym_fun, ])
+    call
+  })
+}
+
+#' Execution of assignment operator substitutions for a call.
+#' @keywords internal
+#' @noRd
+sub_arrows <- function(call) {
+  checkmate::assert_data_frame(call)
+  map <- data.frame(
+    row.names = c("<-", "<<-", "="),
+    token = rep("LEFT_ASSIGN", 3),
+    text = rep("<-", 3)
+  )
+  sub_ids <- call$text %in% rownames(map)
+  call[sub_ids, c("token", "text")] <- map[call$text[sub_ids], ]
+  call
+}
+
+# code_graph ----
+
+#' Extract object occurrence
+#'
+#' Extracts objects occurrence within calls passed by `pd`.
+#' Also detects which objects depend on which within a call.
+#'
+#' @param pd `data.frame`;
+#'  one of the results of `utils::getParseData()` split into subsets representing individual calls;
+#'  created by `extract_calls()` function
+#'
+#' @return
+#' A character vector listing names of objects that depend on this call
+#' and names of objects that this call depends on.
+#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
+#' depends on objects `b` and `c`.
+#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
+#'
+#' @keywords internal
+#' @noRd
+extract_occurrence <- function(pd) {
+  is_in_function <- function(x) {
+    # If an object is a function parameter,
+    # then in calls_pd there is a `SYMBOL_FORMALS` entry for that object.
+    function_id <- x[x$token == "FUNCTION", "parent"]
+    if (length(function_id)) {
+      x$id %in% get_children(x, function_id[1])$id
+    } else {
+      rep(FALSE, nrow(x))
+    }
+  }
+  in_parenthesis <- function(x) {
+    if (any(x$token %in% c("LBB", "'['"))) {
+      id_start <- min(x$id[x$token %in% c("LBB", "'['")])
+      id_end <- min(x$id[x$token == "']'"])
+      x$text[x$token == "SYMBOL" & x$id > id_start & x$id < id_end]
+    }
+  }
+
+  # Handle data(object)/data("object")/data(object, envir = ) independently.
+  data_call <- find_call(pd, "data")
+  if (data_call) {
+    sym <- pd[data_call + 1, "text"]
+    return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
+  }
+  # Handle assign(x = ).
+  assign_call <- find_call(pd, "assign")
+  if (assign_call) {
+    # Check if parameters were named.
+    # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named.
+    # "EQ_SUB" is for `=` appearing after the name of the named parameter.
+    if (any(pd$token == "SYMBOL_SUB")) {
+      params <- pd[pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]
+      # Remove sequence of "=", ",".
+      if (length(params > 1)) {
+        remove <- integer(0)
+        for (i in 2:length(params)) {
+          if (params[i - 1] == "=" && params[i] == ",") {
+            remove <- c(remove, i - 1, i)
+          }
+        }
+        if (length(remove)) params <- params[-remove]
+      }
+      pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0))
+      if (!pos) {
+        return(character(0L))
+      }
+      # pos is indicator of the place of 'x'
+      # 1. All parameters are named, but none is 'x' - return(character(0L))
+      # 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ","))
+      # - check "x" in params being just a vector of named parameters.
+      # 3. Some parameters are named, 'x' is not in named parameters
+      # - check first appearance of "," (unnamed parameter) in vector parameters.
+    } else {
+      # Object is the first entry after 'assign'.
+      pos <- 1
+    }
+    sym <- pd[assign_call + pos, "text"]
+    return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
+  }
+
+  # What occurs in a function body is not tracked.
+  x <- pd[!is_in_function(pd), ]
+  sym_cond <- which(x$token %in% c("SPECIAL", "SYMBOL", "SYMBOL_FUNCTION_CALL"))
+  sym_fc_cond <- which(x$token == "SYMBOL_FUNCTION_CALL")
+
+  if (length(sym_cond) == 0) {
+    return(character(0L))
+  }
+  # Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not.
+  # For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2.
+  dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"]
+  if (length(dollar_ids)) {
+    object_ids <- x[sym_cond, "id"]
+    after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids]
+    sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar))
+  }
+
+  assign_cond <- grep("ASSIGN", x$token)
+  if (!length(assign_cond)) {
+    return(c("<-", unique(x[sym_cond, "text"])))
+  }
+
+  # For cases like 'eval(expression(c <- b + 2))' removes 'eval(expression('.
+  sym_cond <- sym_cond[!(sym_cond < min(assign_cond) & sym_cond %in% sym_fc_cond)]
+
+  # If there was an assignment operation detect direction of it.
+  if (unique(x$text[assign_cond]) == "->") { # What if there are 2 assignments: e.g. a <- b -> c.
+    sym_cond <- rev(sym_cond)
+  }
+
+  after <- match(min(x$id[assign_cond]), sort(x$id[c(min(assign_cond), sym_cond)])) - 1
+  ans <- append(x[sym_cond, "text"], "<-", after = max(1, after))
+  ans <- move_functions_after_arrow(ans, unique(x[sym_fc_cond, "text"]))
+  roll <- in_parenthesis(pd)
+  if (length(roll)) {
+    # detect elements appeared in parenthesis and move them on RHS
+    # but only their first appearance
+    # as the same object can appear as regular object and the one used in parenthesis
+    result <- ans
+    for (elem in roll) {
+      idx <- which(result == elem)[1]
+      if (!is.na(idx)) {
+        result <- result[-idx]
+      }
+    }
+    c(result, roll)
+  } else {
+    ans
+  }
+}
+
+#' Moves function names to the right side of dependency graph
+#'
+#' Changes status of the function call from dependent to dependency if occurs in the lhs.
+#' Technically, it means to move function names after the dependency operator.
+#' For example, for `attributes(a) <- b` the dependency graph should look like `c("a", "<-", "b", "attributes")`.
+#'
+#' @param ans `character` vector of object names in dependency graph.
+#' @param functions `character` vector of function names.
+#'
+#' @return
+#' A character vector.
+#' @keywords internal
+#' @noRd
+move_functions_after_arrow <- function(ans, functions) {
+  arrow_pos <- which(ans == "<-")
+  if (length(arrow_pos) == 0) {
+    return(ans)
+  }
+  if (length(functions) == 0) {
+    return(ans)
+  }
+  ans_pre <- ans[1:arrow_pos]
+  # it's setdiff but without the removal of duplicates
+  # do not use setdiff(ans_pre, functions)
+  # as it removes duplicates from ans_pre even if they do not appear in functions
+  # check setdiff(c("A", "A"), "B") - gives "A", where we want to keep c("A", "A")
+  for (fun in functions) {
+    if (any(ans_pre == fun)) ans_pre <- ans_pre[-match(fun, ans_pre)]
+  }
+  after_arrow <- if (arrow_pos < length(ans)) {
+    ans[(arrow_pos + 1):length(ans)]
+  }
+  c(ans_pre, after_arrow)
+}
+
+#' Extract side effects
+#'
+#' Extracts all object names from the code that are marked with `@linksto` tag.
+#'
+#' The code may contain functions calls that create side effects, e.g. modify the environment.
+#' Static code analysis may be insufficient to determine which objects are created or modified by such a function call.
+#' The `@linksto` comment tag is introduced to mark a call as having a (side) effect on one or more objects.
+#' With this tag a complete object dependency structure can be established.
+#' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function.
+#'
+#' @param pd `data.frame`;
+#'  one of the results of `utils::getParseData()` split into subsets representing individual calls;
+#'  created by `extract_calls()` function
+#'
+#' @return
+#' A character vector of names of objects
+#' depending a call tagged with `@linksto` in a corresponding element of `pd`.
+#'
+#' @keywords internal
+#' @noRd
+extract_side_effects <- function(pd) {
+  linksto <- grep("@linksto", pd[pd$token == "COMMENT", "text"], value = TRUE)
+  unlist(strsplit(sub("\\s*#.*@linksto\\s+", "", linksto), "\\s+"))
+}
+
+#' @param parsed_code results of `parse(text = code, keep.source = TRUE` (parsed text)
+#' @keywords internal
+#' @noRd
+extract_dependency <- function(parsed_code) {
+  full_pd <- normalize_pd(utils::getParseData(parsed_code))
+  reordered_full_pd <- extract_calls(full_pd)
+
+  # Early return on empty code
+  if (length(reordered_full_pd) == 0L) {
+    return(NULL)
+  }
+
+  if (length(parsed_code) == 0L) {
+    return(extract_side_effects(reordered_full_pd[[1]]))
+  }
+  expr_ix <- lapply(parsed_code[[1]], class) == "{"
+
+  # Build queue of expressions to parse individually
+  queue <- list()
+  parsed_code_list <- if (all(!expr_ix)) {
+    list(parsed_code)
+  } else {
+    queue <- as.list(parsed_code[[1]][expr_ix])
+    new_list <- parsed_code[[1]]
+    new_list[expr_ix] <- NULL
+    list(parse(text = as.expression(new_list), keep.source = TRUE))
+  }
+
+  while (length(queue) > 0) {
+    current <- queue[[1]]
+    queue <- queue[-1]
+    if (identical(current[[1L]], as.name("{"))) {
+      queue <- append(queue, as.list(current)[-1L])
+    } else {
+      parsed_code_list[[length(parsed_code_list) + 1]] <- parse(text = as.expression(current), keep.source = TRUE)
+    }
+  }
+
+  parsed_occurences <- lapply(
+    parsed_code_list,
+    function(parsed_code) {
+      pd <- normalize_pd(utils::getParseData(parsed_code))
+      reordered_pd <- extract_calls(pd)
+      if (length(reordered_pd) > 0) {
+        # extract_calls is needed to reorder the pd so that assignment operator comes before symbol names
+        # extract_calls is needed also to substitute assignment operators into specific format with fix_arrows
+        # extract_calls is needed to omit empty calls that contain only one token `"';'"`
+        # This cleaning is needed as extract_occurrence assumes arrows are fixed, and order is different
+        # than in original pd
+        extract_occurrence(reordered_pd[[1]])
+      }
+    }
+  )
+
+  # Merge results together
+  result <- Reduce(
+    function(u, v) {
+      ix <- if ("<-" %in% v) min(which(v == "<-")) else 0
+      u$left_side <- c(u$left_side, v[seq_len(max(0, ix - 1))])
+      u$right_side <- c(
+        u$right_side,
+        if (ix == length(v)) character(0L) else v[seq(ix + 1, max(ix + 1, length(v)))]
+      )
+      u
+    },
+    init = list(left_side = character(0L), right_side = character(0L)),
+    x = parsed_occurences
+  )
+
+  c(extract_side_effects(reordered_full_pd[[1]]), result$left_side, "<-", result$right_side)
+}
+
+# graph_parser ----
+
+#' Return the indices of calls needed to reproduce an object
+#'
+#' @param x The name of the object to return code for.
+#' @param graph A result of `code_graph()`.
+#'
+#' @return
+#' Integer vector of indices that can be applied to `graph` to obtain all calls required to reproduce object `x`.
+#'
+#' @keywords internal
+#' @noRd
+graph_parser <- function(x, graph) {
+  # x occurrences (lhs)
+  occurrence <- vapply(
+    graph, function(call) {
+      ind <- match("<-", call, nomatch = length(call) + 1L)
+      x %in% call[seq_len(ind - 1L)]
+    },
+    logical(1)
+  )
+
+  # x-dependent objects (rhs)
+  dependencies <- lapply(graph[occurrence], function(call) {
+    ind <- match("<-", call, nomatch = 0L)
+    call[(ind + 1L):length(call)]
+  })
+  dependencies <- setdiff(unlist(dependencies), x)
+
+  dependency_occurrences <- lapply(dependencies, function(dependency) {
+    # track down dependencies and where they occur on the lhs in previous calls
+    last_x_occurrence <- max(which(occurrence))
+    reduced_graph <- utils::head(graph[seq_len(last_x_occurrence)], -1)
+    c(graph_parser(dependency, reduced_graph), last_x_occurrence)
+  })
+
+  sort(unique(c(which(occurrence), unlist(dependency_occurrences))))
+}
+
+
+# default_side_effects --------------------------------------------------------------------------------------------
+
+#' Detect library calls
+#'
+#' Detects `library()` and `require()` function calls.
+#'
+#' @param `graph` the dependency graph, result of `lapply(code, attr, "dependency")`
+#'
+#' @return
+#' Integer vector of indices that can be applied to `graph` to obtain all calls containing
+#' `library()` or `require()` calls that are always returned for reproducibility.
+#'
+#' @keywords internal
+#' @noRd
+detect_libraries <- function(graph) {
+  defaults <- c("library", "require")
+
+  which(
+    unlist(
+      lapply(
+        graph, function(x) {
+          any(grepl(pattern = paste(defaults, collapse = "|"), x = x))
+        }
+      )
+    )
+  )
+}
+
+
+# utils -----------------------------------------------------------------------------------------------------------
+
+
+#' Normalize parsed data removing backticks from symbols
+#'
+#' @param pd `data.frame` resulting from `utils::getParseData()` call.
+#'
+#' @return `data.frame` with backticks removed from `text` column for `SYMBOL` tokens.
+#'
+#' @keywords internal
+#' @noRd
+normalize_pd <- function(pd) {
+  # Remove backticks from SYMBOL tokens
+  symbol_index <- grepl("^SYMBOL.*$", pd$token)
+  pd[symbol_index, "text"] <- gsub("^`(.*)`$", "\\1", pd[symbol_index, "text"])
+
+  pd
+}
+
+
+# split_code ------------------------------------------------------------------------------------------------------
+
+
+#' Get line/column in the source where the calls end
+#'
+#'
+#' @param code `character(1)`
+#'
+#' @return `matrix` with `colnames = c("line", "col")`
+#'
+#' @keywords internal
+#' @noRd
+get_call_breaks <- function(code) {
+  parsed_code <- parse(text = code, keep.source = TRUE)
+  pd <- utils::getParseData(parsed_code)
+  pd <- normalize_pd(pd)
+  pd <- pd[pd$token != "';'", ]
+  call_breaks <- t(sapply(
+    extract_calls(pd),
+    function(x) {
+      matrix(c(max(x$line2), max(x$col2[x$line2 == max(x$line2)])))
+    }
+  ))
+  call_breaks <- call_breaks[-nrow(call_breaks), , drop = FALSE] # breaks in between needed only
+  if (nrow(call_breaks) == 0L) {
+    call_breaks <- matrix(numeric(0), ncol = 2)
+  }
+  colnames(call_breaks) <- c("line", "col")
+  call_breaks
+}
+
+#' Split code by calls
+#'
+#' @param code `character` with the code.
+#'
+#' @return list of `character`s of the length equal to the number of calls in `code`.
+#'
+#' @keywords internal
+#' @noRd
+split_code <- function(code) {
+  call_breaks <- get_call_breaks(code)
+  if (nrow(call_breaks) == 0) {
+    return(code)
+  }
+  call_breaks <- call_breaks[order(call_breaks[, "line"], call_breaks[, "col"]), , drop = FALSE]
+  code_split <- strsplit(code, split = "\n", fixed = TRUE)[[1]]
+  char_count_lines <- c(0, cumsum(sapply(code_split, nchar, USE.NAMES = FALSE) + 1), -1)[seq_along(code_split)]
+
+  idx_start <- c(
+    0, # first call starts in the beginning of src
+    char_count_lines[call_breaks[, "line"]] + call_breaks[, "col"] + 1
+  )
+  idx_end <- c(
+    char_count_lines[call_breaks[, "line"]] + call_breaks[, "col"],
+    nchar(code) # last call end in the end of src
+  )
+  new_code <- substring(code, idx_start, idx_end)
+
+  # line split happens before call terminator (it could be `;` or `\n`) and the terminator goes to the next line
+  # we need to move remove leading and add \n instead when combining calls
+  c(new_code[1], gsub("^[\t ]*(\n|;)", "", new_code[-1]))
+}
\ No newline at end of file

From c44f0ef637ebaac51e413b07d52e368d36c7d4fa Mon Sep 17 00:00:00 2001
From: Marcin <133694481+m7pr@users.noreply.github.com>
Date: Thu, 11 Dec 2025 12:22:36 +0100
Subject: [PATCH 6/8] Update DESCRIPTION

Signed-off-by: Marcin <133694481+m7pr@users.noreply.github.com>
---
 DESCRIPTION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index 9675c67e7..0648ce2e1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -73,4 +73,5 @@ Collate:
     'qenv-show.R'
     'qenv-within.R'
     'teal.code-package.R'
+    'utils-get_code_dependency.R'
     'utils.R'

From 7ea51ecbc3d7f1a284715be8d5db106c7567020e Mon Sep 17 00:00:00 2001
From: Marcin <133694481+m7pr@users.noreply.github.com>
Date: Thu, 11 Dec 2025 12:23:11 +0100
Subject: [PATCH 7/8] Update R/utils-get_code_dependency.R

Signed-off-by: Marcin <133694481+m7pr@users.noreply.github.com>
---
 R/utils-get_code_dependency.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R
index 05a46c0df..954448d31 100644
--- a/R/utils-get_code_dependency.R
+++ b/R/utils-get_code_dependency.R
@@ -604,4 +604,4 @@ split_code <- function(code) {
   # line split happens before call terminator (it could be `;` or `\n`) and the terminator goes to the next line
   # we need to move remove leading and add \n instead when combining calls
   c(new_code[1], gsub("^[\t ]*(\n|;)", "", new_code[-1]))
-}
\ No newline at end of file
+}

From 3eda1d5d09355fbbe89ed95032a3d5ed9843d615 Mon Sep 17 00:00:00 2001
From: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 11 Dec 2025 11:27:40 +0000
Subject: [PATCH 8/8] [skip roxygen] [skip vbump] Roxygen Man Pages Auto Update

---
 man/get_code_dependency.Rd | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 man/get_code_dependency.Rd

diff --git a/man/get_code_dependency.Rd b/man/get_code_dependency.Rd
new file mode 100644
index 000000000..678f0233f
--- /dev/null
+++ b/man/get_code_dependency.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils-get_code_dependency.R
+\name{get_code_dependency}
+\alias{get_code_dependency}
+\title{Get code dependency of an object}
+\usage{
+get_code_dependency(code, names, check_code_names = TRUE)
+}
+\arguments{
+\item{code}{\code{character} with the code.}
+
+\item{names}{\code{character} vector of object names.}
+
+\item{check_code_names}{\code{logical(1)} flag specifying if a warning for non-existing names should be displayed.}
+}
+\value{
+Character vector, a subset of \code{code}.
+Note that subsetting is actually done on the calls \code{code}, not necessarily on the elements of the vector.
+}
+\description{
+Extract subset of code required to reproduce specific object(s), including code producing side-effects.
+}
+\details{
+Given a character vector with code, this function will extract the part of the code responsible for creating
+the variables specified by \code{names}.
+This includes the final call that creates the variable(s) in question as well as all \emph{parent calls},
+\emph{i.e.} calls that create variables used in the final call and their parents, etc.
+Also included are calls that create side-effects like establishing connections.
+
+It is assumed that object dependency is established by using three assignment operators: \verb{<-}, \code{=}, and \verb{->} .
+Other assignment methods (\code{assign}, \verb{<<-}) or non-standard-evaluation methods are not supported.
+
+Side-effects are not detected automatically and must be marked in the code.
+Add \verb{# @linksto object} at the end of a line where a side-effect occurs to specify that this line is required
+to reproduce a variable called \code{object}.
+}
+\keyword{internal}