From 41c8f670d08644104d5be7dd4fe599110fbdc47d Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Jul 2025 13:27:58 -0700 Subject: [PATCH 01/10] refactor cedta rules into helper --- R/cedta.R | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index 83d92ec9dd..2afea32b7b 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -39,6 +39,36 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow } # nocov end +# in a helper to promote readability +# NB: put the most common and recommended cases first for speed +.cedta_impl_ <- function(ns, n) { + nsname = getNamespaceName(ns) + if (nsname == "data.table") return(TRUE) + + if ("data.table" %chin% names(getNamespaceImports(ns))) return(TRUE) + + if (nsname == "utils") { + if (exists("debugger.look", parent.frame(n+1L))) return(TRUE) + + # 'example' for #2972 + sc <- sys.calls() + if (length(sc) >= 8L && sc[[length(sc) - 7L]] %iscall% 'example') return(TRUE) + } + + if (nsname == "base" && all(c("FUN", "X") %chin% ls(parent.frame(n)))) return(TRUE) # lapply + + if (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack()) return(TRUE) + + if (nsname %chin% cedta.override) return(TRUE) + + # As of Sep 2018: RCAS, caretEnsemble, dtplyr, rstanarm, rbokeh, CEMiTool, rqdatatable, RImmPort, BPRMeth, rlist + if (isTRUE(ns$.datatable.aware)) return(TRUE) + + # both ns$.Depends and get(.Depends,ns) are not sufficient + pkg_ns = paste("package", nsname, sep=":") + tryCatch("data.table" %chin% get(".Depends", pkg_ns, inherits=FALSE), error=function(e) FALSE) +} + # cedta = Calling Environment Data.Table-Aware cedta = function(n=2L) { # Calling Environment Data Table Aware @@ -51,20 +81,10 @@ cedta = function(n=2L) { # e.g. DT queries at the prompt (.GlobalEnv) and knitr's eval(,envir=globalenv()) but not DF[...] inside knitr::kable v1.6 return(TRUE) } - nsname = getNamespaceName(ns) - ans = nsname=="data.table" || - "data.table" %chin% names(getNamespaceImports(ns)) || # most common and recommended cases first for speed - (nsname=="utils" && - (exists("debugger.look", parent.frame(n+1L)) || - (length(sc<-sys.calls())>=8L && sc[[length(sc)-7L]] %iscall% 'example')) ) || # 'example' for #2972 - (nsname=="base" && all(c("FUN", "X") %chin% ls(parent.frame(n)))) || # lapply - (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack()) || - nsname %chin% cedta.override || - isTRUE(ns$.datatable.aware) || # As of Sep 2018: RCAS, caretEnsemble, dtplyr, rstanarm, rbokeh, CEMiTool, rqdatatable, RImmPort, BPRMeth, rlist - tryCatch("data.table" %chin% get(".Depends",paste("package",nsname,sep=":"),inherits=FALSE),error=function(e)FALSE) # both ns$.Depends and get(.Depends,ns) are not sufficient + ans = .cedta_impl_(ns, n) if (!ans && getOption("datatable.verbose")) { # nocov start - catf("cedta decided '%s' wasn't data.table aware. Here is call stack with [[1L]] applied:\n", nsname) + catf("cedta decided '%s' wasn't data.table aware. Here is call stack with [[1L]] applied:\n", getNamespaceName(ns)) print(sapply(sys.calls(), `[[`, 1L)) # nocov end # so we can trace the namespace name that may need to be added (very unusually) From c5e1284c94b26cd272b80c124355181132feaa1a Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Jul 2025 13:34:03 -0700 Subject: [PATCH 02/10] implement for bug --- R/cedta.R | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index 2afea32b7b..e829f054df 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -28,8 +28,7 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow # nocov start: very hard to reach this within our test suite -- the call stack within a call generated by e.g. knitr # for loop, not any(vapply_1b(.)), to allow early exit -.any_eval_calls_in_stack = function() { - calls = sys.calls() +.any_eval_calls_in_stack = function(calls) { # likelier to be close to the end of the call stack, right? for (ii in length(calls):1) { # nolint: seq_linter. rev(seq_len(length(calls)))? See https://bugs.r-project.org/show_bug.cgi?id=18406. the_call = calls[[ii]][[1L]] @@ -39,6 +38,17 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow } # nocov end +.any_sd_queries_in_stack = function(calls) { + for (ii in length(calls):1) { # nolint: seq_linter. As above. + the_call = calls[[ii]][1L] + if (!is.name(the_call) || the_call != "[") next + the_lhs = calls[[ii]][[2L]] + if (!is.name(the_lhs) || the_lhs != ".SD") next + return(TRUE) + } + FALSE +} + # in a helper to promote readability # NB: put the most common and recommended cases first for speed .cedta_impl_ <- function(ns, n) { @@ -47,17 +57,21 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow if ("data.table" %chin% names(getNamespaceImports(ns))) return(TRUE) + sc <- sys.calls() if (nsname == "utils") { if (exists("debugger.look", parent.frame(n+1L))) return(TRUE) # 'example' for #2972 - sc <- sys.calls() if (length(sc) >= 8L && sc[[length(sc) - 7L]] %iscall% 'example') return(TRUE) } - if (nsname == "base" && all(c("FUN", "X") %chin% ls(parent.frame(n)))) return(TRUE) # lapply + if (nsname == "base") { + if (all(c("FUN", "X") %chin% ls(parent.frame(n)))) return(TRUE) # lapply + + if (.any_sd_queries_in_stack(sc)) return(TRUE) # e.g. lapply() where "piped-in" j= arg has .SD[] + } - if (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack()) return(TRUE) + if (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack(sc)) return(TRUE) if (nsname %chin% cedta.override) return(TRUE) From d82923ef6d84cbb69ace752e287c73ffc20cc302 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Jul 2025 13:37:28 -0700 Subject: [PATCH 03/10] test, NEWS --- NEWS.md | 2 ++ inst/tests/tests.Rraw | 3 +++ 2 files changed, 5 insertions(+) diff --git a/NEWS.md b/NEWS.md index 9421f96400..04a409c8c7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -72,6 +72,8 @@ 12. `print(..., col.names = 'none')` now correctly adapts column widths to the data content, ignoring the original column names and producing a more compact output, [#6882](https://github.com/Rdatatable/data.table/issues/6882). Thanks to @brooksambrose for the report and @venom1204 for the PR. +13. Reference to `.SD` in `...` arguments to `lapply()`, e.g. ``lapply(list_of_tables, `[`, j=.SD[1L])`` is evaluated correctly, [#2982](https://github.com/Rdatatable/data.table/issues/2982). Thanks @franknarf1 for the report and @MichaelChirico for the fix. + ### NOTES 1. The following in-progress deprecations have proceeded: diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index aceeb77f89..4e09bc8df6 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21426,3 +21426,6 @@ test(2330.7, as.data.table(list(z), keep.rownames=TRUE), data.table(rn=rep("", 3 M <- matrix(1:6, nrow=3, dimnames=list(rep("", 3), c("V1", "V2"))) # test of list(M) for empty-rowname'd matrix input test(2330.8, as.data.table(list(M), keep.rownames=TRUE), data.table(rn=rep("", 3), V1=1:3, V2=4:6)) + +# .SD reference in '...' passed to lapply(FUN=) is recognized as data.table +test(2331, lapply(list(data.table(a=1:2)), `[`, j=.SD[1L]), list(data.table(a=1L))) From 885dbb1ef5c68ef06e752d3158f37e587f07c152 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Jul 2025 13:40:04 -0700 Subject: [PATCH 04/10] About 100 packages define this now, no sense keeping track --- R/cedta.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/cedta.R b/R/cedta.R index e829f054df..7e29b81522 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -75,7 +75,6 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow if (nsname %chin% cedta.override) return(TRUE) - # As of Sep 2018: RCAS, caretEnsemble, dtplyr, rstanarm, rbokeh, CEMiTool, rqdatatable, RImmPort, BPRMeth, rlist if (isTRUE(ns$.datatable.aware)) return(TRUE) # both ns$.Depends and get(.Depends,ns) are not sufficient From 1643f36f37977a98403fa2369e3314c338ccd263 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Jul 2025 13:40:35 -0700 Subject: [PATCH 05/10] trailing ws --- R/cedta.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index 7e29b81522..0089ed6b8c 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -54,9 +54,9 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow .cedta_impl_ <- function(ns, n) { nsname = getNamespaceName(ns) if (nsname == "data.table") return(TRUE) - + if ("data.table" %chin% names(getNamespaceImports(ns))) return(TRUE) - + sc <- sys.calls() if (nsname == "utils") { if (exists("debugger.look", parent.frame(n+1L))) return(TRUE) @@ -72,11 +72,11 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow } if (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack(sc)) return(TRUE) - + if (nsname %chin% cedta.override) return(TRUE) - + if (isTRUE(ns$.datatable.aware)) return(TRUE) - + # both ns$.Depends and get(.Depends,ns) are not sufficient pkg_ns = paste("package", nsname, sep=":") tryCatch("data.table" %chin% get(".Depends", pkg_ns, inherits=FALSE), error=function(e) FALSE) From c43cee846201b7b30983cbb934f307778d1c62f5 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Jul 2025 13:41:59 -0700 Subject: [PATCH 06/10] rearrange: .datatable.aware to the top --- R/cedta.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index 0089ed6b8c..0271d6266a 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -57,6 +57,8 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow if ("data.table" %chin% names(getNamespaceImports(ns))) return(TRUE) + if (isTRUE(ns$.datatable.aware)) return(TRUE) + sc <- sys.calls() if (nsname == "utils") { if (exists("debugger.look", parent.frame(n+1L))) return(TRUE) @@ -75,8 +77,6 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow if (nsname %chin% cedta.override) return(TRUE) - if (isTRUE(ns$.datatable.aware)) return(TRUE) - # both ns$.Depends and get(.Depends,ns) are not sufficient pkg_ns = paste("package", nsname, sep=":") tryCatch("data.table" %chin% get(".Depends", pkg_ns, inherits=FALSE), error=function(e) FALSE) From f7be16543dac3ecc5d694ce3e38648a1da50313c Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Jul 2025 21:47:53 +0000 Subject: [PATCH 07/10] push isNamespace check into helper --- R/cedta.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index 0271d6266a..a6b8e49fae 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -52,6 +52,11 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow # in a helper to promote readability # NB: put the most common and recommended cases first for speed .cedta_impl_ <- function(ns, n) { + if (!isNamespace(ns)) { + # e.g. DT queries at the prompt (.GlobalEnv) and knitr's eval(,envir=globalenv()) but not DF[...] inside knitr::kable v1.6 + return(TRUE) + } + nsname = getNamespaceName(ns) if (nsname == "data.table") return(TRUE) @@ -90,10 +95,6 @@ cedta = function(n=2L) { return(env$.datatable.aware) } ns = topenv(env) - if (!isNamespace(ns)) { - # e.g. DT queries at the prompt (.GlobalEnv) and knitr's eval(,envir=globalenv()) but not DF[...] inside knitr::kable v1.6 - return(TRUE) - } ans = .cedta_impl_(ns, n) if (!ans && getOption("datatable.verbose")) { # nocov start From 5462a20bf046838f8b95e229852fece97cee4fe9 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 11 Jul 2025 21:59:36 +0000 Subject: [PATCH 08/10] Fix: look up another level, use %iscall% --- R/cedta.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index a6b8e49fae..b877860148 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -31,8 +31,7 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow .any_eval_calls_in_stack = function(calls) { # likelier to be close to the end of the call stack, right? for (ii in length(calls):1) { # nolint: seq_linter. rev(seq_len(length(calls)))? See https://bugs.r-project.org/show_bug.cgi?id=18406. - the_call = calls[[ii]][[1L]] - if (is.name(the_call) && (the_call %chin% c("eval", "evalq"))) return(TRUE) + if (calls[[ii]] %iscall% c("eval", "evalq")) return(TRUE) } FALSE } @@ -40,8 +39,7 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow .any_sd_queries_in_stack = function(calls) { for (ii in length(calls):1) { # nolint: seq_linter. As above. - the_call = calls[[ii]][1L] - if (!is.name(the_call) || the_call != "[") next + if (!calls[[ii]] %iscall% "[") next the_lhs = calls[[ii]][[2L]] if (!is.name(the_lhs) || the_lhs != ".SD") next return(TRUE) @@ -95,7 +93,7 @@ cedta = function(n=2L) { return(env$.datatable.aware) } ns = topenv(env) - ans = .cedta_impl_(ns, n) + ans = .cedta_impl_(ns, n+1L) if (!ans && getOption("datatable.verbose")) { # nocov start catf("cedta decided '%s' wasn't data.table aware. Here is call stack with [[1L]] applied:\n", getNamespaceName(ns)) From 0a9089a336317828a589e047f9e70f25dc3202f3 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 12 Jul 2025 15:32:24 -0700 Subject: [PATCH 09/10] nocov very-difficult-to-test lines --- R/cedta.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/cedta.R b/R/cedta.R index b877860148..ac5cb30334 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -58,13 +58,13 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow nsname = getNamespaceName(ns) if (nsname == "data.table") return(TRUE) - if ("data.table" %chin% names(getNamespaceImports(ns))) return(TRUE) + if ("data.table" %chin% names(getNamespaceImports(ns))) return(TRUE) # nocov - if (isTRUE(ns$.datatable.aware)) return(TRUE) + if (isTRUE(ns$.datatable.aware)) return(TRUE) # nocov sc <- sys.calls() if (nsname == "utils") { - if (exists("debugger.look", parent.frame(n+1L))) return(TRUE) + if (exists("debugger.look", parent.frame(n+1L))) return(TRUE) # nocov # 'example' for #2972 if (length(sc) >= 8L && sc[[length(sc) - 7L]] %iscall% 'example') return(TRUE) @@ -76,9 +76,9 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow if (.any_sd_queries_in_stack(sc)) return(TRUE) # e.g. lapply() where "piped-in" j= arg has .SD[] } - if (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack(sc)) return(TRUE) + if (nsname %chin% cedta.pkgEvalsUserCode && .any_eval_calls_in_stack(sc)) return(TRUE) # nocov - if (nsname %chin% cedta.override) return(TRUE) + if (nsname %chin% cedta.override) return(TRUE) # nocov # both ns$.Depends and get(.Depends,ns) are not sufficient pkg_ns = paste("package", nsname, sep=":") From 96f7aff8cebb9ed30b32f562cf7974cb50f268c6 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 12 Jul 2025 16:08:29 -0700 Subject: [PATCH 10/10] more nocov --- R/cedta.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/cedta.R b/R/cedta.R index ac5cb30334..b29237e91e 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -67,7 +67,7 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow if (exists("debugger.look", parent.frame(n+1L))) return(TRUE) # nocov # 'example' for #2972 - if (length(sc) >= 8L && sc[[length(sc) - 7L]] %iscall% 'example') return(TRUE) + if (length(sc) >= 8L && sc[[length(sc) - 7L]] %iscall% 'example') return(TRUE) # nocov } if (nsname == "base") {