From 160f077bb97d2cd3b9ca0ab04ab4939160a68d9c Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 24 Jun 2025 10:16:08 +0200 Subject: [PATCH 1/7] add test case --- .../pushdown/like_perf_regression.test | 38 +++++++++++++++++++ ...test_pushdown_filters_into_delim_join.test | 19 ++++++++++ 2 files changed, 57 insertions(+) create mode 100644 test/optimizer/pushdown/like_perf_regression.test create mode 100644 test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test diff --git a/test/optimizer/pushdown/like_perf_regression.test b/test/optimizer/pushdown/like_perf_regression.test new file mode 100644 index 000000000000..01f4a7a165d1 --- /dev/null +++ b/test/optimizer/pushdown/like_perf_regression.test @@ -0,0 +1,38 @@ +# name: test/optimizer/pushdown/like_perf_regression.test +# description: Test reordering of cheap filters +# group: [pushdown] + +# create or replace table int_and_long_strings as +# from range(20000000) t(i) +# select +# i, +# md5(i::varchar) as medium_string +# ; +# +# insert into int_and_long_strings from int_and_long_strings; +# insert into int_and_long_strings from int_and_long_strings; +# insert into int_and_long_strings from int_and_long_strings; +# insert into int_and_long_strings from int_and_long_strings; +# from int_and_long_strings +# where +# i > 10000000 +# and medium_string ilike '%888%' +# ; +# + +mode skip + +statement ok +attach '/Users/tomebergen/duckdb/bug.db' as bug; + +statement ok +use bug; + +statement ok +pragma threads=1; + +statement ok +from int_and_long_strings +where + medium_string ilike '%888%' +; diff --git a/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test b/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test new file mode 100644 index 000000000000..33b4f912b5d3 --- /dev/null +++ b/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test @@ -0,0 +1,19 @@ +# name: test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test +# description: Push filters into delim joins. +# group: [pushdown] + + +statement ok +attach 'post.db' as posts_db; + +statement ok +use posts_db + +statement ok +SELECT + (t0."Tags") AS "Tags", + (COUNT(Id)) AS "total_posts_measure" +FROM "posts", LATERAL UNNEST("Tags") t0("Tags") +WHERE ("CreationDate" >= '2023-01-01T00:00:00.000Z' AND "CreationDate" < '2023-04-01T00:00:00.000Z') +GROUP BY 1 +ORDER BY "total_posts_measure" DESC NULLS LAST LIMIT 8; \ No newline at end of file From 6b0fb7092c83e0260a4d3bbf48767ac001a7cfcf Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 24 Jun 2025 13:18:06 +0200 Subject: [PATCH 2/7] fix up test file --- .../pushdown/like_perf_regression.test | 38 ------------------- ...test_pushdown_filters_into_delim_join.test | 6 ++- 2 files changed, 4 insertions(+), 40 deletions(-) delete mode 100644 test/optimizer/pushdown/like_perf_regression.test diff --git a/test/optimizer/pushdown/like_perf_regression.test b/test/optimizer/pushdown/like_perf_regression.test deleted file mode 100644 index 01f4a7a165d1..000000000000 --- a/test/optimizer/pushdown/like_perf_regression.test +++ /dev/null @@ -1,38 +0,0 @@ -# name: test/optimizer/pushdown/like_perf_regression.test -# description: Test reordering of cheap filters -# group: [pushdown] - -# create or replace table int_and_long_strings as -# from range(20000000) t(i) -# select -# i, -# md5(i::varchar) as medium_string -# ; -# -# insert into int_and_long_strings from int_and_long_strings; -# insert into int_and_long_strings from int_and_long_strings; -# insert into int_and_long_strings from int_and_long_strings; -# insert into int_and_long_strings from int_and_long_strings; -# from int_and_long_strings -# where -# i > 10000000 -# and medium_string ilike '%888%' -# ; -# - -mode skip - -statement ok -attach '/Users/tomebergen/duckdb/bug.db' as bug; - -statement ok -use bug; - -statement ok -pragma threads=1; - -statement ok -from int_and_long_strings -where - medium_string ilike '%888%' -; diff --git a/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test b/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test index 33b4f912b5d3..3b4c78a927ee 100644 --- a/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test +++ b/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test @@ -2,9 +2,11 @@ # description: Push filters into delim joins. # group: [pushdown] +statement ok +pragma enable_verification; statement ok -attach 'post.db' as posts_db; +attach 'posts.db' as posts_db; statement ok use posts_db @@ -13,7 +15,7 @@ statement ok SELECT (t0."Tags") AS "Tags", (COUNT(Id)) AS "total_posts_measure" -FROM "posts", LATERAL UNNEST("Tags") t0("Tags") +FROM posts, LATERAL UNNEST("Tags") t0("Tags") WHERE ("CreationDate" >= '2023-01-01T00:00:00.000Z' AND "CreationDate" < '2023-04-01T00:00:00.000Z') GROUP BY 1 ORDER BY "total_posts_measure" DESC NULLS LAST LIMIT 8; \ No newline at end of file From 5480875eff6172cfcba69274e877d23902302ce3 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Wed, 25 Jun 2025 12:37:25 +0200 Subject: [PATCH 3/7] have a code fix --- src/main/config.cpp | 2 +- src/optimizer/pushdown/pushdown_get.cpp | 29 +++++++++++++++++++ .../pushdown/pushdown_inner_join.cpp | 19 ++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/src/main/config.cpp b/src/main/config.cpp index 2be4afc8488f..0274e2ac9709 100644 --- a/src/main/config.cpp +++ b/src/main/config.cpp @@ -19,7 +19,7 @@ namespace duckdb { #ifdef DEBUG -bool DBConfigOptions::debug_print_bindings = false; +bool DBConfigOptions::debug_print_bindings = true; #endif #define DUCKDB_GLOBAL(_PARAM) \ diff --git a/src/optimizer/pushdown/pushdown_get.cpp b/src/optimizer/pushdown/pushdown_get.cpp index 90dbbb823951..968ce18ec486 100644 --- a/src/optimizer/pushdown/pushdown_get.cpp +++ b/src/optimizer/pushdown/pushdown_get.cpp @@ -43,6 +43,35 @@ unique_ptr FilterPushdown::PushdownGet(unique_ptr> remain_expressions; + // for (auto &filter : filters) { + // auto &f = *filter; + // auto can_push = true; + // // for (auto &binding : f.bindings) { + // // if (binding == unnest.unnest_index) { + // // can_push = false; + // // break; + // // } + // // } + // // if the expression index table index is the unnest index, then the filter is on the + // // unnest, and it should not be pushed down. + // if (!can_push) { + // // We can't push down related expressions if the column in the + // // expression is generated by the functions which have side effects + // remain_expressions.push_back(std::move(f.filter)); + // } else { + // // add the filter to the child pushdown + // if (child_pushdown.AddFilter(std::move(f.filter)) == FilterResult::UNSATISFIABLE) { + // // filter statically evaluates to false, strip tree + // return make_uniq(std::move(op)); + // } + // } + // } + // } if (!get.table_filters.filters.empty() || !get.function.filter_pushdown) { // the table function does not support filter pushdown: push a LogicalFilter on top diff --git a/src/optimizer/pushdown/pushdown_inner_join.cpp b/src/optimizer/pushdown/pushdown_inner_join.cpp index 8370f4ca9ba6..bdd9286a74bd 100644 --- a/src/optimizer/pushdown/pushdown_inner_join.cpp +++ b/src/optimizer/pushdown/pushdown_inner_join.cpp @@ -14,6 +14,25 @@ unique_ptr FilterPushdown::PushdownInnerJoin(unique_ptrCast(); D_ASSERT(join.join_type == JoinType::INNER); if (op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN) { + // try to push the current filters into the children. + for (idx_t i = 0; i < join.children.size(); i++) { + FilterPushdown new_pushdown(optimizer, convert_mark_joins); + unordered_set child_bindings; + LogicalJoin::GetTableReferences(*op->children[i], child_bindings); + // only add filters to the children if we know the bindings are in the join child + for (auto &filter : filters) { + for (auto &filter : filters) { + if (std::all_of(filter->bindings.begin(), filter->bindings.end(), + [&](const idx_t &binding) { return child_bindings.count(binding); })) { + new_pushdown.AddFilter(filter->filter->Copy()); + } + } + } + new_pushdown.GenerateFilters(); + if (!new_pushdown.filters.empty()) { + join.children[i] = new_pushdown.Rewrite(std::move(join.children[i])); + } + } return FinishPushdown(std::move(op)); } // inner join: gather all the conditions of the inner join and add to the filter list From 5e883fb0c6df3fc912fb6c04139eb614865efe4d Mon Sep 17 00:00:00 2001 From: Tmonster Date: Wed, 25 Jun 2025 13:00:23 +0200 Subject: [PATCH 4/7] added a test as well --- .../pushdown/pushdown_inner_join.cpp | 10 ++--- ...test_pushdown_filters_into_delim_join.test | 40 ++++++++++++++----- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/src/optimizer/pushdown/pushdown_inner_join.cpp b/src/optimizer/pushdown/pushdown_inner_join.cpp index bdd9286a74bd..e19b6d076ce1 100644 --- a/src/optimizer/pushdown/pushdown_inner_join.cpp +++ b/src/optimizer/pushdown/pushdown_inner_join.cpp @@ -21,11 +21,9 @@ unique_ptr FilterPushdown::PushdownInnerJoin(unique_ptrchildren[i], child_bindings); // only add filters to the children if we know the bindings are in the join child for (auto &filter : filters) { - for (auto &filter : filters) { - if (std::all_of(filter->bindings.begin(), filter->bindings.end(), - [&](const idx_t &binding) { return child_bindings.count(binding); })) { - new_pushdown.AddFilter(filter->filter->Copy()); - } + if (std::all_of(filter->bindings.begin(), filter->bindings.end(), + [&](const idx_t &binding) { return child_bindings.count(binding); })) { + new_pushdown.AddFilter(filter->filter->Copy()); } } new_pushdown.GenerateFilters(); @@ -33,6 +31,8 @@ unique_ptr FilterPushdown::PushdownInnerJoin(unique_ptr= '2023-01-01T00:00:00.000Z' AND "CreationDate" < '2023-04-01T00:00:00.000Z') GROUP BY 1 -ORDER BY "total_posts_measure" DESC NULLS LAST LIMIT 8; \ No newline at end of file +ORDER BY "total_posts_measure" DESC NULLS LAST LIMIT 8; +---- +r 4 +python 3 +c++ 2 +stats 1 + +statement ok +pragma explain_output='optimized_only'; + +# filter is pushded below the right delim join +query II +explain SELECT +(t0."Tags") AS "Tags", +(COUNT(Id)) AS "total_posts_measure" +FROM posts, LATERAL UNNEST("Tags") t0("Tags") +WHERE ("CreationDate" >= '2023-01-01T00:00:00.000Z' AND "CreationDate" < '2023-04-01T00:00:00.000Z') +GROUP BY 1 +ORDER BY "total_posts_measure" DESC NULLS LAST LIMIT 8; +---- +logical_opt :.*DELIM_JOIN.*FILTER.* \ No newline at end of file From a0d434d715fb2bcc19f490eea23fcfc76915045b Mon Sep 17 00:00:00 2001 From: Tmonster Date: Wed, 25 Jun 2025 13:02:52 +0200 Subject: [PATCH 5/7] clean up some of the code --- src/main/config.cpp | 2 +- src/optimizer/pushdown/pushdown_get.cpp | 29 ------------------------- 2 files changed, 1 insertion(+), 30 deletions(-) diff --git a/src/main/config.cpp b/src/main/config.cpp index 0274e2ac9709..2be4afc8488f 100644 --- a/src/main/config.cpp +++ b/src/main/config.cpp @@ -19,7 +19,7 @@ namespace duckdb { #ifdef DEBUG -bool DBConfigOptions::debug_print_bindings = true; +bool DBConfigOptions::debug_print_bindings = false; #endif #define DUCKDB_GLOBAL(_PARAM) \ diff --git a/src/optimizer/pushdown/pushdown_get.cpp b/src/optimizer/pushdown/pushdown_get.cpp index 968ce18ec486..90dbbb823951 100644 --- a/src/optimizer/pushdown/pushdown_get.cpp +++ b/src/optimizer/pushdown/pushdown_get.cpp @@ -43,35 +43,6 @@ unique_ptr FilterPushdown::PushdownGet(unique_ptr> remain_expressions; - // for (auto &filter : filters) { - // auto &f = *filter; - // auto can_push = true; - // // for (auto &binding : f.bindings) { - // // if (binding == unnest.unnest_index) { - // // can_push = false; - // // break; - // // } - // // } - // // if the expression index table index is the unnest index, then the filter is on the - // // unnest, and it should not be pushed down. - // if (!can_push) { - // // We can't push down related expressions if the column in the - // // expression is generated by the functions which have side effects - // remain_expressions.push_back(std::move(f.filter)); - // } else { - // // add the filter to the child pushdown - // if (child_pushdown.AddFilter(std::move(f.filter)) == FilterResult::UNSATISFIABLE) { - // // filter statically evaluates to false, strip tree - // return make_uniq(std::move(op)); - // } - // } - // } - // } if (!get.table_filters.filters.empty() || !get.function.filter_pushdown) { // the table function does not support filter pushdown: push a LogicalFilter on top From 649ecca1f7e1b61060e74c507af2a6e41c9b1eb3 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Wed, 25 Jun 2025 13:34:57 +0200 Subject: [PATCH 6/7] also remove filter from parent optimizer --- .../test_pushdown_filters_into_delim_join.test | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test b/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test index 8b6cc24070cc..1a8638dc77ff 100644 --- a/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test +++ b/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test @@ -40,4 +40,17 @@ WHERE ("CreationDate" >= '2023-01-01T00:00:00.000Z' AND "CreationDate" < '2023-0 GROUP BY 1 ORDER BY "total_posts_measure" DESC NULLS LAST LIMIT 8; ---- -logical_opt :.*DELIM_JOIN.*FILTER.* \ No newline at end of file +logical_opt :.*DELIM_JOIN.*FILTER.* + + +# filter is pushed below the right delim join, and is not duplicated above it as well. +query II +explain SELECT +(t0."Tags") AS "Tags", +(COUNT(Id)) AS "total_posts_measure" +FROM posts, LATERAL UNNEST("Tags") t0("Tags") +WHERE ("CreationDate" >= '2023-01-01T00:00:00.000Z' AND "CreationDate" < '2023-04-01T00:00:00.000Z') +GROUP BY 1 +ORDER BY "total_posts_measure" DESC NULLS LAST LIMIT 8; +---- +logical_opt :.*FILTER.*DELIM_JOIN.*FILTER.* \ No newline at end of file From 81c6b49774e965310cb19d8d92b1e53bd2c230a4 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Wed, 25 Jun 2025 15:11:26 +0200 Subject: [PATCH 7/7] add another data point --- src/optimizer/pushdown/pushdown_inner_join.cpp | 16 +++++++++++++--- .../test_pushdown_filters_into_delim_join.test | 3 ++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/optimizer/pushdown/pushdown_inner_join.cpp b/src/optimizer/pushdown/pushdown_inner_join.cpp index e19b6d076ce1..de6808dee3ad 100644 --- a/src/optimizer/pushdown/pushdown_inner_join.cpp +++ b/src/optimizer/pushdown/pushdown_inner_join.cpp @@ -15,15 +15,20 @@ unique_ptr FilterPushdown::PushdownInnerJoin(unique_ptrtype == LogicalOperatorType::LOGICAL_DELIM_JOIN) { // try to push the current filters into the children. + unordered_set remove_filters; for (idx_t i = 0; i < join.children.size(); i++) { FilterPushdown new_pushdown(optimizer, convert_mark_joins); unordered_set child_bindings; LogicalJoin::GetTableReferences(*op->children[i], child_bindings); // only add filters to the children if we know the bindings are in the join child - for (auto &filter : filters) { - if (std::all_of(filter->bindings.begin(), filter->bindings.end(), - [&](const idx_t &binding) { return child_bindings.count(binding); })) { + for (idx_t j = 0; j < filters.size(); j++) { + auto &filter = filters[j]; + bool contains_all_bindings = + std::all_of(filter->bindings.begin(), filter->bindings.end(), + [&](const idx_t &binding) { return child_bindings.count(binding); }); + if (contains_all_bindings) { new_pushdown.AddFilter(filter->filter->Copy()); + remove_filters.insert(j); } } new_pushdown.GenerateFilters(); @@ -31,6 +36,11 @@ unique_ptr FilterPushdown::PushdownInnerJoin(unique_ptr(pushed_filter)); + } + } // TODO: prevent recursive calls since the Rewrites above will already // pushdown extra filters return FinishPushdown(std::move(op)); diff --git a/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test b/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test index 1a8638dc77ff..b6bb33caf421 100644 --- a/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test +++ b/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test @@ -11,7 +11,8 @@ create table posts as from values (5, ['python', 'r'], '2023-03-02T00:00:00.000Z'::TIMESTAMPTZ), (6, ['python', 'r'], '2023-03-04T00:00:00.000Z'::TIMESTAMPTZ), (7, ['r'], '2023-03-08T00:00:00.000Z'::TIMESTAMPTZ), -(8, ['c++'], '2023-03-13T00:00:00.000Z'::TIMESTAMPTZ) t(Id, Tags, CreationDate); +(8, ['c++'], '2023-03-13T00:00:00.000Z'::TIMESTAMPTZ), + (8, ['c++'], '2023-05-13T00:00:00.000Z'::TIMESTAMPTZ) t(Id, Tags, CreationDate); query II SELECT