diff --git a/src/optimizer/pushdown/pushdown_inner_join.cpp b/src/optimizer/pushdown/pushdown_inner_join.cpp index 8370f4ca9ba6..de6808dee3ad 100644 --- a/src/optimizer/pushdown/pushdown_inner_join.cpp +++ b/src/optimizer/pushdown/pushdown_inner_join.cpp @@ -14,6 +14,35 @@ unique_ptr FilterPushdown::PushdownInnerJoin(unique_ptrCast(); D_ASSERT(join.join_type == JoinType::INNER); if (op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN) { + // try to push the current filters into the children. + unordered_set remove_filters; + for (idx_t i = 0; i < join.children.size(); i++) { + FilterPushdown new_pushdown(optimizer, convert_mark_joins); + unordered_set child_bindings; + LogicalJoin::GetTableReferences(*op->children[i], child_bindings); + // only add filters to the children if we know the bindings are in the join child + for (idx_t j = 0; j < filters.size(); j++) { + auto &filter = filters[j]; + bool contains_all_bindings = + std::all_of(filter->bindings.begin(), filter->bindings.end(), + [&](const idx_t &binding) { return child_bindings.count(binding); }); + if (contains_all_bindings) { + new_pushdown.AddFilter(filter->filter->Copy()); + remove_filters.insert(j); + } + } + new_pushdown.GenerateFilters(); + if (!new_pushdown.filters.empty()) { + join.children[i] = new_pushdown.Rewrite(std::move(join.children[i])); + } + } + for (auto &pushed_filter : remove_filters) { + if (pushed_filter < filters.size()) { + filters.erase(filters.begin() + static_cast(pushed_filter)); + } + } + // TODO: prevent recursive calls since the Rewrites above will already + // pushdown extra filters return FinishPushdown(std::move(op)); } // inner join: gather all the conditions of the inner join and add to the filter list diff --git a/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test b/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test new file mode 100644 index 000000000000..b6bb33caf421 --- /dev/null +++ b/test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test @@ -0,0 +1,57 @@ +# name: test/optimizer/pushdown/test_pushdown_filters_into_delim_join.test +# description: Push filters into delim joins. +# group: [pushdown] + +statement ok +create table posts as from values +(1, ['r'], '2022-12-01T00:00:00.000Z'::TIMESTAMPTZ), +(2, ['c++'], '2023-01-03T00:00:00.000Z'::TIMESTAMPTZ), +(3, ['python'], '2023-01-04T00:00:00.000Z'::TIMESTAMPTZ), +(4, ['r', 'stats'], '2023-01-05T00:00:00.000Z'::TIMESTAMPTZ), +(5, ['python', 'r'], '2023-03-02T00:00:00.000Z'::TIMESTAMPTZ), +(6, ['python', 'r'], '2023-03-04T00:00:00.000Z'::TIMESTAMPTZ), +(7, ['r'], '2023-03-08T00:00:00.000Z'::TIMESTAMPTZ), +(8, ['c++'], '2023-03-13T00:00:00.000Z'::TIMESTAMPTZ), + (8, ['c++'], '2023-05-13T00:00:00.000Z'::TIMESTAMPTZ) t(Id, Tags, CreationDate); + +query II +SELECT + (t0."Tags") AS "Tags", + (COUNT(Id)) AS "total_posts_measure" +FROM posts, LATERAL UNNEST("Tags") t0("Tags") +WHERE ("CreationDate" >= '2023-01-01T00:00:00.000Z' AND "CreationDate" < '2023-04-01T00:00:00.000Z') +GROUP BY 1 +ORDER BY "total_posts_measure" DESC NULLS LAST LIMIT 8; +---- +r 4 +python 3 +c++ 2 +stats 1 + +statement ok +pragma explain_output='optimized_only'; + +# filter is pushded below the right delim join +query II +explain SELECT +(t0."Tags") AS "Tags", +(COUNT(Id)) AS "total_posts_measure" +FROM posts, LATERAL UNNEST("Tags") t0("Tags") +WHERE ("CreationDate" >= '2023-01-01T00:00:00.000Z' AND "CreationDate" < '2023-04-01T00:00:00.000Z') +GROUP BY 1 +ORDER BY "total_posts_measure" DESC NULLS LAST LIMIT 8; +---- +logical_opt :.*DELIM_JOIN.*FILTER.* + + +# filter is pushed below the right delim join, and is not duplicated above it as well. +query II +explain SELECT +(t0."Tags") AS "Tags", +(COUNT(Id)) AS "total_posts_measure" +FROM posts, LATERAL UNNEST("Tags") t0("Tags") +WHERE ("CreationDate" >= '2023-01-01T00:00:00.000Z' AND "CreationDate" < '2023-04-01T00:00:00.000Z') +GROUP BY 1 +ORDER BY "total_posts_measure" DESC NULLS LAST LIMIT 8; +---- +logical_opt :.*FILTER.*DELIM_JOIN.*FILTER.* \ No newline at end of file