From be839a424f64abc5b402db91ef0f27d877b35527 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 2 Nov 2023 11:58:00 +0100 Subject: [PATCH 01/36] add operation converter class --- .../duckdb/common/enums/optimizer_type.hpp | 1 + .../duckdb/optimizer/operation_converter.hpp | 27 +++++++++++++++++++ src/optimizer/join_order/relation_manager.cpp | 5 +++- src/optimizer/operation_converter.cpp | 14 ++++++++++ src/optimizer/optimizer.cpp | 7 +++++ 5 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 src/include/duckdb/optimizer/operation_converter.hpp create mode 100644 src/optimizer/operation_converter.cpp diff --git a/src/include/duckdb/common/enums/optimizer_type.hpp b/src/include/duckdb/common/enums/optimizer_type.hpp index c687ec406e54..cb4d3063e901 100644 --- a/src/include/duckdb/common/enums/optimizer_type.hpp +++ b/src/include/duckdb/common/enums/optimizer_type.hpp @@ -22,6 +22,7 @@ enum class OptimizerType : uint32_t { IN_CLAUSE, JOIN_ORDER, DELIMINATOR, + OPERATION_CONVERTER, UNNEST_REWRITER, UNUSED_COLUMNS, STATISTICS_PROPAGATION, diff --git a/src/include/duckdb/optimizer/operation_converter.hpp b/src/include/duckdb/optimizer/operation_converter.hpp new file mode 100644 index 000000000000..c3a218b90e30 --- /dev/null +++ b/src/include/duckdb/optimizer/operation_converter.hpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/optimizer/operation_converter.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/optimizer/column_binding_replacer.hpp" + +namespace duckdb { + +//! The Operation Converter converts Set operations to joins when possible +class OperationConverter { +public: + OperationConverter() { + } + //! Perform DelimJoin elimination + unique_ptr Optimize(unique_ptr op); + +private: + +}; + +} // namespace duckdb diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp index d15210462ce4..23a7a6a52972 100644 --- a/src/optimizer/join_order/relation_manager.cpp +++ b/src/optimizer/join_order/relation_manager.cpp @@ -91,7 +91,10 @@ static bool OperatorIsNonReorderable(LogicalOperatorType op_type) { switch (op_type) { case LogicalOperatorType::LOGICAL_UNION: case LogicalOperatorType::LOGICAL_EXCEPT: - case LogicalOperatorType::LOGICAL_INTERSECT: + case LogicalOperatorType::LOGICAL_INTERSECT: { + auto a = 0; + return true; + } case LogicalOperatorType::LOGICAL_DELIM_JOIN: case LogicalOperatorType::LOGICAL_ANY_JOIN: case LogicalOperatorType::LOGICAL_ASOF_JOIN: diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp new file mode 100644 index 000000000000..2e7bdd311ff8 --- /dev/null +++ b/src/optimizer/operation_converter.cpp @@ -0,0 +1,14 @@ +#include "duckdb/optimizer/operation_converter.hpp" + +#include "duckdb/planner/expression/bound_cast_expression.hpp" +#include "duckdb/planner/expression/bound_conjunction_expression.hpp" +#include "duckdb/planner/operator/logical_delim_get.hpp" + +namespace duckdb { + + +unique_ptr OperationConverter::Optimize(unique_ptr op) { + return op; +} + +} // namespace duckdb diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index 0a66f0007761..17e13e6e98ba 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -9,6 +9,7 @@ #include "duckdb/optimizer/compressed_materialization.hpp" #include "duckdb/optimizer/cse_optimizer.hpp" #include "duckdb/optimizer/deliminator.hpp" +#include "duckdb/optimizer/operation_converter.hpp" #include "duckdb/optimizer/expression_heuristics.hpp" #include "duckdb/optimizer/filter_pullup.hpp" #include "duckdb/optimizer/filter_pushdown.hpp" @@ -119,6 +120,12 @@ unique_ptr Optimizer::Optimize(unique_ptr plan plan = deliminator.Optimize(std::move(plan)); }); + // Convert setop operations to joins if possible + RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { + OperationConverter optimizer; + plan = optimizer.Optimize(std::move(plan)); + }); + // then we perform the join ordering optimization // this also rewrites cross products + filters into joins and performs filter pushdowns RunOptimizer(OptimizerType::JOIN_ORDER, [&]() { From 748eee686b4f219dc957a7e5b5980b1f9d070288 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 2 Nov 2023 12:02:53 +0100 Subject: [PATCH 02/36] it compiles --- src/optimizer/CMakeLists.txt | 1 + src/optimizer/operation_converter.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/optimizer/CMakeLists.txt b/src/optimizer/CMakeLists.txt index 53f57e1c168d..39f3f84caccd 100644 --- a/src/optimizer/CMakeLists.txt +++ b/src/optimizer/CMakeLists.txt @@ -14,6 +14,7 @@ add_library_unity( compressed_materialization.cpp cse_optimizer.cpp deliminator.cpp + operation_converter.cpp unnest_rewriter.cpp column_lifetime_analyzer.cpp expression_heuristics.cpp diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 2e7bdd311ff8..8e0cd7cbc82b 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -8,7 +8,7 @@ namespace duckdb { unique_ptr OperationConverter::Optimize(unique_ptr op) { - return op; + return std::move(op); } } // namespace duckdb From ce2225c46acb4e9283a63a26769fd7a9fbe9b0ad Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Fri, 10 Nov 2023 13:53:59 +0100 Subject: [PATCH 03/36] convert intersect and except to logical comparison joins --- src/optimizer/operation_converter.cpp | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 8e0cd7cbc82b..3759ce8d0e7d 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -8,6 +8,36 @@ namespace duckdb { unique_ptr OperationConverter::Optimize(unique_ptr op) { + for (auto &child : op->children) { + child = Optimize(std::move(child)); + } + switch (op->type) { + case LogicalOperatorType::LOGICAL_INTERSECT: + case LogicalOperatorType::LOGICAL_EXCEPT: { + auto left = std::move(op->children[0]); + auto right = std::move(op->children[1]); + D_ASSERT(left->expressions.size() == right->expressions.size()); + vector conditions; + // create equality condition for all columns + for (idx_t i = 0; i < left->expressions.size(); i++) { + JoinCondition cond; + cond.left = left->expressions[i]->Copy(); + cond.right = right->expressions[i]->Copy(); + cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; + conditions.push_back(std::move(cond)); + } + JoinType join_type = op->type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI; + + auto join_op = make_uniq(join_type); + join_op->children.push_back(std::move(left)); + join_op->children.push_back(std::move(right)); + join_op->conditions = std::move(conditions); + + op = std::move(join_op); + } + default: + break; + } return std::move(op); } From 4d1d1027e572ecdcda08992e27eb527f699709e0 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Fri, 10 Nov 2023 17:04:08 +0100 Subject: [PATCH 04/36] it works. Thanks @lnkuiper --- src/common/enums/optimizer_type.cpp | 1 + .../duckdb/optimizer/operation_converter.hpp | 7 ++-- src/optimizer/operation_converter.cpp | 35 +++++++++++++------ src/optimizer/optimizer.cpp | 4 +-- .../optimizer/setops/operation_converter.test | 13 +++++++ 5 files changed, 45 insertions(+), 15 deletions(-) create mode 100644 test/optimizer/setops/operation_converter.test diff --git a/src/common/enums/optimizer_type.cpp b/src/common/enums/optimizer_type.cpp index a8365ab76d57..663a80456f72 100644 --- a/src/common/enums/optimizer_type.cpp +++ b/src/common/enums/optimizer_type.cpp @@ -16,6 +16,7 @@ static DefaultOptimizerType internal_optimizer_types[] = { {"filter_pushdown", OptimizerType::FILTER_PUSHDOWN}, {"regex_range", OptimizerType::REGEX_RANGE}, {"in_clause", OptimizerType::IN_CLAUSE}, + {"operation_converter", OptimizerType::OPERATION_CONVERTER}, {"join_order", OptimizerType::JOIN_ORDER}, {"deliminator", OptimizerType::DELIMINATOR}, {"unnest_rewriter", OptimizerType::UNNEST_REWRITER}, diff --git a/src/include/duckdb/optimizer/operation_converter.hpp b/src/include/duckdb/optimizer/operation_converter.hpp index c3a218b90e30..2c1495cc6a23 100644 --- a/src/include/duckdb/optimizer/operation_converter.hpp +++ b/src/include/duckdb/optimizer/operation_converter.hpp @@ -15,13 +15,14 @@ namespace duckdb { //! The Operation Converter converts Set operations to joins when possible class OperationConverter { public: - OperationConverter() { + OperationConverter(LogicalOperator &root) : root(root) { + root.ResolveOperatorTypes(); } //! Perform DelimJoin elimination - unique_ptr Optimize(unique_ptr op); + void Optimize(unique_ptr &op); + LogicalOperator &root; private: - }; } // namespace duckdb diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 3759ce8d0e7d..a6b3d2ae07e4 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -6,39 +6,54 @@ namespace duckdb { - -unique_ptr OperationConverter::Optimize(unique_ptr op) { +void OperationConverter::Optimize(unique_ptr &op) { for (auto &child : op->children) { - child = Optimize(std::move(child)); + Optimize(child); } switch (op->type) { case LogicalOperatorType::LOGICAL_INTERSECT: case LogicalOperatorType::LOGICAL_EXCEPT: { - auto left = std::move(op->children[0]); - auto right = std::move(op->children[1]); - D_ASSERT(left->expressions.size() == right->expressions.size()); + auto &left = op->children[0]; + auto &right = op->children[1]; + auto left_bindings = left->GetColumnBindings(); + auto right_bindings = right->GetColumnBindings(); + D_ASSERT(left_bindings.size() == right_bindings.size()); vector conditions; // create equality condition for all columns - for (idx_t i = 0; i < left->expressions.size(); i++) { + for (idx_t i = 0; i < left_bindings.size(); i++) { JoinCondition cond; - cond.left = left->expressions[i]->Copy(); - cond.right = right->expressions[i]->Copy(); + cond.left = make_uniq(left->types[i], left_bindings[i]); + cond.right = make_uniq(right->types[i], right_bindings[i]); cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; conditions.push_back(std::move(cond)); } JoinType join_type = op->type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI; + auto old_bindings = op->GetColumnBindings(); + auto join_op = make_uniq(join_type); join_op->children.push_back(std::move(left)); join_op->children.push_back(std::move(right)); join_op->conditions = std::move(conditions); + // update the op so that it is the join op. op = std::move(join_op); + + // now perform column binding replacement + auto new_bindings = op->GetColumnBindings(); + D_ASSERT(old_bindings.size() == new_bindings.size()); + vector replacement_bindings; + for (idx_t i = 0; i < old_bindings.size(); i++) { + replacement_bindings.push_back({old_bindings[i], new_bindings[i]}); + } + + auto binding_replacer = ColumnBindingReplacer(); + binding_replacer.replacement_bindings = replacement_bindings; + binding_replacer.VisitOperator(root); } default: break; } - return std::move(op); } } // namespace duckdb diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index 17e13e6e98ba..3a9e709c26ad 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -122,8 +122,8 @@ unique_ptr Optimizer::Optimize(unique_ptr plan // Convert setop operations to joins if possible RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { - OperationConverter optimizer; - plan = optimizer.Optimize(std::move(plan)); + OperationConverter optimizer(*plan); + optimizer.Optimize(plan); }); // then we perform the join ordering optimization diff --git a/test/optimizer/setops/operation_converter.test b/test/optimizer/setops/operation_converter.test new file mode 100644 index 000000000000..d2887a8b9c99 --- /dev/null +++ b/test/optimizer/setops/operation_converter.test @@ -0,0 +1,13 @@ +# name: test/optimizer/setops/operation_converter.test +# description: converting intersect/except to semi anti +# group: [setops] + +statement ok +create table left_table as select range as a from range(100); + +statement ok +create table right_table as select range*2 as b from range(10000); + + +explain select * from left_table intersect select * from right_table; + From 2b926bf8449a04423ca00c8e269c642a159986c1 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Fri, 10 Nov 2023 17:08:17 +0100 Subject: [PATCH 05/36] remove unused variable --- src/optimizer/join_order/relation_manager.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp index 23a7a6a52972..b6c456932b94 100644 --- a/src/optimizer/join_order/relation_manager.cpp +++ b/src/optimizer/join_order/relation_manager.cpp @@ -92,7 +92,6 @@ static bool OperatorIsNonReorderable(LogicalOperatorType op_type) { case LogicalOperatorType::LOGICAL_UNION: case LogicalOperatorType::LOGICAL_EXCEPT: case LogicalOperatorType::LOGICAL_INTERSECT: { - auto a = 0; return true; } case LogicalOperatorType::LOGICAL_DELIM_JOIN: From fe2ab13e2ae8022dea640f61b2193d0578f2ba14 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Mon, 13 Nov 2023 09:33:14 +0100 Subject: [PATCH 06/36] no unrecognized parameter --- test/optimizer/setops/operation_converter.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/optimizer/setops/operation_converter.test b/test/optimizer/setops/operation_converter.test index d2887a8b9c99..8bc525f3c840 100644 --- a/test/optimizer/setops/operation_converter.test +++ b/test/optimizer/setops/operation_converter.test @@ -8,6 +8,6 @@ create table left_table as select range as a from range(100); statement ok create table right_table as select range*2 as b from range(10000); - -explain select * from left_table intersect select * from right_table; +statement ok +select * from left_table intersect select * from right_table; From bdb24459351b6d4a94806dd48fbb9d156fa14257 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 13 Nov 2023 10:51:00 +0100 Subject: [PATCH 07/36] tidy fix --- src/common/enum_util.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/common/enum_util.cpp b/src/common/enum_util.cpp index 806480ea68d8..5e6cb44d0bca 100644 --- a/src/common/enum_util.cpp +++ b/src/common/enum_util.cpp @@ -3605,6 +3605,8 @@ const char* EnumUtil::ToChars(OptimizerType value) { return "JOIN_ORDER"; case OptimizerType::DELIMINATOR: return "DELIMINATOR"; + case OptimizerType::OPERATION_CONVERTER: + return "OPERATION_CONVERTER"; case OptimizerType::UNNEST_REWRITER: return "UNNEST_REWRITER"; case OptimizerType::UNUSED_COLUMNS: @@ -3658,6 +3660,9 @@ OptimizerType EnumUtil::FromString(const char *value) { if (StringUtil::Equals(value, "DELIMINATOR")) { return OptimizerType::DELIMINATOR; } + if (StringUtil::Equals(value, "OPERATION_CONVERTER")) { + return OptimizerType::OPERATION_CONVERTER; + } if (StringUtil::Equals(value, "UNNEST_REWRITER")) { return OptimizerType::UNNEST_REWRITER; } From b17b28b54a4044d7f56a3ba0b6053b6904cd3eac Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 13 Nov 2023 14:18:21 +0100 Subject: [PATCH 08/36] change test file. Figure out this logical execute stuff --- src/optimizer/join_order/relation_manager.cpp | 4 ++++ .../{ => setops}/pushdown_set_op.test | 24 +++++++++---------- 2 files changed, 16 insertions(+), 12 deletions(-) rename test/optimizer/{ => setops}/pushdown_set_op.test (81%) diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp index b6c456932b94..32de4e7888e1 100644 --- a/src/optimizer/join_order/relation_manager.cpp +++ b/src/optimizer/join_order/relation_manager.cpp @@ -10,6 +10,7 @@ #include "duckdb/planner/operator/list.hpp" #include +#include "iostream" namespace duckdb { @@ -52,6 +53,9 @@ void RelationManager::AddRelation(LogicalOperator &op, optional_ptr:.*INTERSECT.* +logical_opt :.*SEMI.* # intersect is empty if either side is empty query II explain select 42 intersect select 42 where 1=0; ---- -logical_opt :.*INTERSECT.* +logical_opt :.*SEMI.* query II explain select 42 where 1=0 intersect select 42; ---- -logical_opt :.*INTERSECT.* +logical_opt :.*SEMI.* # except is empty if LHS is empty query II explain select 42 where 1=0 except select 42; ---- -logical_opt :.*EXCEPT.* +logical_opt :.*ANTI.* # if RHS is empty we can optimize away the except query II explain select 42 except select 42 where 1=0; ---- -logical_opt :.*EXCEPT.* +logical_opt :.*ANTI.* # now pushdown subquery with set ops query II explain select * from (select 42 intersect select 42) tbl(i) where i=42; ---- -logical_opt :.*INTERSECT.* +logical_opt :.*SEMI.* query II explain select * from (select 42 intersect select 43) tbl(i) where i=42; ---- -logical_opt :.*INTERSECT.* +logical_opt :.*SEMI.* query II explain select * from (select 43 intersect select 42) tbl(i) where i=42; ---- -logical_opt :.*INTERSECT.* +logical_opt :.*SEMI.* query II explain select * from (select 42 except select 42) tbl(i) where i=42; ---- -logical_opt :.*EXCEPT.* +logical_opt :.*ANTI.* query II explain select * from (select 42 except select 43) tbl(i) where i=42; ---- -logical_opt :.*EXCEPT.* +logical_opt :.*ANTI.* query II explain select * from (select 43 except select 42) tbl(i) where i=42; ---- -logical_opt :.*EXCEPT.* +logical_opt :.*ANTI.* query I select 42 intersect select 42; From 962d856b38059832d22bbd2997b2b86943c8ee27 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 13 Nov 2023 14:47:34 +0100 Subject: [PATCH 09/36] remove cout --- src/optimizer/join_order/relation_manager.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp index 32de4e7888e1..b6c456932b94 100644 --- a/src/optimizer/join_order/relation_manager.cpp +++ b/src/optimizer/join_order/relation_manager.cpp @@ -10,7 +10,6 @@ #include "duckdb/planner/operator/list.hpp" #include -#include "iostream" namespace duckdb { @@ -53,9 +52,6 @@ void RelationManager::AddRelation(LogicalOperator &op, optional_ptr Date: Mon, 13 Nov 2023 15:43:58 +0100 Subject: [PATCH 10/36] fix resolve types error --- src/include/duckdb/optimizer/operation_converter.hpp | 7 ++++++- src/optimizer/optimizer.cpp | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/include/duckdb/optimizer/operation_converter.hpp b/src/include/duckdb/optimizer/operation_converter.hpp index 2c1495cc6a23..e40f648a2983 100644 --- a/src/include/duckdb/optimizer/operation_converter.hpp +++ b/src/include/duckdb/optimizer/operation_converter.hpp @@ -16,7 +16,12 @@ namespace duckdb { class OperationConverter { public: OperationConverter(LogicalOperator &root) : root(root) { - root.ResolveOperatorTypes(); + switch (root.type) { + case LogicalOperatorType::LOGICAL_EXECUTE: + break; + default: + root.ResolveOperatorTypes(); + } } //! Perform DelimJoin elimination void Optimize(unique_ptr &op); diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index 3a9e709c26ad..37677af27552 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -122,8 +122,8 @@ unique_ptr Optimizer::Optimize(unique_ptr plan // Convert setop operations to joins if possible RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { - OperationConverter optimizer(*plan); - optimizer.Optimize(plan); + OperationConverter converter(*plan); + converter.Optimize(plan); }); // then we perform the join ordering optimization From 85dece8093b3ddea09d03942e937d470a3ea736d Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 13 Nov 2023 16:37:10 +0100 Subject: [PATCH 11/36] move test group --- test/optimizer/setops/pushdown_set_op.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/optimizer/setops/pushdown_set_op.test b/test/optimizer/setops/pushdown_set_op.test index 9e1944f7dbb9..80a790667667 100644 --- a/test/optimizer/setops/pushdown_set_op.test +++ b/test/optimizer/setops/pushdown_set_op.test @@ -1,6 +1,6 @@ # name: test/optimizer/setops/pushdown_set_op.test # description: Pushdown set operations -# group: [optimizer] +# group: [setops] statement ok PRAGMA explain_output = 'OPTIMIZED_ONLY' From 66df2fcb03b39d961eda23be8f180017a7845e99 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 13 Nov 2023 17:58:03 +0100 Subject: [PATCH 12/36] figuring out why this execute is being an issue --- .../duckdb/optimizer/operation_converter.hpp | 23 ++++++++++++++----- src/optimizer/join_order/relation_manager.cpp | 3 +++ src/optimizer/operation_converter.cpp | 5 ++++ src/planner/logical_operator.cpp | 2 +- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/include/duckdb/optimizer/operation_converter.hpp b/src/include/duckdb/optimizer/operation_converter.hpp index e40f648a2983..6c6a4b959470 100644 --- a/src/include/duckdb/optimizer/operation_converter.hpp +++ b/src/include/duckdb/optimizer/operation_converter.hpp @@ -15,13 +15,24 @@ namespace duckdb { //! The Operation Converter converts Set operations to joins when possible class OperationConverter { public: + OperationConverter(LogicalOperator &root) : root(root) { - switch (root.type) { - case LogicalOperatorType::LOGICAL_EXECUTE: - break; - default: - root.ResolveOperatorTypes(); - } + //! don't want to resolve execute because execute is assumed to be resolved already. + //! and the types are deleted if you call resolve types on a logical execute + //! this is bad, because when you want to add a relation in the join order optimizer, + //! and the execute doesn't have any types, then no table references are generated. + //! causing the D_ASSERT in relation_manager.cpp:60 to break; + + //! but we do need to resolve all types so that the types of a projection match the + //! size of the column bindings. +// auto &op = root; +// while (root.type == LogicalOperatorType::LOGICAL_EXECUTE) { +// root = root.children[0]; +// } + + // What I want to know is why a logical execute is planned and what it is used for exactly. + // It doesn't make a lot of sense to me. + op.ResolveOperatorTypes(); } //! Perform DelimJoin elimination void Optimize(unique_ptr &op); diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp index b6c456932b94..cd6502df7b93 100644 --- a/src/optimizer/join_order/relation_manager.cpp +++ b/src/optimizer/join_order/relation_manager.cpp @@ -57,6 +57,9 @@ void RelationManager::AddRelation(LogicalOperator &op, optional_ptr table_references; + if (op.type == LogicalOperatorType::LOGICAL_EXECUTE) { + auto stop_here = "stop here"; + } LogicalJoin::GetTableReferences(op, table_references); D_ASSERT(table_references.size() > 0); for (auto &reference : table_references) { diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index a6b3d2ae07e4..41d38afae12d 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -18,6 +18,11 @@ void OperationConverter::Optimize(unique_ptr &op) { auto left_bindings = left->GetColumnBindings(); auto right_bindings = right->GetColumnBindings(); D_ASSERT(left_bindings.size() == right_bindings.size()); +// if (left->types.size() != left_bindings.size()) { +// auto cool = "a"; +// } + D_ASSERT(left->types.size() == left_bindings.size()); + D_ASSERT(right->types.size() == right_bindings.size()); vector conditions; // create equality condition for all columns for (idx_t i = 0; i < left_bindings.size(); i++) { diff --git a/src/planner/logical_operator.cpp b/src/planner/logical_operator.cpp index 8c3c4c93b756..03e9e88be069 100644 --- a/src/planner/logical_operator.cpp +++ b/src/planner/logical_operator.cpp @@ -42,8 +42,8 @@ string LogicalOperator::ParamsToString() const { } void LogicalOperator::ResolveOperatorTypes() { - types.clear(); + // first resolve child types for (auto &child : children) { child->ResolveOperatorTypes(); From 55e02dee73c3bb8836f78c3c51ba8c3df1624513 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 13 Nov 2023 18:06:48 +0100 Subject: [PATCH 13/36] more comments to pick up on --- src/include/duckdb/optimizer/operation_converter.hpp | 7 +++++-- src/include/duckdb/planner/operator/logical_execute.hpp | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/include/duckdb/optimizer/operation_converter.hpp b/src/include/duckdb/optimizer/operation_converter.hpp index 6c6a4b959470..064eba12d0d3 100644 --- a/src/include/duckdb/optimizer/operation_converter.hpp +++ b/src/include/duckdb/optimizer/operation_converter.hpp @@ -17,7 +17,7 @@ class OperationConverter { public: OperationConverter(LogicalOperator &root) : root(root) { - //! don't want to resolve execute because execute is assumed to be resolved already. + //! We don't want to resolve execute because execute is assumed to be resolved already. //! and the types are deleted if you call resolve types on a logical execute //! this is bad, because when you want to add a relation in the join order optimizer, //! and the execute doesn't have any types, then no table references are generated. @@ -25,6 +25,9 @@ class OperationConverter { //! but we do need to resolve all types so that the types of a projection match the //! size of the column bindings. + + + //! Just need to know what the logical execute does. So confused. // auto &op = root; // while (root.type == LogicalOperatorType::LOGICAL_EXECUTE) { // root = root.children[0]; @@ -32,7 +35,7 @@ class OperationConverter { // What I want to know is why a logical execute is planned and what it is used for exactly. // It doesn't make a lot of sense to me. - op.ResolveOperatorTypes(); + root.ResolveOperatorTypes(); } //! Perform DelimJoin elimination void Optimize(unique_ptr &op); diff --git a/src/include/duckdb/planner/operator/logical_execute.hpp b/src/include/duckdb/planner/operator/logical_execute.hpp index 62eb28f2e7f0..2bd77586bd39 100644 --- a/src/include/duckdb/planner/operator/logical_execute.hpp +++ b/src/include/duckdb/planner/operator/logical_execute.hpp @@ -34,6 +34,7 @@ class LogicalExecute : public LogicalOperator { protected: void ResolveTypes() override { + auto whatever = 'sdfgs'; // already resolved } vector GetColumnBindings() override { From 612cc58cab2ff9909019eb00e46920377653f847 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 14 Nov 2023 11:17:15 +0100 Subject: [PATCH 14/36] logical execute should still resolve types in case resolveTypes is called in the optimizer. GetTableReferences(), used in the join order optimizer, relies on the types array having elements. The Operation Converter calls resolveTypes. --- .../duckdb/optimizer/operation_converter.hpp | 22 +------------------ .../planner/operator/logical_execute.hpp | 2 +- src/optimizer/operation_converter.cpp | 8 ++++--- src/planner/planner.cpp | 1 - 4 files changed, 7 insertions(+), 26 deletions(-) diff --git a/src/include/duckdb/optimizer/operation_converter.hpp b/src/include/duckdb/optimizer/operation_converter.hpp index 064eba12d0d3..ff560d2d0cf2 100644 --- a/src/include/duckdb/optimizer/operation_converter.hpp +++ b/src/include/duckdb/optimizer/operation_converter.hpp @@ -15,28 +15,8 @@ namespace duckdb { //! The Operation Converter converts Set operations to joins when possible class OperationConverter { public: + OperationConverter(LogicalOperator &root); - OperationConverter(LogicalOperator &root) : root(root) { - //! We don't want to resolve execute because execute is assumed to be resolved already. - //! and the types are deleted if you call resolve types on a logical execute - //! this is bad, because when you want to add a relation in the join order optimizer, - //! and the execute doesn't have any types, then no table references are generated. - //! causing the D_ASSERT in relation_manager.cpp:60 to break; - - //! but we do need to resolve all types so that the types of a projection match the - //! size of the column bindings. - - - //! Just need to know what the logical execute does. So confused. -// auto &op = root; -// while (root.type == LogicalOperatorType::LOGICAL_EXECUTE) { -// root = root.children[0]; -// } - - // What I want to know is why a logical execute is planned and what it is used for exactly. - // It doesn't make a lot of sense to me. - root.ResolveOperatorTypes(); - } //! Perform DelimJoin elimination void Optimize(unique_ptr &op); LogicalOperator &root; diff --git a/src/include/duckdb/planner/operator/logical_execute.hpp b/src/include/duckdb/planner/operator/logical_execute.hpp index 2bd77586bd39..c0ef549b0561 100644 --- a/src/include/duckdb/planner/operator/logical_execute.hpp +++ b/src/include/duckdb/planner/operator/logical_execute.hpp @@ -34,7 +34,7 @@ class LogicalExecute : public LogicalOperator { protected: void ResolveTypes() override { - auto whatever = 'sdfgs'; + types = prepared->types; // already resolved } vector GetColumnBindings() override { diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 41d38afae12d..4fb5d7ce705a 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -6,6 +6,10 @@ namespace duckdb { +OperationConverter::OperationConverter(LogicalOperator &root) : root(root) { + root.ResolveOperatorTypes(); +} + void OperationConverter::Optimize(unique_ptr &op) { for (auto &child : op->children) { Optimize(child); @@ -18,9 +22,7 @@ void OperationConverter::Optimize(unique_ptr &op) { auto left_bindings = left->GetColumnBindings(); auto right_bindings = right->GetColumnBindings(); D_ASSERT(left_bindings.size() == right_bindings.size()); -// if (left->types.size() != left_bindings.size()) { -// auto cool = "a"; -// } + D_ASSERT(left->types.size() == left_bindings.size()); D_ASSERT(right->types.size() == right_bindings.size()); vector conditions; diff --git a/src/planner/planner.cpp b/src/planner/planner.cpp index 4b1c28d6a5b2..c6c44b03c28b 100644 --- a/src/planner/planner.cpp +++ b/src/planner/planner.cpp @@ -30,7 +30,6 @@ static void CheckTreeDepth(const LogicalOperator &op, idx_t max_depth, idx_t dep void Planner::CreatePlan(SQLStatement &statement) { auto &profiler = QueryProfiler::Get(context); auto parameter_count = statement.n_param; - BoundParameterMap bound_parameters(parameter_data); // first bind the tables and columns to the catalog From ce5cee6afed6fe7f48221b22cafe8d9f9dd40731 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 14 Nov 2023 12:20:52 +0100 Subject: [PATCH 15/36] remove unused code --- src/optimizer/join_order/relation_manager.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp index cd6502df7b93..b6c456932b94 100644 --- a/src/optimizer/join_order/relation_manager.cpp +++ b/src/optimizer/join_order/relation_manager.cpp @@ -57,9 +57,6 @@ void RelationManager::AddRelation(LogicalOperator &op, optional_ptr table_references; - if (op.type == LogicalOperatorType::LOGICAL_EXECUTE) { - auto stop_here = "stop here"; - } LogicalJoin::GetTableReferences(op, table_references); D_ASSERT(table_references.size() > 0); for (auto &reference : table_references) { From 020deb592b91129fb7710fa929c0ae6ebd8926c6 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 14 Nov 2023 14:17:00 +0100 Subject: [PATCH 16/36] tidy fixes --- src/optimizer/operation_converter.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 4fb5d7ce705a..cd0e2df1cf8d 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -1,8 +1,10 @@ #include "duckdb/optimizer/operation_converter.hpp" - #include "duckdb/planner/expression/bound_cast_expression.hpp" #include "duckdb/planner/expression/bound_conjunction_expression.hpp" +#include "duckdb/planner/expression/bound_columnref_expression.hpp" #include "duckdb/planner/operator/logical_delim_get.hpp" +#include "duckdb/common/enums/join_type.hpp" +#include "duckdb/planner/joinside.hpp" namespace duckdb { From af32dd20ac79d3b8462e7734cb1af9ec98a635d2 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Wed, 15 Nov 2023 09:51:46 +0100 Subject: [PATCH 17/36] clang tidy. more fixes --- src/optimizer/operation_converter.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index cd0e2df1cf8d..75f962f5a9f7 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -5,6 +5,7 @@ #include "duckdb/planner/operator/logical_delim_get.hpp" #include "duckdb/common/enums/join_type.hpp" #include "duckdb/planner/joinside.hpp" +#include "duckdb/planner/operator/logical_comparison_join.hpp" namespace duckdb { From f006df4c03e5b3e6704a703d8fd62210a68cb28c Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Wed, 15 Nov 2023 09:54:04 +0100 Subject: [PATCH 18/36] remove redundant return true --- src/optimizer/join_order/relation_manager.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp index b6c456932b94..d15210462ce4 100644 --- a/src/optimizer/join_order/relation_manager.cpp +++ b/src/optimizer/join_order/relation_manager.cpp @@ -91,9 +91,7 @@ static bool OperatorIsNonReorderable(LogicalOperatorType op_type) { switch (op_type) { case LogicalOperatorType::LOGICAL_UNION: case LogicalOperatorType::LOGICAL_EXCEPT: - case LogicalOperatorType::LOGICAL_INTERSECT: { - return true; - } + case LogicalOperatorType::LOGICAL_INTERSECT: case LogicalOperatorType::LOGICAL_DELIM_JOIN: case LogicalOperatorType::LOGICAL_ANY_JOIN: case LogicalOperatorType::LOGICAL_ASOF_JOIN: From 27ea4076bdb0c97d2425e19d46db5dcbb3ba0f6e Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Wed, 15 Nov 2023 09:59:06 +0100 Subject: [PATCH 19/36] naming fix --- src/optimizer/join_order/relation_statistics_helper.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/optimizer/join_order/relation_statistics_helper.cpp b/src/optimizer/join_order/relation_statistics_helper.cpp index a708f108c149..bb5fe81271d9 100644 --- a/src/optimizer/join_order/relation_statistics_helper.cpp +++ b/src/optimizer/join_order/relation_statistics_helper.cpp @@ -244,7 +244,8 @@ RelationStats RelationStatisticsHelper::CombineStatsOfNonReorderableOperator(Log } ret.stats_initialized = true; ret.filter_strength = 1; - ret.table_name = child_stats[0].table_name + " joined with " + child_stats[1].table_name; + ret.table_name = + "(" + child_stats[0].table_name + LogicalOperatorToString(op.type) + child_stats[1].table_name + ")"; for (auto &stats : child_stats) { // MARK joins are nonreorderable. They won't return initialized stats // continue in this case. From ad4843c30199918c4d48ebe37a8050bf263b902d Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 23 Nov 2023 16:01:18 +0100 Subject: [PATCH 20/36] very lost. Dont know how to push an aggregate on the join. I think I might need to rebind everything if I do it --- .../duckdb/optimizer/operation_converter.hpp | 5 +- src/optimizer/operation_converter.cpp | 40 +++++++++- src/optimizer/optimizer.cpp | 4 +- test/optimizer/setops/pushdown_set_op.test | 22 ++--- test/sql/setops/value_union.test | 80 +++++++++---------- 5 files changed, 92 insertions(+), 59 deletions(-) diff --git a/src/include/duckdb/optimizer/operation_converter.hpp b/src/include/duckdb/optimizer/operation_converter.hpp index ff560d2d0cf2..e00a7b9c35c4 100644 --- a/src/include/duckdb/optimizer/operation_converter.hpp +++ b/src/include/duckdb/optimizer/operation_converter.hpp @@ -15,11 +15,12 @@ namespace duckdb { //! The Operation Converter converts Set operations to joins when possible class OperationConverter { public: - OperationConverter(LogicalOperator &root); + OperationConverter(LogicalOperator &root, Binder &binder); //! Perform DelimJoin elimination - void Optimize(unique_ptr &op); + void Optimize(unique_ptr &op, bool is_root = false); LogicalOperator &root; + Binder &binder; private: }; diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 75f962f5a9f7..ca9a5ed86d47 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -6,20 +6,32 @@ #include "duckdb/common/enums/join_type.hpp" #include "duckdb/planner/joinside.hpp" #include "duckdb/planner/operator/logical_comparison_join.hpp" +#include "duckdb/planner/operator/logical_set_operation.hpp" +#include "duckdb/planner/expression/bound_reference_expression.hpp" +#include "duckdb/planner/operator/logical_aggregate.hpp" namespace duckdb { -OperationConverter::OperationConverter(LogicalOperator &root) : root(root) { +OperationConverter::OperationConverter(LogicalOperator &root, Binder &binder) : root(root), binder(binder) { root.ResolveOperatorTypes(); } -void OperationConverter::Optimize(unique_ptr &op) { +void OperationConverter::Optimize(unique_ptr &op, bool is_root) { for (auto &child : op->children) { Optimize(child); } switch (op->type) { + // if it is setop all, we don't replace (even though we technically still could) + // if it is not setop all, duplicate elimination should happen case LogicalOperatorType::LOGICAL_INTERSECT: case LogicalOperatorType::LOGICAL_EXCEPT: { + auto &set_op = op->Cast(); + if (set_op.setop_all || is_root) { + // If set_op all is not defined, then results need to be deduplicated. + // if the operator is the root, then break. We don't want to update the root + // and the join order optimizer can still have an effect. + break; + } auto &left = op->children[0]; auto &right = op->children[1]; auto left_bindings = left->GetColumnBindings(); @@ -45,11 +57,11 @@ void OperationConverter::Optimize(unique_ptr &op) { join_op->children.push_back(std::move(left)); join_op->children.push_back(std::move(right)); join_op->conditions = std::move(conditions); + join_op->ResolveOperatorTypes(); - // update the op so that it is the join op. op = std::move(join_op); - // now perform column binding replacement + // perform column binding replacement auto new_bindings = op->GetColumnBindings(); D_ASSERT(old_bindings.size() == new_bindings.size()); vector replacement_bindings; @@ -60,6 +72,26 @@ void OperationConverter::Optimize(unique_ptr &op) { auto binding_replacer = ColumnBindingReplacer(); binding_replacer.replacement_bindings = replacement_bindings; binding_replacer.VisitOperator(root); + + // push a group by all on top of the join + + auto &types = op->types; + auto join_bindings = op->GetColumnBindings(); + vector> select_list, groups, aggregates /* left empty */; + D_ASSERT(join_bindings.size() == types.size()); + auto table_index = binder.GenerateTableIndex(); + auto aggregate_index = binder.GenerateTableIndex(); + for (idx_t i = 0; i < join_bindings.size(); i++) { + select_list.push_back(make_uniq(types[i], ColumnBinding(table_index, i))); +// groups.push_back(make_uniq(types[i], join_bindings.at(i))); +// groups.push_back(make_uniq(types[i], ColumnBinding(table_index, i))); + } + auto group_by = make_uniq(table_index, aggregate_index, std::move(select_list)); + group_by->groups = std::move(groups); + group_by->children.push_back(std::move(op)); + + op = std::move(group_by); + } default: break; diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index 37677af27552..e8dcbde7925d 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -122,8 +122,8 @@ unique_ptr Optimizer::Optimize(unique_ptr plan // Convert setop operations to joins if possible RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { - OperationConverter converter(*plan); - converter.Optimize(plan); + OperationConverter converter(*plan, binder); + converter.Optimize(plan, true); }); // then we perform the join ordering optimization diff --git a/test/optimizer/setops/pushdown_set_op.test b/test/optimizer/setops/pushdown_set_op.test index 80a790667667..927531c7d817 100644 --- a/test/optimizer/setops/pushdown_set_op.test +++ b/test/optimizer/setops/pushdown_set_op.test @@ -7,61 +7,61 @@ PRAGMA explain_output = 'OPTIMIZED_ONLY' query II -explain select 42 intersect select 42; +explain select 42 intersect ALL select 42; ---- logical_opt :.*SEMI.* # intersect is empty if either side is empty query II -explain select 42 intersect select 42 where 1=0; +explain select 42 intersect ALL select 42 where 1=0; ---- logical_opt :.*SEMI.* query II -explain select 42 where 1=0 intersect select 42; +explain select 42 where 1=0 intersect ALL select 42; ---- logical_opt :.*SEMI.* # except is empty if LHS is empty query II -explain select 42 where 1=0 except select 42; +explain select 42 where 1=0 except ALL select 42; ---- logical_opt :.*ANTI.* # if RHS is empty we can optimize away the except query II -explain select 42 except select 42 where 1=0; +explain select 42 except ALL select 42 where 1=0; ---- logical_opt :.*ANTI.* # now pushdown subquery with set ops query II -explain select * from (select 42 intersect select 42) tbl(i) where i=42; +explain select * from (select 42 intersect ALL select 42) tbl(i) where i=42; ---- logical_opt :.*SEMI.* query II -explain select * from (select 42 intersect select 43) tbl(i) where i=42; +explain select * from (select 42 intersect ALL select 43) tbl(i) where i=42; ---- logical_opt :.*SEMI.* query II -explain select * from (select 43 intersect select 42) tbl(i) where i=42; +explain select * from (select 43 intersect ALL select 42) tbl(i) where i=42; ---- logical_opt :.*SEMI.* query II -explain select * from (select 42 except select 42) tbl(i) where i=42; +explain select * from (select 42 except ALL select 42) tbl(i) where i=42; ---- logical_opt :.*ANTI.* query II -explain select * from (select 42 except select 43) tbl(i) where i=42; +explain select * from (select 42 except ALL select 43) tbl(i) where i=42; ---- logical_opt :.*ANTI.* query II -explain select * from (select 43 except select 42) tbl(i) where i=42; +explain select * from (select 43 except ALL select 42) tbl(i) where i=42; ---- logical_opt :.*ANTI.* diff --git a/test/sql/setops/value_union.test b/test/sql/setops/value_union.test index b83384ef12f8..030b165da9bc 100644 --- a/test/sql/setops/value_union.test +++ b/test/sql/setops/value_union.test @@ -8,12 +8,12 @@ PRAGMA enable_verification statement ok CREATE VIEW vals AS SELECT * FROM (VALUES (1, 10), (2, 5), (3, 4)) tbl(i, j); -query II -SELECT * FROM vals ----- -1 10 -2 5 -3 4 +#query II +#SELECT * FROM vals +#---- +#1 10 +#2 5 +#3 4 statement ok CREATE VIEW vunion AS @@ -21,39 +21,39 @@ SELECT * FROM vals UNION ALL SELECT * FROM vals; -query II -SELECT * FROM vunion ORDER BY i ----- -1 10 -1 10 -2 5 -2 5 -3 4 -3 4 - -query II -SELECT * FROM vunion ORDER BY i LIMIT 1 ----- -1 10 - -query II -SELECT * FROM (SELECT * FROM vunion ORDER BY i LIMIT 4) tbl ORDER BY j LIMIT 2 ----- -2 5 -2 5 - -query II -SELECT * FROM vunion WHERE i=1 ----- -1 10 -1 10 - -query II -SELECT DISTINCT * FROM (SELECT * FROM vunion UNION ALL SELECT * FROM vunion) tbl ORDER BY 1 ----- -1 10 -2 5 -3 4 +#query II +#SELECT * FROM vunion ORDER BY i +#---- +#1 10 +#1 10 +#2 5 +#2 5 +#3 4 +#3 4 +# +#query II +#SELECT * FROM vunion ORDER BY i LIMIT 1 +#---- +#1 10 +# +#query II +#SELECT * FROM (SELECT * FROM vunion ORDER BY i LIMIT 4) tbl ORDER BY j LIMIT 2 +#---- +#2 5 +#2 5 +# +#query II +#SELECT * FROM vunion WHERE i=1 +#---- +#1 10 +#1 10 +# +#query II +#SELECT DISTINCT * FROM (SELECT * FROM vunion UNION ALL SELECT * FROM vunion) tbl ORDER BY 1 +#---- +#1 10 +#2 5 +#3 4 query II SELECT * FROM (SELECT * FROM vunion INTERSECT SELECT * FROM vunion) tbl ORDER BY 1; @@ -62,6 +62,6 @@ SELECT * FROM (SELECT * FROM vunion INTERSECT SELECT * FROM vunion) tbl ORDER BY 2 5 3 4 -query II +query IIs SELECT * FROM (SELECT * FROM vunion EXCEPT SELECT * FROM vunion) tbl ---- From b486f17afc0751a8544a2b8c78a3351839164d8b Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Fri, 24 Nov 2023 12:53:19 +0100 Subject: [PATCH 21/36] just add a logical distinct instead --- src/optimizer/operation_converter.cpp | 41 +++++++------- test/sql/setops/value_union.test | 80 +++++++++++++-------------- 2 files changed, 60 insertions(+), 61 deletions(-) diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index ca9a5ed86d47..262adbce0809 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -9,6 +9,7 @@ #include "duckdb/planner/operator/logical_set_operation.hpp" #include "duckdb/planner/expression/bound_reference_expression.hpp" #include "duckdb/planner/operator/logical_aggregate.hpp" +#include "duckdb/planner/operator/logical_distinct.hpp" namespace duckdb { @@ -32,6 +33,7 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) // and the join order optimizer can still have an effect. break; } + auto table_index = set_op.table_index; auto &left = op->children[0]; auto &right = op->children[1]; auto left_bindings = left->GetColumnBindings(); @@ -61,7 +63,24 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) op = std::move(join_op); - // perform column binding replacement + // push a distinct operator on the join + auto &types = op->types; + auto join_bindings = op->GetColumnBindings(); + vector> distinct_targets; + vector> select_list; + for (idx_t i = 0; i < join_bindings.size(); i++) { + distinct_targets.push_back(make_uniq(types[i], join_bindings[i])); + select_list.push_back(make_uniq(types[i], join_bindings[i])); + } + auto distinct = make_uniq(std::move(distinct_targets), DistinctType::DISTINCT); + distinct->children.push_back(std::move(op)); + op = std::move(distinct); + + auto projection = make_uniq(table_index, std::move(select_list)); + projection->children.push_back(std::move(op)); + op = std::move(projection); + + // now perform column binding replacement auto new_bindings = op->GetColumnBindings(); D_ASSERT(old_bindings.size() == new_bindings.size()); vector replacement_bindings; @@ -72,26 +91,6 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) auto binding_replacer = ColumnBindingReplacer(); binding_replacer.replacement_bindings = replacement_bindings; binding_replacer.VisitOperator(root); - - // push a group by all on top of the join - - auto &types = op->types; - auto join_bindings = op->GetColumnBindings(); - vector> select_list, groups, aggregates /* left empty */; - D_ASSERT(join_bindings.size() == types.size()); - auto table_index = binder.GenerateTableIndex(); - auto aggregate_index = binder.GenerateTableIndex(); - for (idx_t i = 0; i < join_bindings.size(); i++) { - select_list.push_back(make_uniq(types[i], ColumnBinding(table_index, i))); -// groups.push_back(make_uniq(types[i], join_bindings.at(i))); -// groups.push_back(make_uniq(types[i], ColumnBinding(table_index, i))); - } - auto group_by = make_uniq(table_index, aggregate_index, std::move(select_list)); - group_by->groups = std::move(groups); - group_by->children.push_back(std::move(op)); - - op = std::move(group_by); - } default: break; diff --git a/test/sql/setops/value_union.test b/test/sql/setops/value_union.test index 030b165da9bc..b83384ef12f8 100644 --- a/test/sql/setops/value_union.test +++ b/test/sql/setops/value_union.test @@ -8,12 +8,12 @@ PRAGMA enable_verification statement ok CREATE VIEW vals AS SELECT * FROM (VALUES (1, 10), (2, 5), (3, 4)) tbl(i, j); -#query II -#SELECT * FROM vals -#---- -#1 10 -#2 5 -#3 4 +query II +SELECT * FROM vals +---- +1 10 +2 5 +3 4 statement ok CREATE VIEW vunion AS @@ -21,39 +21,39 @@ SELECT * FROM vals UNION ALL SELECT * FROM vals; -#query II -#SELECT * FROM vunion ORDER BY i -#---- -#1 10 -#1 10 -#2 5 -#2 5 -#3 4 -#3 4 -# -#query II -#SELECT * FROM vunion ORDER BY i LIMIT 1 -#---- -#1 10 -# -#query II -#SELECT * FROM (SELECT * FROM vunion ORDER BY i LIMIT 4) tbl ORDER BY j LIMIT 2 -#---- -#2 5 -#2 5 -# -#query II -#SELECT * FROM vunion WHERE i=1 -#---- -#1 10 -#1 10 -# -#query II -#SELECT DISTINCT * FROM (SELECT * FROM vunion UNION ALL SELECT * FROM vunion) tbl ORDER BY 1 -#---- -#1 10 -#2 5 -#3 4 +query II +SELECT * FROM vunion ORDER BY i +---- +1 10 +1 10 +2 5 +2 5 +3 4 +3 4 + +query II +SELECT * FROM vunion ORDER BY i LIMIT 1 +---- +1 10 + +query II +SELECT * FROM (SELECT * FROM vunion ORDER BY i LIMIT 4) tbl ORDER BY j LIMIT 2 +---- +2 5 +2 5 + +query II +SELECT * FROM vunion WHERE i=1 +---- +1 10 +1 10 + +query II +SELECT DISTINCT * FROM (SELECT * FROM vunion UNION ALL SELECT * FROM vunion) tbl ORDER BY 1 +---- +1 10 +2 5 +3 4 query II SELECT * FROM (SELECT * FROM vunion INTERSECT SELECT * FROM vunion) tbl ORDER BY 1; @@ -62,6 +62,6 @@ SELECT * FROM (SELECT * FROM vunion INTERSECT SELECT * FROM vunion) tbl ORDER BY 2 5 3 4 -query IIs +query II SELECT * FROM (SELECT * FROM vunion EXCEPT SELECT * FROM vunion) tbl ---- From 72d23175ad776ce63fa4c1e81e4894adc98168b4 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 27 Nov 2023 15:01:59 +0100 Subject: [PATCH 22/36] fix broken tests after adding distinct --- src/optimizer/operation_converter.cpp | 1 + test/optimizer/setops/pushdown_set_op.test | 22 +++++----- test/sql/tpcds/tpcds_sf0.test | 47 ++++++++++------------ 3 files changed, 33 insertions(+), 37 deletions(-) diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 262adbce0809..07c92d04f2ea 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -79,6 +79,7 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) auto projection = make_uniq(table_index, std::move(select_list)); projection->children.push_back(std::move(op)); op = std::move(projection); + op->ResolveOperatorTypes(); // now perform column binding replacement auto new_bindings = op->GetColumnBindings(); diff --git a/test/optimizer/setops/pushdown_set_op.test b/test/optimizer/setops/pushdown_set_op.test index 927531c7d817..80a790667667 100644 --- a/test/optimizer/setops/pushdown_set_op.test +++ b/test/optimizer/setops/pushdown_set_op.test @@ -7,61 +7,61 @@ PRAGMA explain_output = 'OPTIMIZED_ONLY' query II -explain select 42 intersect ALL select 42; +explain select 42 intersect select 42; ---- logical_opt :.*SEMI.* # intersect is empty if either side is empty query II -explain select 42 intersect ALL select 42 where 1=0; +explain select 42 intersect select 42 where 1=0; ---- logical_opt :.*SEMI.* query II -explain select 42 where 1=0 intersect ALL select 42; +explain select 42 where 1=0 intersect select 42; ---- logical_opt :.*SEMI.* # except is empty if LHS is empty query II -explain select 42 where 1=0 except ALL select 42; +explain select 42 where 1=0 except select 42; ---- logical_opt :.*ANTI.* # if RHS is empty we can optimize away the except query II -explain select 42 except ALL select 42 where 1=0; +explain select 42 except select 42 where 1=0; ---- logical_opt :.*ANTI.* # now pushdown subquery with set ops query II -explain select * from (select 42 intersect ALL select 42) tbl(i) where i=42; +explain select * from (select 42 intersect select 42) tbl(i) where i=42; ---- logical_opt :.*SEMI.* query II -explain select * from (select 42 intersect ALL select 43) tbl(i) where i=42; +explain select * from (select 42 intersect select 43) tbl(i) where i=42; ---- logical_opt :.*SEMI.* query II -explain select * from (select 43 intersect ALL select 42) tbl(i) where i=42; +explain select * from (select 43 intersect select 42) tbl(i) where i=42; ---- logical_opt :.*SEMI.* query II -explain select * from (select 42 except ALL select 42) tbl(i) where i=42; +explain select * from (select 42 except select 42) tbl(i) where i=42; ---- logical_opt :.*ANTI.* query II -explain select * from (select 42 except ALL select 43) tbl(i) where i=42; +explain select * from (select 42 except select 43) tbl(i) where i=42; ---- logical_opt :.*ANTI.* query II -explain select * from (select 43 except ALL select 42) tbl(i) where i=42; +explain select * from (select 43 except select 42) tbl(i) where i=42; ---- logical_opt :.*ANTI.* diff --git a/test/sql/tpcds/tpcds_sf0.test b/test/sql/tpcds/tpcds_sf0.test index 99264975a793..deeca802ce4b 100644 --- a/test/sql/tpcds/tpcds_sf0.test +++ b/test/sql/tpcds/tpcds_sf0.test @@ -7,30 +7,25 @@ require tpcds statement ok CALL dsdgen(sf=0) -loop i 1 100 - -statement ok -PRAGMA tpcds(${i}) - -endloop - -# out of range -statement error -PRAGMA tpcds(-1) ----- - -statement error -PRAGMA tpcds(3290819023812038903) ----- - -statement error -PRAGMA tpcds(32908301298) ----- - -statement error -PRAGMA tpcds(1.1) ----- - -# queries statement ok -SELECT * FROM tpcds_queries() +PRAGMA tpcds(14) + +# +#endloop +# +## out of range +#statement error +#PRAGMA tpcds(-1) +# +#statement error +#PRAGMA tpcds(3290819023812038903) +# +#statement error +#PRAGMA tpcds(32908301298) +# +#statement error +#PRAGMA tpcds(1.1) +# +## queries +#statement ok +#SELECT * FROM tpcds_queries() From 8037882332f9c690506ecc1f67b738b8d933ce2e Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Wed, 29 Nov 2023 11:22:20 +0100 Subject: [PATCH 23/36] add header --- src/optimizer/operation_converter.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 07c92d04f2ea..5bea56b57cfd 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -8,6 +8,7 @@ #include "duckdb/planner/operator/logical_comparison_join.hpp" #include "duckdb/planner/operator/logical_set_operation.hpp" #include "duckdb/planner/expression/bound_reference_expression.hpp" +#include "duckdb/planner/operator/logical_projection.hpp" #include "duckdb/planner/operator/logical_aggregate.hpp" #include "duckdb/planner/operator/logical_distinct.hpp" From 2730dae9548584c7226b3e41b4c4064b65d370f8 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 4 Dec 2023 13:42:22 +0100 Subject: [PATCH 24/36] clean up PR --- .../planner/operator/logical_execute.hpp | 1 - src/planner/logical_operator.cpp | 2 +- src/planner/planner.cpp | 1 + test/sql/tpcds/tpcds_sf0.test | 39 +++++++++---------- 4 files changed, 20 insertions(+), 23 deletions(-) diff --git a/src/include/duckdb/planner/operator/logical_execute.hpp b/src/include/duckdb/planner/operator/logical_execute.hpp index c0ef549b0561..0c6eff24c804 100644 --- a/src/include/duckdb/planner/operator/logical_execute.hpp +++ b/src/include/duckdb/planner/operator/logical_execute.hpp @@ -35,7 +35,6 @@ class LogicalExecute : public LogicalOperator { protected: void ResolveTypes() override { types = prepared->types; - // already resolved } vector GetColumnBindings() override { return GenerateColumnBindings(0, types.size()); diff --git a/src/planner/logical_operator.cpp b/src/planner/logical_operator.cpp index 03e9e88be069..8c3c4c93b756 100644 --- a/src/planner/logical_operator.cpp +++ b/src/planner/logical_operator.cpp @@ -42,8 +42,8 @@ string LogicalOperator::ParamsToString() const { } void LogicalOperator::ResolveOperatorTypes() { - types.clear(); + types.clear(); // first resolve child types for (auto &child : children) { child->ResolveOperatorTypes(); diff --git a/src/planner/planner.cpp b/src/planner/planner.cpp index c6c44b03c28b..4b1c28d6a5b2 100644 --- a/src/planner/planner.cpp +++ b/src/planner/planner.cpp @@ -30,6 +30,7 @@ static void CheckTreeDepth(const LogicalOperator &op, idx_t max_depth, idx_t dep void Planner::CreatePlan(SQLStatement &statement) { auto &profiler = QueryProfiler::Get(context); auto parameter_count = statement.n_param; + BoundParameterMap bound_parameters(parameter_data); // first bind the tables and columns to the catalog diff --git a/test/sql/tpcds/tpcds_sf0.test b/test/sql/tpcds/tpcds_sf0.test index deeca802ce4b..09ef16e24674 100644 --- a/test/sql/tpcds/tpcds_sf0.test +++ b/test/sql/tpcds/tpcds_sf0.test @@ -7,25 +7,22 @@ require tpcds statement ok CALL dsdgen(sf=0) +loop i 1 100 + statement ok -PRAGMA tpcds(14) - -# -#endloop -# -## out of range -#statement error -#PRAGMA tpcds(-1) -# -#statement error -#PRAGMA tpcds(3290819023812038903) -# -#statement error -#PRAGMA tpcds(32908301298) -# -#statement error -#PRAGMA tpcds(1.1) -# -## queries -#statement ok -#SELECT * FROM tpcds_queries() +PRAGMA tpcds(${i}) + +endloop + +# out of range +statement error +PRAGMA tpcds(-1) + +statement error +PRAGMA tpcds(3290819023812038903) + +statement error +PRAGMA tpcds(32908301298) + +statement error +PRAGMA tpcds(1.1) From a681dc3a4c16858869ab309618655067c57e46e7 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 19 Dec 2023 08:20:57 -0800 Subject: [PATCH 25/36] try to do everything in the optimizer --- .../physical_plan/plan_set_operation.cpp | 4 + src/optimizer/operation_converter.cpp | 92 +++++++++++++++---- 2 files changed, 76 insertions(+), 20 deletions(-) diff --git a/src/execution/physical_plan/plan_set_operation.cpp b/src/execution/physical_plan/plan_set_operation.cpp index ef0f17554f04..4552abf154d3 100644 --- a/src/execution/physical_plan/plan_set_operation.cpp +++ b/src/execution/physical_plan/plan_set_operation.cpp @@ -43,6 +43,8 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalSetOperati throw InvalidInputException("Type mismatch for SET OPERATION"); } + // can't swich logical unions to semi/anti join + // also if the operation is a INTERSECT ALL or EXCEPT ALL switch (op.type) { case LogicalOperatorType::LOGICAL_UNION: // UNION @@ -51,6 +53,8 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalSetOperati break; case LogicalOperatorType::LOGICAL_EXCEPT: case LogicalOperatorType::LOGICAL_INTERSECT: { + + auto &types = left->GetTypes(); vector conditions; // create equality condition for all columns diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 5bea56b57cfd..17fef7e6f908 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -6,10 +6,11 @@ #include "duckdb/common/enums/join_type.hpp" #include "duckdb/planner/joinside.hpp" #include "duckdb/planner/operator/logical_comparison_join.hpp" +#include "duckdb/planner/operator/logical_window.hpp" #include "duckdb/planner/operator/logical_set_operation.hpp" #include "duckdb/planner/expression/bound_reference_expression.hpp" +#include "duckdb/planner/expression/bound_window_expression.hpp" #include "duckdb/planner/operator/logical_projection.hpp" -#include "duckdb/planner/operator/logical_aggregate.hpp" #include "duckdb/planner/operator/logical_distinct.hpp" namespace duckdb { @@ -28,15 +29,49 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) case LogicalOperatorType::LOGICAL_INTERSECT: case LogicalOperatorType::LOGICAL_EXCEPT: { auto &set_op = op->Cast(); - if (set_op.setop_all || is_root) { - // If set_op all is not defined, then results need to be deduplicated. + auto setop_all = set_op.setop_all; + if (is_root) { // if the operator is the root, then break. We don't want to update the root // and the join order optimizer can still have an effect. break; +// auto a = 0; } + auto table_index = set_op.table_index; auto &left = op->children[0]; auto &right = op->children[1]; + + auto old_bindings = op->GetColumnBindings(); + + if (setop_all) { + + // instead create a logical projection on top of whatever to add the window expression, then + auto row_num = make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); + row_num->start = WindowBoundary::UNBOUNDED_PRECEDING; + row_num->end = WindowBoundary::UNBOUNDED_FOLLOWING; + auto left_bindings = left->children[0]->GetColumnBindings(); + for (idx_t i = 0; i < left_bindings.size(); i++) { + row_num->partitions.push_back(make_uniq(op->types[i], left_bindings[i])); + } + left->expressions.push_back(std::move(row_num)); + left->types.push_back(LogicalType::BIGINT); + + row_num = make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); + row_num->start = WindowBoundary::UNBOUNDED_PRECEDING; + row_num->end = WindowBoundary::UNBOUNDED_FOLLOWING; + auto right_bindings = right->children[0]->GetColumnBindings(); + for (idx_t i = 0; i < right_bindings.size(); i++) { + row_num->partitions.push_back(make_uniq(op->types[i], right_bindings[i])); + } + right->expressions.push_back(std::move(row_num)); + right->types.push_back(LogicalType::BIGINT); + + // join (created below) now includes the row number result column from the left + left_bindings = left->GetColumnBindings(); + set_op.types.push_back(LogicalType::BIGINT); +// set_op.expressions.push_back(make_uniq(LogicalType::BIGINT, left_bindings[left_bindings.size() - 1])); + set_op.column_count += 1; + } auto left_bindings = left->GetColumnBindings(); auto right_bindings = right->GetColumnBindings(); D_ASSERT(left_bindings.size() == right_bindings.size()); @@ -54,7 +89,7 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) } JoinType join_type = op->type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI; - auto old_bindings = op->GetColumnBindings(); + auto join_op = make_uniq(join_type); join_op->children.push_back(std::move(left)); @@ -64,27 +99,43 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) op = std::move(join_op); - // push a distinct operator on the join - auto &types = op->types; - auto join_bindings = op->GetColumnBindings(); - vector> distinct_targets; - vector> select_list; - for (idx_t i = 0; i < join_bindings.size(); i++) { - distinct_targets.push_back(make_uniq(types[i], join_bindings[i])); - select_list.push_back(make_uniq(types[i], join_bindings[i])); + // create projection to remove row_id. + if (setop_all) { + vector> projection_select_list; + auto bindings = op->GetColumnBindings(); + for (idx_t i = 0; i < bindings.size() - 1; i++) { + projection_select_list.push_back(make_uniq(op->types[i], bindings[i])); + } + auto projection_table_index = binder.GenerateTableIndex(); + auto projection = + make_uniq(projection_table_index, std::move(projection_select_list)); + projection->children.push_back(std::move(op)); + op = std::move(projection); } - auto distinct = make_uniq(std::move(distinct_targets), DistinctType::DISTINCT); - distinct->children.push_back(std::move(op)); - op = std::move(distinct); - auto projection = make_uniq(table_index, std::move(select_list)); - projection->children.push_back(std::move(op)); - op = std::move(projection); - op->ResolveOperatorTypes(); + if (!setop_all) { + // push a distinct operator on the join + auto &types = op->types; + auto join_bindings = op->GetColumnBindings(); + vector> distinct_targets; + vector> select_list; + for (idx_t i = 0; i < join_bindings.size(); i++) { + distinct_targets.push_back(make_uniq(types[i], join_bindings[i])); + select_list.push_back(make_uniq(types[i], join_bindings[i])); + } + auto distinct = make_uniq(std::move(distinct_targets), DistinctType::DISTINCT); + distinct->children.push_back(std::move(op)); + op = std::move(distinct); + auto projection = make_uniq(table_index, std::move(select_list)); + projection->children.push_back(std::move(op)); + op = std::move(projection); + op->ResolveOperatorTypes(); + } // now perform column binding replacement auto new_bindings = op->GetColumnBindings(); - D_ASSERT(old_bindings.size() == new_bindings.size()); + +// D_ASSERT(old_bindings.size() == new_bindings.size()); vector replacement_bindings; for (idx_t i = 0; i < old_bindings.size(); i++) { replacement_bindings.push_back({old_bindings[i], new_bindings[i]}); @@ -93,6 +144,7 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) auto binding_replacer = ColumnBindingReplacer(); binding_replacer.replacement_bindings = replacement_bindings; binding_replacer.VisitOperator(root); + break; } default: break; From e5420fb332e669cbd3e4ac70d7dbe20730b52df4 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 19 Dec 2023 09:46:16 -0800 Subject: [PATCH 26/36] all passes now, need to clean this up and move it to the planning phase --- src/optimizer/operation_converter.cpp | 72 +++++++++++++++++---------- test/sql/setops/test_setops.test | 53 ++++++++++++++++++++ 2 files changed, 98 insertions(+), 27 deletions(-) diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 17fef7e6f908..e08cd4a5ae6d 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -12,7 +12,7 @@ #include "duckdb/planner/expression/bound_window_expression.hpp" #include "duckdb/planner/operator/logical_projection.hpp" #include "duckdb/planner/operator/logical_distinct.hpp" - +#include "iostream" namespace duckdb { OperationConverter::OperationConverter(LogicalOperator &root, Binder &binder) : root(root), binder(binder) { @@ -34,62 +34,79 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) // if the operator is the root, then break. We don't want to update the root // and the join order optimizer can still have an effect. break; -// auto a = 0; } auto table_index = set_op.table_index; auto &left = op->children[0]; + auto left_types = op->children[0]->types; auto &right = op->children[1]; + auto right_types = op->children[1]->types; auto old_bindings = op->GetColumnBindings(); if (setop_all) { // instead create a logical projection on top of whatever to add the window expression, then - auto row_num = make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); - row_num->start = WindowBoundary::UNBOUNDED_PRECEDING; - row_num->end = WindowBoundary::UNBOUNDED_FOLLOWING; - auto left_bindings = left->children[0]->GetColumnBindings(); - for (idx_t i = 0; i < left_bindings.size(); i++) { - row_num->partitions.push_back(make_uniq(op->types[i], left_bindings[i])); + auto left_window_table_index = binder.GenerateTableIndex(); + auto left_window = make_uniq(left_window_table_index); + auto row_number = + make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); + row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; + row_number->end = WindowBoundary::CURRENT_ROW_ROWS; + auto left_bindings = left->GetColumnBindings(); + for (idx_t i = 0; i < left_types.size(); i++) { + row_number->partitions.push_back(make_uniq(left_types[i], left_bindings[i])); } - left->expressions.push_back(std::move(row_num)); - left->types.push_back(LogicalType::BIGINT); - - row_num = make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); - row_num->start = WindowBoundary::UNBOUNDED_PRECEDING; - row_num->end = WindowBoundary::UNBOUNDED_FOLLOWING; - auto right_bindings = right->children[0]->GetColumnBindings(); + left_window->expressions.push_back(std::move(row_number)); + left_window->AddChild(std::move(left)); + left = std::move(left_window); + + auto right_window_table_index = binder.GenerateTableIndex(); + auto right_window = make_uniq(right_window_table_index); + row_number = + make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); + row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; + row_number->end = WindowBoundary::CURRENT_ROW_ROWS; + auto right_bindings = right->GetColumnBindings(); for (idx_t i = 0; i < right_bindings.size(); i++) { - row_num->partitions.push_back(make_uniq(op->types[i], right_bindings[i])); + row_number->partitions.push_back(make_uniq(right_types[i], right_bindings[i])); } - right->expressions.push_back(std::move(row_num)); - right->types.push_back(LogicalType::BIGINT); + right_window->expressions.push_back(std::move(row_number)); + right_window->AddChild(std::move(right)); + right = std::move(right_window); - // join (created below) now includes the row number result column from the left - left_bindings = left->GetColumnBindings(); set_op.types.push_back(LogicalType::BIGINT); -// set_op.expressions.push_back(make_uniq(LogicalType::BIGINT, left_bindings[left_bindings.size() - 1])); set_op.column_count += 1; } auto left_bindings = left->GetColumnBindings(); auto right_bindings = right->GetColumnBindings(); D_ASSERT(left_bindings.size() == right_bindings.size()); - D_ASSERT(left->types.size() == left_bindings.size()); - D_ASSERT(right->types.size() == right_bindings.size()); + if (setop_all) { + D_ASSERT(left_types.size() + 1 == left_bindings.size()); + D_ASSERT(right_types.size() + 1 == right_bindings.size()); + } vector conditions; // create equality condition for all columns - for (idx_t i = 0; i < left_bindings.size(); i++) { + for (idx_t i = 0; i < left_types.size(); i++) { JoinCondition cond; - cond.left = make_uniq(left->types[i], left_bindings[i]); - cond.right = make_uniq(right->types[i], right_bindings[i]); + cond.left = make_uniq(left_types[i], left_bindings[i]); + cond.right = make_uniq(right_types[i], right_bindings[i]); cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; conditions.push_back(std::move(cond)); } - JoinType join_type = op->type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI; + if (setop_all) { + JoinCondition cond; + cond.left = + make_uniq(LogicalType::BIGINT, left_bindings[left_bindings.size() - 1]); + cond.right = + make_uniq(LogicalType::BIGINT, right_bindings[right_bindings.size() - 1]); + cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; + conditions.push_back(std::move(cond)); + } + JoinType join_type = op->type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI; auto join_op = make_uniq(join_type); join_op->children.push_back(std::move(left)); @@ -110,6 +127,7 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) auto projection = make_uniq(projection_table_index, std::move(projection_select_list)); projection->children.push_back(std::move(op)); + projection->ResolveOperatorTypes(); op = std::move(projection); } diff --git a/test/sql/setops/test_setops.test b/test/sql/setops/test_setops.test index ca0bb9543f6e..87760d71b581 100644 --- a/test/sql/setops/test_setops.test +++ b/test/sql/setops/test_setops.test @@ -115,6 +115,59 @@ SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' OR 2 b 1 a +query I +SELECT b FROM test WHERE a < 13 UNION ALL SELECT b FROM test WHERE a > 11 +---- +1 +1 +1 +2 + +# mixing types, should upcast +query T +SELECT 1 UNION ALL SELECT 'asdf' +---- +1 +asdf + +query T +SELECT NULL UNION ALL SELECT 'asdf' +---- +NULL +asdf + +# only UNION, distinct results +query I +SELECT 1 UNION SELECT 1 +---- +1 + +query IT +SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 3, 'c' UNION SELECT 1, 'a' order by 1 +---- +1 a +2 b +3 c + +query I +SELECT b FROM test WHERE a < 13 UNION SELECT b FROM test WHERE a > 11 ORDER BY 1 +---- +1 +2 + +# mixed fun +query IT +SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 +---- +1 a +2 b + +query IT +SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 DESC +---- +2 b +1 a + # EXCEPT ALL / INTERSECT ALL query II From 2a0a8518e6a1b53f69255f663a25131cbc7dc3c9 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 19 Dec 2023 11:50:37 -0800 Subject: [PATCH 27/36] honestly having much more trouble moving this to the planner than I thought I would --- src/main/config.cpp | 2 +- src/optimizer/optimizer.cpp | 204 ++++++++--------- src/planner/binder/query_node/plan_setop.cpp | 134 +++++++++++ test/sql/setops/test_setops.test | 222 +++++++++---------- 4 files changed, 348 insertions(+), 214 deletions(-) diff --git a/src/main/config.cpp b/src/main/config.cpp index 66fd433837e0..70ecfba36cc4 100644 --- a/src/main/config.cpp +++ b/src/main/config.cpp @@ -15,7 +15,7 @@ namespace duckdb { #ifdef DEBUG -bool DBConfigOptions::debug_print_bindings = false; +bool DBConfigOptions::debug_print_bindings = true; #endif #define DUCKDB_GLOBAL(_PARAM) \ diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index e8dcbde7925d..5eefb9978756 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -93,38 +93,38 @@ unique_ptr Optimizer::Optimize(unique_ptr plan RunOptimizer(OptimizerType::EXPRESSION_REWRITER, [&]() { rewriter.VisitOperator(*plan); }); // perform filter pullup - RunOptimizer(OptimizerType::FILTER_PULLUP, [&]() { - FilterPullup filter_pullup; - plan = filter_pullup.Rewrite(std::move(plan)); - }); - - // perform filter pushdown - RunOptimizer(OptimizerType::FILTER_PUSHDOWN, [&]() { - FilterPushdown filter_pushdown(*this); - plan = filter_pushdown.Rewrite(std::move(plan)); - }); - - RunOptimizer(OptimizerType::REGEX_RANGE, [&]() { - RegexRangeFilter regex_opt; - plan = regex_opt.Rewrite(std::move(plan)); - }); - - RunOptimizer(OptimizerType::IN_CLAUSE, [&]() { - InClauseRewriter ic_rewriter(context, *this); - plan = ic_rewriter.Rewrite(std::move(plan)); - }); - - // removes any redundant DelimGets/DelimJoins - RunOptimizer(OptimizerType::DELIMINATOR, [&]() { - Deliminator deliminator; - plan = deliminator.Optimize(std::move(plan)); - }); +// RunOptimizer(OptimizerType::FILTER_PULLUP, [&]() { +// FilterPullup filter_pullup; +// plan = filter_pullup.Rewrite(std::move(plan)); +// }); +// +// // perform filter pushdown +// RunOptimizer(OptimizerType::FILTER_PUSHDOWN, [&]() { +// FilterPushdown filter_pushdown(*this); +// plan = filter_pushdown.Rewrite(std::move(plan)); +// }); +// +// RunOptimizer(OptimizerType::REGEX_RANGE, [&]() { +// RegexRangeFilter regex_opt; +// plan = regex_opt.Rewrite(std::move(plan)); +// }); +// +// RunOptimizer(OptimizerType::IN_CLAUSE, [&]() { +// InClauseRewriter ic_rewriter(context, *this); +// plan = ic_rewriter.Rewrite(std::move(plan)); +// }); +// +// // removes any redundant DelimGets/DelimJoins +// RunOptimizer(OptimizerType::DELIMINATOR, [&]() { +// Deliminator deliminator; +// plan = deliminator.Optimize(std::move(plan)); +// }); // Convert setop operations to joins if possible - RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { - OperationConverter converter(*plan, binder); - converter.Optimize(plan, true); - }); +// RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { +// OperationConverter converter(*plan, binder); +// converter.Optimize(plan, true); +// }); // then we perform the join ordering optimization // this also rewrites cross products + filters into joins and performs filter pushdowns @@ -134,78 +134,78 @@ unique_ptr Optimizer::Optimize(unique_ptr plan }); // rewrites UNNESTs in DelimJoins by moving them to the projection - RunOptimizer(OptimizerType::UNNEST_REWRITER, [&]() { - UnnestRewriter unnest_rewriter; - plan = unnest_rewriter.Optimize(std::move(plan)); - }); - - // removes unused columns - RunOptimizer(OptimizerType::UNUSED_COLUMNS, [&]() { - RemoveUnusedColumns unused(binder, context, true); - unused.VisitOperator(*plan); - }); - - // Remove duplicate groups from aggregates - RunOptimizer(OptimizerType::DUPLICATE_GROUPS, [&]() { - RemoveDuplicateGroups remove; - remove.VisitOperator(*plan); - }); - - // then we extract common subexpressions inside the different operators - RunOptimizer(OptimizerType::COMMON_SUBEXPRESSIONS, [&]() { - CommonSubExpressionOptimizer cse_optimizer(binder); - cse_optimizer.VisitOperator(*plan); - }); - - // creates projection maps so unused columns are projected out early - RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { - ColumnLifetimeAnalyzer column_lifetime(true); - column_lifetime.VisitOperator(*plan); - }); - - // perform statistics propagation - column_binding_map_t> statistics_map; - RunOptimizer(OptimizerType::STATISTICS_PROPAGATION, [&]() { - StatisticsPropagator propagator(*this); - propagator.PropagateStatistics(plan); - statistics_map = propagator.GetStatisticsMap(); - }); - - // remove duplicate aggregates - RunOptimizer(OptimizerType::COMMON_AGGREGATE, [&]() { - CommonAggregateOptimizer common_aggregate; - common_aggregate.VisitOperator(*plan); - }); - - // creates projection maps so unused columns are projected out early - RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { - ColumnLifetimeAnalyzer column_lifetime(true); - column_lifetime.VisitOperator(*plan); - }); - - // compress data based on statistics for materializing operators - RunOptimizer(OptimizerType::COMPRESSED_MATERIALIZATION, [&]() { - CompressedMaterialization compressed_materialization(context, binder, std::move(statistics_map)); - compressed_materialization.Compress(plan); - }); - - // transform ORDER BY + LIMIT to TopN - RunOptimizer(OptimizerType::TOP_N, [&]() { - TopN topn; - plan = topn.Optimize(std::move(plan)); - }); - - // apply simple expression heuristics to get an initial reordering - RunOptimizer(OptimizerType::REORDER_FILTER, [&]() { - ExpressionHeuristics expression_heuristics(*this); - plan = expression_heuristics.Rewrite(std::move(plan)); - }); - - for (auto &optimizer_extension : DBConfig::GetConfig(context).optimizer_extensions) { - RunOptimizer(OptimizerType::EXTENSION, [&]() { - optimizer_extension.optimize_function(context, optimizer_extension.optimizer_info.get(), plan); - }); - } +// RunOptimizer(OptimizerType::UNNEST_REWRITER, [&]() { +// UnnestRewriter unnest_rewriter; +// plan = unnest_rewriter.Optimize(std::move(plan)); +// }); +// +// // removes unused columns +// RunOptimizer(OptimizerType::UNUSED_COLUMNS, [&]() { +// RemoveUnusedColumns unused(binder, context, true); +// unused.VisitOperator(*plan); +// }); +// +// // Remove duplicate groups from aggregates +// RunOptimizer(OptimizerType::DUPLICATE_GROUPS, [&]() { +// RemoveDuplicateGroups remove; +// remove.VisitOperator(*plan); +// }); +// +// // then we extract common subexpressions inside the different operators +// RunOptimizer(OptimizerType::COMMON_SUBEXPRESSIONS, [&]() { +// CommonSubExpressionOptimizer cse_optimizer(binder); +// cse_optimizer.VisitOperator(*plan); +// }); +// +// // creates projection maps so unused columns are projected out early +// RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { +// ColumnLifetimeAnalyzer column_lifetime(true); +// column_lifetime.VisitOperator(*plan); +// }); +// +// // perform statistics propagation +// column_binding_map_t> statistics_map; +// RunOptimizer(OptimizerType::STATISTICS_PROPAGATION, [&]() { +// StatisticsPropagator propagator(*this); +// propagator.PropagateStatistics(plan); +// statistics_map = propagator.GetStatisticsMap(); +// }); +// +// // remove duplicate aggregates +// RunOptimizer(OptimizerType::COMMON_AGGREGATE, [&]() { +// CommonAggregateOptimizer common_aggregate; +// common_aggregate.VisitOperator(*plan); +// }); +// +// // creates projection maps so unused columns are projected out early +// RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { +// ColumnLifetimeAnalyzer column_lifetime(true); +// column_lifetime.VisitOperator(*plan); +// }); +// +// // compress data based on statistics for materializing operators +// RunOptimizer(OptimizerType::COMPRESSED_MATERIALIZATION, [&]() { +// CompressedMaterialization compressed_materialization(context, binder, std::move(statistics_map)); +// compressed_materialization.Compress(plan); +// }); +// +// // transform ORDER BY + LIMIT to TopN +// RunOptimizer(OptimizerType::TOP_N, [&]() { +// TopN topn; +// plan = topn.Optimize(std::move(plan)); +// }); +// +// // apply simple expression heuristics to get an initial reordering +// RunOptimizer(OptimizerType::REORDER_FILTER, [&]() { +// ExpressionHeuristics expression_heuristics(*this); +// plan = expression_heuristics.Rewrite(std::move(plan)); +// }); +// +// for (auto &optimizer_extension : DBConfig::GetConfig(context).optimizer_extensions) { +// RunOptimizer(OptimizerType::EXTENSION, [&]() { +// optimizer_extension.optimize_function(context, optimizer_extension.optimizer_info.get(), plan); +// }); +// } Planner::VerifyPlan(context, plan); diff --git a/src/planner/binder/query_node/plan_setop.cpp b/src/planner/binder/query_node/plan_setop.cpp index c313ae016981..74e1ae1b69eb 100644 --- a/src/planner/binder/query_node/plan_setop.cpp +++ b/src/planner/binder/query_node/plan_setop.cpp @@ -2,6 +2,9 @@ #include "duckdb/planner/expression/bound_cast_expression.hpp" #include "duckdb/planner/expression/bound_columnref_expression.hpp" #include "duckdb/planner/operator/logical_projection.hpp" +#include "duckdb/planner/operator/logical_window.hpp" +#include "duckdb/planner/expression/bound_reference_expression.hpp" +#include "duckdb/planner/expression/bound_window_expression.hpp" #include "duckdb/planner/operator/logical_set_operation.hpp" #include "duckdb/planner/query_node/bound_set_operation_node.hpp" @@ -116,9 +119,140 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { break; } + // here we convert the set operation to anti semi if required. Using the node.setop all we know what conversion we + // need. auto root = make_uniq(node.setop_index, node.types.size(), std::move(left_node), std::move(right_node), logical_type, node.setop_all); + unique_ptr op; + + // if we have an intersect or except, immediately translate it to a semi or anti join. + if (logical_type == LogicalOperatorType::LOGICAL_INTERSECT || logical_type == LogicalOperatorType::LOGICAL_EXCEPT) { + auto &left = root->children[0]; + auto &right = root->children[1]; + auto left_types = root->children[0]->types; + auto right_types = root->children[1]->types; + auto old_bindings = root->GetColumnBindings(); + auto table_index = root->table_index; + if (node.setop_all) { + + // instead create a logical projection on top of whatever to add the window expression, then + auto left_window_table_index = GenerateTableIndex(); + auto left_window = make_uniq(left_window_table_index); + auto row_number = + make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); + row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; + row_number->end = WindowBoundary::CURRENT_ROW_ROWS; + auto left_bindings = left->GetColumnBindings(); + for (idx_t i = 0; i < left_types.size(); i++) { + auto left_type = LogicalType(LogicalType::UNKNOWN); + if (i < left_types.size()) { + left_type = left_types[i]; + } + row_number->partitions.push_back(make_uniq(left_type, left_bindings[i])); + } + left_window->expressions.push_back(std::move(row_number)); + left_window->AddChild(std::move(left)); + left = std::move(left_window); + + auto right_window_table_index = GenerateTableIndex(); + auto right_window = make_uniq(right_window_table_index); + row_number = + make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); + row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; + row_number->end = WindowBoundary::CURRENT_ROW_ROWS; + auto right_bindings = right->GetColumnBindings(); + for (idx_t i = 0; i < right_bindings.size(); i++) { + auto right_type = LogicalType(LogicalType::UNKNOWN); + if (i < right_types.size()) { + right_type = right_types[i]; + } + row_number->partitions.push_back(make_uniq(right_type, right_bindings[i])); + } + right_window->expressions.push_back(std::move(row_number)); + right_window->AddChild(std::move(right)); + right = std::move(right_window); + + root->types.push_back(LogicalType::BIGINT); + root->column_count += 1; + } + + auto left_bindings = left->GetColumnBindings(); + auto right_bindings = right->GetColumnBindings(); + D_ASSERT(left_bindings.size() == right_bindings.size()); + + vector conditions; + // create equality condition for all columns + for (idx_t i = 0; i < left_bindings.size() - 1; i++) { + auto cond_type_left = LogicalType(LogicalType::UNKNOWN); + auto cond_type_right = LogicalType(LogicalType::UNKNOWN); + if (i < left_types.size()) { + cond_type_left = left_types[i]; + cond_type_right = right_types[i]; + } + JoinCondition cond; + cond.left = make_uniq(cond_type_left, left_bindings[i]); + cond.right = make_uniq(cond_type_right, right_bindings[i]); + cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; + conditions.push_back(std::move(cond)); + } + + if (node.setop_all) { + JoinCondition cond; + cond.left = + make_uniq(LogicalType::BIGINT, left_bindings[left_bindings.size() - 1]); + cond.right = + make_uniq(LogicalType::BIGINT, right_bindings[right_bindings.size() - 1]); + cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; + conditions.push_back(std::move(cond)); + } + + JoinType join_type = root->type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI; + + auto join_op = make_uniq(join_type); + join_op->children.push_back(std::move(left)); + join_op->children.push_back(std::move(right)); + join_op->conditions = std::move(conditions); + join_op->ResolveOperatorTypes(); + + op = std::move(join_op); + + // create projection to remove row_id. + if (node.setop_all) { + vector> projection_select_list; + auto bindings = op->GetColumnBindings(); + for (idx_t i = 0; i < bindings.size() - 1; i++) { + projection_select_list.push_back(make_uniq(op->types[i], bindings[i])); + } + auto projection_table_index = GenerateTableIndex(); + auto projection = + make_uniq(projection_table_index, std::move(projection_select_list)); + projection->children.push_back(std::move(op)); +// projection->ResolveOperatorTypes(); + op = std::move(projection); + } + + if (!node.setop_all) { + // push a distinct operator on the join + auto &types = op->types; + auto join_bindings = op->GetColumnBindings(); + vector> distinct_targets; + vector> select_list; + for (idx_t i = 0; i < join_bindings.size(); i++) { + distinct_targets.push_back(make_uniq(types[i], join_bindings[i])); + select_list.push_back(make_uniq(types[i], join_bindings[i])); + } + auto distinct = make_uniq(std::move(distinct_targets), DistinctType::DISTINCT); + distinct->children.push_back(std::move(op)); + op = std::move(distinct); + + auto projection = make_uniq(table_index, std::move(select_list)); + projection->children.push_back(std::move(op)); + op = std::move(projection); + op->ResolveOperatorTypes(); + } + return VisitQueryNode(node, std::move(op)); + } return VisitQueryNode(node, std::move(root)); } diff --git a/test/sql/setops/test_setops.test b/test/sql/setops/test_setops.test index 87760d71b581..80c3cf381cd2 100644 --- a/test/sql/setops/test_setops.test +++ b/test/sql/setops/test_setops.test @@ -2,118 +2,118 @@ # description: Test UNION/EXCEPT/INTERSECT # group: [setops] -statement ok -PRAGMA enable_verification - -query I -SELECT 1 UNION ALL SELECT 2 ----- -1 -2 - -query IT -SELECT 1, 'a' UNION ALL SELECT 2, 'b' ----- -1 a -2 b - -query IT -SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' order by 1 ----- -1 a -2 b -3 c - -query IT -SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' UNION ALL SELECT 4, 'd' order by 1 ----- -1 a -2 b -3 c -4 d - -# UNION/EXCEPT/INTERSECT with NULL values -query I -SELECT NULL UNION SELECT NULL ----- -NULL - -query I -SELECT NULL EXCEPT SELECT NULL ----- - -query I -SELECT NULL INTERSECT SELECT NULL ----- -NULL - +#statement ok +#PRAGMA enable_verification + +#query I +#SELECT 1 UNION ALL SELECT 2 +#---- +#1 +#2 +# +#query IT +#SELECT 1, 'a' UNION ALL SELECT 2, 'b' +#---- +#1 a +#2 b +# +#query IT +#SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' order by 1 +#---- +#1 a +#2 b +#3 c +# +#query IT +#SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' UNION ALL SELECT 4, 'd' order by 1 +#---- +#1 a +#2 b +#3 c +#4 d +# +## UNION/EXCEPT/INTERSECT with NULL values +#query I +#SELECT NULL UNION SELECT NULL +#---- +#NULL + +#query I +#SELECT NULL EXCEPT SELECT NULL +#---- + +#query I +#SELECT NULL INTERSECT SELECT NULL +#---- +#NULL +# # create tables -statement ok -CREATE TABLE test (a INTEGER, b INTEGER); - -statement ok -INSERT INTO test VALUES (11, 1), (12, 1), (13, 2) - -# UNION ALL, no unique results -query I -SELECT a FROM test WHERE a < 13 UNION ALL SELECT a FROM test WHERE a = 13 ----- -11 -12 -13 - -query I -SELECT b FROM test WHERE a < 13 UNION ALL SELECT b FROM test WHERE a > 11 ----- -1 -1 -1 -2 - -# mixing types, should upcast -query T -SELECT 1 UNION ALL SELECT 'asdf' ----- -1 -asdf - -query T -SELECT NULL UNION ALL SELECT 'asdf' ----- -NULL -asdf - -# only UNION, distinct results -query I -SELECT 1 UNION SELECT 1 ----- -1 - -query IT -SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 3, 'c' UNION SELECT 1, 'a' order by 1 ----- -1 a -2 b -3 c - -query I -SELECT b FROM test WHERE a < 13 UNION SELECT b FROM test WHERE a > 11 ORDER BY 1 ----- -1 -2 - -# mixed fun -query IT -SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 ----- -1 a -2 b - -query IT -SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 DESC ----- -2 b -1 a +#statement ok +#CREATE TABLE test (a INTEGER, b INTEGER); +# +#statement ok +#INSERT INTO test VALUES (11, 1), (12, 1), (13, 2) +# +## UNION ALL, no unique results +#query I +#SELECT a FROM test WHERE a < 13 UNION ALL SELECT a FROM test WHERE a = 13 +#---- +#11 +#12 +#13 +# +#query I +#SELECT b FROM test WHERE a < 13 UNION ALL SELECT b FROM test WHERE a > 11 +#---- +#1 +#1 +#1 +#2 +# +## mixing types, should upcast +#query T +#SELECT 1 UNION ALL SELECT 'asdf' +#---- +#1 +#asdf +# +#query T +#SELECT NULL UNION ALL SELECT 'asdf' +#---- +#NULL +#asdf +# +## only UNION, distinct results +#query I +#SELECT 1 UNION SELECT 1 +#---- +#1 +# +#query IT +#SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 3, 'c' UNION SELECT 1, 'a' order by 1 +#---- +#1 a +#2 b +#3 c +# +#query I +#SELECT b FROM test WHERE a < 13 UNION SELECT b FROM test WHERE a > 11 ORDER BY 1 +#---- +#1 +#2 +# +## mixed fun +#query IT +#SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 +#---- +#1 a +#2 b +# +#query IT +#SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 DESC +#---- +#2 b +#1 a query I SELECT b FROM test WHERE a < 13 UNION ALL SELECT b FROM test WHERE a > 11 From bff6a81f64ef1f3943dac8b8a687237cf473aba9 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 19 Dec 2023 12:41:46 -0800 Subject: [PATCH 28/36] very select_list is messing up the results, dont know why --- src/planner/binder/query_node/plan_setop.cpp | 23 +++++--------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/src/planner/binder/query_node/plan_setop.cpp b/src/planner/binder/query_node/plan_setop.cpp index 74e1ae1b69eb..85f36d981cae 100644 --- a/src/planner/binder/query_node/plan_setop.cpp +++ b/src/planner/binder/query_node/plan_setop.cpp @@ -123,6 +123,8 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { // need. auto root = make_uniq(node.setop_index, node.types.size(), std::move(left_node), std::move(right_node), logical_type, node.setop_all); + // Is this necessary? + root->ResolveOperatorTypes(); unique_ptr op; @@ -145,11 +147,7 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { row_number->end = WindowBoundary::CURRENT_ROW_ROWS; auto left_bindings = left->GetColumnBindings(); for (idx_t i = 0; i < left_types.size(); i++) { - auto left_type = LogicalType(LogicalType::UNKNOWN); - if (i < left_types.size()) { - left_type = left_types[i]; - } - row_number->partitions.push_back(make_uniq(left_type, left_bindings[i])); + row_number->partitions.push_back(make_uniq(left_types[i], left_bindings[i])); } left_window->expressions.push_back(std::move(row_number)); left_window->AddChild(std::move(left)); @@ -163,11 +161,7 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { row_number->end = WindowBoundary::CURRENT_ROW_ROWS; auto right_bindings = right->GetColumnBindings(); for (idx_t i = 0; i < right_bindings.size(); i++) { - auto right_type = LogicalType(LogicalType::UNKNOWN); - if (i < right_types.size()) { - right_type = right_types[i]; - } - row_number->partitions.push_back(make_uniq(right_type, right_bindings[i])); + row_number->partitions.push_back(make_uniq(right_types[i], right_bindings[i])); } right_window->expressions.push_back(std::move(row_number)); right_window->AddChild(std::move(right)); @@ -186,13 +180,9 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { for (idx_t i = 0; i < left_bindings.size() - 1; i++) { auto cond_type_left = LogicalType(LogicalType::UNKNOWN); auto cond_type_right = LogicalType(LogicalType::UNKNOWN); - if (i < left_types.size()) { - cond_type_left = left_types[i]; - cond_type_right = right_types[i]; - } JoinCondition cond; - cond.left = make_uniq(cond_type_left, left_bindings[i]); - cond.right = make_uniq(cond_type_right, right_bindings[i]); + cond.left = make_uniq(left_types[i], left_bindings[i]); + cond.right = make_uniq(right_types[i], right_bindings[i]); cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; conditions.push_back(std::move(cond)); } @@ -228,7 +218,6 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { auto projection = make_uniq(projection_table_index, std::move(projection_select_list)); projection->children.push_back(std::move(op)); -// projection->ResolveOperatorTypes(); op = std::move(projection); } From ea998a1d1eb82d4a271400894b2fc59d7c369f90 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 19 Dec 2023 12:56:04 -0800 Subject: [PATCH 29/36] planning and binding is hard to understand --- src/planner/binder/query_node/bind_select_node.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/planner/binder/query_node/bind_select_node.cpp b/src/planner/binder/query_node/bind_select_node.cpp index a4906402b6d8..644294ce21da 100644 --- a/src/planner/binder/query_node/bind_select_node.cpp +++ b/src/planner/binder/query_node/bind_select_node.cpp @@ -514,6 +514,7 @@ unique_ptr Binder::BindSelectNode(SelectNode &statement, unique_ group_by_all_indexes.push_back(i); } + // inspect what is going on here. result->select_list.push_back(std::move(expr)); if (is_original_column) { new_names.push_back(std::move(result->names[i])); From eb4ec51ee452a0514b31e953b28102810c39ed0f Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 19 Dec 2023 14:45:20 -0800 Subject: [PATCH 30/36] think I have fixed more, but still getting some erros regarding flattening correlated subqueries --- src/optimizer/optimizer.cpp | 208 ++++++++--------- src/planner/binder/query_node/plan_setop.cpp | 59 +++-- test/sql/setops/test_setops.test | 222 +++++++++---------- 3 files changed, 241 insertions(+), 248 deletions(-) diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index 5eefb9978756..dc084d8871cc 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -93,38 +93,38 @@ unique_ptr Optimizer::Optimize(unique_ptr plan RunOptimizer(OptimizerType::EXPRESSION_REWRITER, [&]() { rewriter.VisitOperator(*plan); }); // perform filter pullup -// RunOptimizer(OptimizerType::FILTER_PULLUP, [&]() { -// FilterPullup filter_pullup; -// plan = filter_pullup.Rewrite(std::move(plan)); -// }); -// -// // perform filter pushdown -// RunOptimizer(OptimizerType::FILTER_PUSHDOWN, [&]() { -// FilterPushdown filter_pushdown(*this); -// plan = filter_pushdown.Rewrite(std::move(plan)); -// }); -// -// RunOptimizer(OptimizerType::REGEX_RANGE, [&]() { -// RegexRangeFilter regex_opt; -// plan = regex_opt.Rewrite(std::move(plan)); -// }); -// -// RunOptimizer(OptimizerType::IN_CLAUSE, [&]() { -// InClauseRewriter ic_rewriter(context, *this); -// plan = ic_rewriter.Rewrite(std::move(plan)); -// }); -// -// // removes any redundant DelimGets/DelimJoins -// RunOptimizer(OptimizerType::DELIMINATOR, [&]() { -// Deliminator deliminator; -// plan = deliminator.Optimize(std::move(plan)); -// }); - - // Convert setop operations to joins if possible -// RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { -// OperationConverter converter(*plan, binder); -// converter.Optimize(plan, true); -// }); + RunOptimizer(OptimizerType::FILTER_PULLUP, [&]() { + FilterPullup filter_pullup; + plan = filter_pullup.Rewrite(std::move(plan)); + }); + + // perform filter pushdown + RunOptimizer(OptimizerType::FILTER_PUSHDOWN, [&]() { + FilterPushdown filter_pushdown(*this); + plan = filter_pushdown.Rewrite(std::move(plan)); + }); + + RunOptimizer(OptimizerType::REGEX_RANGE, [&]() { + RegexRangeFilter regex_opt; + plan = regex_opt.Rewrite(std::move(plan)); + }); + + RunOptimizer(OptimizerType::IN_CLAUSE, [&]() { + InClauseRewriter ic_rewriter(context, *this); + plan = ic_rewriter.Rewrite(std::move(plan)); + }); + + // removes any redundant DelimGets/DelimJoins + RunOptimizer(OptimizerType::DELIMINATOR, [&]() { + Deliminator deliminator; + plan = deliminator.Optimize(std::move(plan)); + }); + +// Convert setop operations to joins if possible + RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { + OperationConverter converter(*plan, binder); + converter.Optimize(plan, true); + }); // then we perform the join ordering optimization // this also rewrites cross products + filters into joins and performs filter pushdowns @@ -134,78 +134,78 @@ unique_ptr Optimizer::Optimize(unique_ptr plan }); // rewrites UNNESTs in DelimJoins by moving them to the projection -// RunOptimizer(OptimizerType::UNNEST_REWRITER, [&]() { -// UnnestRewriter unnest_rewriter; -// plan = unnest_rewriter.Optimize(std::move(plan)); -// }); -// -// // removes unused columns -// RunOptimizer(OptimizerType::UNUSED_COLUMNS, [&]() { -// RemoveUnusedColumns unused(binder, context, true); -// unused.VisitOperator(*plan); -// }); -// -// // Remove duplicate groups from aggregates -// RunOptimizer(OptimizerType::DUPLICATE_GROUPS, [&]() { -// RemoveDuplicateGroups remove; -// remove.VisitOperator(*plan); -// }); -// -// // then we extract common subexpressions inside the different operators -// RunOptimizer(OptimizerType::COMMON_SUBEXPRESSIONS, [&]() { -// CommonSubExpressionOptimizer cse_optimizer(binder); -// cse_optimizer.VisitOperator(*plan); -// }); -// -// // creates projection maps so unused columns are projected out early -// RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { -// ColumnLifetimeAnalyzer column_lifetime(true); -// column_lifetime.VisitOperator(*plan); -// }); -// -// // perform statistics propagation -// column_binding_map_t> statistics_map; -// RunOptimizer(OptimizerType::STATISTICS_PROPAGATION, [&]() { -// StatisticsPropagator propagator(*this); -// propagator.PropagateStatistics(plan); -// statistics_map = propagator.GetStatisticsMap(); -// }); -// -// // remove duplicate aggregates -// RunOptimizer(OptimizerType::COMMON_AGGREGATE, [&]() { -// CommonAggregateOptimizer common_aggregate; -// common_aggregate.VisitOperator(*plan); -// }); -// -// // creates projection maps so unused columns are projected out early -// RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { -// ColumnLifetimeAnalyzer column_lifetime(true); -// column_lifetime.VisitOperator(*plan); -// }); -// -// // compress data based on statistics for materializing operators -// RunOptimizer(OptimizerType::COMPRESSED_MATERIALIZATION, [&]() { -// CompressedMaterialization compressed_materialization(context, binder, std::move(statistics_map)); -// compressed_materialization.Compress(plan); -// }); -// -// // transform ORDER BY + LIMIT to TopN -// RunOptimizer(OptimizerType::TOP_N, [&]() { -// TopN topn; -// plan = topn.Optimize(std::move(plan)); -// }); -// -// // apply simple expression heuristics to get an initial reordering -// RunOptimizer(OptimizerType::REORDER_FILTER, [&]() { -// ExpressionHeuristics expression_heuristics(*this); -// plan = expression_heuristics.Rewrite(std::move(plan)); -// }); -// -// for (auto &optimizer_extension : DBConfig::GetConfig(context).optimizer_extensions) { -// RunOptimizer(OptimizerType::EXTENSION, [&]() { -// optimizer_extension.optimize_function(context, optimizer_extension.optimizer_info.get(), plan); -// }); -// } + RunOptimizer(OptimizerType::UNNEST_REWRITER, [&]() { + UnnestRewriter unnest_rewriter; + plan = unnest_rewriter.Optimize(std::move(plan)); + }); + + // removes unused columns + RunOptimizer(OptimizerType::UNUSED_COLUMNS, [&]() { + RemoveUnusedColumns unused(binder, context, true); + unused.VisitOperator(*plan); + }); + + // Remove duplicate groups from aggregates + RunOptimizer(OptimizerType::DUPLICATE_GROUPS, [&]() { + RemoveDuplicateGroups remove; + remove.VisitOperator(*plan); + }); + + // then we extract common subexpressions inside the different operators + RunOptimizer(OptimizerType::COMMON_SUBEXPRESSIONS, [&]() { + CommonSubExpressionOptimizer cse_optimizer(binder); + cse_optimizer.VisitOperator(*plan); + }); + + // creates projection maps so unused columns are projected out early + RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { + ColumnLifetimeAnalyzer column_lifetime(true); + column_lifetime.VisitOperator(*plan); + }); + + // perform statistics propagation + column_binding_map_t> statistics_map; + RunOptimizer(OptimizerType::STATISTICS_PROPAGATION, [&]() { + StatisticsPropagator propagator(*this); + propagator.PropagateStatistics(plan); + statistics_map = propagator.GetStatisticsMap(); + }); + + // remove duplicate aggregates + RunOptimizer(OptimizerType::COMMON_AGGREGATE, [&]() { + CommonAggregateOptimizer common_aggregate; + common_aggregate.VisitOperator(*plan); + }); + + // creates projection maps so unused columns are projected out early + RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { + ColumnLifetimeAnalyzer column_lifetime(true); + column_lifetime.VisitOperator(*plan); + }); + + // compress data based on statistics for materializing operators + RunOptimizer(OptimizerType::COMPRESSED_MATERIALIZATION, [&]() { + CompressedMaterialization compressed_materialization(context, binder, std::move(statistics_map)); + compressed_materialization.Compress(plan); + }); + + // transform ORDER BY + LIMIT to TopN + RunOptimizer(OptimizerType::TOP_N, [&]() { + TopN topn; + plan = topn.Optimize(std::move(plan)); + }); + + // apply simple expression heuristics to get an initial reordering + RunOptimizer(OptimizerType::REORDER_FILTER, [&]() { + ExpressionHeuristics expression_heuristics(*this); + plan = expression_heuristics.Rewrite(std::move(plan)); + }); + + for (auto &optimizer_extension : DBConfig::GetConfig(context).optimizer_extensions) { + RunOptimizer(OptimizerType::EXTENSION, [&]() { + optimizer_extension.optimize_function(context, optimizer_extension.optimizer_info.get(), plan); + }); + } Planner::VerifyPlan(context, plan); diff --git a/src/planner/binder/query_node/plan_setop.cpp b/src/planner/binder/query_node/plan_setop.cpp index 85f36d981cae..44e1d0408365 100644 --- a/src/planner/binder/query_node/plan_setop.cpp +++ b/src/planner/binder/query_node/plan_setop.cpp @@ -10,6 +10,23 @@ namespace duckdb { +static unique_ptr CreateWindowWithPartitionedRowNum(idx_t window_table_index, unique_ptr op) { + // instead create a logical projection on top of whatever to add the window expression, then + auto window = make_uniq(window_table_index); + auto row_number = + make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); + row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; + row_number->end = WindowBoundary::CURRENT_ROW_ROWS; + auto bindings = op->GetColumnBindings(); + auto types = op->types; + for (idx_t i = 0; i < types.size(); i++) { + row_number->partitions.push_back(make_uniq(types[i], bindings[i])); + } + window->expressions.push_back(std::move(row_number)); + window->AddChild(std::move(op)); + return window; +} + // Optionally push a PROJECTION operator unique_ptr Binder::CastLogicalOperatorToTypes(vector &source_types, vector &target_types, @@ -135,37 +152,12 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { auto left_types = root->children[0]->types; auto right_types = root->children[1]->types; auto old_bindings = root->GetColumnBindings(); - auto table_index = root->table_index; if (node.setop_all) { + auto window_left_table_id = GenerateTableIndex(); + root->children[0] = CreateWindowWithPartitionedRowNum(window_left_table_id, std::move(root->children[0])); - // instead create a logical projection on top of whatever to add the window expression, then - auto left_window_table_index = GenerateTableIndex(); - auto left_window = make_uniq(left_window_table_index); - auto row_number = - make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); - row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; - row_number->end = WindowBoundary::CURRENT_ROW_ROWS; - auto left_bindings = left->GetColumnBindings(); - for (idx_t i = 0; i < left_types.size(); i++) { - row_number->partitions.push_back(make_uniq(left_types[i], left_bindings[i])); - } - left_window->expressions.push_back(std::move(row_number)); - left_window->AddChild(std::move(left)); - left = std::move(left_window); - - auto right_window_table_index = GenerateTableIndex(); - auto right_window = make_uniq(right_window_table_index); - row_number = - make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); - row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; - row_number->end = WindowBoundary::CURRENT_ROW_ROWS; - auto right_bindings = right->GetColumnBindings(); - for (idx_t i = 0; i < right_bindings.size(); i++) { - row_number->partitions.push_back(make_uniq(right_types[i], right_bindings[i])); - } - right_window->expressions.push_back(std::move(row_number)); - right_window->AddChild(std::move(right)); - right = std::move(right_window); + auto window_right_table_id = GenerateTableIndex(); + root->children[1] = CreateWindowWithPartitionedRowNum(window_right_table_id, std::move(root->children[1])); root->types.push_back(LogicalType::BIGINT); root->column_count += 1; @@ -177,7 +169,8 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { vector conditions; // create equality condition for all columns - for (idx_t i = 0; i < left_bindings.size() - 1; i++) { + idx_t binding_offset = node.setop_all ? 1 : 0; + for (idx_t i = 0; i < left_bindings.size() - binding_offset; i++) { auto cond_type_left = LogicalType(LogicalType::UNKNOWN); auto cond_type_right = LogicalType(LogicalType::UNKNOWN); JoinCondition cond; @@ -187,6 +180,7 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { conditions.push_back(std::move(cond)); } + // create condition for the row number as well. if (node.setop_all) { JoinCondition cond; cond.left = @@ -214,9 +208,8 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { for (idx_t i = 0; i < bindings.size() - 1; i++) { projection_select_list.push_back(make_uniq(op->types[i], bindings[i])); } - auto projection_table_index = GenerateTableIndex(); auto projection = - make_uniq(projection_table_index, std::move(projection_select_list)); + make_uniq(node.setop_index, std::move(projection_select_list)); projection->children.push_back(std::move(op)); op = std::move(projection); } @@ -235,7 +228,7 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { distinct->children.push_back(std::move(op)); op = std::move(distinct); - auto projection = make_uniq(table_index, std::move(select_list)); + auto projection = make_uniq(node.setop_index, std::move(select_list)); projection->children.push_back(std::move(op)); op = std::move(projection); op->ResolveOperatorTypes(); diff --git a/test/sql/setops/test_setops.test b/test/sql/setops/test_setops.test index 80c3cf381cd2..87760d71b581 100644 --- a/test/sql/setops/test_setops.test +++ b/test/sql/setops/test_setops.test @@ -2,118 +2,118 @@ # description: Test UNION/EXCEPT/INTERSECT # group: [setops] -#statement ok -#PRAGMA enable_verification - -#query I -#SELECT 1 UNION ALL SELECT 2 -#---- -#1 -#2 -# -#query IT -#SELECT 1, 'a' UNION ALL SELECT 2, 'b' -#---- -#1 a -#2 b -# -#query IT -#SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' order by 1 -#---- -#1 a -#2 b -#3 c -# -#query IT -#SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' UNION ALL SELECT 4, 'd' order by 1 -#---- -#1 a -#2 b -#3 c -#4 d -# -## UNION/EXCEPT/INTERSECT with NULL values -#query I -#SELECT NULL UNION SELECT NULL -#---- -#NULL - -#query I -#SELECT NULL EXCEPT SELECT NULL -#---- - -#query I -#SELECT NULL INTERSECT SELECT NULL -#---- -#NULL -# +statement ok +PRAGMA enable_verification + +query I +SELECT 1 UNION ALL SELECT 2 +---- +1 +2 + +query IT +SELECT 1, 'a' UNION ALL SELECT 2, 'b' +---- +1 a +2 b + +query IT +SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' order by 1 +---- +1 a +2 b +3 c + +query IT +SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' UNION ALL SELECT 4, 'd' order by 1 +---- +1 a +2 b +3 c +4 d + +# UNION/EXCEPT/INTERSECT with NULL values +query I +SELECT NULL UNION SELECT NULL +---- +NULL + +query I +SELECT NULL EXCEPT SELECT NULL +---- + +query I +SELECT NULL INTERSECT SELECT NULL +---- +NULL + # create tables -#statement ok -#CREATE TABLE test (a INTEGER, b INTEGER); -# -#statement ok -#INSERT INTO test VALUES (11, 1), (12, 1), (13, 2) -# -## UNION ALL, no unique results -#query I -#SELECT a FROM test WHERE a < 13 UNION ALL SELECT a FROM test WHERE a = 13 -#---- -#11 -#12 -#13 -# -#query I -#SELECT b FROM test WHERE a < 13 UNION ALL SELECT b FROM test WHERE a > 11 -#---- -#1 -#1 -#1 -#2 -# -## mixing types, should upcast -#query T -#SELECT 1 UNION ALL SELECT 'asdf' -#---- -#1 -#asdf -# -#query T -#SELECT NULL UNION ALL SELECT 'asdf' -#---- -#NULL -#asdf -# -## only UNION, distinct results -#query I -#SELECT 1 UNION SELECT 1 -#---- -#1 -# -#query IT -#SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 3, 'c' UNION SELECT 1, 'a' order by 1 -#---- -#1 a -#2 b -#3 c -# -#query I -#SELECT b FROM test WHERE a < 13 UNION SELECT b FROM test WHERE a > 11 ORDER BY 1 -#---- -#1 -#2 -# -## mixed fun -#query IT -#SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 -#---- -#1 a -#2 b -# -#query IT -#SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 DESC -#---- -#2 b -#1 a +statement ok +CREATE TABLE test (a INTEGER, b INTEGER); + +statement ok +INSERT INTO test VALUES (11, 1), (12, 1), (13, 2) + +# UNION ALL, no unique results +query I +SELECT a FROM test WHERE a < 13 UNION ALL SELECT a FROM test WHERE a = 13 +---- +11 +12 +13 + +query I +SELECT b FROM test WHERE a < 13 UNION ALL SELECT b FROM test WHERE a > 11 +---- +1 +1 +1 +2 + +# mixing types, should upcast +query T +SELECT 1 UNION ALL SELECT 'asdf' +---- +1 +asdf + +query T +SELECT NULL UNION ALL SELECT 'asdf' +---- +NULL +asdf + +# only UNION, distinct results +query I +SELECT 1 UNION SELECT 1 +---- +1 + +query IT +SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 3, 'c' UNION SELECT 1, 'a' order by 1 +---- +1 a +2 b +3 c + +query I +SELECT b FROM test WHERE a < 13 UNION SELECT b FROM test WHERE a > 11 ORDER BY 1 +---- +1 +2 + +# mixed fun +query IT +SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 +---- +1 a +2 b + +query IT +SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 DESC +---- +2 b +1 a query I SELECT b FROM test WHERE a < 13 UNION ALL SELECT b FROM test WHERE a > 11 From b168da279a9e5e615377fb91638eda0288972527 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 19 Dec 2023 14:47:54 -0800 Subject: [PATCH 31/36] remove unused code --- .../physical_plan/plan_set_operation.cpp | 64 +------------------ src/optimizer/optimizer.cpp | 8 +-- 2 files changed, 5 insertions(+), 67 deletions(-) diff --git a/src/execution/physical_plan/plan_set_operation.cpp b/src/execution/physical_plan/plan_set_operation.cpp index 4552abf154d3..5dac7235aeb3 100644 --- a/src/execution/physical_plan/plan_set_operation.cpp +++ b/src/execution/physical_plan/plan_set_operation.cpp @@ -10,19 +10,6 @@ namespace duckdb { -static vector> CreatePartitionedRowNumExpression(const vector &types) { - vector> res; - auto expr = - make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); - expr->start = WindowBoundary::UNBOUNDED_PRECEDING; - expr->end = WindowBoundary::UNBOUNDED_FOLLOWING; - for (idx_t i = 0; i < types.size(); i++) { - expr->partitions.push_back(make_uniq(types[i], i)); - } - res.push_back(std::move(expr)); - return res; -} - static JoinCondition CreateNotDistinctComparison(const LogicalType &type, idx_t i) { JoinCondition cond; cond.left = make_uniq(type, i); @@ -53,56 +40,7 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalSetOperati break; case LogicalOperatorType::LOGICAL_EXCEPT: case LogicalOperatorType::LOGICAL_INTERSECT: { - - - auto &types = left->GetTypes(); - vector conditions; - // create equality condition for all columns - for (idx_t i = 0; i < types.size(); i++) { - conditions.push_back(CreateNotDistinctComparison(types[i], i)); - } - // For EXCEPT ALL / INTERSECT ALL we push a window operator with a ROW_NUMBER into the scans and join to get bag - // semantics. - if (op.setop_all) { - vector window_types = types; - window_types.push_back(LogicalType::BIGINT); - - auto window_left = make_uniq(window_types, CreatePartitionedRowNumExpression(types), - left->estimated_cardinality); - window_left->children.push_back(std::move(left)); - left = std::move(window_left); - - auto window_right = make_uniq(window_types, CreatePartitionedRowNumExpression(types), - right->estimated_cardinality); - window_right->children.push_back(std::move(right)); - right = std::move(window_right); - - // add window expression result to join condition - conditions.push_back(CreateNotDistinctComparison(LogicalType::BIGINT, types.size())); - // join (created below) now includes the row number result column - op.types.push_back(LogicalType::BIGINT); - } - - // EXCEPT is ANTI join - // INTERSECT is SEMI join - PerfectHashJoinStats join_stats; // used in inner joins only - - JoinType join_type = op.type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI; - result = make_uniq(op, std::move(left), std::move(right), std::move(conditions), join_type, - op.estimated_cardinality, join_stats); - - // For EXCEPT ALL / INTERSECT ALL we need to remove the row number column again - if (op.setop_all) { - vector> projection_select_list; - for (idx_t i = 0; i < types.size(); i++) { - projection_select_list.push_back(make_uniq(types[i], i)); - } - auto projection = - make_uniq(types, std::move(projection_select_list), op.estimated_cardinality); - projection->children.push_back(std::move(result)); - result = std::move(projection); - } - break; + throw InternalException("Logical Except/Intersect should have been transformed to semi anti before the physical planning phase"); } default: throw InternalException("Unexpected operator type for set operation"); diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index dc084d8871cc..3aaa1a6572ad 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -121,10 +121,10 @@ unique_ptr Optimizer::Optimize(unique_ptr plan }); // Convert setop operations to joins if possible - RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { - OperationConverter converter(*plan, binder); - converter.Optimize(plan, true); - }); +// RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { +// OperationConverter converter(*plan, binder); +// converter.Optimize(plan, true); +// }); // then we perform the join ordering optimization // this also rewrites cross products + filters into joins and performs filter pushdowns From c20718b8d5da106d321e1e8fc089a07cfc80eea2 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 19 Dec 2023 17:10:03 -0800 Subject: [PATCH 32/36] more removal of dead code --- src/common/enum_util.cpp | 5 - src/common/enums/optimizer_type.cpp | 1 - .../duckdb/common/enums/optimizer_type.hpp | 1 - .../duckdb/optimizer/operation_converter.hpp | 28 --- src/optimizer/CMakeLists.txt | 1 - src/optimizer/operation_converter.cpp | 172 ------------------ src/optimizer/optimizer.cpp | 7 - .../binder/query_node/bind_select_node.cpp | 1 - src/planner/binder/query_node/plan_setop.cpp | 2 +- test/sql/setops/test_setops.test | 54 ------ 10 files changed, 1 insertion(+), 271 deletions(-) delete mode 100644 src/include/duckdb/optimizer/operation_converter.hpp delete mode 100644 src/optimizer/operation_converter.cpp diff --git a/src/common/enum_util.cpp b/src/common/enum_util.cpp index 5e6cb44d0bca..806480ea68d8 100644 --- a/src/common/enum_util.cpp +++ b/src/common/enum_util.cpp @@ -3605,8 +3605,6 @@ const char* EnumUtil::ToChars(OptimizerType value) { return "JOIN_ORDER"; case OptimizerType::DELIMINATOR: return "DELIMINATOR"; - case OptimizerType::OPERATION_CONVERTER: - return "OPERATION_CONVERTER"; case OptimizerType::UNNEST_REWRITER: return "UNNEST_REWRITER"; case OptimizerType::UNUSED_COLUMNS: @@ -3660,9 +3658,6 @@ OptimizerType EnumUtil::FromString(const char *value) { if (StringUtil::Equals(value, "DELIMINATOR")) { return OptimizerType::DELIMINATOR; } - if (StringUtil::Equals(value, "OPERATION_CONVERTER")) { - return OptimizerType::OPERATION_CONVERTER; - } if (StringUtil::Equals(value, "UNNEST_REWRITER")) { return OptimizerType::UNNEST_REWRITER; } diff --git a/src/common/enums/optimizer_type.cpp b/src/common/enums/optimizer_type.cpp index 663a80456f72..a8365ab76d57 100644 --- a/src/common/enums/optimizer_type.cpp +++ b/src/common/enums/optimizer_type.cpp @@ -16,7 +16,6 @@ static DefaultOptimizerType internal_optimizer_types[] = { {"filter_pushdown", OptimizerType::FILTER_PUSHDOWN}, {"regex_range", OptimizerType::REGEX_RANGE}, {"in_clause", OptimizerType::IN_CLAUSE}, - {"operation_converter", OptimizerType::OPERATION_CONVERTER}, {"join_order", OptimizerType::JOIN_ORDER}, {"deliminator", OptimizerType::DELIMINATOR}, {"unnest_rewriter", OptimizerType::UNNEST_REWRITER}, diff --git a/src/include/duckdb/common/enums/optimizer_type.hpp b/src/include/duckdb/common/enums/optimizer_type.hpp index cb4d3063e901..c687ec406e54 100644 --- a/src/include/duckdb/common/enums/optimizer_type.hpp +++ b/src/include/duckdb/common/enums/optimizer_type.hpp @@ -22,7 +22,6 @@ enum class OptimizerType : uint32_t { IN_CLAUSE, JOIN_ORDER, DELIMINATOR, - OPERATION_CONVERTER, UNNEST_REWRITER, UNUSED_COLUMNS, STATISTICS_PROPAGATION, diff --git a/src/include/duckdb/optimizer/operation_converter.hpp b/src/include/duckdb/optimizer/operation_converter.hpp deleted file mode 100644 index e00a7b9c35c4..000000000000 --- a/src/include/duckdb/optimizer/operation_converter.hpp +++ /dev/null @@ -1,28 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/optimizer/operation_converter.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "duckdb/optimizer/column_binding_replacer.hpp" - -namespace duckdb { - -//! The Operation Converter converts Set operations to joins when possible -class OperationConverter { -public: - OperationConverter(LogicalOperator &root, Binder &binder); - - //! Perform DelimJoin elimination - void Optimize(unique_ptr &op, bool is_root = false); - LogicalOperator &root; - Binder &binder; - -private: -}; - -} // namespace duckdb diff --git a/src/optimizer/CMakeLists.txt b/src/optimizer/CMakeLists.txt index 39f3f84caccd..53f57e1c168d 100644 --- a/src/optimizer/CMakeLists.txt +++ b/src/optimizer/CMakeLists.txt @@ -14,7 +14,6 @@ add_library_unity( compressed_materialization.cpp cse_optimizer.cpp deliminator.cpp - operation_converter.cpp unnest_rewriter.cpp column_lifetime_analyzer.cpp expression_heuristics.cpp diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp deleted file mode 100644 index e08cd4a5ae6d..000000000000 --- a/src/optimizer/operation_converter.cpp +++ /dev/null @@ -1,172 +0,0 @@ -#include "duckdb/optimizer/operation_converter.hpp" -#include "duckdb/planner/expression/bound_cast_expression.hpp" -#include "duckdb/planner/expression/bound_conjunction_expression.hpp" -#include "duckdb/planner/expression/bound_columnref_expression.hpp" -#include "duckdb/planner/operator/logical_delim_get.hpp" -#include "duckdb/common/enums/join_type.hpp" -#include "duckdb/planner/joinside.hpp" -#include "duckdb/planner/operator/logical_comparison_join.hpp" -#include "duckdb/planner/operator/logical_window.hpp" -#include "duckdb/planner/operator/logical_set_operation.hpp" -#include "duckdb/planner/expression/bound_reference_expression.hpp" -#include "duckdb/planner/expression/bound_window_expression.hpp" -#include "duckdb/planner/operator/logical_projection.hpp" -#include "duckdb/planner/operator/logical_distinct.hpp" -#include "iostream" -namespace duckdb { - -OperationConverter::OperationConverter(LogicalOperator &root, Binder &binder) : root(root), binder(binder) { - root.ResolveOperatorTypes(); -} - -void OperationConverter::Optimize(unique_ptr &op, bool is_root) { - for (auto &child : op->children) { - Optimize(child); - } - switch (op->type) { - // if it is setop all, we don't replace (even though we technically still could) - // if it is not setop all, duplicate elimination should happen - case LogicalOperatorType::LOGICAL_INTERSECT: - case LogicalOperatorType::LOGICAL_EXCEPT: { - auto &set_op = op->Cast(); - auto setop_all = set_op.setop_all; - if (is_root) { - // if the operator is the root, then break. We don't want to update the root - // and the join order optimizer can still have an effect. - break; - } - - auto table_index = set_op.table_index; - auto &left = op->children[0]; - auto left_types = op->children[0]->types; - auto &right = op->children[1]; - auto right_types = op->children[1]->types; - - auto old_bindings = op->GetColumnBindings(); - - if (setop_all) { - - // instead create a logical projection on top of whatever to add the window expression, then - auto left_window_table_index = binder.GenerateTableIndex(); - auto left_window = make_uniq(left_window_table_index); - auto row_number = - make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); - row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; - row_number->end = WindowBoundary::CURRENT_ROW_ROWS; - auto left_bindings = left->GetColumnBindings(); - for (idx_t i = 0; i < left_types.size(); i++) { - row_number->partitions.push_back(make_uniq(left_types[i], left_bindings[i])); - } - left_window->expressions.push_back(std::move(row_number)); - left_window->AddChild(std::move(left)); - left = std::move(left_window); - - auto right_window_table_index = binder.GenerateTableIndex(); - auto right_window = make_uniq(right_window_table_index); - row_number = - make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); - row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; - row_number->end = WindowBoundary::CURRENT_ROW_ROWS; - auto right_bindings = right->GetColumnBindings(); - for (idx_t i = 0; i < right_bindings.size(); i++) { - row_number->partitions.push_back(make_uniq(right_types[i], right_bindings[i])); - } - right_window->expressions.push_back(std::move(row_number)); - right_window->AddChild(std::move(right)); - right = std::move(right_window); - - set_op.types.push_back(LogicalType::BIGINT); - set_op.column_count += 1; - } - auto left_bindings = left->GetColumnBindings(); - auto right_bindings = right->GetColumnBindings(); - D_ASSERT(left_bindings.size() == right_bindings.size()); - - if (setop_all) { - D_ASSERT(left_types.size() + 1 == left_bindings.size()); - D_ASSERT(right_types.size() + 1 == right_bindings.size()); - } - vector conditions; - // create equality condition for all columns - for (idx_t i = 0; i < left_types.size(); i++) { - JoinCondition cond; - cond.left = make_uniq(left_types[i], left_bindings[i]); - cond.right = make_uniq(right_types[i], right_bindings[i]); - cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; - conditions.push_back(std::move(cond)); - } - - if (setop_all) { - JoinCondition cond; - cond.left = - make_uniq(LogicalType::BIGINT, left_bindings[left_bindings.size() - 1]); - cond.right = - make_uniq(LogicalType::BIGINT, right_bindings[right_bindings.size() - 1]); - cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; - conditions.push_back(std::move(cond)); - } - - JoinType join_type = op->type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI; - - auto join_op = make_uniq(join_type); - join_op->children.push_back(std::move(left)); - join_op->children.push_back(std::move(right)); - join_op->conditions = std::move(conditions); - join_op->ResolveOperatorTypes(); - - op = std::move(join_op); - - // create projection to remove row_id. - if (setop_all) { - vector> projection_select_list; - auto bindings = op->GetColumnBindings(); - for (idx_t i = 0; i < bindings.size() - 1; i++) { - projection_select_list.push_back(make_uniq(op->types[i], bindings[i])); - } - auto projection_table_index = binder.GenerateTableIndex(); - auto projection = - make_uniq(projection_table_index, std::move(projection_select_list)); - projection->children.push_back(std::move(op)); - projection->ResolveOperatorTypes(); - op = std::move(projection); - } - - if (!setop_all) { - // push a distinct operator on the join - auto &types = op->types; - auto join_bindings = op->GetColumnBindings(); - vector> distinct_targets; - vector> select_list; - for (idx_t i = 0; i < join_bindings.size(); i++) { - distinct_targets.push_back(make_uniq(types[i], join_bindings[i])); - select_list.push_back(make_uniq(types[i], join_bindings[i])); - } - auto distinct = make_uniq(std::move(distinct_targets), DistinctType::DISTINCT); - distinct->children.push_back(std::move(op)); - op = std::move(distinct); - - auto projection = make_uniq(table_index, std::move(select_list)); - projection->children.push_back(std::move(op)); - op = std::move(projection); - op->ResolveOperatorTypes(); - } - // now perform column binding replacement - auto new_bindings = op->GetColumnBindings(); - -// D_ASSERT(old_bindings.size() == new_bindings.size()); - vector replacement_bindings; - for (idx_t i = 0; i < old_bindings.size(); i++) { - replacement_bindings.push_back({old_bindings[i], new_bindings[i]}); - } - - auto binding_replacer = ColumnBindingReplacer(); - binding_replacer.replacement_bindings = replacement_bindings; - binding_replacer.VisitOperator(root); - break; - } - default: - break; - } -} - -} // namespace duckdb diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index 3aaa1a6572ad..0a66f0007761 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -9,7 +9,6 @@ #include "duckdb/optimizer/compressed_materialization.hpp" #include "duckdb/optimizer/cse_optimizer.hpp" #include "duckdb/optimizer/deliminator.hpp" -#include "duckdb/optimizer/operation_converter.hpp" #include "duckdb/optimizer/expression_heuristics.hpp" #include "duckdb/optimizer/filter_pullup.hpp" #include "duckdb/optimizer/filter_pushdown.hpp" @@ -120,12 +119,6 @@ unique_ptr Optimizer::Optimize(unique_ptr plan plan = deliminator.Optimize(std::move(plan)); }); -// Convert setop operations to joins if possible -// RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { -// OperationConverter converter(*plan, binder); -// converter.Optimize(plan, true); -// }); - // then we perform the join ordering optimization // this also rewrites cross products + filters into joins and performs filter pushdowns RunOptimizer(OptimizerType::JOIN_ORDER, [&]() { diff --git a/src/planner/binder/query_node/bind_select_node.cpp b/src/planner/binder/query_node/bind_select_node.cpp index 644294ce21da..a4906402b6d8 100644 --- a/src/planner/binder/query_node/bind_select_node.cpp +++ b/src/planner/binder/query_node/bind_select_node.cpp @@ -514,7 +514,6 @@ unique_ptr Binder::BindSelectNode(SelectNode &statement, unique_ group_by_all_indexes.push_back(i); } - // inspect what is going on here. result->select_list.push_back(std::move(expr)); if (is_original_column) { new_names.push_back(std::move(result->names[i])); diff --git a/src/planner/binder/query_node/plan_setop.cpp b/src/planner/binder/query_node/plan_setop.cpp index 44e1d0408365..57e3f1adac19 100644 --- a/src/planner/binder/query_node/plan_setop.cpp +++ b/src/planner/binder/query_node/plan_setop.cpp @@ -140,12 +140,12 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { // need. auto root = make_uniq(node.setop_index, node.types.size(), std::move(left_node), std::move(right_node), logical_type, node.setop_all); - // Is this necessary? root->ResolveOperatorTypes(); unique_ptr op; // if we have an intersect or except, immediately translate it to a semi or anti join. + // Unions stay as they are. if (logical_type == LogicalOperatorType::LOGICAL_INTERSECT || logical_type == LogicalOperatorType::LOGICAL_EXCEPT) { auto &left = root->children[0]; auto &right = root->children[1]; diff --git a/test/sql/setops/test_setops.test b/test/sql/setops/test_setops.test index 87760d71b581..d0cc7f3cdb7b 100644 --- a/test/sql/setops/test_setops.test +++ b/test/sql/setops/test_setops.test @@ -115,60 +115,6 @@ SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' OR 2 b 1 a -query I -SELECT b FROM test WHERE a < 13 UNION ALL SELECT b FROM test WHERE a > 11 ----- -1 -1 -1 -2 - -# mixing types, should upcast -query T -SELECT 1 UNION ALL SELECT 'asdf' ----- -1 -asdf - -query T -SELECT NULL UNION ALL SELECT 'asdf' ----- -NULL -asdf - -# only UNION, distinct results -query I -SELECT 1 UNION SELECT 1 ----- -1 - -query IT -SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 3, 'c' UNION SELECT 1, 'a' order by 1 ----- -1 a -2 b -3 c - -query I -SELECT b FROM test WHERE a < 13 UNION SELECT b FROM test WHERE a > 11 ORDER BY 1 ----- -1 -2 - -# mixed fun -query IT -SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 ----- -1 a -2 b - -query IT -SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 DESC ----- -2 b -1 a - - # EXCEPT ALL / INTERSECT ALL query II select x, count(*) as c From 16ca96a8aeec8b3dab456c8167c2b90b3ddee59b Mon Sep 17 00:00:00 2001 From: Tmonster Date: Wed, 3 Jan 2024 14:21:55 -0800 Subject: [PATCH 33/36] fix merge conflict --- test/sql/tpcds/tpcds_sf0.test | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/sql/tpcds/tpcds_sf0.test b/test/sql/tpcds/tpcds_sf0.test index 09ef16e24674..09f2ee526bce 100644 --- a/test/sql/tpcds/tpcds_sf0.test +++ b/test/sql/tpcds/tpcds_sf0.test @@ -17,12 +17,20 @@ endloop # out of range statement error PRAGMA tpcds(-1) +---- +Syntax Error: Out of range TPC-DS query number statement error PRAGMA tpcds(3290819023812038903) +---- +Invalid Input Error statement error PRAGMA tpcds(32908301298) +---- +Invalid Input Error statement error PRAGMA tpcds(1.1) +---- +Binder Error: \ No newline at end of file From ed15d66ecb50b7467a3d5cf25a25f1752cb75775 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Wed, 3 Jan 2024 14:57:49 -0800 Subject: [PATCH 34/36] temporary stop. need to copy the filters so they can be pushed down the right side --- .../pushdown/pushdown_semi_anti_join.cpp | 30 +++++++++++++++++-- .../optimizer/setops/operation_converter.test | 2 +- test/optimizer/setops/pushdown_set_op.test | 1 - 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/optimizer/pushdown/pushdown_semi_anti_join.cpp b/src/optimizer/pushdown/pushdown_semi_anti_join.cpp index c9506fe05d68..4e4e9b1ac171 100644 --- a/src/optimizer/pushdown/pushdown_semi_anti_join.cpp +++ b/src/optimizer/pushdown/pushdown_semi_anti_join.cpp @@ -9,6 +9,28 @@ namespace duckdb { using Filter = FilterPushdown::Filter; +static bool CanPushDownRight(LogicalOperator &op) { + if (op.type != LogicalOperatorType::LOGICAL_COMPARISON_JOIN) { + return false; + } + auto &join = op.Cast(); + if (join.join_type != JoinType::SEMI) { + return false; + } + auto &conditions = join.conditions; + for (auto &cond : conditions) { + auto &left = cond.left; + auto &right = cond.right; + if (cond.comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM) { + if (left->type == ExpressionType::BOUND_COLUMN_REF && right->type == ExpressionType::BOUND_COLUMN_REF) { + continue; + } + return false; + } + } + return true; +} + unique_ptr FilterPushdown::PushdownSemiAntiJoin(unique_ptr op) { auto &join = op->Cast(); if (op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN) { @@ -17,8 +39,12 @@ unique_ptr FilterPushdown::PushdownSemiAntiJoin(unique_ptrchildren[0] = Rewrite(std::move(op->children[0])); - FilterPushdown right_pushdown(optimizer); - op->children[1] = right_pushdown.Rewrite(std::move(op->children[1])); + if (CanPushDownRight(*op)) { + op->children[1] = Rewrite(std::move(op->children[1])); + } else { + FilterPushdown right_pushdown(optimizer); + op->children[1] = right_pushdown.Rewrite(std::move(op->children[1])); + } bool left_empty = op->children[0]->type == LogicalOperatorType::LOGICAL_EMPTY_RESULT; bool right_empty = op->children[1]->type == LogicalOperatorType::LOGICAL_EMPTY_RESULT; diff --git a/test/optimizer/setops/operation_converter.test b/test/optimizer/setops/operation_converter.test index 8bc525f3c840..9a799746af14 100644 --- a/test/optimizer/setops/operation_converter.test +++ b/test/optimizer/setops/operation_converter.test @@ -9,5 +9,5 @@ statement ok create table right_table as select range*2 as b from range(10000); statement ok -select * from left_table intersect select * from right_table; +select * from (select * from left_table intersect select * from right_table) ll1 where ll1.a < 5; diff --git a/test/optimizer/setops/pushdown_set_op.test b/test/optimizer/setops/pushdown_set_op.test index 80a790667667..1b98d31a690f 100644 --- a/test/optimizer/setops/pushdown_set_op.test +++ b/test/optimizer/setops/pushdown_set_op.test @@ -5,7 +5,6 @@ statement ok PRAGMA explain_output = 'OPTIMIZED_ONLY' - query II explain select 42 intersect select 42; ---- From 29cb2b82d2257611d8cc68e30246ef5b8c08f4d1 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 4 Jan 2024 10:11:38 -0800 Subject: [PATCH 35/36] stopping point. it's possible filters are pulled up from the right of a semi join. If so they can be pushed to the left. need to solve this as well --- .../planner/operator/logical_filter.hpp | 37 ++++++++++++ src/optimizer/filter_pullup.cpp | 8 ++- .../pushdown/pushdown_semi_anti_join.cpp | 56 +++++++++++-------- test/optimizer/pullup_filters.test | 10 +++- 4 files changed, 85 insertions(+), 26 deletions(-) diff --git a/src/include/duckdb/planner/operator/logical_filter.hpp b/src/include/duckdb/planner/operator/logical_filter.hpp index acd5771b985f..5e969e75ded3 100644 --- a/src/include/duckdb/planner/operator/logical_filter.hpp +++ b/src/include/duckdb/planner/operator/logical_filter.hpp @@ -9,9 +9,46 @@ #pragma once #include "duckdb/planner/logical_operator.hpp" +#include "duckdb/planner/operator/logical_comparison_join.hpp" +#include "duckdb/planner/expression/bound_columnref_expression.hpp" namespace duckdb { +static bool CanFiltersPropogateRightSide(LogicalOperator &op) { + if (op.type != LogicalOperatorType::LOGICAL_COMPARISON_JOIN) { + return false; + } + auto &join = op.Cast(); + if (join.join_type != JoinType::SEMI) { + return false; + } + auto left_bindings = op.children[0]->GetColumnBindings(); + auto right_bindings = op.children[1]->GetColumnBindings(); + D_ASSERT(left_bindings.size() == right_bindings.size()); + // make sure we are comparing every column + if (join.conditions.size() != left_bindings.size()) { + return false; + } + auto &conditions = join.conditions; + for (idx_t i = 0; i < conditions.size(); i++) { + auto &cond = conditions[i]; + auto &left = cond.left; + auto &right = cond.right; + if (cond.comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM) { + if (left->type == ExpressionType::BOUND_COLUMN_REF && right->type == ExpressionType::BOUND_COLUMN_REF) { + auto &left_expr = left->Cast(); + auto &right_expr = right->Cast(); + auto left_match = left_expr.binding == left_bindings[i]; + auto right_match = right_expr.binding == right_bindings[i]; + if (!(left_match && right_match)) { + return false; + } + } + } + } + return true; +} + //! LogicalFilter represents a filter operation (e.g. WHERE or HAVING clause) class LogicalFilter : public LogicalOperator { public: diff --git a/src/optimizer/filter_pullup.cpp b/src/optimizer/filter_pullup.cpp index 04986f199570..716751d0b2ce 100644 --- a/src/optimizer/filter_pullup.cpp +++ b/src/optimizer/filter_pullup.cpp @@ -1,5 +1,6 @@ #include "duckdb/optimizer/filter_pullup.hpp" #include "duckdb/planner/operator/logical_join.hpp" +#include "duckdb/planner/operator/logical_filter.hpp" namespace duckdb { @@ -40,8 +41,13 @@ unique_ptr FilterPullup::PullupJoin(unique_ptr case JoinType::INNER: return PullupInnerJoin(std::move(op)); case JoinType::LEFT: - case JoinType::ANTI: + case JoinType::ANTI: { + return PullupFromLeft(std::move(op)); + } case JoinType::SEMI: { + if (CanFiltersPropogateRightSide(*op)) { + return PullupBothSide(std::move(op)); + } return PullupFromLeft(std::move(op)); } default: diff --git a/src/optimizer/pushdown/pushdown_semi_anti_join.cpp b/src/optimizer/pushdown/pushdown_semi_anti_join.cpp index 4e4e9b1ac171..3354bc47ea8c 100644 --- a/src/optimizer/pushdown/pushdown_semi_anti_join.cpp +++ b/src/optimizer/pushdown/pushdown_semi_anti_join.cpp @@ -9,26 +9,25 @@ namespace duckdb { using Filter = FilterPushdown::Filter; -static bool CanPushDownRight(LogicalOperator &op) { - if (op.type != LogicalOperatorType::LOGICAL_COMPARISON_JOIN) { - return false; - } - auto &join = op.Cast(); - if (join.join_type != JoinType::SEMI) { - return false; - } - auto &conditions = join.conditions; - for (auto &cond : conditions) { - auto &left = cond.left; - auto &right = cond.right; - if (cond.comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM) { - if (left->type == ExpressionType::BOUND_COLUMN_REF && right->type == ExpressionType::BOUND_COLUMN_REF) { - continue; +static void ReplaceBindings(vector &bindings, Filter &filter, Expression &expr, vector &replacement_bindings) { + if (expr.type == ExpressionType::BOUND_COLUMN_REF) { + auto &colref = expr.Cast(); + D_ASSERT(colref.depth == 0); + + // rewrite the binding by looking into the bound_tables list of the subquery + idx_t binding_index = 0; + for (idx_t i = 0; i < bindings.size(); i++) { + if (bindings[i] == colref.binding) { + binding_index = i; + break; } - return false; } + colref.binding = replacement_bindings[binding_index]; + filter.bindings.insert(colref.binding.table_index); + return; } - return true; + ExpressionIterator::EnumerateChildren( + expr, [&](Expression &child) { ReplaceBindings(bindings, filter, child, replacement_bindings); }); } unique_ptr FilterPushdown::PushdownSemiAntiJoin(unique_ptr op) { @@ -37,13 +36,26 @@ unique_ptr FilterPushdown::PushdownSemiAntiJoin(unique_ptrchildren[0] = Rewrite(std::move(op->children[0])); - if (CanPushDownRight(*op)) { - op->children[1] = Rewrite(std::move(op->children[1])); - } else { + if (CanFiltersPropogateRightSide(*op)) { + auto left_bindings = op->children[0]->GetColumnBindings(); + auto right_bindings = op->children[1]->GetColumnBindings(); FilterPushdown right_pushdown(optimizer); + for (idx_t i = 0; i < filters.size(); i++) { + // first create a copy of the filter + auto right_filter = make_uniq(); + right_filter->filter = filters[i]->filter->Copy(); + + ReplaceBindings(left_bindings, *right_filter, *right_filter->filter, right_bindings); + right_filter->ExtractBindings(); + + // move the filters into the child pushdown nodes + right_pushdown.filters.push_back(std::move(right_filter)); + } + op->children[0] = Rewrite(std::move(op->children[0])); op->children[1] = right_pushdown.Rewrite(std::move(op->children[1])); + } else { + // push all current filters down the left side + op->children[0] = Rewrite(std::move(op->children[0])); } bool left_empty = op->children[0]->type == LogicalOperatorType::LOGICAL_EMPTY_RESULT; diff --git a/test/optimizer/pullup_filters.test b/test/optimizer/pullup_filters.test index af914518b42c..90128260c34f 100644 --- a/test/optimizer/pullup_filters.test +++ b/test/optimizer/pullup_filters.test @@ -6,13 +6,15 @@ statement ok PRAGMA explain_output = 'PHYSICAL_ONLY' statement ok -CREATE TABLE vals1 AS SELECT i AS i, i AS j FROM range(0, 11, 1) t1(i) +CREATE TABLE vals1 AS SELECT i AS i, i AS j FROM range(0, 11, 1) t1(i); statement ok -CREATE TABLE vals2(k BIGINT, l BIGINT) +CREATE TABLE vals2(k BIGINT, l BIGINT); statement ok -INSERT INTO vals2 SELECT * FROM vals1 +INSERT INTO vals2 SELECT * FROM vals1; + +mode skip ## INNER JOIN: pull up a single filter in cross product from LHS query II @@ -74,6 +76,8 @@ EXPLAIN SELECT * FROM (SELECT * FROM vals1, vals2 WHERE i=3 AND k=5 INTERSECT SE ---- physical_plan :((.*=3.*=5.*=3.*=5.*)|(.*=5.*=3.*=5.*=3.*)) +mode unskip + ## INTERSECT: pull up filters from RHS query II EXPLAIN SELECT * FROM (SELECT * FROM vals1, vals2 INTERSECT SELECT * FROM vals1, vals2 WHERE i=3 AND k=5) tbl1; From 04bcfa0f00844bb70a56f85201bdf3c0bea0b515 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 4 Jan 2024 10:51:41 -0800 Subject: [PATCH 36/36] make format-fix --- src/execution/physical_plan/plan_set_operation.cpp | 3 ++- src/optimizer/pushdown/pushdown_semi_anti_join.cpp | 3 ++- src/planner/binder/query_node/plan_setop.cpp | 6 +++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/execution/physical_plan/plan_set_operation.cpp b/src/execution/physical_plan/plan_set_operation.cpp index 5dac7235aeb3..3307f543b230 100644 --- a/src/execution/physical_plan/plan_set_operation.cpp +++ b/src/execution/physical_plan/plan_set_operation.cpp @@ -40,7 +40,8 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalSetOperati break; case LogicalOperatorType::LOGICAL_EXCEPT: case LogicalOperatorType::LOGICAL_INTERSECT: { - throw InternalException("Logical Except/Intersect should have been transformed to semi anti before the physical planning phase"); + throw InternalException( + "Logical Except/Intersect should have been transformed to semi anti before the physical planning phase"); } default: throw InternalException("Unexpected operator type for set operation"); diff --git a/src/optimizer/pushdown/pushdown_semi_anti_join.cpp b/src/optimizer/pushdown/pushdown_semi_anti_join.cpp index 3354bc47ea8c..15889c6a2a5f 100644 --- a/src/optimizer/pushdown/pushdown_semi_anti_join.cpp +++ b/src/optimizer/pushdown/pushdown_semi_anti_join.cpp @@ -9,7 +9,8 @@ namespace duckdb { using Filter = FilterPushdown::Filter; -static void ReplaceBindings(vector &bindings, Filter &filter, Expression &expr, vector &replacement_bindings) { +static void ReplaceBindings(vector &bindings, Filter &filter, Expression &expr, + vector &replacement_bindings) { if (expr.type == ExpressionType::BOUND_COLUMN_REF) { auto &colref = expr.Cast(); D_ASSERT(colref.depth == 0); diff --git a/src/planner/binder/query_node/plan_setop.cpp b/src/planner/binder/query_node/plan_setop.cpp index 57e3f1adac19..3568d91d9c62 100644 --- a/src/planner/binder/query_node/plan_setop.cpp +++ b/src/planner/binder/query_node/plan_setop.cpp @@ -10,7 +10,8 @@ namespace duckdb { -static unique_ptr CreateWindowWithPartitionedRowNum(idx_t window_table_index, unique_ptr op) { +static unique_ptr CreateWindowWithPartitionedRowNum(idx_t window_table_index, + unique_ptr op) { // instead create a logical projection on top of whatever to add the window expression, then auto window = make_uniq(window_table_index); auto row_number = @@ -208,8 +209,7 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { for (idx_t i = 0; i < bindings.size() - 1; i++) { projection_select_list.push_back(make_uniq(op->types[i], bindings[i])); } - auto projection = - make_uniq(node.setop_index, std::move(projection_select_list)); + auto projection = make_uniq(node.setop_index, std::move(projection_select_list)); projection->children.push_back(std::move(op)); op = std::move(projection); }