From fcd9ff7273d5876e72461de288a4eda1c8ab715f Mon Sep 17 00:00:00 2001 From: PGross Date: Tue, 16 Apr 2024 20:03:08 +0200 Subject: [PATCH 1/2] factorization working for the edgies case of all edge cases --- src/common/types.cpp | 7 +++ src/common/types/data_chunk.cpp | 1 + src/common/vector_operations/vector_copy.cpp | 5 ++ src/execution/aggregate_hashtable.cpp | 3 + src/execution/join_hashtable.cpp | 58 ++++++++++++++--- .../duckdb/common/enums/vector_type.hpp | 3 +- src/include/duckdb/common/types.hpp | 62 ++++++++++++------- src/include/duckdb/common/types/vector.hpp | 13 ++++ .../duckdb/common/types/vector_buffer.hpp | 2 + .../duckdb/execution/join_hashtable.hpp | 7 +++ .../duckdb/execution/physical_operator.hpp | 3 +- src/planner/operator/logical_join.cpp | 8 +++ 12 files changed, 137 insertions(+), 35 deletions(-) diff --git a/src/common/types.cpp b/src/common/types.cpp index dc5c2d6d4ed3..117dda545990 100644 --- a/src/common/types.cpp +++ b/src/common/types.cpp @@ -128,6 +128,8 @@ PhysicalType LogicalType::GetInternalType() { return PhysicalType::LIST; case LogicalTypeId::ARRAY: return PhysicalType::ARRAY; + case LogicalTypeId::FACTORIZED: + return PhysicalType::FACTORIZED; case LogicalTypeId::POINTER: // LCOV_EXCL_START if (sizeof(uintptr_t) == sizeof(uint32_t)) { @@ -288,6 +290,8 @@ string TypeIdToString(PhysicalType type) { return "LIST"; case PhysicalType::ARRAY: return "ARRAY"; + case PhysicalType::FACTORIZED: + return "FACTORIZED"; case PhysicalType::INVALID: return "INVALID"; case PhysicalType::BIT: @@ -295,6 +299,7 @@ string TypeIdToString(PhysicalType type) { case PhysicalType::UNKNOWN: return "UNKNOWN"; } + return "INVALID"; } // LCOV_EXCL_STOP @@ -338,6 +343,8 @@ idx_t GetTypeIdSize(PhysicalType type) { return 0; // no own payload case PhysicalType::LIST: return sizeof(list_entry_t); // offset + len + case PhysicalType::FACTORIZED: + return sizeof(fact_entry_t); default: throw InternalException("Invalid PhysicalType for GetTypeIdSize"); } diff --git a/src/common/types/data_chunk.cpp b/src/common/types/data_chunk.cpp index 6e1b5ed89ef5..7ec3bc903c77 100644 --- a/src/common/types/data_chunk.cpp +++ b/src/common/types/data_chunk.cpp @@ -344,6 +344,7 @@ void DataChunk::Verify() { #ifdef DEBUG D_ASSERT(size() <= capacity); + // verify that all vectors in this chunk have the chunk selection vector for (idx_t i = 0; i < ColumnCount(); i++) { data[i].Verify(size()); diff --git a/src/common/vector_operations/vector_copy.cpp b/src/common/vector_operations/vector_copy.cpp index 3823cd39e302..c6f5850299e7 100644 --- a/src/common/vector_operations/vector_copy.cpp +++ b/src/common/vector_operations/vector_copy.cpp @@ -29,6 +29,8 @@ static const ValidityMask &CopyValidityMask(const Vector &v) { return FlatVector::Validity(v); case VectorType::FSST_VECTOR: return FSSTVector::Validity(v); + case VectorType::FACTORIZED_VECTOR: + return FactorizedVector::Validity(v); default: throw InternalException("Unsupported vector type in vector copy"); } @@ -76,6 +78,9 @@ void VectorOperations::Copy(const Vector &source_p, Vector &target, const Select case VectorType::FLAT_VECTOR: finished = true; break; + case VectorType::FACTORIZED_VECTOR: + finished = true; + break; default: throw NotImplementedException("FIXME unimplemented vector type for VectorOperations::Copy"); } diff --git a/src/execution/aggregate_hashtable.cpp b/src/execution/aggregate_hashtable.cpp index 59cd8693ef2a..6703d242f587 100644 --- a/src/execution/aggregate_hashtable.cpp +++ b/src/execution/aggregate_hashtable.cpp @@ -10,6 +10,7 @@ #include "duckdb/common/vector_operations/vector_operations.hpp" #include "duckdb/execution/expression_executor.hpp" #include "duckdb/execution/ht_entry.hpp" +#include "duckdb/execution/join_hashtable.hpp" #include "duckdb/planner/expression/bound_aggregate_expression.hpp" namespace duckdb { @@ -245,6 +246,8 @@ idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, Vector &group_hashe D_ASSERT(groups.GetTypes()[i] == layout.GetTypes()[i]); } #endif + // todo: magic number 9 (pointer offset) + JoinHashTable::GetChainLengths(payload.data[0], groups.size(), 9); const auto new_group_count = FindOrCreateGroups(groups, group_hashes, state.addresses, state.new_groups); VectorOperations::AddInPlace(state.addresses, layout.GetAggrOffset(), payload.size()); diff --git a/src/execution/join_hashtable.cpp b/src/execution/join_hashtable.cpp index f04553469ee9..ffa6c3c42031 100644 --- a/src/execution/join_hashtable.cpp +++ b/src/execution/join_hashtable.cpp @@ -870,7 +870,7 @@ idx_t ScanStructure::ScanInnerJoin(DataChunk &keys, SelectionVector &result_vect void ScanStructure::AdvancePointers(const SelectionVector &sel, idx_t sel_count) { - if (!ht.chains_longer_than_one) { + if (!ht.chains_longer_than_one || EmitFactVectors()) { this->count = 0; return; } @@ -914,6 +914,7 @@ void ScanStructure::NextInnerJoin(DataChunk &keys, DataChunk &left, DataChunk &r idx_t result_count = ScanInnerJoin(keys, chain_match_sel_vector); if (result_count > 0) { + if (PropagatesBuildSide(ht.join_type)) { // full/right outer join: mark join matches as FOUND in the HT auto ptrs = FlatVector::GetData(pointers); @@ -925,19 +926,42 @@ void ScanStructure::NextInnerJoin(DataChunk &keys, DataChunk &left, DataChunk &r Store(true, ptrs[idx] + ht.tuple_size); } } + // for right semi join, just mark the entry as found and move on. Propagation happens later if (ht.join_type != JoinType::RIGHT_SEMI && ht.join_type != JoinType::RIGHT_ANTI) { + // matches were found // construct the result // on the LHS, we create a slice using the result vector - result.Slice(left, chain_match_sel_vector, result_count); - - // on the RHS, we need to fetch the data from the hash table - for (idx_t i = 0; i < ht.output_columns.size(); i++) { - auto &vector = result.data[left.ColumnCount() + i]; - const auto output_col_idx = ht.output_columns[i]; - D_ASSERT(vector.GetType() == ht.layout.GetTypes()[output_col_idx]); - GatherResult(vector, chain_match_sel_vector, result_count, output_col_idx); + result.Slice(left, chain_match_sel_vector, result_count, 0); + + if (EmitFactVectors()) { + // in our very special case, the aggregate keys are the first vector and the key to be grouped by is + // the second vector + + // set the first vector in the result to be the fact vector + auto &fact_vector = result.data[1]; + fact_vector.SetVectorType(VectorType::FLAT_VECTOR); + // fact_vector.SetVectorType(VectorType::FACTORIZED_VECTOR); + auto fact_vector_pointer = FactorizedVector::GetData(fact_vector); + + auto ptrs = FlatVector::GetData(pointers); + + for (idx_t j = 0; j < result_count; j++) { + auto idx = chain_match_sel_vector.get_index(j); + data_ptr_t ptr = ptrs[idx]; + fact_vector_pointer[idx] = fact_entry_t(ptr); + } + + } else { + + // on the RHS, we need to fetch the data from the hash table + for (idx_t i = 0; i < ht.output_columns.size(); i++) { + auto &vector = result.data[left.ColumnCount() + i]; + const auto output_col_idx = ht.output_columns[i]; + D_ASSERT(vector.GetType() == ht.layout.GetTypes()[output_col_idx]); + GatherResult(vector, chain_match_sel_vector, result_count, output_col_idx); + } } } AdvancePointers(); @@ -1473,6 +1497,22 @@ unique_ptr JoinHashTable::ProbeAndSpill(DataChunk &keys, TupleDat return ss; } +void JoinHashTable::GetChainLengths(Vector &row_pointer_v, idx_t count, idx_t pointer_offset) { + + row_pointer_v.Flatten(count); + auto row_pointer = FlatVector::GetData(row_pointer_v); + + for (idx_t i = 0; i < count; i++) { + auto next_ptr = row_pointer[i]; + idx_t chain_length = 0; + while (next_ptr) { + next_ptr = Load(next_ptr + pointer_offset); + chain_length++; + } + // set the chain length in the row pointer + row_pointer[i] = reinterpret_cast(chain_length); + } +} ProbeSpill::ProbeSpill(JoinHashTable &ht, ClientContext &context, const vector &probe_types) : ht(ht), context(context), probe_types(probe_types) { diff --git a/src/include/duckdb/common/enums/vector_type.hpp b/src/include/duckdb/common/enums/vector_type.hpp index 155df5fe975e..80e17c3119e0 100644 --- a/src/include/duckdb/common/enums/vector_type.hpp +++ b/src/include/duckdb/common/enums/vector_type.hpp @@ -17,7 +17,8 @@ enum class VectorType : uint8_t { FSST_VECTOR, // Contains string data compressed with FSST CONSTANT_VECTOR, // Constant vector represents a single constant DICTIONARY_VECTOR, // Dictionary vector represents a selection vector on top of another vector - SEQUENCE_VECTOR // Sequence vector represents a sequence with a start point and an increment + SEQUENCE_VECTOR, // Sequence vector represents a sequence with a start point and an increment + FACTORIZED_VECTOR, // Factorized vector represents a set of tuples as the cartesian product of a list of tuples }; string VectorTypeToString(VectorType type); diff --git a/src/include/duckdb/common/types.hpp b/src/include/duckdb/common/types.hpp index bd5778ec402a..2425ad91760b 100644 --- a/src/include/duckdb/common/types.hpp +++ b/src/include/duckdb/common/types.hpp @@ -52,6 +52,13 @@ struct list_entry_t { // NOLINT: mimic std casing uint64_t length; }; +// used for the FactorizedVector +struct fact_entry_t { // NOLINT: mimic std casing + fact_entry_t(data_ptr_t data_ptr) : row_ptr(data_ptr) { + } + data_ptr_t row_ptr; +}; + using union_tag_t = uint8_t; //===--------------------------------------------------------------------===// @@ -167,10 +174,13 @@ enum class PhysicalType : uint8_t { ///// Like LIST, but with 64-bit offsets // LARGE_LIST = 33, + /// Factorized representation of multiple rows + FACTORIZED = 34, + /// DuckDB Extensions VARCHAR = 200, // our own string representation, different from STRING and LARGE_STRING above UINT128 = 203, // 128-bit unsigned integers - INT128 = 204, // 128-bit integers + INT128 = 204, // 128-bit integers UNKNOWN = 205, // Unknown physical type of user defined types /// Boolean as 1 bit, LSB bit-packed ordering BIT = 206, @@ -212,8 +222,8 @@ enum class LogicalTypeId : uint8_t { TIMESTAMP_TZ = 32, TIME_TZ = 34, BIT = 36, - STRING_LITERAL = 37, /* string literals, used for constant strings - only exists while binding */ - INTEGER_LITERAL = 38,/* integer literals, used for constant integers - only exists while binding */ + STRING_LITERAL = 37, /* string literals, used for constant strings - only exists while binding */ + INTEGER_LITERAL = 38, /* integer literals, used for constant integers - only exists while binding */ UHUGEINT = 49, HUGEINT = 50, @@ -229,7 +239,8 @@ enum class LogicalTypeId : uint8_t { AGGREGATE_STATE = 105, LAMBDA = 106, UNION = 107, - ARRAY = 108 + ARRAY = 108, + FACTORIZED = 109 }; struct ExtraTypeInfo; @@ -319,29 +330,32 @@ struct LogicalType { DUCKDB_API bool HasAlias() const; DUCKDB_API string GetAlias() const; - //! Returns the maximum logical type when combining the two types - or throws an exception if combining is not possible - DUCKDB_API static LogicalType MaxLogicalType(ClientContext &context, const LogicalType &left, const LogicalType &right); - DUCKDB_API static bool TryGetMaxLogicalType(ClientContext &context, const LogicalType &left, const LogicalType &right, LogicalType &result); - //! Forcibly returns a maximum logical type - similar to MaxLogicalType but never throws. As a fallback either left or right are returned. + //! Returns the maximum logical type when combining the two types - or throws an exception if combining is not + //! possible + DUCKDB_API static LogicalType MaxLogicalType(ClientContext &context, const LogicalType &left, + const LogicalType &right); + DUCKDB_API static bool TryGetMaxLogicalType(ClientContext &context, const LogicalType &left, + const LogicalType &right, LogicalType &result); + //! Forcibly returns a maximum logical type - similar to MaxLogicalType but never throws. As a fallback either left + //! or right are returned. DUCKDB_API static LogicalType ForceMaxLogicalType(const LogicalType &left, const LogicalType &right); //! Normalize a type - removing literals DUCKDB_API static LogicalType NormalizeType(const LogicalType &type); - - //! Gets the decimal properties of a numeric type. Fails if the type is not numeric. + //! Gets the decimal properties of a numeric type. Fails if the type is not numeric. DUCKDB_API bool GetDecimalProperties(uint8_t &width, uint8_t &scale) const; DUCKDB_API void Verify() const; DUCKDB_API bool IsValid() const; - template + template bool Contains(F &&predicate) const; bool Contains(LogicalTypeId type_id) const; private: - LogicalTypeId id_; // NOLINT: allow this naming for legacy reasons - PhysicalType physical_type_; // NOLINT: allow this naming for legacy reasons + LogicalTypeId id_; // NOLINT: allow this naming for legacy reasons + PhysicalType physical_type_; // NOLINT: allow this naming for legacy reasons shared_ptr type_info_; // NOLINT: allow this naming for legacy reasons private: @@ -383,9 +397,10 @@ struct LogicalType { static constexpr const LogicalTypeId LAMBDA = LogicalTypeId::LAMBDA; static constexpr const LogicalTypeId INVALID = LogicalTypeId::INVALID; static constexpr const LogicalTypeId ROW_TYPE = LogicalTypeId::BIGINT; + static constexpr const LogicalTypeId FACTORIZED = LogicalTypeId::FACTORIZED; // explicitly allowing these functions to be capitalized to be in-line with the remaining functions - DUCKDB_API static LogicalType DECIMAL(uint8_t width, uint8_t scale); // NOLINT + DUCKDB_API static LogicalType DECIMAL(uint8_t width, uint8_t scale); // NOLINT DUCKDB_API static LogicalType VARCHAR_COLLATION(string collation); // NOLINT DUCKDB_API static LogicalType LIST(const LogicalType &child); // NOLINT DUCKDB_API static LogicalType STRUCT(child_list_t children); // NOLINT @@ -400,7 +415,7 @@ struct LogicalType { // ANY but with special rules (default is LogicalType::ANY, 5) DUCKDB_API static LogicalType ANY_PARAMS(LogicalType target, idx_t cast_score = 5); // NOLINT //! Integer literal of the specified value - DUCKDB_API static LogicalType INTEGER_LITERAL(const Value &constant); // NOLINT + DUCKDB_API static LogicalType INTEGER_LITERAL(const Value &constant); // NOLINT // DEPRECATED - provided for backwards compatibility DUCKDB_API static LogicalType ENUM(const string &enum_name, Vector &ordered_data, idx_t size); // NOLINT DUCKDB_API static LogicalType USER(const string &user_type_name); // NOLINT @@ -534,27 +549,26 @@ struct aggregate_state_t { vector bound_argument_types; }; -template +template bool LogicalType::Contains(F &&predicate) const { - if(predicate(*this)) { + if (predicate(*this)) { return true; } - switch(id()) { + switch (id()) { case LogicalTypeId::STRUCT: { - for(const auto &child : StructType::GetChildTypes(*this)) { - if(child.second.Contains(predicate)) { + for (const auto &child : StructType::GetChildTypes(*this)) { + if (child.second.Contains(predicate)) { return true; } } - } - break; + } break; case LogicalTypeId::LIST: return ListType::GetChildType(*this).Contains(predicate); case LogicalTypeId::MAP: return MapType::KeyType(*this).Contains(predicate) || MapType::ValueType(*this).Contains(predicate); case LogicalTypeId::UNION: - for(const auto &child : UnionType::CopyMemberTypes(*this)) { - if(child.second.Contains(predicate)) { + for (const auto &child : UnionType::CopyMemberTypes(*this)) { + if (child.second.Contains(predicate)) { return true; } } diff --git a/src/include/duckdb/common/types/vector.hpp b/src/include/duckdb/common/types/vector.hpp index 80d2c2ce9654..e8547bd71bf2 100644 --- a/src/include/duckdb/common/types/vector.hpp +++ b/src/include/duckdb/common/types/vector.hpp @@ -75,6 +75,7 @@ class Vector { friend struct UnionVector; friend struct SequenceVector; friend struct ArrayVector; + friend struct FactorizedVector; friend class DataChunk; friend class VectorCacheBuffer; @@ -563,4 +564,16 @@ struct SequenceVector { } }; +struct FactorizedVector { + + static inline const ValidityMask &Validity(const Vector &vector) { + D_ASSERT(vector.GetVectorType() == VectorType::FACTORIZED_VECTOR); + return vector.validity; + } + + static inline fact_entry_t *GetData(Vector &vector) { + return reinterpret_cast(vector.data); + } +}; + } // namespace duckdb diff --git a/src/include/duckdb/common/types/vector_buffer.hpp b/src/include/duckdb/common/types/vector_buffer.hpp index e1d49aacb629..0d90684b7ccf 100644 --- a/src/include/duckdb/common/types/vector_buffer.hpp +++ b/src/include/duckdb/common/types/vector_buffer.hpp @@ -302,4 +302,6 @@ class ManagedVectorBuffer : public VectorBuffer { BufferHandle handle; }; +class VectorFactBuffer : public VectorBuffer {}; + } // namespace duckdb diff --git a/src/include/duckdb/execution/join_hashtable.hpp b/src/include/duckdb/execution/join_hashtable.hpp index f5f3ae02d95e..308483e86a08 100644 --- a/src/include/duckdb/execution/join_hashtable.hpp +++ b/src/include/duckdb/execution/join_hashtable.hpp @@ -378,6 +378,13 @@ class JoinHashTable { return PointerTableCapacity(count) * sizeof(data_ptr_t); } + //! Whether or not to emit fact vectors from the HT + static bool EmitFactVectors(){ + return true; + } + + static void GetChainLengths(Vector &row_pointer_v, idx_t count, idx_t pointer_offset); + //! Get total size of HT if all partitions would be built idx_t GetTotalSize(vector> &local_hts, idx_t &max_partition_size, idx_t &max_partition_count) const; diff --git a/src/include/duckdb/execution/physical_operator.hpp b/src/include/duckdb/execution/physical_operator.hpp index f5876c5e7ea1..11c5cd680c04 100644 --- a/src/include/duckdb/execution/physical_operator.hpp +++ b/src/include/duckdb/execution/physical_operator.hpp @@ -220,7 +220,8 @@ class CachingOperatorState : public OperatorState { //! inherit their state class from the CachingOperatorState. class CachingPhysicalOperator : public PhysicalOperator { public: - static constexpr const idx_t CACHE_THRESHOLD = 64; + // todo: Reduced CACHE THRESHOLD to 2 for testing purposes + static constexpr const idx_t CACHE_THRESHOLD = 2; CachingPhysicalOperator(PhysicalOperatorType type, vector types, idx_t estimated_cardinality); bool caching_supported; diff --git a/src/planner/operator/logical_join.cpp b/src/planner/operator/logical_join.cpp index eacf7b8e75f4..fbbdb0fa3e58 100644 --- a/src/planner/operator/logical_join.cpp +++ b/src/planner/operator/logical_join.cpp @@ -21,8 +21,11 @@ vector LogicalJoin::GetColumnBindings() { left_bindings.emplace_back(mark_index, 0); return left_bindings; } + // todo: hack - override the return types of the rhs to be one fact vector + // for other join types we project both the LHS and the RHS auto right_bindings = MapBindings(children[1]->GetColumnBindings(), right_projection_map); + // auto right_bindings = {children[1]->GetColumnBindings()[0]}; if (join_type == JoinType::RIGHT_SEMI || join_type == JoinType::RIGHT_ANTI) { return right_bindings; } @@ -42,7 +45,12 @@ void LogicalJoin::ResolveTypes() { return; } // for any other join we project both sides + + // todo: hack - override the return types of the rhs to be one fact vector + // vector right_types_override = {LogicalType::FACTORIZED}; + auto right_types = MapTypes(children[1]->types, right_projection_map); + if (join_type == JoinType::RIGHT_SEMI || join_type == JoinType::RIGHT_ANTI) { types = right_types; return; From a080d6579d69ea039f5edce78df1d1f11d63dab1 Mon Sep 17 00:00:00 2001 From: PGross Date: Wed, 17 Apr 2024 11:43:04 +0200 Subject: [PATCH 2/2] improvements to make benchmark run --- src/execution/aggregate_hashtable.cpp | 1 + src/execution/join_hashtable.cpp | 43 ++++++++++++++++--- .../duckdb/execution/join_hashtable.hpp | 2 +- 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/execution/aggregate_hashtable.cpp b/src/execution/aggregate_hashtable.cpp index 6703d242f587..67d8aedb262f 100644 --- a/src/execution/aggregate_hashtable.cpp +++ b/src/execution/aggregate_hashtable.cpp @@ -246,6 +246,7 @@ idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, Vector &group_hashe D_ASSERT(groups.GetTypes()[i] == layout.GetTypes()[i]); } #endif + // todo: magic number 9 (pointer offset) JoinHashTable::GetChainLengths(payload.data[0], groups.size(), 9); diff --git a/src/execution/join_hashtable.cpp b/src/execution/join_hashtable.cpp index ffa6c3c42031..3299ae7a0363 100644 --- a/src/execution/join_hashtable.cpp +++ b/src/execution/join_hashtable.cpp @@ -942,6 +942,7 @@ void ScanStructure::NextInnerJoin(DataChunk &keys, DataChunk &left, DataChunk &r // set the first vector in the result to be the fact vector auto &fact_vector = result.data[1]; fact_vector.SetVectorType(VectorType::FLAT_VECTOR); + // fact_vector.SetVectorType(VectorType::FACTORIZED_VECTOR); auto fact_vector_pointer = FactorizedVector::GetData(fact_vector); @@ -953,6 +954,11 @@ void ScanStructure::NextInnerJoin(DataChunk &keys, DataChunk &left, DataChunk &r fact_vector_pointer[idx] = fact_entry_t(ptr); } + // mark only the fields with pointers as valid + fact_vector.Slice(chain_match_sel_vector, result_count); + + + } else { // on the RHS, we need to fetch the data from the hash table @@ -1497,21 +1503,44 @@ unique_ptr JoinHashTable::ProbeAndSpill(DataChunk &keys, TupleDat return ss; } -void JoinHashTable::GetChainLengths(Vector &row_pointer_v, idx_t count, idx_t pointer_offset) { +void JoinHashTable::GetChainLengths(Vector &row_pointer_v, const idx_t count, const idx_t pointer_offset) { + + const idx_t COMPUTED_MASK = 0x8000000000000000; row_pointer_v.Flatten(count); auto row_pointer = FlatVector::GetData(row_pointer_v); for (idx_t i = 0; i < count; i++) { - auto next_ptr = row_pointer[i]; - idx_t chain_length = 0; - while (next_ptr) { - next_ptr = Load(next_ptr + pointer_offset); - chain_length++; + auto list_start_ptr = row_pointer[i]; + + D_ASSERT(list_start_ptr != nullptr); + + idx_t chain_length; + + auto first_next_pointer = Load(list_start_ptr + pointer_offset); + + // the chain length is already computed, just read the length from the first pointer + if (first_next_pointer & COMPUTED_MASK) { + // the chain length is already computed + chain_length = first_next_pointer & ~COMPUTED_MASK; + } + // the chain length is not computed yet, traverse the chain and compute the length + else { + chain_length = 1; + auto next_ptr = reinterpret_cast(first_next_pointer); + + while (next_ptr) { + next_ptr = Load(next_ptr + pointer_offset); + chain_length++; + } + + // store the chain length in the first pointer + Store(chain_length | COMPUTED_MASK, list_start_ptr + pointer_offset); } - // set the chain length in the row pointer + row_pointer[i] = reinterpret_cast(chain_length); } + } ProbeSpill::ProbeSpill(JoinHashTable &ht, ClientContext &context, const vector &probe_types) diff --git a/src/include/duckdb/execution/join_hashtable.hpp b/src/include/duckdb/execution/join_hashtable.hpp index 308483e86a08..125fa4a4ef05 100644 --- a/src/include/duckdb/execution/join_hashtable.hpp +++ b/src/include/duckdb/execution/join_hashtable.hpp @@ -383,7 +383,7 @@ class JoinHashTable { return true; } - static void GetChainLengths(Vector &row_pointer_v, idx_t count, idx_t pointer_offset); + static void GetChainLengths(Vector &row_pointer_v, const idx_t count, const idx_t pointer_offset); //! Get total size of HT if all partitions would be built idx_t GetTotalSize(vector> &local_hts, idx_t &max_partition_size,