From 408ffae03ed743a0506fdf9e8afd2931672a012d Mon Sep 17 00:00:00 2001
From: Tmonster <tom@ebergen.com>
Date: Mon, 3 Feb 2025 09:39:16 +0100
Subject: [PATCH] revert all new sampling functionality

---
 .../aggregate/holistic/reservoir_quantile.cpp |   2 +-
 .../catalog_entry/duck_table_entry.cpp        |   4 -
 .../catalog_entry/table_catalog_entry.cpp     |   4 -
 src/common/enum_util.cpp                      |  18 -
 src/execution/CMakeLists.txt                  |   4 +-
 src/execution/reservoir_sample.cpp            | 324 ++++++
 src/execution/sample/CMakeLists.txt           |   5 -
 .../sample/base_reservoir_sample.cpp          | 136 ---
 src/execution/sample/reservoir_sample.cpp     | 931 ------------------
 src/function/table/system/CMakeLists.txt      |   1 -
 .../table/system/pragma_table_sample.cpp      |  95 --
 src/function/table/system_functions.cpp       |   1 -
 .../catalog_entry/duck_table_entry.hpp        |   2 -
 .../catalog_entry/table_catalog_entry.hpp     |   3 -
 src/include/duckdb/common/enum_util.hpp       |   8 -
 src/include/duckdb/common/random_engine.hpp   |   6 +-
 .../duckdb/common/serializer/serializer.hpp   |   1 -
 src/include/duckdb/common/types/uuid.hpp      |   2 +-
 .../duckdb/execution/physical_operator.hpp    |   2 +-
 .../duckdb/execution/reservoir_sample.hpp     | 197 +---
 .../function/table/system_functions.hpp       |   4 -
 src/include/duckdb/main/client_data.hpp       |   2 +-
 src/include/duckdb/storage/data_table.hpp     |   3 -
 .../duckdb/storage/serialization/nodes.json   |   3 +-
 .../storage/table/row_group_collection.hpp    |   1 -
 .../duckdb/storage/table/table_statistics.hpp |   9 +-
 src/storage/data_table.cpp                    |   6 +-
 src/storage/serialization/serialize_nodes.cpp |   4 +-
 src/storage/table/row_group_collection.cpp    |  18 +-
 src/storage/table/table_statistics.cpp        |  96 +-
 .../can_sample_from_ingested_files.test       |  44 -
 .../sample/reservoir_testing_percentage.test  |  85 --
 .../reservoir_testing_rows_value.test_slow    |   8 +-
 .../sample/same_seed_same_sample.test_slow    |  10 +-
 .../table_samples/basic_sample_tests.test     |  88 --
 ...sample_stores_rows_from_later_on.test_slow |  42 -
 ...table_sample_converts_to_block_sample.test |  59 --
 .../table_sample_is_stored.test_slow          | 150 ---
 .../test_sample_is_destroyed_on_updates.test  | 125 ---
 .../table_samples/test_sample_types.test      |  80 --
 .../test_table_sample_errors.test             |  21 -
 test/sql/sample/test_sample.test_slow         |   6 +
 .../sql/storage/checkpointed_self_append.test |   1 +
 ...ace_drop_column_overflow_strings.test_slow |   2 +-
 test/sql/upsert/test_big_insert.test          |   2 +
 45 files changed, 399 insertions(+), 2216 deletions(-)
 create mode 100644 src/execution/reservoir_sample.cpp
 delete mode 100644 src/execution/sample/CMakeLists.txt
 delete mode 100644 src/execution/sample/base_reservoir_sample.cpp
 delete mode 100644 src/execution/sample/reservoir_sample.cpp
 delete mode 100644 src/function/table/system/pragma_table_sample.cpp
 delete mode 100644 test/sql/sample/can_sample_from_ingested_files.test
 delete mode 100644 test/sql/sample/reservoir_testing_percentage.test
 delete mode 100644 test/sql/sample/table_samples/basic_sample_tests.test
 delete mode 100644 test/sql/sample/table_samples/sample_stores_rows_from_later_on.test_slow
 delete mode 100644 test/sql/sample/table_samples/table_sample_converts_to_block_sample.test
 delete mode 100644 test/sql/sample/table_samples/table_sample_is_stored.test_slow
 delete mode 100644 test/sql/sample/table_samples/test_sample_is_destroyed_on_updates.test
 delete mode 100644 test/sql/sample/table_samples/test_sample_types.test
 delete mode 100644 test/sql/sample/table_samples/test_table_sample_errors.test

diff --git a/extension/core_functions/aggregate/holistic/reservoir_quantile.cpp b/extension/core_functions/aggregate/holistic/reservoir_quantile.cpp
index 8c332500d6e5..e99912762d43 100644
--- a/extension/core_functions/aggregate/holistic/reservoir_quantile.cpp
+++ b/extension/core_functions/aggregate/holistic/reservoir_quantile.cpp
@@ -39,7 +39,7 @@ struct ReservoirQuantileState {
 	void FillReservoir(idx_t sample_size, T element) {
 		if (pos < sample_size) {
 			v[pos++] = element;
-			r_samp->InitializeReservoirWeights(pos, len);
+			r_samp->InitializeReservoir(pos, len);
 		} else {
 			D_ASSERT(r_samp->next_index_to_sample >= r_samp->num_entries_to_skip_b4_next_sample);
 			if (r_samp->next_index_to_sample == r_samp->num_entries_to_skip_b4_next_sample) {
diff --git a/src/catalog/catalog_entry/duck_table_entry.cpp b/src/catalog/catalog_entry/duck_table_entry.cpp
index 4983710d9a99..7473417db132 100644
--- a/src/catalog/catalog_entry/duck_table_entry.cpp
+++ b/src/catalog/catalog_entry/duck_table_entry.cpp
@@ -136,10 +136,6 @@ unique_ptr<BaseStatistics> DuckTableEntry::GetStatistics(ClientContext &context,
 	return storage->GetStatistics(context, column.StorageOid());
 }
 
-unique_ptr<BlockingSample> DuckTableEntry::GetSample() {
-	return storage->GetSample();
-}
-
 unique_ptr<CatalogEntry> DuckTableEntry::AlterEntry(CatalogTransaction transaction, AlterInfo &info) {
 	if (transaction.HasContext()) {
 		return AlterEntry(transaction.GetContext(), info);
diff --git a/src/catalog/catalog_entry/table_catalog_entry.cpp b/src/catalog/catalog_entry/table_catalog_entry.cpp
index 3070b2e30d48..5a1e0f019c2d 100644
--- a/src/catalog/catalog_entry/table_catalog_entry.cpp
+++ b/src/catalog/catalog_entry/table_catalog_entry.cpp
@@ -43,10 +43,6 @@ LogicalIndex TableCatalogEntry::GetColumnIndex(string &column_name, bool if_exis
 	return entry;
 }
 
-unique_ptr<BlockingSample> TableCatalogEntry::GetSample() {
-	return nullptr;
-}
-
 bool TableCatalogEntry::ColumnExists(const string &name) const {
 	return columns.ColumnExists(name);
 }
diff --git a/src/common/enum_util.cpp b/src/common/enum_util.cpp
index 49e7bb2b9b77..15db5f89adf4 100644
--- a/src/common/enum_util.cpp
+++ b/src/common/enum_util.cpp
@@ -3249,24 +3249,6 @@ SampleType EnumUtil::FromString<SampleType>(const char *value) {
 	return static_cast<SampleType>(StringUtil::StringToEnum(GetSampleTypeValues(), 3, "SampleType", value));
 }
 
-const StringUtil::EnumStringLiteral *GetSamplingStateValues() {
-	static constexpr StringUtil::EnumStringLiteral values[] {
-		{ static_cast<uint32_t>(SamplingState::RANDOM), "RANDOM" },
-		{ static_cast<uint32_t>(SamplingState::RESERVOIR), "RESERVOIR" }
-	};
-	return values;
-}
-
-template<>
-const char* EnumUtil::ToChars<SamplingState>(SamplingState value) {
-	return StringUtil::EnumToString(GetSamplingStateValues(), 2, "SamplingState", static_cast<uint32_t>(value));
-}
-
-template<>
-SamplingState EnumUtil::FromString<SamplingState>(const char *value) {
-	return static_cast<SamplingState>(StringUtil::StringToEnum(GetSamplingStateValues(), 2, "SamplingState", value));
-}
-
 const StringUtil::EnumStringLiteral *GetScanTypeValues() {
 	static constexpr StringUtil::EnumStringLiteral values[] {
 		{ static_cast<uint32_t>(ScanType::TABLE), "TABLE" },
diff --git a/src/execution/CMakeLists.txt b/src/execution/CMakeLists.txt
index 87c2a3bcc16e..ef0d0a3eb50e 100644
--- a/src/execution/CMakeLists.txt
+++ b/src/execution/CMakeLists.txt
@@ -3,7 +3,6 @@ add_subdirectory(nested_loop_join)
 add_subdirectory(operator)
 add_subdirectory(physical_plan)
 add_subdirectory(index)
-add_subdirectory(sample)
 
 add_library_unity(
   duckdb_execution
@@ -18,7 +17,8 @@ add_library_unity(
   perfect_aggregate_hashtable.cpp
   physical_operator.cpp
   physical_plan_generator.cpp
-  radix_partitioned_hashtable.cpp)
+  radix_partitioned_hashtable.cpp
+  reservoir_sample.cpp)
 set(ALL_OBJECT_FILES
     ${ALL_OBJECT_FILES} $<TARGET_OBJECTS:duckdb_execution>
     PARENT_SCOPE)
diff --git a/src/execution/reservoir_sample.cpp b/src/execution/reservoir_sample.cpp
new file mode 100644
index 000000000000..284e03faef09
--- /dev/null
+++ b/src/execution/reservoir_sample.cpp
@@ -0,0 +1,324 @@
+#include "duckdb/execution/reservoir_sample.hpp"
+#include "duckdb/common/types/data_chunk.hpp"
+#include "duckdb/common/pair.hpp"
+
+namespace duckdb {
+
+void ReservoirChunk::Serialize(Serializer &serializer) const {
+	chunk.Serialize(serializer);
+}
+
+unique_ptr<ReservoirChunk> ReservoirChunk::Deserialize(Deserializer &deserializer) {
+	auto result = make_uniq<ReservoirChunk>();
+	result->chunk.Deserialize(deserializer);
+	return result;
+}
+
+ReservoirSample::ReservoirSample(Allocator &allocator, idx_t sample_count, int64_t seed)
+    : BlockingSample(seed), allocator(allocator), sample_count(sample_count), reservoir_initialized(false) {
+}
+
+ReservoirSample::ReservoirSample(idx_t sample_count, int64_t seed)
+    : ReservoirSample(Allocator::DefaultAllocator(), sample_count, seed) {
+}
+
+void ReservoirSample::AddToReservoir(DataChunk &input) {
+	if (sample_count == 0) {
+		// sample count is 0, means no samples were requested
+		return;
+	}
+	old_base_reservoir_sample.num_entries_seen_total += input.size();
+	// Input: A population V of n weighted items
+	// Output: A reservoir R with a size m
+	// 1: The first m items of V are inserted into R
+	// first we need to check if the reservoir already has "m" elements
+	if (!reservoir_data_chunk || reservoir_data_chunk->size() < sample_count) {
+		if (FillReservoir(input) == 0) {
+			// entire chunk was consumed by reservoir
+			return;
+		}
+	}
+	D_ASSERT(reservoir_data_chunk);
+	D_ASSERT(reservoir_data_chunk->size() == sample_count);
+	// Initialize the weights if they have not been already
+	if (old_base_reservoir_sample.reservoir_weights.empty()) {
+		old_base_reservoir_sample.InitializeReservoir(reservoir_data_chunk->size(), sample_count);
+	}
+	// find the position of next_index_to_sample relative to number of seen entries (num_entries_to_skip_b4_next_sample)
+	idx_t remaining = input.size();
+	idx_t base_offset = 0;
+	while (true) {
+		idx_t offset = old_base_reservoir_sample.next_index_to_sample -
+		               old_base_reservoir_sample.num_entries_to_skip_b4_next_sample;
+		if (offset >= remaining) {
+			// not in this chunk! increment current count and go to the next chunk
+			old_base_reservoir_sample.num_entries_to_skip_b4_next_sample += remaining;
+			return;
+		}
+		// in this chunk! replace the element
+		ReplaceElement(input, base_offset + offset);
+		// shift the chunk forward
+		remaining -= offset;
+		base_offset += offset;
+	}
+}
+
+unique_ptr<DataChunk> ReservoirSample::GetChunk() {
+	if (!reservoir_data_chunk || reservoir_data_chunk->size() == 0) {
+		return nullptr;
+	}
+	auto collected_sample_count = reservoir_data_chunk->size();
+	if (collected_sample_count > STANDARD_VECTOR_SIZE) {
+		// get from the back to avoid creating two selection vectors
+		// one to return the first STANDARD_VECTOR_SIZE
+		// another to replace the reservoir_data_chunk with the first STANDARD VECTOR SIZE missing
+		auto ret = make_uniq<DataChunk>();
+		auto samples_remaining = collected_sample_count - STANDARD_VECTOR_SIZE;
+		auto reservoir_types = reservoir_data_chunk->GetTypes();
+		SelectionVector sel(STANDARD_VECTOR_SIZE);
+		for (idx_t i = samples_remaining; i < collected_sample_count; i++) {
+			sel.set_index(i - samples_remaining, i);
+		}
+		ret->Initialize(allocator, reservoir_types);
+		ret->Slice(*reservoir_data_chunk, sel, STANDARD_VECTOR_SIZE);
+		ret->SetCardinality(STANDARD_VECTOR_SIZE);
+		// reduce capacity and cardinality of the sample data chunk
+		reservoir_data_chunk->SetCardinality(samples_remaining);
+		return ret;
+	}
+	return std::move(reservoir_data_chunk);
+}
+
+void ReservoirSample::ReplaceElement(DataChunk &input, idx_t index_in_chunk, double with_weight) {
+	// replace the entry in the reservoir
+	// 8. The item in R with the minimum key is replaced by item vi
+	D_ASSERT(input.ColumnCount() == reservoir_data_chunk->ColumnCount());
+	for (idx_t col_idx = 0; col_idx < input.ColumnCount(); col_idx++) {
+		reservoir_data_chunk->SetValue(col_idx, old_base_reservoir_sample.min_weighted_entry_index,
+		                               input.GetValue(col_idx, index_in_chunk));
+	}
+	old_base_reservoir_sample.ReplaceElement(with_weight);
+}
+
+void ReservoirSample::InitializeReservoir(DataChunk &input) {
+	reservoir_data_chunk = make_uniq<DataChunk>();
+	reservoir_data_chunk->Initialize(allocator, input.GetTypes(), sample_count);
+	for (idx_t col_idx = 0; col_idx < reservoir_data_chunk->ColumnCount(); col_idx++) {
+		FlatVector::Validity(reservoir_data_chunk->data[col_idx]).Initialize(sample_count);
+	}
+	reservoir_initialized = true;
+}
+
+idx_t ReservoirSample::FillReservoir(DataChunk &input) {
+	idx_t chunk_count = input.size();
+	input.Flatten();
+	auto num_added_samples = reservoir_data_chunk ? reservoir_data_chunk->size() : 0;
+	D_ASSERT(num_added_samples <= sample_count);
+
+	// required count is what we still need to add to the reservoir
+	idx_t required_count;
+	if (num_added_samples + chunk_count >= sample_count) {
+		// have to limit the count of the chunk
+		required_count = sample_count - num_added_samples;
+	} else {
+		// we copy the entire chunk
+		required_count = chunk_count;
+	}
+	input.SetCardinality(required_count);
+
+	// initialize the reservoir
+	if (!reservoir_initialized) {
+		InitializeReservoir(input);
+	}
+	reservoir_data_chunk->Append(input, false, nullptr, required_count);
+	old_base_reservoir_sample.InitializeReservoir(required_count, sample_count);
+
+	// check if there are still elements remaining in the Input data chunk that should be
+	// randomly sampled and potentially added. This happens if we are on a boundary
+	// for example, input.size() is 1024, but our sample size is 10
+	if (required_count == chunk_count) {
+		// we are done here
+		return 0;
+	}
+	// we still need to process a part of the chunk
+	// create a selection vector of the remaining elements
+	SelectionVector sel(STANDARD_VECTOR_SIZE);
+	for (idx_t i = required_count; i < chunk_count; i++) {
+		sel.set_index(i - required_count, i);
+	}
+	// slice the input vector and continue
+	input.Slice(sel, chunk_count - required_count);
+	return input.size();
+}
+
+void ReservoirSample::Finalize() {
+	return;
+}
+
+ReservoirSamplePercentage::ReservoirSamplePercentage(Allocator &allocator, double percentage, int64_t seed)
+    : BlockingSample(seed), allocator(allocator), sample_percentage(percentage / 100.0), current_count(0),
+      is_finalized(false) {
+	reservoir_sample_size = idx_t(sample_percentage * RESERVOIR_THRESHOLD);
+	current_sample = make_uniq<ReservoirSample>(allocator, reservoir_sample_size, random.NextRandomInteger());
+}
+
+ReservoirSamplePercentage::ReservoirSamplePercentage(double percentage, int64_t seed)
+    : ReservoirSamplePercentage(Allocator::DefaultAllocator(), percentage, seed) {
+}
+
+void ReservoirSamplePercentage::AddToReservoir(DataChunk &input) {
+	old_base_reservoir_sample.num_entries_seen_total += input.size();
+	if (current_count + input.size() > RESERVOIR_THRESHOLD) {
+		// we don't have enough space in our current reservoir
+		// first check what we still need to append to the current sample
+		idx_t append_to_current_sample_count = RESERVOIR_THRESHOLD - current_count;
+		idx_t append_to_next_sample = input.size() - append_to_current_sample_count;
+		if (append_to_current_sample_count > 0) {
+			// we have elements remaining, first add them to the current sample
+			if (append_to_next_sample > 0) {
+				// we need to also add to the next sample
+				DataChunk new_chunk;
+				new_chunk.InitializeEmpty(input.GetTypes());
+				new_chunk.Slice(input, *FlatVector::IncrementalSelectionVector(), append_to_current_sample_count);
+				new_chunk.Flatten();
+				current_sample->AddToReservoir(new_chunk);
+			} else {
+				input.Flatten();
+				input.SetCardinality(append_to_current_sample_count);
+				current_sample->AddToReservoir(input);
+			}
+		}
+		if (append_to_next_sample > 0) {
+			// slice the input for the remainder
+			SelectionVector sel(append_to_next_sample);
+			for (idx_t i = append_to_current_sample_count; i < append_to_next_sample + append_to_current_sample_count;
+			     i++) {
+				sel.set_index(i - append_to_current_sample_count, i);
+			}
+			input.Slice(sel, append_to_next_sample);
+		}
+		// now our first sample is filled: append it to the set of finished samples
+		finished_samples.push_back(std::move(current_sample));
+
+		// allocate a new sample, and potentially add the remainder of the current input to that sample
+		current_sample = make_uniq<ReservoirSample>(allocator, reservoir_sample_size, random.NextRandomInteger());
+		if (append_to_next_sample > 0) {
+			current_sample->AddToReservoir(input);
+		}
+		current_count = append_to_next_sample;
+	} else {
+		// we can just append to the current sample
+		current_count += input.size();
+		current_sample->AddToReservoir(input);
+	}
+}
+
+unique_ptr<DataChunk> ReservoirSamplePercentage::GetChunk() {
+	if (!is_finalized) {
+		Finalize();
+	}
+	while (!finished_samples.empty()) {
+		auto &front = finished_samples.front();
+		auto chunk = front->GetChunk();
+		if (chunk && chunk->size() > 0) {
+			return chunk;
+		}
+		// move to the next sample
+		finished_samples.erase(finished_samples.begin());
+	}
+	return nullptr;
+}
+
+void ReservoirSamplePercentage::Finalize() {
+	// need to finalize the current sample, if any
+	// we are finializing, so we are starting to return chunks. Our last chunk has
+	// sample_percentage * RESERVOIR_THRESHOLD entries that hold samples.
+	// if our current count is less than the sample_percentage * RESERVOIR_THRESHOLD
+	// then we have sampled too much for the current_sample and we need to redo the sample
+	// otherwise we can just push the current sample back
+	// Imagine sampling 70% of 100 rows (so 70 rows). We allocate sample_percentage * RESERVOIR_THRESHOLD
+	// -----------------------------------------
+	auto sampled_more_than_required =
+	    static_cast<double>(current_count) > sample_percentage * RESERVOIR_THRESHOLD || finished_samples.empty();
+	if (current_count > 0 && sampled_more_than_required) {
+		// create a new sample
+		auto new_sample_size = idx_t(round(sample_percentage * static_cast<double>(current_count)));
+		auto new_sample = make_uniq<ReservoirSample>(allocator, new_sample_size, random.NextRandomInteger());
+		while (true) {
+			auto chunk = current_sample->GetChunk();
+			if (!chunk || chunk->size() == 0) {
+				break;
+			}
+			new_sample->AddToReservoir(*chunk);
+		}
+		finished_samples.push_back(std::move(new_sample));
+	} else {
+		finished_samples.push_back(std::move(current_sample));
+	}
+	// when finalizing, current_sample is null. All samples are now in finished samples.
+	current_sample = nullptr;
+	is_finalized = true;
+}
+
+BaseReservoirSampling::BaseReservoirSampling(int64_t seed) : random(seed) {
+	next_index_to_sample = 0;
+	min_weight_threshold = 0;
+	min_weighted_entry_index = 0;
+	num_entries_to_skip_b4_next_sample = 0;
+	num_entries_seen_total = 0;
+}
+
+BaseReservoirSampling::BaseReservoirSampling() : BaseReservoirSampling(-1) {
+}
+
+void BaseReservoirSampling::InitializeReservoir(idx_t cur_size, idx_t sample_size) {
+	//! 1: The first m items of V are inserted into R
+	//! first we need to check if the reservoir already has "m" elements
+	if (cur_size == sample_size) {
+		//! 2. For each item vi ∈ R: Calculate a key ki = random(0, 1)
+		//! we then define the threshold to enter the reservoir T_w as the minimum key of R
+		//! we use a priority queue to extract the minimum key in O(1) time
+		for (idx_t i = 0; i < sample_size; i++) {
+			double k_i = random.NextRandom();
+			reservoir_weights.emplace(-k_i, i);
+		}
+		SetNextEntry();
+	}
+}
+
+void BaseReservoirSampling::SetNextEntry() {
+	//! 4. Let r = random(0, 1) and Xw = log(r) / log(T_w)
+	auto &min_key = reservoir_weights.top();
+	double t_w = -min_key.first;
+	double r = random.NextRandom();
+	double x_w = log(r) / log(t_w);
+	//! 5. From the current item vc skip items until item vi , such that:
+	//! 6. wc +wc+1 +···+wi−1 < Xw <= wc +wc+1 +···+wi−1 +wi
+	//! since all our weights are 1 (uniform sampling), we can just determine the amount of elements to skip
+	min_weight_threshold = t_w;
+	min_weighted_entry_index = min_key.second;
+	next_index_to_sample = MaxValue<idx_t>(1, idx_t(round(x_w)));
+	num_entries_to_skip_b4_next_sample = 0;
+}
+
+void BaseReservoirSampling::ReplaceElement(double with_weight) {
+	//! replace the entry in the reservoir
+	//! pop the minimum entry
+	reservoir_weights.pop();
+	//! now update the reservoir
+	//! 8. Let tw = Tw i , r2 = random(tw,1) and vi’s key: ki = (r2)1/wi
+	//! 9. The new threshold Tw is the new minimum key of R
+	//! we generate a random number between (min_weight_threshold, 1)
+	double r2 = random.NextRandom(min_weight_threshold, 1);
+
+	//! if we are merging two reservoir samples use the weight passed
+	if (with_weight >= 0) {
+		r2 = with_weight;
+	}
+	//! now we insert the new weight into the reservoir
+	reservoir_weights.emplace(-r2, min_weighted_entry_index);
+	//! we update the min entry with the new min entry in the reservoir
+	SetNextEntry();
+}
+
+} // namespace duckdb
diff --git a/src/execution/sample/CMakeLists.txt b/src/execution/sample/CMakeLists.txt
deleted file mode 100644
index 6f69a205a0a9..000000000000
--- a/src/execution/sample/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library_unity(duckdb_sample OBJECT base_reservoir_sample.cpp
-                  reservoir_sample.cpp)
-set(ALL_OBJECT_FILES
-    ${ALL_OBJECT_FILES} $<TARGET_OBJECTS:duckdb_sample>
-    PARENT_SCOPE)
diff --git a/src/execution/sample/base_reservoir_sample.cpp b/src/execution/sample/base_reservoir_sample.cpp
deleted file mode 100644
index 0f0fcdf7a387..000000000000
--- a/src/execution/sample/base_reservoir_sample.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-#include "duckdb/execution/reservoir_sample.hpp"
-#include <math.h>
-
-namespace duckdb {
-
-double BaseReservoirSampling::GetMinWeightFromTuplesSeen(idx_t rows_seen_total) {
-	// this function was obtained using https://mycurvefit.com. Inputting multiple x, y values into
-	// The
-	switch (rows_seen_total) {
-	case 0:
-		return 0;
-	case 1:
-		return 0.000161;
-	case 2:
-		return 0.530136;
-	case 3:
-		return 0.693454;
-	default: {
-		return (0.99 - 0.355 * std::exp(-0.07 * static_cast<double>(rows_seen_total)));
-	}
-	}
-}
-
-BaseReservoirSampling::BaseReservoirSampling(int64_t seed) : random(seed) {
-	next_index_to_sample = 0;
-	min_weight_threshold = 0;
-	min_weighted_entry_index = 0;
-	num_entries_to_skip_b4_next_sample = 0;
-	num_entries_seen_total = 0;
-}
-
-BaseReservoirSampling::BaseReservoirSampling() : BaseReservoirSampling(1) {
-}
-
-unique_ptr<BaseReservoirSampling> BaseReservoirSampling::Copy() {
-	auto ret = make_uniq<BaseReservoirSampling>(1);
-	ret->reservoir_weights = reservoir_weights;
-	ret->next_index_to_sample = next_index_to_sample;
-	ret->min_weight_threshold = min_weight_threshold;
-	ret->min_weighted_entry_index = min_weighted_entry_index;
-	ret->num_entries_to_skip_b4_next_sample = num_entries_to_skip_b4_next_sample;
-	ret->num_entries_seen_total = num_entries_seen_total;
-	return ret;
-}
-
-void BaseReservoirSampling::InitializeReservoirWeights(idx_t cur_size, idx_t sample_size) {
-	//! 1: The first m items of V are inserted into R
-	//! first we need to check if the reservoir already has "m" elements
-	//! 2. For each item vi ∈ R: Calculate a key ki = random(0, 1)
-	//! we then define the threshold to enter the reservoir T_w as the minimum key of R
-	//! we use a priority queue to extract the minimum key in O(1) time
-	if (cur_size == sample_size) {
-		//! 2. For each item vi ∈ R: Calculate a key ki = random(0, 1)
-		//! we then define the threshold to enter the reservoir T_w as the minimum key of R
-		//! we use a priority queue to extract the minimum key in O(1) time
-		for (idx_t i = 0; i < sample_size; i++) {
-			double k_i = random.NextRandom();
-			reservoir_weights.emplace(-k_i, i);
-		}
-		SetNextEntry();
-	}
-}
-
-void BaseReservoirSampling::SetNextEntry() {
-	D_ASSERT(!reservoir_weights.empty());
-	//! 4. Let r = random(0, 1) and Xw = log(r) / log(T_w)
-	auto &min_key = reservoir_weights.top();
-	double t_w = -min_key.first;
-	double r = random.NextRandom32();
-	double x_w = log(r) / log(t_w);
-	//! 5. From the current item vc skip items until item vi , such that:
-	//! 6. wc +wc+1 +···+wi−1 < Xw <= wc +wc+1 +···+wi−1 +wi
-	//! since all our weights are 1 (uniform sampling), we can just determine the amount of elements to skip
-	min_weight_threshold = t_w;
-	min_weighted_entry_index = min_key.second;
-	next_index_to_sample = MaxValue<idx_t>(1, idx_t(round(x_w)));
-	num_entries_to_skip_b4_next_sample = 0;
-}
-
-void BaseReservoirSampling::ReplaceElementWithIndex(idx_t entry_index, double with_weight, bool pop) {
-
-	if (pop) {
-		reservoir_weights.pop();
-	}
-	double r2 = with_weight;
-	//! now we insert the new weight into the reservoir
-	reservoir_weights.emplace(-r2, entry_index);
-	//! we update the min entry with the new min entry in the reservoir
-	SetNextEntry();
-}
-
-void BaseReservoirSampling::ReplaceElement(double with_weight) {
-	//! replace the entry in the reservoir
-	//! pop the minimum entry
-	reservoir_weights.pop();
-	//! now update the reservoir
-	//! 8. Let tw = Tw i , r2 = random(tw,1) and vi’s key: ki = (r2)1/wi
-	//! 9. The new threshold Tw is the new minimum key of R
-	//! we generate a random number between (min_weight_threshold, 1)
-	double r2 = random.NextRandom(min_weight_threshold, 1);
-
-	//! if we are merging two reservoir samples use the weight passed
-	if (with_weight >= 0) {
-		r2 = with_weight;
-	}
-	//! now we insert the new weight into the reservoir
-	reservoir_weights.emplace(-r2, min_weighted_entry_index);
-	//! we update the min entry with the new min entry in the reservoir
-	SetNextEntry();
-}
-
-void BaseReservoirSampling::UpdateMinWeightThreshold() {
-	if (!reservoir_weights.empty()) {
-		min_weight_threshold = -reservoir_weights.top().first;
-		min_weighted_entry_index = reservoir_weights.top().second;
-		return;
-	}
-	min_weight_threshold = 1;
-}
-
-void BaseReservoirSampling::FillWeights(SelectionVector &sel, idx_t &sel_size) {
-	if (!reservoir_weights.empty()) {
-		return;
-	}
-	D_ASSERT(reservoir_weights.empty());
-	auto num_entries_seen_normalized = num_entries_seen_total / FIXED_SAMPLE_SIZE;
-	auto min_weight = GetMinWeightFromTuplesSeen(num_entries_seen_normalized);
-	for (idx_t i = 0; i < sel_size; i++) {
-		auto weight = random.NextRandom(min_weight, 1);
-		reservoir_weights.emplace(-weight, i);
-	}
-	D_ASSERT(reservoir_weights.size() <= sel_size);
-	SetNextEntry();
-}
-
-} // namespace duckdb
diff --git a/src/execution/sample/reservoir_sample.cpp b/src/execution/sample/reservoir_sample.cpp
deleted file mode 100644
index 58056920eaf8..000000000000
--- a/src/execution/sample/reservoir_sample.cpp
+++ /dev/null
@@ -1,931 +0,0 @@
-#include "duckdb/execution/reservoir_sample.hpp"
-#include "duckdb/common/types/data_chunk.hpp"
-#include "duckdb/common/vector_operations/vector_operations.hpp"
-#include <unordered_set>
-
-namespace duckdb {
-
-std::pair<double, idx_t> BlockingSample::PopFromWeightQueue() {
-	D_ASSERT(base_reservoir_sample && !base_reservoir_sample->reservoir_weights.empty());
-	auto ret = base_reservoir_sample->reservoir_weights.top();
-	base_reservoir_sample->reservoir_weights.pop();
-
-	base_reservoir_sample->UpdateMinWeightThreshold();
-	D_ASSERT(base_reservoir_sample->min_weight_threshold > 0);
-	return ret;
-}
-
-double BlockingSample::GetMinWeightThreshold() {
-	return base_reservoir_sample->min_weight_threshold;
-}
-
-idx_t BlockingSample::GetPriorityQueueSize() {
-	return base_reservoir_sample->reservoir_weights.size();
-}
-
-void BlockingSample::Destroy() {
-	destroyed = true;
-}
-
-void ReservoirChunk::Serialize(Serializer &serializer) const {
-	chunk.Serialize(serializer);
-}
-
-unique_ptr<ReservoirChunk> ReservoirChunk::Deserialize(Deserializer &deserializer) {
-	auto result = make_uniq<ReservoirChunk>();
-	result->chunk.Deserialize(deserializer);
-	return result;
-}
-
-unique_ptr<ReservoirChunk> ReservoirChunk::Copy() const {
-	auto copy = make_uniq<ReservoirChunk>();
-	copy->chunk.Initialize(Allocator::DefaultAllocator(), chunk.GetTypes());
-
-	chunk.Copy(copy->chunk);
-	return copy;
-}
-
-ReservoirSample::ReservoirSample(idx_t sample_count, unique_ptr<ReservoirChunk> reservoir_chunk)
-    : ReservoirSample(Allocator::DefaultAllocator(), sample_count, 1) {
-	if (reservoir_chunk) {
-		this->reservoir_chunk = std::move(reservoir_chunk);
-		sel_size = this->reservoir_chunk->chunk.size();
-		sel = SelectionVector(FIXED_SAMPLE_SIZE);
-		for (idx_t i = 0; i < sel_size; i++) {
-			sel.set_index(i, i);
-		}
-		ExpandSerializedSample();
-	}
-	stats_sample = true;
-}
-
-ReservoirSample::ReservoirSample(Allocator &allocator, idx_t sample_count, int64_t seed)
-    : BlockingSample(seed), sample_count(sample_count), allocator(allocator) {
-	base_reservoir_sample = make_uniq<BaseReservoirSampling>(seed);
-	type = SampleType::RESERVOIR_SAMPLE;
-	reservoir_chunk = nullptr;
-	stats_sample = false;
-	sel = SelectionVector(sample_count);
-	sel_size = 0;
-}
-
-idx_t ReservoirSample::GetSampleCount() {
-	return sample_count;
-}
-
-idx_t ReservoirSample::NumSamplesCollected() const {
-	if (!reservoir_chunk) {
-		return 0;
-	}
-	return reservoir_chunk->chunk.size();
-}
-
-SamplingState ReservoirSample::GetSamplingState() const {
-	if (base_reservoir_sample->reservoir_weights.empty()) {
-		return SamplingState::RANDOM;
-	}
-	return SamplingState::RESERVOIR;
-}
-
-idx_t ReservoirSample::GetActiveSampleCount() const {
-	switch (GetSamplingState()) {
-	case SamplingState::RANDOM:
-		return sel_size;
-	case SamplingState::RESERVOIR:
-		return base_reservoir_sample->reservoir_weights.size();
-	default:
-		throw InternalException("Sampling State is INVALID");
-	}
-}
-
-idx_t ReservoirSample::GetTuplesSeen() const {
-	return base_reservoir_sample->num_entries_seen_total;
-}
-
-DataChunk &ReservoirSample::Chunk() {
-	D_ASSERT(reservoir_chunk);
-	return reservoir_chunk->chunk;
-}
-
-unique_ptr<DataChunk> ReservoirSample::GetChunk() {
-	if (destroyed || !reservoir_chunk || Chunk().size() == 0) {
-		return nullptr;
-	}
-	// cannot destory internal samples.
-	auto ret = make_uniq<DataChunk>();
-
-	SelectionVector ret_sel(STANDARD_VECTOR_SIZE);
-	idx_t collected_samples = GetActiveSampleCount();
-
-	if (collected_samples == 0) {
-		return nullptr;
-	}
-
-	idx_t samples_remaining;
-	idx_t return_chunk_size;
-	if (collected_samples > STANDARD_VECTOR_SIZE) {
-		samples_remaining = collected_samples - STANDARD_VECTOR_SIZE;
-		return_chunk_size = STANDARD_VECTOR_SIZE;
-	} else {
-		samples_remaining = 0;
-		return_chunk_size = collected_samples;
-	}
-
-	for (idx_t i = samples_remaining; i < collected_samples; i++) {
-		// pop samples and reduce size of selection vector.
-		if (GetSamplingState() == SamplingState::RESERVOIR) {
-			auto top = PopFromWeightQueue();
-			ret_sel.set_index(i - samples_remaining, sel.get_index(top.second));
-		} else {
-			ret_sel.set_index(i - samples_remaining, sel.get_index(i));
-		}
-		sel_size -= 1;
-	}
-
-	auto reservoir_types = Chunk().GetTypes();
-
-	ret->Initialize(allocator, reservoir_types, STANDARD_VECTOR_SIZE);
-	ret->Slice(Chunk(), ret_sel, return_chunk_size);
-	ret->SetCardinality(return_chunk_size);
-	return ret;
-}
-
-unique_ptr<ReservoirChunk> ReservoirSample::CreateNewSampleChunk(vector<LogicalType> &types, idx_t size) const {
-	auto new_sample_chunk = make_uniq<ReservoirChunk>();
-	new_sample_chunk->chunk.Initialize(Allocator::DefaultAllocator(), types, size);
-
-	// set the NULL columns correctly
-	for (idx_t col_idx = 0; col_idx < types.size(); col_idx++) {
-		if (!ValidSampleType(types[col_idx]) && stats_sample) {
-			new_sample_chunk->chunk.data[col_idx].SetVectorType(VectorType::CONSTANT_VECTOR);
-			ConstantVector::SetNull(new_sample_chunk->chunk.data[col_idx], true);
-		}
-	}
-	return new_sample_chunk;
-}
-
-void ReservoirSample::Vacuum() {
-	Verify();
-	if (NumSamplesCollected() <= FIXED_SAMPLE_SIZE || !reservoir_chunk || destroyed) {
-		// sample is destroyed or too small to shrink
-		return;
-	}
-
-	auto ret = Copy();
-	auto ret_reservoir = duckdb::unique_ptr_cast<BlockingSample, ReservoirSample>(std::move(ret));
-	reservoir_chunk = std::move(ret_reservoir->reservoir_chunk);
-	sel = std::move(ret_reservoir->sel);
-	sel_size = ret_reservoir->sel_size;
-
-	Verify();
-	// We should only have one sample chunk now.
-	D_ASSERT(Chunk().size() > 0 && Chunk().size() <= sample_count);
-}
-
-unique_ptr<BlockingSample> ReservoirSample::Copy() const {
-
-	auto ret = make_uniq<ReservoirSample>(sample_count);
-	ret->stats_sample = stats_sample;
-
-	ret->base_reservoir_sample = base_reservoir_sample->Copy();
-	ret->destroyed = destroyed;
-
-	if (!reservoir_chunk || destroyed) {
-		return unique_ptr_cast<ReservoirSample, BlockingSample>(std::move(ret));
-	}
-
-	D_ASSERT(reservoir_chunk);
-
-	// create a new sample chunk to store new samples
-	auto types = reservoir_chunk->chunk.GetTypes();
-	// how many values should be copied
-	idx_t values_to_copy = MinValue<idx_t>(GetActiveSampleCount(), sample_count);
-
-	auto new_sample_chunk = CreateNewSampleChunk(types, GetReservoirChunkCapacity());
-
-	SelectionVector sel_copy(sel);
-
-	ret->reservoir_chunk = std::move(new_sample_chunk);
-	ret->UpdateSampleAppend(ret->reservoir_chunk->chunk, reservoir_chunk->chunk, sel_copy, values_to_copy);
-	ret->sel = SelectionVector(values_to_copy);
-	for (idx_t i = 0; i < values_to_copy; i++) {
-		ret->sel.set_index(i, i);
-	}
-	ret->sel_size = sel_size;
-	D_ASSERT(ret->reservoir_chunk->chunk.size() <= sample_count);
-	ret->Verify();
-	return unique_ptr_cast<ReservoirSample, BlockingSample>(std::move(ret));
-}
-
-void ReservoirSample::ConvertToReservoirSample() {
-	D_ASSERT(sel_size <= sample_count);
-	base_reservoir_sample->FillWeights(sel, sel_size);
-}
-
-vector<uint32_t> ReservoirSample::GetRandomizedVector(uint32_t range, uint32_t size) const {
-	vector<uint32_t> ret;
-	ret.reserve(range);
-	for (uint32_t i = 0; i < range; i++) {
-		ret.push_back(i);
-	}
-	for (uint32_t i = 0; i < size; i++) {
-		uint32_t random_shuffle = base_reservoir_sample->random.NextRandomInteger32(i, range);
-		if (random_shuffle == i) {
-			// leave the value where it is
-			continue;
-		}
-		uint32_t tmp = ret[random_shuffle];
-		// basically replacing the tuple that was at index actual_sample_indexes[random_shuffle]
-		ret[random_shuffle] = ret[i];
-		ret[i] = tmp;
-	}
-	return ret;
-}
-
-void ReservoirSample::SimpleMerge(ReservoirSample &other) {
-	D_ASSERT(GetPriorityQueueSize() == 0);
-	D_ASSERT(other.GetPriorityQueueSize() == 0);
-	D_ASSERT(GetSamplingState() == SamplingState::RANDOM);
-	D_ASSERT(other.GetSamplingState() == SamplingState::RANDOM);
-
-	if (other.GetActiveSampleCount() == 0 && other.GetTuplesSeen() == 0) {
-		return;
-	}
-
-	if (GetActiveSampleCount() == 0 && GetTuplesSeen() == 0) {
-		sel = SelectionVector(other.sel);
-		sel_size = other.sel_size;
-		base_reservoir_sample->num_entries_seen_total = other.GetTuplesSeen();
-		return;
-	}
-
-	idx_t total_seen = GetTuplesSeen() + other.GetTuplesSeen();
-
-	auto weight_tuples_this = static_cast<double>(GetTuplesSeen()) / static_cast<double>(total_seen);
-	auto weight_tuples_other = static_cast<double>(other.GetTuplesSeen()) / static_cast<double>(total_seen);
-
-	// If weights don't add up to 1, most likely a simple merge occured and no new samples were added.
-	// if that is the case, add the missing weight to the lower weighted sample to adjust.
-	// this is to avoid cases where if you have a 20k row table and add another 20k rows row by row
-	// then eventually the missing weights will add up, and get you a more even distribution
-	if (weight_tuples_this + weight_tuples_other < 1) {
-		weight_tuples_other += 1 - (weight_tuples_other + weight_tuples_this);
-	}
-
-	idx_t keep_from_this = 0;
-	idx_t keep_from_other = 0;
-	D_ASSERT(stats_sample);
-	D_ASSERT(sample_count == FIXED_SAMPLE_SIZE);
-	D_ASSERT(sample_count == other.sample_count);
-	auto sample_count_double = static_cast<double>(sample_count);
-
-	if (weight_tuples_this > weight_tuples_other) {
-		keep_from_this = MinValue<idx_t>(static_cast<idx_t>(round(sample_count_double * weight_tuples_this)),
-		                                 GetActiveSampleCount());
-		keep_from_other = MinValue<idx_t>(sample_count - keep_from_this, other.GetActiveSampleCount());
-	} else {
-		keep_from_other = MinValue<idx_t>(static_cast<idx_t>(round(sample_count_double * weight_tuples_other)),
-		                                  other.GetActiveSampleCount());
-		keep_from_this = MinValue<idx_t>(sample_count - keep_from_other, GetActiveSampleCount());
-	}
-
-	D_ASSERT(keep_from_this <= GetActiveSampleCount());
-	D_ASSERT(keep_from_other <= other.GetActiveSampleCount());
-	D_ASSERT(keep_from_other + keep_from_this <= FIXED_SAMPLE_SIZE);
-	idx_t size_after_merge = MinValue<idx_t>(keep_from_other + keep_from_this, FIXED_SAMPLE_SIZE);
-
-	// Check if appending the other samples to this will go over the sample chunk size
-	if (reservoir_chunk->chunk.size() + keep_from_other > GetReservoirChunkCapacity()) {
-		Vacuum();
-	}
-
-	D_ASSERT(size_after_merge <= other.GetActiveSampleCount() + GetActiveSampleCount());
-	SelectionVector chunk_sel(keep_from_other);
-	auto offset = reservoir_chunk->chunk.size();
-	for (idx_t i = keep_from_this; i < size_after_merge; i++) {
-		if (i >= GetActiveSampleCount()) {
-			D_ASSERT(sel_size >= GetActiveSampleCount());
-			sel.set_index(GetActiveSampleCount(), offset);
-			sel_size += 1;
-		} else {
-			sel.set_index(i, offset);
-		}
-		chunk_sel.set_index(i - keep_from_this, other.sel.get_index(i - keep_from_this));
-		offset += 1;
-	}
-
-	D_ASSERT(GetActiveSampleCount() == size_after_merge);
-
-	// Copy the rows that make it to the sample from other and put them into this.
-	UpdateSampleAppend(reservoir_chunk->chunk, other.reservoir_chunk->chunk, chunk_sel, keep_from_other);
-	base_reservoir_sample->num_entries_seen_total += other.GetTuplesSeen();
-
-	// if THIS has too many samples now, we conver it to a slower sample.
-	if (GetTuplesSeen() >= FIXED_SAMPLE_SIZE * FAST_TO_SLOW_THRESHOLD) {
-		ConvertToReservoirSample();
-	}
-	Verify();
-}
-
-void ReservoirSample::WeightedMerge(ReservoirSample &other_sample) {
-	D_ASSERT(GetSamplingState() == SamplingState::RESERVOIR);
-	D_ASSERT(other_sample.GetSamplingState() == SamplingState::RESERVOIR);
-
-	// Find out how many samples we want to keep.
-	idx_t total_samples = GetActiveSampleCount() + other_sample.GetActiveSampleCount();
-	idx_t total_samples_seen =
-	    base_reservoir_sample->num_entries_seen_total + other_sample.base_reservoir_sample->num_entries_seen_total;
-	idx_t num_samples_to_keep = MinValue<idx_t>(total_samples, MinValue<idx_t>(sample_count, total_samples_seen));
-
-	D_ASSERT(GetActiveSampleCount() <= num_samples_to_keep);
-	D_ASSERT(total_samples <= FIXED_SAMPLE_SIZE * 2);
-
-	// pop from base base_reservoir weights until there are num_samples_to_keep left.
-	vector<idx_t> this_indexes_to_replace;
-	for (idx_t i = num_samples_to_keep; i < total_samples; i++) {
-		auto min_weight_this = base_reservoir_sample->min_weight_threshold;
-		auto min_weight_other = other_sample.base_reservoir_sample->min_weight_threshold;
-		// min weight threshol is always positive
-		if (min_weight_this > min_weight_other) {
-			// pop from other
-			other_sample.base_reservoir_sample->reservoir_weights.pop();
-			other_sample.base_reservoir_sample->UpdateMinWeightThreshold();
-		} else {
-			auto top_this = PopFromWeightQueue();
-			this_indexes_to_replace.push_back(top_this.second);
-			base_reservoir_sample->UpdateMinWeightThreshold();
-		}
-	}
-
-	D_ASSERT(other_sample.GetPriorityQueueSize() + GetPriorityQueueSize() <= FIXED_SAMPLE_SIZE);
-	D_ASSERT(other_sample.GetPriorityQueueSize() + GetPriorityQueueSize() == num_samples_to_keep);
-	D_ASSERT(other_sample.reservoir_chunk->chunk.GetTypes() == reservoir_chunk->chunk.GetTypes());
-
-	// Prepare a selection vector to copy data from the other sample chunk to this sample chunk
-	SelectionVector sel_other(other_sample.GetPriorityQueueSize());
-	D_ASSERT(GetPriorityQueueSize() <= num_samples_to_keep);
-	D_ASSERT(other_sample.GetPriorityQueueSize() >= this_indexes_to_replace.size());
-	idx_t chunk_offset = 0;
-
-	// Now push weights from other.base_reservoir_sample to this
-	// Depending on how many sample values "this" has, we either need to add to the selection vector
-	// Or replace values in "this'" selection vector
-	idx_t i = 0;
-	while (other_sample.GetPriorityQueueSize() > 0) {
-		auto other_top = other_sample.PopFromWeightQueue();
-		idx_t index_for_new_pair = chunk_offset + reservoir_chunk->chunk.size();
-
-		// update the sel used to copy values from other to this
-		sel_other.set_index(chunk_offset, other_top.second);
-		if (i < this_indexes_to_replace.size()) {
-			auto replacement_index = this_indexes_to_replace[i];
-			sel.set_index(replacement_index, index_for_new_pair);
-			other_top.second = replacement_index;
-		} else {
-			sel.set_index(sel_size, index_for_new_pair);
-			other_top.second = sel_size;
-			sel_size += 1;
-		}
-
-		// make sure that the sample indexes are (this.sample_chunk.size() + chunk_offfset)
-		base_reservoir_sample->reservoir_weights.push(other_top);
-		chunk_offset += 1;
-		i += 1;
-	}
-
-	D_ASSERT(GetPriorityQueueSize() == num_samples_to_keep);
-
-	base_reservoir_sample->UpdateMinWeightThreshold();
-	D_ASSERT(base_reservoir_sample->min_weight_threshold > 0);
-	base_reservoir_sample->num_entries_seen_total = GetTuplesSeen() + other_sample.GetTuplesSeen();
-
-	UpdateSampleAppend(reservoir_chunk->chunk, other_sample.reservoir_chunk->chunk, sel_other, chunk_offset);
-	if (reservoir_chunk->chunk.size() > FIXED_SAMPLE_SIZE * (FIXED_SAMPLE_SIZE_MULTIPLIER - 3)) {
-		Vacuum();
-	}
-
-	Verify();
-}
-
-void ReservoirSample::Merge(unique_ptr<BlockingSample> other) {
-	if (destroyed || other->destroyed) {
-		Destroy();
-		return;
-	}
-
-	D_ASSERT(other->type == SampleType::RESERVOIR_SAMPLE);
-	auto &other_sample = other->Cast<ReservoirSample>();
-
-	// if the other sample has not collected anything yet return
-	if (!other_sample.reservoir_chunk || other_sample.reservoir_chunk->chunk.size() == 0) {
-		return;
-	}
-
-	// this has not collected samples, take over the other
-	if (!reservoir_chunk || reservoir_chunk->chunk.size() == 0) {
-		base_reservoir_sample = std::move(other->base_reservoir_sample);
-		reservoir_chunk = std::move(other_sample.reservoir_chunk);
-		sel = SelectionVector(other_sample.sel);
-		sel_size = other_sample.sel_size;
-		Verify();
-		return;
-	}
-	//! Both samples are still in "fast sampling" method
-	if (GetSamplingState() == SamplingState::RANDOM && other_sample.GetSamplingState() == SamplingState::RANDOM) {
-		SimpleMerge(other_sample);
-		return;
-	}
-
-	// One or none of the samples are in "Fast Sampling" method.
-	// When this is the case, switch both to slow sampling
-	ConvertToReservoirSample();
-	other_sample.ConvertToReservoirSample();
-	WeightedMerge(other_sample);
-}
-
-void ReservoirSample::ShuffleSel(SelectionVector &sel, idx_t range, idx_t size) const {
-	auto randomized = GetRandomizedVector(static_cast<uint32_t>(range), static_cast<uint32_t>(size));
-	SelectionVector original_sel(range);
-	for (idx_t i = 0; i < range; i++) {
-		original_sel.set_index(i, sel.get_index(i));
-	}
-	for (idx_t i = 0; i < size; i++) {
-		sel.set_index(i, original_sel.get_index(randomized[i]));
-	}
-}
-
-void ReservoirSample::NormalizeWeights() {
-	vector<std::pair<double, idx_t>> tmp_weights;
-	while (!base_reservoir_sample->reservoir_weights.empty()) {
-		auto top = base_reservoir_sample->reservoir_weights.top();
-		tmp_weights.push_back(std::move(top));
-		base_reservoir_sample->reservoir_weights.pop();
-	}
-	std::sort(tmp_weights.begin(), tmp_weights.end(),
-	          [&](std::pair<double, idx_t> a, std::pair<double, idx_t> b) { return a.second < b.second; });
-	for (idx_t i = 0; i < tmp_weights.size(); i++) {
-		base_reservoir_sample->reservoir_weights.emplace(tmp_weights.at(i).first, i);
-	}
-	base_reservoir_sample->SetNextEntry();
-}
-
-void ReservoirSample::EvictOverBudgetSamples() {
-	Verify();
-	if (!reservoir_chunk || destroyed) {
-		return;
-	}
-
-	// since this is for serialization, we really need to make sure keep a
-	// minimum of 1% of the rows or 2048 rows
-	idx_t num_samples_to_keep =
-	    MinValue<idx_t>(FIXED_SAMPLE_SIZE, static_cast<idx_t>(SAVE_PERCENTAGE * static_cast<double>(GetTuplesSeen())));
-
-	if (num_samples_to_keep <= 0) {
-		reservoir_chunk->chunk.SetCardinality(0);
-		return;
-	}
-
-	if (num_samples_to_keep == sample_count) {
-		return;
-	}
-
-	// if we over sampled, make sure we only keep the highest percentage samples
-	std::unordered_set<idx_t> selections_to_delete;
-
-	while (num_samples_to_keep < GetPriorityQueueSize()) {
-		auto top = PopFromWeightQueue();
-		D_ASSERT(top.second < sel_size);
-		selections_to_delete.emplace(top.second);
-	}
-
-	// set up reservoir chunk for the reservoir sample
-	D_ASSERT(reservoir_chunk->chunk.size() <= sample_count);
-	// create a new sample chunk to store new samples
-	auto types = reservoir_chunk->chunk.GetTypes();
-	D_ASSERT(num_samples_to_keep <= sample_count);
-	D_ASSERT(stats_sample);
-	D_ASSERT(sample_count == FIXED_SAMPLE_SIZE);
-	auto new_reservoir_chunk = CreateNewSampleChunk(types, sample_count);
-
-	// The current selection vector can potentially have 2048 valid mappings.
-	// If we need to save a sample with less rows than that, we need to do the following
-	// 1. Create a new selection vector that doesn't point to the rows we are evicting
-	SelectionVector new_sel(num_samples_to_keep);
-	idx_t offset = 0;
-	for (idx_t i = 0; i < num_samples_to_keep + selections_to_delete.size(); i++) {
-		if (selections_to_delete.find(i) == selections_to_delete.end()) {
-			D_ASSERT(i - offset < num_samples_to_keep);
-			new_sel.set_index(i - offset, sel.get_index(i));
-		} else {
-			offset += 1;
-		}
-	}
-	// 2. Update row_ids in our weights so that they don't store rows ids to
-	//    indexes in the selection vector that have been evicted.
-	if (!selections_to_delete.empty()) {
-		NormalizeWeights();
-	}
-
-	D_ASSERT(reservoir_chunk->chunk.GetTypes() == new_reservoir_chunk->chunk.GetTypes());
-
-	UpdateSampleAppend(new_reservoir_chunk->chunk, reservoir_chunk->chunk, new_sel, num_samples_to_keep);
-	// set the cardinality
-	new_reservoir_chunk->chunk.SetCardinality(num_samples_to_keep);
-	reservoir_chunk = std::move(new_reservoir_chunk);
-	sel_size = num_samples_to_keep;
-	base_reservoir_sample->UpdateMinWeightThreshold();
-}
-
-void ReservoirSample::ExpandSerializedSample() {
-	if (!reservoir_chunk) {
-		return;
-	}
-
-	auto types = reservoir_chunk->chunk.GetTypes();
-	auto new_res_chunk = CreateNewSampleChunk(types, GetReservoirChunkCapacity());
-	auto copy_count = reservoir_chunk->chunk.size();
-	SelectionVector tmp_sel = SelectionVector(0, copy_count);
-	UpdateSampleAppend(new_res_chunk->chunk, reservoir_chunk->chunk, tmp_sel, copy_count);
-	new_res_chunk->chunk.SetCardinality(copy_count);
-	std::swap(reservoir_chunk, new_res_chunk);
-}
-
-idx_t ReservoirSample::GetReservoirChunkCapacity() const {
-	return sample_count + (FIXED_SAMPLE_SIZE_MULTIPLIER * FIXED_SAMPLE_SIZE);
-}
-
-idx_t ReservoirSample::FillReservoir(DataChunk &chunk) {
-
-	idx_t ingested_count = 0;
-	if (!reservoir_chunk) {
-		if (chunk.size() > FIXED_SAMPLE_SIZE) {
-			throw InternalException("Creating sample with DataChunk that is larger than the fixed sample size");
-		}
-		auto types = chunk.GetTypes();
-		// create a new sample chunk to store new samples
-		reservoir_chunk = CreateNewSampleChunk(types, GetReservoirChunkCapacity());
-	}
-
-	idx_t actual_sample_index_start = GetActiveSampleCount();
-	D_ASSERT(reservoir_chunk->chunk.ColumnCount() == chunk.ColumnCount());
-
-	if (reservoir_chunk->chunk.size() < sample_count) {
-		ingested_count = MinValue<idx_t>(sample_count - reservoir_chunk->chunk.size(), chunk.size());
-		auto random_other_sel =
-		    GetRandomizedVector(static_cast<uint32_t>(ingested_count), static_cast<uint32_t>(ingested_count));
-		SelectionVector sel_for_input_chunk(ingested_count);
-		for (idx_t i = 0; i < ingested_count; i++) {
-			sel.set_index(actual_sample_index_start + i, actual_sample_index_start + i);
-			sel_for_input_chunk.set_index(i, random_other_sel[i]);
-		}
-		UpdateSampleAppend(reservoir_chunk->chunk, chunk, sel_for_input_chunk, ingested_count);
-		sel_size += ingested_count;
-	}
-	D_ASSERT(GetActiveSampleCount() <= sample_count);
-	D_ASSERT(GetActiveSampleCount() >= ingested_count);
-	// always return how many tuples were ingested
-	return ingested_count;
-}
-
-void ReservoirSample::Destroy() {
-	destroyed = true;
-}
-
-SelectionVectorHelper ReservoirSample::GetReplacementIndexes(idx_t sample_chunk_offset,
-                                                             idx_t theoretical_chunk_length) {
-	if (GetSamplingState() == SamplingState::RANDOM) {
-		return GetReplacementIndexesFast(sample_chunk_offset, theoretical_chunk_length);
-	}
-	return GetReplacementIndexesSlow(sample_chunk_offset, theoretical_chunk_length);
-}
-
-SelectionVectorHelper ReservoirSample::GetReplacementIndexesFast(idx_t sample_chunk_offset, idx_t chunk_length) {
-
-	// how much weight to the other tuples have compared to the ones in this chunk?
-	auto weight_tuples_other = static_cast<double>(chunk_length) / static_cast<double>(GetTuplesSeen() + chunk_length);
-	auto num_to_pop = static_cast<uint32_t>(round(weight_tuples_other * static_cast<double>(sample_count)));
-	D_ASSERT(num_to_pop <= sample_count);
-	D_ASSERT(num_to_pop <= sel_size);
-	SelectionVectorHelper ret;
-
-	if (num_to_pop == 0) {
-		ret.sel = SelectionVector(num_to_pop);
-		ret.size = 0;
-		return ret;
-	}
-	std::unordered_map<idx_t, idx_t> replacement_indexes;
-	SelectionVector chunk_sel(num_to_pop);
-
-	auto random_indexes_chunk = GetRandomizedVector(static_cast<uint32_t>(chunk_length), num_to_pop);
-	auto random_sel_indexes = GetRandomizedVector(static_cast<uint32_t>(sel_size), num_to_pop);
-	for (idx_t i = 0; i < num_to_pop; i++) {
-		// update the selection vector for the reservoir sample
-		chunk_sel.set_index(i, random_indexes_chunk[i]);
-		// sel is not guaratneed to be random, so we update the indexes according to our
-		// random sel indexes.
-		sel.set_index(random_sel_indexes[i], sample_chunk_offset + i);
-	}
-
-	D_ASSERT(sel_size == sample_count);
-
-	ret.sel = SelectionVector(chunk_sel);
-	ret.size = num_to_pop;
-	return ret;
-}
-
-SelectionVectorHelper ReservoirSample::GetReplacementIndexesSlow(const idx_t sample_chunk_offset,
-                                                                 const idx_t chunk_length) {
-	idx_t remaining = chunk_length;
-	std::unordered_map<idx_t, idx_t> ret_map;
-	idx_t sample_chunk_index = 0;
-
-	idx_t base_offset = 0;
-
-	while (true) {
-		idx_t offset =
-		    base_reservoir_sample->next_index_to_sample - base_reservoir_sample->num_entries_to_skip_b4_next_sample;
-		if (offset >= remaining) {
-			// not in this chunk! increment current count and go to the next chunk
-			base_reservoir_sample->num_entries_to_skip_b4_next_sample += remaining;
-			break;
-		}
-		// in this chunk! replace the element
-		// ret[index_in_new_chunk] = index_in_sample_chunk (the sample chunk offset will be applied later)
-		// D_ASSERT(sample_chunk_index == ret.size());
-		ret_map[base_offset + offset] = sample_chunk_index;
-		double r2 = base_reservoir_sample->random.NextRandom32(base_reservoir_sample->min_weight_threshold, 1);
-		// replace element in our max_heap
-		// first get the top most pair
-		const auto top = PopFromWeightQueue();
-		const auto index = top.second;
-		const auto index_in_sample_chunk = sample_chunk_offset + sample_chunk_index;
-		sel.set_index(index, index_in_sample_chunk);
-		base_reservoir_sample->ReplaceElementWithIndex(index, r2, false);
-
-		sample_chunk_index += 1;
-		// shift the chunk forward
-		remaining -= offset;
-		base_offset += offset;
-	}
-
-	// create selection vector to return
-	SelectionVector ret_sel(ret_map.size());
-	D_ASSERT(sel_size == sample_count);
-	for (auto &kv : ret_map) {
-		ret_sel.set_index(kv.second, kv.first);
-	}
-	SelectionVectorHelper ret;
-	ret.sel = SelectionVector(ret_sel);
-	ret.size = static_cast<uint32_t>(ret_map.size());
-	return ret;
-}
-
-void ReservoirSample::Finalize() {
-}
-
-bool ReservoirSample::ValidSampleType(const LogicalType &type) {
-	return type.IsNumeric();
-}
-
-void ReservoirSample::UpdateSampleAppend(DataChunk &this_, DataChunk &other, SelectionVector &other_sel,
-                                         idx_t append_count) const {
-	idx_t new_size = this_.size() + append_count;
-	if (other.size() == 0) {
-		return;
-	}
-	D_ASSERT(this_.GetTypes() == other.GetTypes());
-
-	// UpdateSampleAppend(this_, other, other_sel, append_count);
-	D_ASSERT(this_.GetTypes() == other.GetTypes());
-	auto types = reservoir_chunk->chunk.GetTypes();
-
-	for (idx_t i = 0; i < reservoir_chunk->chunk.ColumnCount(); i++) {
-		auto col_type = types[i];
-		if (ValidSampleType(col_type) || !stats_sample) {
-			D_ASSERT(this_.data[i].GetVectorType() == VectorType::FLAT_VECTOR);
-			VectorOperations::Copy(other.data[i], this_.data[i], other_sel, append_count, 0, this_.size());
-		}
-	}
-	this_.SetCardinality(new_size);
-}
-
-void ReservoirSample::AddToReservoir(DataChunk &chunk) {
-	if (destroyed || chunk.size() == 0) {
-		return;
-	}
-
-	idx_t tuples_consumed = FillReservoir(chunk);
-	base_reservoir_sample->num_entries_seen_total += tuples_consumed;
-	D_ASSERT(sample_count == 0 || reservoir_chunk->chunk.size() >= 1);
-
-	if (tuples_consumed == chunk.size()) {
-		return;
-	}
-
-	// the chunk filled the first FIXED_SAMPLE_SIZE chunk but still has tuples remaining
-	// slice the chunk and call AddToReservoir again.
-	if (tuples_consumed != chunk.size() && tuples_consumed != 0) {
-		// Fill reservoir consumed some of the chunk to reach FIXED_SAMPLE_SIZE
-		// now we need to
-		// So we slice it and call AddToReservoir
-		auto slice = make_uniq<DataChunk>();
-		auto samples_remaining = chunk.size() - tuples_consumed;
-		auto types = chunk.GetTypes();
-		SelectionVector input_sel(samples_remaining);
-		for (idx_t i = 0; i < samples_remaining; i++) {
-			input_sel.set_index(i, tuples_consumed + i);
-		}
-		slice->Initialize(Allocator::DefaultAllocator(), types, samples_remaining);
-		slice->Slice(chunk, input_sel, samples_remaining);
-		slice->SetCardinality(samples_remaining);
-		AddToReservoir(*slice);
-		return;
-	}
-
-	// at this point we should have collected at least sample count samples
-	D_ASSERT(GetActiveSampleCount() >= sample_count);
-
-	auto chunk_sel = GetReplacementIndexes(reservoir_chunk->chunk.size(), chunk.size());
-
-	if (chunk_sel.size == 0) {
-		// not adding any samples
-		base_reservoir_sample->num_entries_seen_total += chunk.size();
-		return;
-	}
-	idx_t size = chunk_sel.size;
-	D_ASSERT(size <= chunk.size());
-
-	UpdateSampleAppend(reservoir_chunk->chunk, chunk, chunk_sel.sel, size);
-
-	base_reservoir_sample->num_entries_seen_total += chunk.size();
-	D_ASSERT(base_reservoir_sample->reservoir_weights.size() == 0 ||
-	         base_reservoir_sample->reservoir_weights.size() == sample_count);
-
-	Verify();
-
-	// if we are over the threshold, we ned to swith to slow sampling.
-	if (GetSamplingState() == SamplingState::RANDOM && GetTuplesSeen() >= FIXED_SAMPLE_SIZE * FAST_TO_SLOW_THRESHOLD) {
-		ConvertToReservoirSample();
-	}
-	if (reservoir_chunk->chunk.size() >= (GetReservoirChunkCapacity() - (static_cast<idx_t>(FIXED_SAMPLE_SIZE) * 3))) {
-		Vacuum();
-	}
-}
-
-void ReservoirSample::Verify() {
-#ifdef DEBUG
-	if (destroyed) {
-		return;
-	}
-	if (GetPriorityQueueSize() == 0) {
-		D_ASSERT(GetActiveSampleCount() <= sample_count);
-		D_ASSERT(GetTuplesSeen() >= GetActiveSampleCount());
-		return;
-	}
-	if (NumSamplesCollected() > sample_count) {
-		D_ASSERT(GetPriorityQueueSize() == sample_count);
-	} else if (NumSamplesCollected() <= sample_count && GetPriorityQueueSize() > 0) {
-		// it's possible to collect more samples than your priority queue size.
-		// see sample_converts_to_reservoir_sample.test
-		D_ASSERT(NumSamplesCollected() >= GetPriorityQueueSize());
-	}
-	auto base_reservoir_copy = base_reservoir_sample->Copy();
-	std::unordered_map<idx_t, idx_t> index_count;
-	while (!base_reservoir_copy->reservoir_weights.empty()) {
-		auto &pair = base_reservoir_copy->reservoir_weights.top();
-		if (index_count.find(pair.second) == index_count.end()) {
-			index_count[pair.second] = 1;
-			base_reservoir_copy->reservoir_weights.pop();
-		} else {
-			index_count[pair.second] += 1;
-			base_reservoir_copy->reservoir_weights.pop();
-			throw InternalException("Duplicate selection index in reservoir weights");
-		}
-	}
-	// TODO: Verify the Sel as well. No duplicate indices.
-
-	if (reservoir_chunk) {
-		reservoir_chunk->chunk.Verify();
-	}
-#endif
-}
-
-ReservoirSamplePercentage::ReservoirSamplePercentage(double percentage, int64_t seed, idx_t reservoir_sample_size)
-    : BlockingSample(seed), allocator(Allocator::DefaultAllocator()), sample_percentage(percentage / 100.0),
-      reservoir_sample_size(reservoir_sample_size), current_count(0), is_finalized(false) {
-	current_sample = make_uniq<ReservoirSample>(allocator, reservoir_sample_size, base_reservoir_sample->random());
-	type = SampleType::RESERVOIR_PERCENTAGE_SAMPLE;
-}
-
-ReservoirSamplePercentage::ReservoirSamplePercentage(Allocator &allocator, double percentage, int64_t seed)
-    : BlockingSample(seed), allocator(allocator), sample_percentage(percentage / 100.0), current_count(0),
-      is_finalized(false) {
-	reservoir_sample_size = (idx_t)(sample_percentage * RESERVOIR_THRESHOLD);
-	current_sample = make_uniq<ReservoirSample>(allocator, reservoir_sample_size, base_reservoir_sample->random());
-	type = SampleType::RESERVOIR_PERCENTAGE_SAMPLE;
-}
-
-ReservoirSamplePercentage::ReservoirSamplePercentage(double percentage, int64_t seed)
-    : ReservoirSamplePercentage(Allocator::DefaultAllocator(), percentage, seed) {
-}
-
-void ReservoirSamplePercentage::AddToReservoir(DataChunk &input) {
-	base_reservoir_sample->num_entries_seen_total += input.size();
-	if (current_count + input.size() > RESERVOIR_THRESHOLD) {
-		// we don't have enough space in our current reservoir
-		// first check what we still need to append to the current sample
-		idx_t append_to_current_sample_count = RESERVOIR_THRESHOLD - current_count;
-		idx_t append_to_next_sample = input.size() - append_to_current_sample_count;
-		if (append_to_current_sample_count > 0) {
-			// we have elements remaining, first add them to the current sample
-			if (append_to_next_sample > 0) {
-				// we need to also add to the next sample
-				DataChunk new_chunk;
-				new_chunk.InitializeEmpty(input.GetTypes());
-				new_chunk.Slice(input, *FlatVector::IncrementalSelectionVector(), append_to_current_sample_count);
-				new_chunk.Flatten();
-				current_sample->AddToReservoir(new_chunk);
-			} else {
-				input.Flatten();
-				input.SetCardinality(append_to_current_sample_count);
-				current_sample->AddToReservoir(input);
-			}
-		}
-		if (append_to_next_sample > 0) {
-			// slice the input for the remainder
-			SelectionVector sel(append_to_next_sample);
-			for (idx_t i = append_to_current_sample_count; i < append_to_next_sample + append_to_current_sample_count;
-			     i++) {
-				sel.set_index(i - append_to_current_sample_count, i);
-			}
-			input.Slice(sel, append_to_next_sample);
-		}
-		// now our first sample is filled: append it to the set of finished samples
-		finished_samples.push_back(std::move(current_sample));
-
-		// allocate a new sample, and potentially add the remainder of the current input to that sample
-		current_sample = make_uniq<ReservoirSample>(allocator, reservoir_sample_size, base_reservoir_sample->random());
-		if (append_to_next_sample > 0) {
-			current_sample->AddToReservoir(input);
-		}
-		current_count = append_to_next_sample;
-	} else {
-		// we can just append to the current sample
-		current_count += input.size();
-		current_sample->AddToReservoir(input);
-	}
-}
-
-unique_ptr<DataChunk> ReservoirSamplePercentage::GetChunk() {
-	// reservoir sample percentage should never stay
-	if (!is_finalized) {
-		Finalize();
-	}
-	while (!finished_samples.empty()) {
-		auto &front = finished_samples.front();
-		auto chunk = front->GetChunk();
-		if (chunk && chunk->size() > 0) {
-			return chunk;
-		}
-		// move to the next sample
-		finished_samples.erase(finished_samples.begin());
-	}
-	return nullptr;
-}
-
-unique_ptr<BlockingSample> ReservoirSamplePercentage::Copy() const {
-	throw InternalException("Cannot call Copy on ReservoirSample Percentage");
-}
-
-void ReservoirSamplePercentage::Finalize() {
-	// need to finalize the current sample, if any
-	// we are finializing, so we are starting to return chunks. Our last chunk has
-	// sample_percentage * RESERVOIR_THRESHOLD entries that hold samples.
-	// if our current count is less than the sample_percentage * RESERVOIR_THRESHOLD
-	// then we have sampled too much for the current_sample and we need to redo the sample
-	// otherwise we can just push the current sample back
-	// Imagine sampling 70% of 100 rows (so 70 rows). We allocate sample_percentage * RESERVOIR_THRESHOLD
-	// -----------------------------------------
-	auto sampled_more_than_required =
-	    static_cast<double>(current_count) > sample_percentage * RESERVOIR_THRESHOLD || finished_samples.empty();
-	if (current_count > 0 && sampled_more_than_required) {
-		// create a new sample
-		auto new_sample_size = static_cast<idx_t>(round(sample_percentage * static_cast<double>(current_count)));
-		auto new_sample = make_uniq<ReservoirSample>(allocator, new_sample_size, base_reservoir_sample->random());
-		while (true) {
-			auto chunk = current_sample->GetChunk();
-			if (!chunk || chunk->size() == 0) {
-				break;
-			}
-			new_sample->AddToReservoir(*chunk);
-		}
-		finished_samples.push_back(std::move(new_sample));
-	} else {
-		finished_samples.push_back(std::move(current_sample));
-	}
-	// when finalizing, current_sample is null. All samples are now in finished samples.
-	current_sample = nullptr;
-	is_finalized = true;
-}
-
-} // namespace duckdb
diff --git a/src/function/table/system/CMakeLists.txt b/src/function/table/system/CMakeLists.txt
index c475b0112f86..1565132755e6 100644
--- a/src/function/table/system/CMakeLists.txt
+++ b/src/function/table/system/CMakeLists.txt
@@ -29,7 +29,6 @@ add_library_unity(
   pragma_metadata_info.cpp
   pragma_storage_info.cpp
   pragma_table_info.cpp
-  pragma_table_sample.cpp
   pragma_user_agent.cpp
   test_all_types.cpp
   test_vector_types.cpp)
diff --git a/src/function/table/system/pragma_table_sample.cpp b/src/function/table/system/pragma_table_sample.cpp
deleted file mode 100644
index 7f4122b92b62..000000000000
--- a/src/function/table/system/pragma_table_sample.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-#include "duckdb/function/table/system_functions.hpp"
-
-#include "duckdb/catalog/catalog.hpp"
-#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
-#include "duckdb/catalog/catalog_entry/view_catalog_entry.hpp"
-#include "duckdb/parser/qualified_name.hpp"
-#include "duckdb/parser/constraints/not_null_constraint.hpp"
-#include "duckdb/parser/constraints/unique_constraint.hpp"
-#include "duckdb/planner/expression/bound_parameter_expression.hpp"
-#include "duckdb/planner/binder.hpp"
-
-#include "duckdb/common/exception.hpp"
-#include "duckdb/common/limits.hpp"
-
-#include <algorithm>
-
-namespace duckdb {
-
-struct DuckDBTableSampleFunctionData : public TableFunctionData {
-	explicit DuckDBTableSampleFunctionData(CatalogEntry &entry_p) : entry(entry_p) {
-	}
-	CatalogEntry &entry;
-};
-
-struct DuckDBTableSampleOperatorData : public GlobalTableFunctionState {
-	DuckDBTableSampleOperatorData() : sample_offset(0) {
-		sample = nullptr;
-	}
-	idx_t sample_offset;
-	unique_ptr<BlockingSample> sample;
-};
-
-static unique_ptr<FunctionData> DuckDBTableSampleBind(ClientContext &context, TableFunctionBindInput &input,
-                                                      vector<LogicalType> &return_types, vector<string> &names) {
-
-	// look up the table name in the catalog
-	auto qname = QualifiedName::Parse(input.inputs[0].GetValue<string>());
-	Binder::BindSchemaOrCatalog(context, qname.catalog, qname.schema);
-
-	auto &entry = Catalog::GetEntry(context, CatalogType::TABLE_ENTRY, qname.catalog, qname.schema, qname.name);
-	if (entry.type != CatalogType::TABLE_ENTRY) {
-		throw NotImplementedException("Invalid Catalog type passed to table_sample()");
-	}
-	auto &table_entry = entry.Cast<TableCatalogEntry>();
-	auto types = table_entry.GetTypes();
-	for (auto &type : types) {
-		return_types.push_back(type);
-	}
-	for (idx_t i = 0; i < types.size(); i++) {
-		auto logical_index = LogicalIndex(i);
-		auto &col = table_entry.GetColumn(logical_index);
-		names.push_back(col.GetName());
-	}
-
-	return make_uniq<DuckDBTableSampleFunctionData>(entry);
-}
-
-unique_ptr<GlobalTableFunctionState> DuckDBTableSampleInit(ClientContext &context, TableFunctionInitInput &input) {
-	return make_uniq<DuckDBTableSampleOperatorData>();
-}
-
-static void DuckDBTableSampleTable(ClientContext &context, DuckDBTableSampleOperatorData &data,
-                                   TableCatalogEntry &table, DataChunk &output) {
-	// if table has statistics.
-	// copy the sample of statistics into the output chunk
-	if (!data.sample) {
-		data.sample = table.GetSample();
-	}
-	if (data.sample) {
-		auto sample_chunk = data.sample->GetChunk();
-		if (sample_chunk) {
-			sample_chunk->Copy(output, 0);
-			data.sample_offset += sample_chunk->size();
-		}
-	}
-}
-
-static void DuckDBTableSampleFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
-	auto &bind_data = data_p.bind_data->Cast<DuckDBTableSampleFunctionData>();
-	auto &state = data_p.global_state->Cast<DuckDBTableSampleOperatorData>();
-	switch (bind_data.entry.type) {
-	case CatalogType::TABLE_ENTRY:
-		DuckDBTableSampleTable(context, state, bind_data.entry.Cast<TableCatalogEntry>(), output);
-		break;
-	default:
-		throw NotImplementedException("Unimplemented catalog type for pragma_table_sample");
-	}
-}
-
-void DuckDBTableSample::RegisterFunction(BuiltinFunctions &set) {
-	set.AddFunction(TableFunction("duckdb_table_sample", {LogicalType::VARCHAR}, DuckDBTableSampleFunction,
-	                              DuckDBTableSampleBind, DuckDBTableSampleInit));
-}
-
-} // namespace duckdb
diff --git a/src/function/table/system_functions.cpp b/src/function/table/system_functions.cpp
index 972eb7e54a2f..48559545c59d 100644
--- a/src/function/table/system_functions.cpp
+++ b/src/function/table/system_functions.cpp
@@ -37,7 +37,6 @@ void BuiltinFunctions::RegisterSQLiteFunctions() {
 	DuckDBSequencesFun::RegisterFunction(*this);
 	DuckDBSettingsFun::RegisterFunction(*this);
 	DuckDBTablesFun::RegisterFunction(*this);
-	DuckDBTableSample::RegisterFunction(*this);
 	DuckDBTemporaryFilesFun::RegisterFunction(*this);
 	DuckDBTypesFun::RegisterFunction(*this);
 	DuckDBVariablesFun::RegisterFunction(*this);
diff --git a/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp b/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp
index fb9d5ae67d14..ce09c4fe54f1 100644
--- a/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp
+++ b/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp
@@ -35,8 +35,6 @@ class DuckTableEntry : public TableCatalogEntry {
 	//! Get statistics of a column (physical or virtual) within the table
 	unique_ptr<BaseStatistics> GetStatistics(ClientContext &context, column_t column_id) override;
 
-	unique_ptr<BlockingSample> GetSample() override;
-
 	unique_ptr<CatalogEntry> Copy(ClientContext &context) const override;
 
 	void SetAsRoot() override;
diff --git a/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp b/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp
index 398e49974b88..b2acb05f1644 100644
--- a/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp
+++ b/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp
@@ -13,7 +13,6 @@
 #include "duckdb/parser/column_list.hpp"
 #include "duckdb/parser/constraint.hpp"
 #include "duckdb/planner/bound_constraint.hpp"
-#include "duckdb/storage/table/table_statistics.hpp"
 #include "duckdb/planner/expression.hpp"
 #include "duckdb/common/case_insensitive_map.hpp"
 #include "duckdb/catalog/catalog_entry/table_column_type.hpp"
@@ -83,8 +82,6 @@ class TableCatalogEntry : public StandardEntry {
 	//! Get statistics of a column (physical or virtual) within the table
 	virtual unique_ptr<BaseStatistics> GetStatistics(ClientContext &context, column_t column_id) = 0;
 
-	virtual unique_ptr<BlockingSample> GetSample();
-
 	//! Returns the column index of the specified column name.
 	//! If the column does not exist:
 	//! If if_column_exists is true, returns DConstants::INVALID_INDEX
diff --git a/src/include/duckdb/common/enum_util.hpp b/src/include/duckdb/common/enum_util.hpp
index 379a23969c55..0e7a928b42c3 100644
--- a/src/include/duckdb/common/enum_util.hpp
+++ b/src/include/duckdb/common/enum_util.hpp
@@ -294,8 +294,6 @@ enum class SampleMethod : uint8_t;
 
 enum class SampleType : uint8_t;
 
-enum class SamplingState : uint8_t;
-
 enum class ScanType : uint8_t;
 
 enum class SecretDisplayType : uint8_t;
@@ -784,9 +782,6 @@ const char* EnumUtil::ToChars<SampleMethod>(SampleMethod value);
 template<>
 const char* EnumUtil::ToChars<SampleType>(SampleType value);
 
-template<>
-const char* EnumUtil::ToChars<SamplingState>(SamplingState value);
-
 template<>
 const char* EnumUtil::ToChars<ScanType>(ScanType value);
 
@@ -1322,9 +1317,6 @@ SampleMethod EnumUtil::FromString<SampleMethod>(const char *value);
 template<>
 SampleType EnumUtil::FromString<SampleType>(const char *value);
 
-template<>
-SamplingState EnumUtil::FromString<SamplingState>(const char *value);
-
 template<>
 ScanType EnumUtil::FromString<ScanType>(const char *value);
 
diff --git a/src/include/duckdb/common/random_engine.hpp b/src/include/duckdb/common/random_engine.hpp
index 8a5a3097e4f7..50db4155c04e 100644
--- a/src/include/duckdb/common/random_engine.hpp
+++ b/src/include/duckdb/common/random_engine.hpp
@@ -18,11 +18,11 @@ namespace duckdb {
 class ClientContext;
 struct RandomState;
 
-class RandomEngine {
-public:
+struct RandomEngine {
 	explicit RandomEngine(int64_t seed = -1);
 	~RandomEngine();
 
+public:
 	//! Generate a random number between min and max
 	double NextRandom(double min, double max);
 
@@ -31,8 +31,8 @@ class RandomEngine {
 	//! Generate a random number between 0 and 1, using 32-bits as a base
 	double NextRandom32();
 	double NextRandom32(double min, double max);
-	uint32_t NextRandomInteger32(uint32_t min, uint32_t max);
 	uint32_t NextRandomInteger();
+	uint32_t NextRandomInteger32(uint32_t min, uint32_t max);
 	uint32_t NextRandomInteger(uint32_t min, uint32_t max);
 	uint64_t NextRandomInteger64();
 
diff --git a/src/include/duckdb/common/serializer/serializer.hpp b/src/include/duckdb/common/serializer/serializer.hpp
index 97aeef51a6f6..58ab8e91a963 100644
--- a/src/include/duckdb/common/serializer/serializer.hpp
+++ b/src/include/duckdb/common/serializer/serializer.hpp
@@ -16,7 +16,6 @@
 #include "duckdb/common/types/uhugeint.hpp"
 #include "duckdb/common/unordered_map.hpp"
 #include "duckdb/common/unordered_set.hpp"
-#include "duckdb/common/queue.hpp"
 #include "duckdb/common/optional_idx.hpp"
 #include "duckdb/common/optionally_owned_ptr.hpp"
 #include "duckdb/common/value_operations/value_operations.hpp"
diff --git a/src/include/duckdb/common/types/uuid.hpp b/src/include/duckdb/common/types/uuid.hpp
index bf5ade17a15f..5573aac633c5 100644
--- a/src/include/duckdb/common/types/uuid.hpp
+++ b/src/include/duckdb/common/types/uuid.hpp
@@ -13,7 +13,7 @@
 
 namespace duckdb {
 class ClientContext;
-class RandomEngine;
+struct RandomEngine;
 
 //! The UUID class contains static operations for the UUID type
 class UUID {
diff --git a/src/include/duckdb/execution/physical_operator.hpp b/src/include/duckdb/execution/physical_operator.hpp
index 50529d594f67..822b55377b74 100644
--- a/src/include/duckdb/execution/physical_operator.hpp
+++ b/src/include/duckdb/execution/physical_operator.hpp
@@ -164,7 +164,7 @@ class PhysicalOperator {
 	virtual void PrepareFinalize(ClientContext &context, GlobalSinkState &sink_state) const;
 	//! The finalize is called when ALL threads are finished execution. It is called only once per pipeline, and is
 	//! entirely single threaded.
-	//! If Finalize returns SinkResultType::Finished, the sink is marked as finished
+	//! If Finalize returns SinkResultType::FINISHED, the sink is marked as finished
 	virtual SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
 	                                  OperatorSinkFinalizeInput &input) const;
 	//! For sinks with RequiresBatchIndex set to true, when a new batch starts being processed this method is called
diff --git a/src/include/duckdb/execution/reservoir_sample.hpp b/src/include/duckdb/execution/reservoir_sample.hpp
index bc815b11f9de..0edc7e073b9a 100644
--- a/src/include/duckdb/execution/reservoir_sample.hpp
+++ b/src/include/duckdb/execution/reservoir_sample.hpp
@@ -12,64 +12,25 @@
 #include "duckdb/common/common.hpp"
 #include "duckdb/common/random_engine.hpp"
 #include "duckdb/common/types/data_chunk.hpp"
-#include "duckdb/common/windows_undefs.hpp"
 
 #include "duckdb/common/queue.hpp"
 
-// Originally intended to be the vector size, but in order to run on
-// vector size = 2, we had to change it.
-#define FIXED_SAMPLE_SIZE 2048
-
 namespace duckdb {
 
 enum class SampleType : uint8_t { BLOCKING_SAMPLE = 0, RESERVOIR_SAMPLE = 1, RESERVOIR_PERCENTAGE_SAMPLE = 2 };
 
-enum class SamplingState : uint8_t { RANDOM = 0, RESERVOIR = 1 };
-
-class ReservoirRNG : public RandomEngine {
-public:
-	// return type must be called result type to be a valid URNG
-	typedef uint32_t result_type;
-
-	explicit ReservoirRNG(int64_t seed) : RandomEngine(seed) {};
-
-	result_type operator()() {
-		return NextRandomInteger();
-	};
-
-	static constexpr result_type min() {
-		return NumericLimits<result_type>::Minimum();
-	};
-	static constexpr result_type max() {
-		return NumericLimits<result_type>::Maximum();
-	};
-};
-
-//! Resevoir sampling is based on the 2005 paper "Weighted Random Sampling" by Efraimidis and Spirakis
 class BaseReservoirSampling {
 public:
 	explicit BaseReservoirSampling(int64_t seed);
 	BaseReservoirSampling();
 
-	void InitializeReservoirWeights(idx_t cur_size, idx_t sample_size);
+	void InitializeReservoir(idx_t cur_size, idx_t sample_size);
 
 	void SetNextEntry();
 
-	void ReplaceElementWithIndex(idx_t entry_index, double with_weight, bool pop = true);
 	void ReplaceElement(double with_weight = -1);
-
-	void UpdateMinWeightThreshold();
-
-	//! Go from the naive sampling to the reservoir sampling
-	//! Naive samping will not collect weights, but when we serialize
-	//! we need to serialize weights again.
-	void FillWeights(SelectionVector &sel, idx_t &sel_size);
-
-	unique_ptr<BaseReservoirSampling> Copy();
-
 	//! The random generator
-	ReservoirRNG random;
-
+	RandomEngine random;
 	//! The next element to sample
 	idx_t next_index_to_sample;
 	//! The reservoir threshold of the current min entry
@@ -87,13 +48,6 @@ class BaseReservoirSampling {
 
 	void Serialize(Serializer &serializer) const;
 	static unique_ptr<BaseReservoirSampling> Deserialize(Deserializer &deserializer);
-
-	static double GetMinWeightFromTuplesSeen(idx_t rows_seen_total);
-	// static unordered_map<idx_t, double> tuples_to_min_weight_map;
-	// Blocking sample is a virtual class. It should be allowed to see the weights and
-	// of tuples in the sample. The blocking sample can then easily maintain statisitcal properties
-	// from the sample point of view.
-	friend class BlockingSample;
 };
 
 class BlockingSample {
@@ -107,31 +61,24 @@ class BlockingSample {
 	bool destroyed;
 
 public:
-	explicit BlockingSample(int64_t seed = -1)
-	    : base_reservoir_sample(make_uniq<BaseReservoirSampling>(seed)), type(SampleType::BLOCKING_SAMPLE),
-	      destroyed(false) {
+	explicit BlockingSample(int64_t seed) : old_base_reservoir_sample(seed), random(old_base_reservoir_sample.random) {
+		base_reservoir_sample = nullptr;
 	}
 	virtual ~BlockingSample() {
 	}
 
 	//! Add a chunk of data to the sample
 	virtual void AddToReservoir(DataChunk &input) = 0;
-	virtual unique_ptr<BlockingSample> Copy() const = 0;
-	virtual void Finalize() = 0;
-	virtual void Destroy();
 
-	//! Fetches a chunk from the sample. destroy = true should only be used when
-	//! querying from a sample defined in a query and not a duckdb_table_sample.
+	virtual void Finalize() = 0;
+	//! Fetches a chunk from the sample. Note that this method is destructive and should only be used after the
+	//! sample is completely built.
 	virtual unique_ptr<DataChunk> GetChunk() = 0;
+	BaseReservoirSampling old_base_reservoir_sample;
 
 	virtual void Serialize(Serializer &serializer) const;
 	static unique_ptr<BlockingSample> Deserialize(Deserializer &deserializer);
 
-	//! Helper functions needed to merge two reservoirs while respecting weights of sampled rows
-	std::pair<double, idx_t> PopFromWeightQueue();
-	double GetMinWeightThreshold();
-	idx_t GetPriorityQueueSize();
-
 public:
 	template <class TARGET>
 	TARGET &Cast() {
@@ -148,6 +95,8 @@ class BlockingSample {
 		}
 		return reinterpret_cast<const TARGET &>(*this);
 	}
+	//! The reservoir sampling
+	RandomEngine &random;
 };
 
 class ReservoirChunk {
@@ -158,126 +107,45 @@ class ReservoirChunk {
 	DataChunk chunk;
 	void Serialize(Serializer &serializer) const;
 	static unique_ptr<ReservoirChunk> Deserialize(Deserializer &deserializer);
-
-	unique_ptr<ReservoirChunk> Copy() const;
-};
-
-struct SelectionVectorHelper {
-	SelectionVector sel;
-	uint32_t size;
 };
 
+//! The reservoir sample class maintains a streaming sample of fixed size "sample_count"
 class ReservoirSample : public BlockingSample {
 public:
 	static constexpr const SampleType TYPE = SampleType::RESERVOIR_SAMPLE;
 
-	constexpr static idx_t FIXED_SAMPLE_SIZE_MULTIPLIER = 10;
-	// size is small enough, then the threshold to switch
-	// MinValue between std vec size and fixed sample size.
-	// During 'fast' sampling, we want every new vector to have the potential
-	// to add to the sample. If the threshold is too far below the standard vector size, then
-	// samples in the sample have a higher weight than new samples coming in.
-	// i.e during vector_size=2, 2 new samples will not be significant compared 2048 samples from 204800 tuples.
-	constexpr static idx_t FAST_TO_SLOW_THRESHOLD = MinValue<idx_t>(STANDARD_VECTOR_SIZE, 60);
-
-	// If the table has less than 204800 rows, this is the percentage
-	// of values we save when serializing/returning a sample.
-	constexpr static double SAVE_PERCENTAGE = 0.01;
-
+public:
 	ReservoirSample(Allocator &allocator, idx_t sample_count, int64_t seed = 1);
-	explicit ReservoirSample(idx_t sample_count, unique_ptr<ReservoirChunk> = nullptr);
-
-	//! methods used to help with serializing and deserializing
-	void EvictOverBudgetSamples();
-	void ExpandSerializedSample();
-
-	SamplingState GetSamplingState() const;
-
-	//! Vacuum the Reservoir Sample so it throws away tuples that are not in the
-	//! reservoir weights or in the selection vector
-	void Vacuum();
-
-	//! Transform To sample based on reservoir sampling paper
-	void ConvertToReservoirSample();
-
-	//! Get the capactiy of the data chunk reserved for storing samples
-	idx_t GetReservoirChunkCapacity() const;
+	explicit ReservoirSample(idx_t sample_count, int64_t seed = 1);
 
-	//! If for_serialization=true then the sample_chunk is not padded with extra spaces for
-	//! future sampling values
-	unique_ptr<BlockingSample> Copy() const override;
-
-	//! create the first chunk called by AddToReservoir()
-	idx_t FillReservoir(DataChunk &chunk);
 	//! Add a chunk of data to the sample
 	void AddToReservoir(DataChunk &input) override;
-	//! Merge two Reservoir Samples. Other must be a reservoir sample
-	void Merge(unique_ptr<BlockingSample> other);
-
-	void ShuffleSel(SelectionVector &sel, idx_t range, idx_t size) const;
-
-	//! Update the sample by pushing new sample rows to the end of the sample_chunk.
-	//! The new sample rows are the tuples rows resulting from applying sel to other
-	void UpdateSampleAppend(DataChunk &this_, DataChunk &other, SelectionVector &other_sel, idx_t append_count) const;
-
-	idx_t GetTuplesSeen() const;
-	idx_t NumSamplesCollected() const;
-	idx_t GetActiveSampleCount() const;
-	static bool ValidSampleType(const LogicalType &type);
-
-	// get the chunk from Reservoir chunk
-	DataChunk &Chunk();
 
 	//! Fetches a chunk from the sample. Note that this method is destructive and should only be used after the
 	//! sample is completely built.
-	// unique_ptr<DataChunk> GetChunkAndDestroy() override;
 	unique_ptr<DataChunk> GetChunk() override;
-	void Destroy() override;
 	void Finalize() override;
-	void Verify();
-
-	idx_t GetSampleCount();
-
-	// map is [index in input chunk] -> [index in sample chunk]. Both are zero-based
-	// [index in sample chunk] is incremented by 1
-	// index in input chunk have random values, however, they are increasing.
-	// The base_reservoir_sampling gets updated however, so the indexes point to (sample_chunk_offset +
-	// index_in_sample_chunk) this data is used to make a selection vector to copy samples from the input chunk to the
-	// sample chunk
-	//! Get indexes from current sample that can be replaced.
-	SelectionVectorHelper GetReplacementIndexes(idx_t sample_chunk_offset, idx_t theoretical_chunk_length);
-
 	void Serialize(Serializer &serializer) const override;
 	static unique_ptr<BlockingSample> Deserialize(Deserializer &deserializer);
 
 private:
-	// when we serialize, we may have collected too many samples since we fill a standard vector size, then
-	// truncate if the table is still <=204800 values. The problem is, in our weights, we store indexes into
-	// the selection vector. If throw away values at selection vector index i = 5 , we need to update all indexes
-	// i > 5. Otherwise we will have indexes in the weights that are greater than the length of our sample.
-	void NormalizeWeights();
-
-	SelectionVectorHelper GetReplacementIndexesSlow(const idx_t sample_chunk_offset, const idx_t chunk_length);
-	SelectionVectorHelper GetReplacementIndexesFast(const idx_t sample_chunk_offset, const idx_t chunk_length);
-	void SimpleMerge(ReservoirSample &other);
-	void WeightedMerge(ReservoirSample &other_sample);
-
-	// Helper methods for Shrink().
-	// Shrink has different logic depending on if the Reservoir sample is still in
-	// "Random" mode or in "reservoir" mode. This function creates a new sample chunk
-	// to copy the old sample chunk into
-	unique_ptr<ReservoirChunk> CreateNewSampleChunk(vector<LogicalType> &types, idx_t size) const;
-
-	// Get a vector where each index is a random int in the range 0, size.
-	// This is used to shuffle selection vector indexes
-	vector<uint32_t> GetRandomizedVector(uint32_t range, uint32_t size) const;
+	//! Replace a single element of the input
+	void ReplaceElement(DataChunk &input, idx_t index_in_chunk, double with_weight = -1);
+	void InitializeReservoir(DataChunk &input);
+	//! Fills the reservoir up until sample_count entries, returns how many entries are still required
+	idx_t FillReservoir(DataChunk &input);
 
-	idx_t sample_count;
+public:
 	Allocator &allocator;
+	//! The size of the reservoir sample.
+	//! when calculating percentages, it is set to reservoir_threshold * percentage
+	//! when explicit number used, sample_count = number
+	idx_t sample_count;
+	bool reservoir_initialized;
+
+	//! The current reservoir
+	unique_ptr<DataChunk> reservoir_data_chunk;
 	unique_ptr<ReservoirChunk> reservoir_chunk;
-	bool stats_sample;
-	SelectionVector sel;
-	idx_t sel_size;
 };
 
 //! The reservoir sample sample_size class maintains a streaming sample of variable size
@@ -287,16 +155,15 @@ class ReservoirSamplePercentage : public BlockingSample {
 public:
 	static constexpr const SampleType TYPE = SampleType::RESERVOIR_PERCENTAGE_SAMPLE;
 
+public:
 	ReservoirSamplePercentage(Allocator &allocator, double percentage, int64_t seed = -1);
-	ReservoirSamplePercentage(double percentage, int64_t seed, idx_t reservoir_sample_size);
 	explicit ReservoirSamplePercentage(double percentage, int64_t seed = -1);
 
 	//! Add a chunk of data to the sample
 	void AddToReservoir(DataChunk &input) override;
 
-	unique_ptr<BlockingSample> Copy() const override;
-
-	//! Fetches a chunk from the sample. If destory = true this method is descructive
+	//! Fetches a chunk from the sample. Note that this method is destructive and should only be used after the
+	//! sample is completely built.
 	unique_ptr<DataChunk> GetChunk() override;
 	void Finalize() override;
 
@@ -315,11 +182,9 @@ class ReservoirSamplePercentage : public BlockingSample {
 
 	//! The set of finished samples of the reservoir sample
 	vector<unique_ptr<ReservoirSample>> finished_samples;
-
 	//! The amount of tuples that have been processed so far (not put in the reservoir, just processed)
 	idx_t current_count = 0;
-	//! Whether or not the stream is finalized. The stream is automatically finalized on the first call to
-	//! GetChunkAndShrink();
+	//! Whether or not the stream is finalized. The stream is automatically finalized on the first call to GetChunk();
 	bool is_finalized;
 };
 
diff --git a/src/include/duckdb/function/table/system_functions.hpp b/src/include/duckdb/function/table/system_functions.hpp
index c339b386d98e..e1a40cf982f2 100644
--- a/src/include/duckdb/function/table/system_functions.hpp
+++ b/src/include/duckdb/function/table/system_functions.hpp
@@ -119,10 +119,6 @@ struct DuckDBTablesFun {
 	static void RegisterFunction(BuiltinFunctions &set);
 };
 
-struct DuckDBTableSample {
-	static void RegisterFunction(BuiltinFunctions &set);
-};
-
 struct DuckDBTemporaryFilesFun {
 	static void RegisterFunction(BuiltinFunctions &set);
 };
diff --git a/src/include/duckdb/main/client_data.hpp b/src/include/duckdb/main/client_data.hpp
index 49c281e2d326..27eeb61788df 100644
--- a/src/include/duckdb/main/client_data.hpp
+++ b/src/include/duckdb/main/client_data.hpp
@@ -27,7 +27,7 @@ class QueryProfiler;
 class PreparedStatementData;
 class SchemaCatalogEntry;
 class HTTPLogger;
-class RandomEngine;
+struct RandomEngine;
 
 struct ClientData {
 	explicit ClientData(ClientContext &context);
diff --git a/src/include/duckdb/storage/data_table.hpp b/src/include/duckdb/storage/data_table.hpp
index 6a0b97727384..b05fc6b65a3e 100644
--- a/src/include/duckdb/storage/data_table.hpp
+++ b/src/include/duckdb/storage/data_table.hpp
@@ -188,9 +188,6 @@ class DataTable {
 
 	//! Get statistics of a physical column within the table
 	unique_ptr<BaseStatistics> GetStatistics(ClientContext &context, column_t column_id);
-
-	//! Get table sample
-	unique_ptr<BlockingSample> GetSample();
 	//! Sets statistics of a physical column within the table
 	void SetDistinct(column_t column_id, unique_ptr<DistinctStatistics> distinct_stats);
 
diff --git a/src/include/duckdb/storage/serialization/nodes.json b/src/include/duckdb/storage/serialization/nodes.json
index 781cdd9c27e2..7e44380715a5 100644
--- a/src/include/duckdb/storage/serialization/nodes.json
+++ b/src/include/duckdb/storage/serialization/nodes.json
@@ -281,7 +281,7 @@
         "type": "unique_ptr<ReservoirChunk>"
       }
     ],
-    "constructor": ["sample_count", "reservoir_chunk"]
+    "constructor": ["sample_count"]
   },
   {
     "class": "ReservoirSamplePercentage",
@@ -331,6 +331,7 @@
     ],
     "pointer_type": "none"
   },
+
   {
     "class": "PivotColumnEntry",
     "members": [
diff --git a/src/include/duckdb/storage/table/row_group_collection.hpp b/src/include/duckdb/storage/table/row_group_collection.hpp
index 19aa6452038c..5c47dcd62179 100644
--- a/src/include/duckdb/storage/table/row_group_collection.hpp
+++ b/src/include/duckdb/storage/table/row_group_collection.hpp
@@ -124,7 +124,6 @@ class RowGroupCollection {
 
 	void CopyStats(TableStatistics &stats);
 	unique_ptr<BaseStatistics> CopyStats(column_t column_id);
-	unique_ptr<BlockingSample> GetSample();
 	void SetDistinct(column_t column_id, unique_ptr<DistinctStatistics> distinct_stats);
 
 	AttachedDatabase &GetAttached();
diff --git a/src/include/duckdb/storage/table/table_statistics.hpp b/src/include/duckdb/storage/table/table_statistics.hpp
index 3bb712f8b2c0..633d469463c2 100644
--- a/src/include/duckdb/storage/table/table_statistics.hpp
+++ b/src/include/duckdb/storage/table/table_statistics.hpp
@@ -48,14 +48,6 @@ class TableStatistics {
 	//! Get a reference to the stats - this requires us to hold the lock.
 	//! The reference can only be safely accessed while the lock is held
 	ColumnStatistics &GetStats(TableStatisticsLock &lock, idx_t i);
-	//! Get a reference to the table sample - this requires us to hold the lock.
-	// BlockingSample &GetTableSampleRef(TableStatisticsLock &lock);
-	//! Take ownership of the sample, needed for merging. Requires the lock
-	unique_ptr<BlockingSample> GetTableSample(TableStatisticsLock &lock);
-	void SetTableSample(TableStatisticsLock &lock, unique_ptr<BlockingSample> sample);
-
-	void DestroyTableSample(TableStatisticsLock &lock) const;
-	void AppendToTableSample(TableStatisticsLock &lock, unique_ptr<BlockingSample> sample);
 
 	bool Empty();
 
@@ -70,6 +62,7 @@ class TableStatistics {
 	//! Column statistics
 	vector<shared_ptr<ColumnStatistics>> column_stats;
 	//! The table sample
+	//! Sample for table
 	unique_ptr<BlockingSample> table_sample;
 };
 
diff --git a/src/storage/data_table.cpp b/src/storage/data_table.cpp
index fb9baddf9642..c6d0f19bcfae 100644
--- a/src/storage/data_table.cpp
+++ b/src/storage/data_table.cpp
@@ -1512,10 +1512,6 @@ void DataTable::SetDistinct(column_t column_id, unique_ptr<DistinctStatistics> d
 	row_groups->SetDistinct(column_id, std::move(distinct_stats));
 }
 
-unique_ptr<BlockingSample> DataTable::GetSample() {
-	return row_groups->GetSample();
-}
-
 //===--------------------------------------------------------------------===//
 // Checkpoint
 //===--------------------------------------------------------------------===//
@@ -1532,8 +1528,8 @@ void DataTable::Checkpoint(TableDataWriter &writer, Serializer &serializer) {
 	TableStatistics global_stats;
 	row_groups->CopyStats(global_stats);
 	row_groups->Checkpoint(writer, global_stats);
+
 	// The row group payload data has been written. Now write:
-	//   sample
 	//   column stats
 	//   row-group pointers
 	//   table pointer
diff --git a/src/storage/serialization/serialize_nodes.cpp b/src/storage/serialization/serialize_nodes.cpp
index 10fe3f949b38..b386d2b9b0db 100644
--- a/src/storage/serialization/serialize_nodes.cpp
+++ b/src/storage/serialization/serialize_nodes.cpp
@@ -599,8 +599,8 @@ void ReservoirSample::Serialize(Serializer &serializer) const {
 
 unique_ptr<BlockingSample> ReservoirSample::Deserialize(Deserializer &deserializer) {
 	auto sample_count = deserializer.ReadPropertyWithDefault<idx_t>(200, "sample_count");
-	auto reservoir_chunk = deserializer.ReadPropertyWithDefault<unique_ptr<ReservoirChunk>>(201, "reservoir_chunk");
-	auto result = duckdb::unique_ptr<ReservoirSample>(new ReservoirSample(sample_count, std::move(reservoir_chunk)));
+	auto result = duckdb::unique_ptr<ReservoirSample>(new ReservoirSample(sample_count));
+	deserializer.ReadPropertyWithDefault<unique_ptr<ReservoirChunk>>(201, "reservoir_chunk", result->reservoir_chunk);
 	return std::move(result);
 }
 
diff --git a/src/storage/table/row_group_collection.cpp b/src/storage/table/row_group_collection.cpp
index 2629800a4be8..b11b6b1afad6 100644
--- a/src/storage/table/row_group_collection.cpp
+++ b/src/storage/table/row_group_collection.cpp
@@ -1,4 +1,5 @@
 #include "duckdb/storage/table/row_group_collection.hpp"
+
 #include "duckdb/common/serializer/binary_deserializer.hpp"
 #include "duckdb/execution/expression_executor.hpp"
 #include "duckdb/execution/index/bound_index.hpp"
@@ -396,14 +397,11 @@ bool RowGroupCollection::Append(DataChunk &chunk, TableAppendState &state) {
 		}
 	}
 	state.current_row += row_t(total_append_count);
-
 	auto local_stats_lock = state.stats.GetLock();
-
 	for (idx_t col_idx = 0; col_idx < types.size(); col_idx++) {
 		auto &column_stats = state.stats.GetStats(*local_stats_lock, col_idx);
 		column_stats.UpdateDistinctStatistics(chunk.data[col_idx], chunk.size(), state.hashes);
 	}
-
 	return new_row_group;
 }
 
@@ -423,8 +421,8 @@ void RowGroupCollection::FinalizeAppend(TransactionData transaction, TableAppend
 	state.total_append_count = 0;
 	state.start_row_group = nullptr;
 
-	auto local_stats_lock = state.stats.GetLock();
 	auto global_stats_lock = stats.GetLock();
+	auto local_stats_lock = state.stats.GetLock();
 	for (idx_t col_idx = 0; col_idx < types.size(); col_idx++) {
 		auto &global_stats = stats.GetStats(*global_stats_lock, col_idx);
 		if (!global_stats.HasDistinctStats()) {
@@ -584,7 +582,6 @@ idx_t RowGroupCollection::Delete(TransactionData transaction, DataTable &table,
 		}
 		delete_count += row_group->Delete(transaction, table, ids + start, pos - start);
 	} while (pos < count);
-
 	return delete_count;
 }
 
@@ -1142,7 +1139,6 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AddColumn(ClientContext &cont
 
 		result->row_groups->AppendSegment(std::move(new_row_group));
 	}
-
 	return result;
 }
 
@@ -1155,9 +1151,6 @@ shared_ptr<RowGroupCollection> RowGroupCollection::RemoveColumn(idx_t col_idx) {
 	                                                  total_rows.load(), row_group_size);
 	result->stats.InitializeRemoveColumn(stats, col_idx);
 
-	auto result_lock = result->stats.GetLock();
-	result->stats.DestroyTableSample(*result_lock);
-
 	for (auto &current_row_group : row_groups->Segments()) {
 		auto new_row_group = current_row_group.RemoveColumn(*result, col_idx);
 		result->row_groups->AppendSegment(std::move(new_row_group));
@@ -1204,6 +1197,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AlterType(ClientContext &cont
 		new_row_group->MergeIntoStatistics(changed_idx, changed_stats.Statistics());
 		result->row_groups->AppendSegment(std::move(new_row_group));
 	}
+
 	return result;
 }
 
@@ -1250,7 +1244,7 @@ void RowGroupCollection::VerifyNewConstraint(DataTable &parent, const BoundConst
 
 //===--------------------------------------------------------------------===//
 // Statistics
-//===---------------------------------------------------------------r-----===//
+//===--------------------------------------------------------------------===//
 void RowGroupCollection::CopyStats(TableStatistics &other_stats) {
 	stats.CopyStats(other_stats);
 }
@@ -1259,10 +1253,6 @@ unique_ptr<BaseStatistics> RowGroupCollection::CopyStats(column_t column_id) {
 	return stats.CopyStats(column_id);
 }
 
-unique_ptr<BlockingSample> RowGroupCollection::GetSample() {
-	return nullptr;
-}
-
 void RowGroupCollection::SetDistinct(column_t column_id, unique_ptr<DistinctStatistics> distinct_stats) {
 	D_ASSERT(column_id != COLUMN_IDENTIFIER_ROW_ID);
 	auto stats_lock = stats.GetLock();
diff --git a/src/storage/table/table_statistics.cpp b/src/storage/table/table_statistics.cpp
index 61d85319c4a2..b51c445d0dc2 100644
--- a/src/storage/table/table_statistics.cpp
+++ b/src/storage/table/table_statistics.cpp
@@ -1,23 +1,16 @@
 #include "duckdb/storage/table/table_statistics.hpp"
-
-#include "duckdb/common/serializer/deserializer.hpp"
+#include "duckdb/storage/table/persistent_table_data.hpp"
 #include "duckdb/common/serializer/serializer.hpp"
+#include "duckdb/common/serializer/deserializer.hpp"
 #include "duckdb/execution/reservoir_sample.hpp"
-#include "duckdb/storage/table/persistent_table_data.hpp"
 
 namespace duckdb {
 
 void TableStatistics::Initialize(const vector<LogicalType> &types, PersistentTableData &data) {
 	D_ASSERT(Empty());
-	D_ASSERT(!table_sample);
 
 	stats_lock = make_shared_ptr<mutex>();
 	column_stats = std::move(data.table_stats.column_stats);
-	if (data.table_stats.table_sample) {
-		table_sample = std::move(data.table_stats.table_sample);
-	} else {
-		table_sample = make_uniq<ReservoirSample>(static_cast<idx_t>(FIXED_SAMPLE_SIZE));
-	}
 	if (column_stats.size() != types.size()) { // LCOV_EXCL_START
 		throw IOException("Table statistics column count is not aligned with table column count. Corrupt file?");
 	} // LCOV_EXCL_STOP
@@ -25,10 +18,8 @@ void TableStatistics::Initialize(const vector<LogicalType> &types, PersistentTab
 
 void TableStatistics::InitializeEmpty(const vector<LogicalType> &types) {
 	D_ASSERT(Empty());
-	D_ASSERT(!table_sample);
 
 	stats_lock = make_shared_ptr<mutex>();
-	table_sample = make_uniq<ReservoirSample>(static_cast<idx_t>(FIXED_SAMPLE_SIZE));
 	for (auto &type : types) {
 		column_stats.push_back(ColumnStatistics::CreateEmptyStats(type));
 	}
@@ -44,12 +35,6 @@ void TableStatistics::InitializeAddColumn(TableStatistics &parent, const Logical
 		column_stats.push_back(parent.column_stats[i]);
 	}
 	column_stats.push_back(ColumnStatistics::CreateEmptyStats(new_column_type));
-	if (parent.table_sample) {
-		table_sample = std::move(parent.table_sample);
-	}
-	if (table_sample) {
-		table_sample->Destroy();
-	}
 }
 
 void TableStatistics::InitializeRemoveColumn(TableStatistics &parent, idx_t removed_column) {
@@ -63,12 +48,6 @@ void TableStatistics::InitializeRemoveColumn(TableStatistics &parent, idx_t remo
 			column_stats.push_back(parent.column_stats[i]);
 		}
 	}
-	if (parent.table_sample) {
-		table_sample = std::move(parent.table_sample);
-	}
-	if (table_sample) {
-		table_sample->Destroy();
-	}
 }
 
 void TableStatistics::InitializeAlterType(TableStatistics &parent, idx_t changed_idx, const LogicalType &new_type) {
@@ -84,12 +63,6 @@ void TableStatistics::InitializeAlterType(TableStatistics &parent, idx_t changed
 			column_stats.push_back(parent.column_stats[i]);
 		}
 	}
-	if (parent.table_sample) {
-		table_sample = std::move(parent.table_sample);
-	}
-	if (table_sample) {
-		table_sample->Destroy();
-	}
 }
 
 void TableStatistics::InitializeAddConstraint(TableStatistics &parent) {
@@ -106,21 +79,6 @@ void TableStatistics::InitializeAddConstraint(TableStatistics &parent) {
 void TableStatistics::MergeStats(TableStatistics &other) {
 	auto l = GetLock();
 	D_ASSERT(column_stats.size() == other.column_stats.size());
-	if (table_sample) {
-		if (other.table_sample) {
-			D_ASSERT(table_sample->type == SampleType::RESERVOIR_SAMPLE);
-			auto &this_reservoir = table_sample->Cast<ReservoirSample>();
-			D_ASSERT(other.table_sample->type == SampleType::RESERVOIR_SAMPLE);
-			this_reservoir.Merge(std::move(other.table_sample));
-		}
-		// if no other.table sample, do nothig
-	} else {
-		if (other.table_sample) {
-			auto &other_reservoir = other.table_sample->Cast<ReservoirSample>();
-			auto other_table_sample_copy = other_reservoir.Copy();
-			table_sample = std::move(other_table_sample_copy);
-		}
-	}
 	for (idx_t i = 0; i < column_stats.size(); i++) {
 		if (column_stats[i]) {
 			D_ASSERT(other.column_stats[i]);
@@ -142,25 +100,6 @@ ColumnStatistics &TableStatistics::GetStats(TableStatisticsLock &lock, idx_t i)
 	return *column_stats[i];
 }
 
-// BlockingSample &TableStatistics::GetTableSampleRef(TableStatisticsLock &lock) {
-//	D_ASSERT(table_sample);
-//	return *table_sample;
-//}
-
-unique_ptr<BlockingSample> TableStatistics::GetTableSample(TableStatisticsLock &lock) {
-	return std::move(table_sample);
-}
-
-void TableStatistics::SetTableSample(TableStatisticsLock &lock, unique_ptr<BlockingSample> sample) {
-	table_sample = std::move(sample);
-}
-
-void TableStatistics::DestroyTableSample(TableStatisticsLock &lock) const {
-	if (table_sample) {
-		table_sample->Destroy();
-	}
-}
-
 unique_ptr<BaseStatistics> TableStatistics::CopyStats(idx_t i) {
 	lock_guard<mutex> l(*stats_lock);
 	auto result = column_stats[i]->Statistics().Copy();
@@ -181,25 +120,11 @@ void TableStatistics::CopyStats(TableStatisticsLock &lock, TableStatistics &othe
 	for (auto &stats : column_stats) {
 		other.column_stats.push_back(stats->Copy());
 	}
-
-	if (table_sample) {
-		D_ASSERT(table_sample->type == SampleType::RESERVOIR_SAMPLE);
-		auto &res = table_sample->Cast<ReservoirSample>();
-		other.table_sample = res.Copy();
-	}
 }
 
 void TableStatistics::Serialize(Serializer &serializer) const {
 	serializer.WriteProperty(100, "column_stats", column_stats);
-	unique_ptr<BlockingSample> to_serialize = nullptr;
-	if (table_sample) {
-		D_ASSERT(table_sample->type == SampleType::RESERVOIR_SAMPLE);
-		auto &reservoir_sample = table_sample->Cast<ReservoirSample>();
-		to_serialize = unique_ptr_cast<BlockingSample, ReservoirSample>(reservoir_sample.Copy());
-		auto &res_serialize = to_serialize->Cast<ReservoirSample>();
-		res_serialize.EvictOverBudgetSamples();
-	}
-	serializer.WritePropertyWithDefault<unique_ptr<BlockingSample>>(101, "table_sample", to_serialize, nullptr);
+	serializer.WritePropertyWithDefault<unique_ptr<BlockingSample>>(101, "table_sample", table_sample, nullptr);
 }
 
 void TableStatistics::Deserialize(Deserializer &deserializer, ColumnList &columns) {
@@ -217,19 +142,8 @@ void TableStatistics::Deserialize(Deserializer &deserializer, ColumnList &column
 
 		deserializer.Unset<LogicalType>();
 	});
-	table_sample = deserializer.ReadPropertyWithDefault<unique_ptr<BlockingSample>>(101, "table_sample");
-	if (table_sample) {
-		D_ASSERT(table_sample->type == SampleType::RESERVOIR_SAMPLE);
-#ifdef DEBUG
-		if (table_sample) {
-			auto &reservoir_sample = table_sample->Cast<ReservoirSample>();
-			reservoir_sample.Verify();
-		}
-#endif
-	} else {
-		table_sample = make_uniq<ReservoirSample>(static_cast<idx_t>(FIXED_SAMPLE_SIZE));
-		table_sample->Destroy();
-	}
+	table_sample =
+	    deserializer.ReadPropertyWithExplicitDefault<unique_ptr<BlockingSample>>(101, "table_sample", nullptr);
 }
 
 unique_ptr<TableStatisticsLock> TableStatistics::GetLock() {
diff --git a/test/sql/sample/can_sample_from_ingested_files.test b/test/sql/sample/can_sample_from_ingested_files.test
deleted file mode 100644
index 23c2ef50fd52..000000000000
--- a/test/sql/sample/can_sample_from_ingested_files.test
+++ /dev/null
@@ -1,44 +0,0 @@
-# name: test/sql/sample/can_sample_from_ingested_files.test
-# description: Test reservoir sample crash on large data sets
-# group: [sample]
-
-require parquet
-
-statement ok
-PRAGMA enable_verification;
-
-statement ok
-create table all_types as select * exclude(small_enum, medium_enum, large_enum, "union", bit) from test_all_types();
-
-statement ok
-copy all_types to '__TEST_DIR__/sample_all_types.csv' (FORMAT CSV);
-
-statement ok
-Create table all_types_csv_1 as select * from read_csv_auto('__TEST_DIR__/sample_all_types.csv');
-
-statement ok
-Create table all_types_csv_2 as select * from read_csv_auto('__TEST_DIR__/sample_all_types.csv');
-
-query T nosort result_1
-select * from all_types_csv_1;
-
-query T nosort result_1
-select * from all_types_csv_2;
-
-
-statement ok
-copy (SELECT * from all_types) to '__TEST_DIR__/sample_all_types.parquet' (FORMAT PARQUET);
-
-# test parquet
-statement ok
-Create table all_types_parquet_1 as select * from read_parquet('__TEST_DIR__/sample_all_types.parquet');
-
-statement ok
-Create table all_types_parquet_2 as select * from read_parquet('__TEST_DIR__/sample_all_types.parquet');
-
-query T nosort result_parquet
-select * from all_types_parquet_1;
-
-query T nosort result_paruet
-select * from all_types_parquet_2;
-
diff --git a/test/sql/sample/reservoir_testing_percentage.test b/test/sql/sample/reservoir_testing_percentage.test
deleted file mode 100644
index 19217d635461..000000000000
--- a/test/sql/sample/reservoir_testing_percentage.test
+++ /dev/null
@@ -1,85 +0,0 @@
-# name: test/sql/sample/reservoir_testing_percentage.test
-# description: Test SAMPLE keyword
-# group: [sample]
-
-loop i 1 8
-
-statement ok
-pragma threads=${i};
-
-statement ok
-CREATE or replace TABLE t1 as select range a from range(1000);
-
-query I
-SELECT count(*) from t1 using sample 0 percent (reservoir);
-----
-0
-
-query I
-SELECT count(*) from t1 using sample 10 percent (reservoir);
-----
-100
-
-query I
-SELECT count(*) from t1 using sample 20 percent (reservoir);
-----
-200
-
-query I
-SELECT count(*) from t1 using sample 80 percent (reservoir);
-----
-800
-
-query I
-SELECT count(*) from t1 using sample 100 percent (reservoir);
-----
-1000
-
-
-statement ok
-Insert into t1 select range a from range(9000);
-
-query I
-select count(*) from t1 using sample 80 percent (reservoir);
-----
-8000
-
-statement ok
-Insert into t1 select range a from range(90000);
-
-
-statement ok
-Insert into t1 select range a from range(900000);
-
-query I
-select count(*) from t1 using sample 20 percent (reservoir);
-----
-200000
-
-query I
-select count(*) from t1 using sample 30 percent (reservoir);
-----
-300000
-
-query I
-select count(*) from t1 using sample 40 percent (reservoir);
-----
-400000
-
-query I
-select count(*) from t1 using sample 50 percent (reservoir);
-----
-500000
-
-
-query I
-select count(*) from t1 using sample 60 percent (reservoir);
-----
-600000
-
-query I
-select count(*) from t1 using sample 70 percent (reservoir);
-----
-700000
-
-endloop
diff --git a/test/sql/sample/reservoir_testing_rows_value.test_slow b/test/sql/sample/reservoir_testing_rows_value.test_slow
index 3355225b5b8e..a4e355a69044 100644
--- a/test/sql/sample/reservoir_testing_rows_value.test_slow
+++ b/test/sql/sample/reservoir_testing_rows_value.test_slow
@@ -8,7 +8,7 @@ statement ok
 pragma threads=${i};
 
 statement ok
-CREATE or replace TABLE t1 as select range a from range(1000);
+CREATE or replace TABLE t1 as select range a, [1, a, 2] b, a::VARCHAR || 'ducktastic' c, get_current_timestamp() c from range(1000);
 
 query I
 SELECT count(*) from t1 using sample 0;
@@ -30,15 +30,13 @@ SELECT count(*) from t1 using sample 800;
 ----
 800
 
-
 query I
 SELECT count(*) from t1 using sample 1000;
 ----
 1000
 
-
 statement ok
-create or replace table t1 as select * from range(10000);
+CREATE or replace TABLE t1 as select range a, [1, a, 2] b, a::VARCHAR || 'ducktastic' c, get_current_timestamp() c from range(10000);
 
 query I
 select count(*) from t1 using sample 1000;
@@ -61,7 +59,7 @@ select count(*) from t1 using sample 8000;
 8000
 
 statement ok
-Create or replace table t1  as select range a from range(1000000);
+CREATE or replace TABLE t1 as select range a, [1, a, 2] b, a::VARCHAR || 'ducktastic' c, get_current_timestamp() c from range(1000000);
 
 query I
 select count(*) from t1 using sample 200000;
diff --git a/test/sql/sample/same_seed_same_sample.test_slow b/test/sql/sample/same_seed_same_sample.test_slow
index 1970cf1c4d75..c191fcb75e30 100644
--- a/test/sql/sample/same_seed_same_sample.test_slow
+++ b/test/sql/sample/same_seed_same_sample.test_slow
@@ -23,15 +23,15 @@ SELECT * from t1 using sample reservoir(100) repeatable (1) order by a;
 
 endloop
 
-# testing a table with greater cardinality than the standard vector size, and greater than a row group size.
+# testing a table with greater cardinality than the standard vector size
 
 statement ok
 CREATE or replace TABLE t1 as select range a, [1, a, 2] b, a::VARCHAR || 'ducktastic' c, get_current_timestamp() c from range(100000);
 
-# samples are only equal when threads = 1
+loop i 1 8
 
 statement ok
-set threads=1;
+pragma threads=${i};
 
 query III nosort result_2
 SELECT * from t1 using sample reservoir(6000) repeatable (1) order by a;
@@ -42,6 +42,8 @@ query III nosort result_2
 SELECT * from t1 using sample reservoir(6000) repeatable (1) order by a;
 ----
 
+endloop
+
 statement ok
 CREATE or replace TABLE t1 as select range a, [1, a, 2] b, a::VARCHAR || 'ducktastic' c, get_current_timestamp() c from range(1000000);
 
@@ -71,3 +73,5 @@ select count(*) < 10 from (select * from sample1 intersect select * from sample2
 ----
 true
 
+
+
diff --git a/test/sql/sample/table_samples/basic_sample_tests.test b/test/sql/sample/table_samples/basic_sample_tests.test
deleted file mode 100644
index dc22f6497fd0..000000000000
--- a/test/sql/sample/table_samples/basic_sample_tests.test
+++ /dev/null
@@ -1,88 +0,0 @@
-# name: test/sql/sample/table_samples/basic_sample_tests.test
-# group: [table_samples]
-
-mode skip
-
-# currently require fixed vector size since the "randomness" of the sample depends on
-# the vector size. If the vector size decreases, the randomness of the sample decreases
-# This is especially noticeable for small tables and their samples
-require vector_size 2048
-
-statement ok
-PRAGMA enable_verification
-
-load __TEST_DIR__/test_samples_basic.db
-
-query I
-select count(*) from range(100000) using sample (10000);
-----
-10000
-
-query I
-select count(*) from range(100) using sample (10);
-----
-10
-
-query I
-select count(*) from range(205000) using sample (10000);
-----
-10000
-
-statement ok
-create table t1 as select range a from range(204800);
-
-statement ok
-select * from duckdb_table_sample('t1');
-
-statement ok
-create or replace table t1 as select range a from range(1000);
-
-query II
-select avg(a) > 200, avg(a) < 800 from duckdb_table_sample('t1');
-----
-true	true
-
-statement ok
-create or replace table t1 as select range a from range(204800);
-
-# average is not skewed
-query II
-select avg(a) > (0.2*204800), avg(a) < (0.8*204800) from duckdb_table_sample('t1');
-----
-true	true
-
-# about half the samples are below 102400 and half above
-query I
-select count(*) from duckdb_table_sample('t1') where a < 102400;
-----
-1069
-
-query I
-select count(*) from duckdb_table_sample('t1') where a > 102400;
-----
-979
-
-query I
-select count(*) from t1 using sample (200000);
-----
-200000
-
-statement ok
-create or replace table materialized_range as select * from range(100);
-
-statement ok
-create or replace table integers_1 as (select range b from materialized_range);
-
-query I
-select count(b) from duckdb_table_sample('integers_1') where b in (select * from materialized_range);
-----
-1
-
-# sample exists after restart
-restart
-
-query I
-select count(b) from duckdb_table_sample('integers_1') where b in (select * from materialized_range);
-----
-1
-
diff --git a/test/sql/sample/table_samples/sample_stores_rows_from_later_on.test_slow b/test/sql/sample/table_samples/sample_stores_rows_from_later_on.test_slow
deleted file mode 100644
index 08c421ca86df..000000000000
--- a/test/sql/sample/table_samples/sample_stores_rows_from_later_on.test_slow
+++ /dev/null
@@ -1,42 +0,0 @@
-# name: test/sql/sample/table_samples/sample_stores_rows_from_later_on.test_slow
-# description: Test sampling of larger relations
-# group: [table_samples]
-
-mode skip
-
-# required when testing table samples. See basic_sample_test.test
-require vector_size 2048
-
-require noforcestorage
-
-load __TEST_DIR__/test_sample_conversion.db
-
-statement ok
-PRAGMA enable_verification
-
-statement ok
-create table t1 as select 1 a from range(200000);
-
-loop i 1 4805
-
-statement ok
-INSERT INTO t1 VALUES(${i} + 1);
-
-restart
-
-endloop
-
-query I
-select count(*) from duckdb_table_sample('t1');
-----
-2048
-
-query I
-select count(*) from duckdb_table_sample('t1') where a > 1;
-----
-48
-
-query I
-select count(*) from (select (floor(range/200000))::INT a from range(204800) using sample reservoir (1%)) t1 where a >= 1;
-----
-48
diff --git a/test/sql/sample/table_samples/table_sample_converts_to_block_sample.test b/test/sql/sample/table_samples/table_sample_converts_to_block_sample.test
deleted file mode 100644
index d67ab90b7469..000000000000
--- a/test/sql/sample/table_samples/table_sample_converts_to_block_sample.test
+++ /dev/null
@@ -1,59 +0,0 @@
-# name: test/sql/sample/table_samples/table_sample_converts_to_block_sample.test
-# description: Test sampling of larger relations
-# group: [table_samples]
-
-mode skip
-
-# required when testing table samples. See basic_sample_test.test
-require vector_size 2048
-
-require noforcestorage
-
-# table samples first collect only 1% of the table, until the table has a cardinality of 2048.
-# then the sample stays at a fixed 2048 values.
-
-load __TEST_DIR__/test_sample_converts_after_load.db
-
-statement ok
-create table materialized_range as select 1 a from range(102400);
-
-# only 1% of 102400
-query I
-select count(*) from duckdb_table_sample('materialized_range');
-----
-1024
-
-restart
-
-statement ok
-insert into materialized_range select 2 a from range(102400);
-
-# collect another 1% of 102400
-query I
-select count(*) from duckdb_table_sample('materialized_range');
-----
-2048
-
-query II
-select a, count(*) from duckdb_table_sample('materialized_range') group by all order by a;
-----
-1	1024
-2	1024
-
-# insert another
-statement ok
-insert into materialized_range select 3 a from range(102400);
-
-# sample remains at 2048 values
-query I
-select count(*) from duckdb_table_sample('materialized_range');
-----
-2048
-
-# 2048 / 3 = 682. so each value should have at least >650
-query II
-select a, count(*) > 650 from duckdb_table_sample('materialized_range') group by all order by a;
-----
-1	1
-2	1
-3	1
\ No newline at end of file
diff --git a/test/sql/sample/table_samples/table_sample_is_stored.test_slow b/test/sql/sample/table_samples/table_sample_is_stored.test_slow
deleted file mode 100644
index b937a05c5a2f..000000000000
--- a/test/sql/sample/table_samples/table_sample_is_stored.test_slow
+++ /dev/null
@@ -1,150 +0,0 @@
-# name: test/sql/sample/table_samples/table_sample_is_stored.test_slow
-# description: Test sampling of larger relations
-# group: [table_samples]
-
-mode skip
-
-# required when testing table samples. See basic_sample_test.test
-require vector_size 2048
-
-require noforcestorage
-
-require icu
-
-load __TEST_DIR__/test_samples.db
-
-statement ok
-PRAGMA enable_verification
-
-statement ok
-create table materialized_range as select * from range(5000000);
-
-statement ok
-create table integers_1 as (select (range + 5) a, range b, get_current_time() as time from materialized_range);
-
-query II nosort result_1
-select a::INT, b from duckdb_table_sample('integers_1') order by all;
-----
-
-statement ok
-create table integers_2 as (select (range + 5) a, range b, get_current_time() as time from materialized_range);
-
-## samples should be the same given the same table and the same contents.
-query II nosort result_1
-select a::INT, b from duckdb_table_sample('integers_2') order by all;
-----
-
-statement ok
-create or replace table integers_1 as (select (range + 5) a, range b from materialized_range);
-
-statement ok
-create or replace table integers_2 as (select (range + 5) a, range b from materialized_range);
-
-# sample only has values in the table it was sampled from
-query I
-select count(*) from (select b from duckdb_table_sample('integers_1') intersect (select b from integers_1));
-----
-2048
-
-query I
-select count(*) from (select b from duckdb_table_sample('integers_2') intersect (select b from integers_2));
-----
-2048
-
-# sample exists after restart
-restart
-
-query I
-select count(*) from duckdb_table_sample('integers_1');
-----
-2048
-
-query I
-select count(*) from duckdb_table_sample('integers_2');
-----
-2048
-
-
-query II
-select floor(b / 1000000) as interval, count(*) as frequency from duckdb_table_sample('integers_1') group by interval order by all;
-----
-0.0	453
-1.0	408
-2.0	406
-3.0	404
-4.0	377
-
-
-# adding another interval should subtract an equal number from the rest of the intervals
-statement ok
-insert into integers_1 (select (range + 5) a, range b from range(5000000,6000000));
-
-query II
-select floor(b / 1000000) as interval, count(*) as frequency from duckdb_table_sample('integers_1') group by interval order by all;
-----
-0.0	374
-1.0	334
-2.0	332
-3.0	334
-4.0	311
-5.0	363
-
-# If double the table count is appended, around half the sample should account for the new values.
-statement ok
-insert into integers_1 (select -1, -1 from range(6000000));
-
-query I
-select count(*) from integers_1;
-----
-12000000
-
-
-## about half of the samples should have the pair '-1', 1.
-# on latest storage test its something like 997
-query I
-select count(*) from duckdb_table_sample('integers_1') where a = -1 and b = -1;
-----
-914
-
-restart
-
-# updated sample is also newly serialized
-query I
-select count(*) from duckdb_table_sample('integers_1') where a = -1 and b = -1;
-----
-914
-
-# create a view on top of the sample
-statement ok
-create view sample_view as select * from duckdb_table_sample('integers_1');
-
-# update the sample
-statement ok
-insert into integers_1 (select -2, -2 from range(6000000));
-
-
-# 2048 / 3 = 682 (639 is good)
-query I
-select count(*) from sample_view where a = -2 and b = -2;
-----
-639
-
-restart
-
-query I
-select count(*)  from sample_view where a = -2 and b = -2;
-----
-639
-
-# currently have 18_000_000 values in the table.
-# to try and get 1 value in the sample, we should add
-# 18000000 / 2048 = 8789 values to see 1
-
-statement ok
-insert into integers_1 (select -3, -3 from range(7000));
-
-# 1 value makes it
-query I
-select count(*) from sample_view where a = -3 and b = -3;
-----
-1
diff --git a/test/sql/sample/table_samples/test_sample_is_destroyed_on_updates.test b/test/sql/sample/table_samples/test_sample_is_destroyed_on_updates.test
deleted file mode 100644
index 5e8267435862..000000000000
--- a/test/sql/sample/table_samples/test_sample_is_destroyed_on_updates.test
+++ /dev/null
@@ -1,125 +0,0 @@
-# name: test/sql/sample/table_samples/test_sample_is_destroyed_on_updates.test
-# description: Test sampling of larger relations
-# group: [table_samples]
-
-mode skip
-
-# required when testing table samples. See basic_sample_test.test
-require vector_size 2048
-
-load __TEST_DIR__/test_sample_is_destroyed_on_update.db
-
-statement ok
-create or replace table integers_1 as select range a, range+1 b  from range(102400);
-
-# no sample collected yet. There are only 5
-query I
-select count(*) from duckdb_table_sample('integers_1') order by all;
-----
-1024
-
-statement ok
-delete from integers_1 where a = 3;
-
-# sample no longer exists
-query I
-select count(*) from duckdb_table_sample('integers_1') order by all;
-----
-0
-
-statement ok
-create or replace table integers_1 as select range a, range+1 b  from range(102400);
-
-query I
-select count(*) from duckdb_table_sample('integers_1');
-----
-1024
-
-statement ok
-update integers_1 set a = 5 where a = 1;
-
-query II
-select * from duckdb_table_sample('integers_1');
-----
-
-# test adding columns destroys the sample.
-statement ok
-create or replace table integers_1 as select range a, range+1 b  from range(204800);
-
-query I
-select count(*) from duckdb_table_sample('integers_1');
-----
-2048
-
-statement ok
-Alter table integers_1 add column c DOUBLE;
-
-query III
-select * from duckdb_table_sample('integers_1');
-----
-
-
-# test altering types destroys the sample
-statement ok
-create or replace table integers_1 as select range a, range+1 b  from range(102400);
-
-
-# don't have enough smaples yet.
-query I
-select count(*) from duckdb_table_sample('integers_1');
-----
-1024
-
-statement ok
-Alter table integers_1 alter b TYPE VARCHAR
-
-query II
-select * from duckdb_table_sample('integers_1');
-----
-
-# test dropping a columns
-statement ok
-create or replace table integers_1 as select range a, range+1 b  from range(102400);
-
-query I
-select count(*) from duckdb_table_sample('integers_1');
-----
-1024
-
-statement ok
-Alter table integers_1 drop b;
-
-query I
-select * from duckdb_table_sample('integers_1');
-----
-
-# test sample is destroyed after a restart
-statement ok
-create or replace table integers_1 as select range a, range+1 b  from range(500);
-
-query I
-select count(*) from duckdb_table_sample('integers_1');
-----
-5
-
-statement ok
-Alter table integers_1 drop b;
-
-# sample is destroyed
-query I
-select * from duckdb_table_sample('integers_1');
-----
-
-restart
-
-statement ok
-insert into integers_1 select range a from range(500);
-
-# sample is still destroyed
-query I
-select * from duckdb_table_sample('integers_1');
-----
-
-
-
-
diff --git a/test/sql/sample/table_samples/test_sample_types.test b/test/sql/sample/table_samples/test_sample_types.test
deleted file mode 100644
index 0d656a4abc17..000000000000
--- a/test/sql/sample/table_samples/test_sample_types.test
+++ /dev/null
@@ -1,80 +0,0 @@
-# name: test/sql/sample/table_samples/test_sample_types.test
-# description: Test sampling of larger relations
-# group: [table_samples]
-
-mode skip
-
-# test valid sampling types (for now only integral types)
-
-statement ok
-pragma enable_verification;
-
-statement ok
-create table string_samples as select range::Varchar a from range(204800);
-
-query I
-select count(*) from duckdb_table_sample('string_samples') where a is NULL;
-----
-2048
-
-statement ok
-create table struct_samples as select {'key1': 'quack-a-lack', 'key2': range} a from range(204800);
-
-query I
-select count(*) from duckdb_table_sample('struct_samples') where a is null;
-----
-2048
-
-statement ok
-create table blob_samples as select '\xAA\xAB\xAC'::BLOB a from range(204800);
-
-query I
-select count(*) from duckdb_table_sample('blob_samples') where a is NULL;
-----
-2048
-
-statement ok
-create table integral_samples as select range::BIGINT a, range::DOUBLE b, range::FLOAT c, range::HUGEINT d, INTERVAL 1 YEAR e from range(204800);
-
-query I
-select count(*) from duckdb_table_sample('integral_samples') where a NOT null;
-----
-2048
-
-query I
-select count(*) from duckdb_table_sample('integral_samples') where b NOT null;
-----
-2048
-
-query I
-select count(*) from duckdb_table_sample('integral_samples') where c NOT null;
-----
-2048
-
-query I
-select count(*) from duckdb_table_sample('integral_samples') where d NOT null;
-----
-2048
-
-query I
-select count(*) from duckdb_table_sample('integral_samples') where e IS null;
-----
-2048
-
-statement ok
-CREATE or replace TABLE t1 as select range a, [1, a, 2] b, a::VARCHAR || 'ducktastic' c, get_current_timestamp() d from range(1000000);
-
-query I
-select count(*) from duckdb_table_sample('t1') where b is null;
-----
-2048
-
-query I
-select count(*) from duckdb_table_sample('t1') where c is null;
-----
-2048
-
-query I
-select count(*) from duckdb_table_sample('t1') where d is null;
-----
-2048
\ No newline at end of file
diff --git a/test/sql/sample/table_samples/test_table_sample_errors.test b/test/sql/sample/table_samples/test_table_sample_errors.test
deleted file mode 100644
index facc8d62a5e8..000000000000
--- a/test/sql/sample/table_samples/test_table_sample_errors.test
+++ /dev/null
@@ -1,21 +0,0 @@
-# name: test/sql/sample/table_samples/test_table_sample_errors.test
-# description: test table sampl[e errors
-# group: [table_samples]
-
-mode skip
-
-statement ok
-create table t1 as select range a from range(204800);
-
-statement ok
-create view v1 as select * from t1;
-
-statement error
-select * from duckdb_table_sample('v1');
-----
-<REGEX>:.*Invalid Catalog type.*
-
-statement error
-select * from duckdb_table_sample('a');
-----
-<REGEX>:.*Catalog Error:.*Table.*does not exist.*
\ No newline at end of file
diff --git a/test/sql/sample/test_sample.test_slow b/test/sql/sample/test_sample.test_slow
index 557b629203e6..d476fcc211d8 100644
--- a/test/sql/sample/test_sample.test_slow
+++ b/test/sql/sample/test_sample.test_slow
@@ -75,6 +75,7 @@ SELECT COUNT(*) FROM range(2000000) USING SAMPLE 2
 ----
 2
 
+
 # test sample with multiple columns
 # we insert the same data in the entire column
 statement ok
@@ -222,8 +223,11 @@ select i from integers using sample (1 rows) repeatable (0);
 query I noresult repeatable_seed_0
 select i from integers using sample (1 rows) repeatable (0);
 ----
+152
 
 
+query I
+select i from integers using sample reservoir(1%) repeatable (0);
 query I noresult repeatable_seed_1
 select i from integers using sample reservoir(1%) repeatable (0) order by i;
 ----
@@ -231,5 +235,7 @@ select i from integers using sample reservoir(1%) repeatable (0) order by i;
 query I noresult repeatable_seed_1
 select i from integers using sample reservoir(1%) repeatable (0) order by i;
 ----
+51
+78
 58
 127
diff --git a/test/sql/storage/checkpointed_self_append.test b/test/sql/storage/checkpointed_self_append.test
index dc3162709d65..cd1f0e806e7c 100644
--- a/test/sql/storage/checkpointed_self_append.test
+++ b/test/sql/storage/checkpointed_self_append.test
@@ -4,6 +4,7 @@
 
 require skip_reload
 
+
 # load the DB from disk
 load __TEST_DIR__/checkpointed_self_append.db
 
diff --git a/test/sql/storage/reclaim_space/reclaim_space_drop_column_overflow_strings.test_slow b/test/sql/storage/reclaim_space/reclaim_space_drop_column_overflow_strings.test_slow
index b1b013597f1d..2b6ae0d0fcac 100644
--- a/test/sql/storage/reclaim_space/reclaim_space_drop_column_overflow_strings.test_slow
+++ b/test/sql/storage/reclaim_space/reclaim_space_drop_column_overflow_strings.test_slow
@@ -90,4 +90,4 @@ SELECT AVG(STRLEN(s)), MIN(STRLEN(S)), MAX(STRLEN(S)), SUM(STRLEN(S)), MIN(S[1])
 ----
 296.955	0	5000	44543527	(empty)	X
 
-endloop
\ No newline at end of file
+endloop
diff --git a/test/sql/upsert/test_big_insert.test b/test/sql/upsert/test_big_insert.test
index 1bba1ba5fd17..2de05a7d27b1 100644
--- a/test/sql/upsert/test_big_insert.test
+++ b/test/sql/upsert/test_big_insert.test
@@ -2,6 +2,8 @@
 # description: Test insert into statements
 # group: [upsert]
 
+# TODO: remove this, behavior should be consistent at all vector sizes
+
 statement ok
 pragma enable_verification;