From 25209fb462a2894fc3ab0a81040048ebb5de375b Mon Sep 17 00:00:00 2001
From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com>
Date: Mon, 20 Jan 2025 18:19:27 +0100
Subject: [PATCH 001/142] wip

---
 .../catalog_entry/duck_table_entry.cpp        |  6 +-
 src/include/duckdb/storage/data_table.hpp     |  2 +-
 .../duckdb/storage/table/row_group.hpp        |  2 +-
 .../storage/table/row_group_collection.hpp    |  2 +-
 .../duckdb/transaction/local_storage.hpp      |  6 +-
 src/storage/data_table.cpp                    |  4 +-
 src/storage/local_storage.cpp                 | 24 +++++---
 src/storage/partial_block_manager.cpp         |  6 +-
 src/storage/single_file_block_manager.cpp     | 15 +++++
 src/storage/table/row_group.cpp               |  5 +-
 src/storage/table/row_group_collection.cpp    |  4 +-
 .../optimistic_write_alter_type.test_slow     |  4 --
 .../optimistic_write_delete.test              |  1 -
 .../optimistic_write_drop_column.test_slow    | 22 +++----
 ...der_preserving_odd_sized_batches.test_slow |  3 -
 ...ace_insert_unique_idx_optimistic.test_slow | 59 ++-----------------
 ...aim_space_primary_key_optimistic.test_slow | 28 ++-------
 17 files changed, 71 insertions(+), 122 deletions(-)
diff --git a/src/catalog/catalog_entry/duck_table_entry.cpp b/src/catalog/catalog_entry/duck_table_entry.cpp
index 4983710d9a99..b58391d2ab4f 100644
--- a/src/catalog/catalog_entry/duck_table_entry.cpp
+++ b/src/catalog/catalog_entry/duck_table_entry.cpp
@@ -22,6 +22,7 @@
 #include "duckdb/planner/parsed_data/bound_create_table_info.hpp"
 #include "duckdb/storage/storage_manager.hpp"
 #include "duckdb/storage/table_storage_info.hpp"
+#include "duckdb/transaction/duck_transaction.hpp"
 
 namespace duckdb {
 
@@ -885,7 +886,10 @@ void DuckTableEntry::CommitAlter(string &column_name) {
 			break;
 		}
 	}
-	storage->CommitDropColumn(columns.LogicalToPhysical(LogicalIndex(removed_index.GetIndex())).index);
+
+	auto logical_column_index = LogicalIndex(removed_index.GetIndex());
+	auto column_index = columns.LogicalToPhysical(logical_column_index).index;
+	storage->CommitDropColumn(column_index);
 }
 
 void DuckTableEntry::CommitDrop() {
diff --git a/src/include/duckdb/storage/data_table.hpp b/src/include/duckdb/storage/data_table.hpp
index 39795ed1b907..bbc42ed3ffc9 100644
--- a/src/include/duckdb/storage/data_table.hpp
+++ b/src/include/duckdb/storage/data_table.hpp
@@ -200,7 +200,7 @@ class DataTable {
 	//! Checkpoint the table to the specified table data writer
 	void Checkpoint(TableDataWriter &writer, Serializer &serializer);
 	void CommitDropTable();
-	void CommitDropColumn(idx_t index);
+	void CommitDropColumn(const idx_t column_index);
 
 	idx_t ColumnCount() const;
 	idx_t GetTotalRows() const;
diff --git a/src/include/duckdb/storage/table/row_group.hpp b/src/include/duckdb/storage/table/row_group.hpp
index 16a535a4e4f9..40d0873a2ed7 100644
--- a/src/include/duckdb/storage/table/row_group.hpp
+++ b/src/include/duckdb/storage/table/row_group.hpp
@@ -102,7 +102,7 @@ class RowGroup : public SegmentBase<RowGroup> {
 	unique_ptr<RowGroup> RemoveColumn(RowGroupCollection &collection, idx_t removed_column);
 
 	void CommitDrop();
-	void CommitDropColumn(idx_t index);
+	void CommitDropColumn(const idx_t index);
 
 	void InitializeEmpty(const vector<LogicalType> &types);
 
diff --git a/src/include/duckdb/storage/table/row_group_collection.hpp b/src/include/duckdb/storage/table/row_group_collection.hpp
index 19aa6452038c..412d8dcdaa61 100644
--- a/src/include/duckdb/storage/table/row_group_collection.hpp
+++ b/src/include/duckdb/storage/table/row_group_collection.hpp
@@ -108,7 +108,7 @@ class RowGroupCollection {
 	                         bool schedule_vacuum);
 	unique_ptr<CheckpointTask> GetCheckpointTask(CollectionCheckpointState &checkpoint_state, idx_t segment_idx);
 
-	void CommitDropColumn(idx_t index);
+	void CommitDropColumn(const idx_t index);
 	void CommitDropTable();
 
 	vector<PartitionStatistics> GetPartitionStats() const;
diff --git a/src/include/duckdb/transaction/local_storage.hpp b/src/include/duckdb/transaction/local_storage.hpp
index 453a7ce440ab..20ce212b8639 100644
--- a/src/include/duckdb/transaction/local_storage.hpp
+++ b/src/include/duckdb/transaction/local_storage.hpp
@@ -32,8 +32,8 @@ class LocalTableStorage : public enable_shared_from_this<LocalTableStorage> {
 	// Create a LocalTableStorage from an ALTER TYPE
 	LocalTableStorage(ClientContext &context, DataTable &table, LocalTableStorage &parent, idx_t changed_idx,
 	                  const LogicalType &target_type, const vector<StorageIndex> &bound_columns, Expression &cast_expr);
-	// Create a LocalTableStorage from a DROP COLUMN
-	LocalTableStorage(DataTable &table, LocalTableStorage &parent, idx_t drop_idx);
+	//! Create a LocalTableStorage from a DROP COLUMN.
+	LocalTableStorage(DataTable &new_data_table, LocalTableStorage &parent, const idx_t drop_column_index);
 	// Create a LocalTableStorage from an ADD COLUMN
 	LocalTableStorage(ClientContext &context, DataTable &table, LocalTableStorage &parent, ColumnDefinition &new_column,
 	                  ExpressionExecutor &default_executor);
@@ -153,7 +153,7 @@ class LocalStorage {
 
 	void AddColumn(DataTable &old_dt, DataTable &new_dt, ColumnDefinition &new_column,
 	               ExpressionExecutor &default_executor);
-	void DropColumn(DataTable &old_dt, DataTable &new_dt, idx_t removed_column);
+	void DropColumn(DataTable &old_dt, DataTable &new_dt, const idx_t drop_column_index);
 	void ChangeType(DataTable &old_dt, DataTable &new_dt, idx_t changed_idx, const LogicalType &target_type,
 	                const vector<StorageIndex> &bound_columns, Expression &cast_expr);
 
diff --git a/src/storage/data_table.cpp b/src/storage/data_table.cpp
index 81bf50100b13..630b081eebf5 100644
--- a/src/storage/data_table.cpp
+++ b/src/storage/data_table.cpp
@@ -1546,8 +1546,8 @@ void DataTable::Checkpoint(TableDataWriter &writer, Serializer &serializer) {
 	writer.FinalizeTable(global_stats, info.get(), serializer);
 }
 
-void DataTable::CommitDropColumn(idx_t index) {
-	row_groups->CommitDropColumn(index);
+void DataTable::CommitDropColumn(const idx_t column_index) {
+	row_groups->CommitDropColumn(column_index);
 }
 
 idx_t DataTable::ColumnCount() const {
diff --git a/src/storage/local_storage.cpp b/src/storage/local_storage.cpp
index 71b20bd10059..239596b3800c 100644
--- a/src/storage/local_storage.cpp
+++ b/src/storage/local_storage.cpp
@@ -62,12 +62,17 @@ LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_dt,
 	append_indexes.Move(parent.append_indexes);
 }
 
-LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &parent, idx_t drop_idx)
-    : table_ref(new_dt), allocator(Allocator::Get(new_dt.db)), deleted_rows(parent.deleted_rows),
-      optimistic_writer(new_dt, parent.optimistic_writer), optimistic_writers(std::move(parent.optimistic_writers)),
-      merged_storage(parent.merged_storage) {
-	row_groups = parent.row_groups->RemoveColumn(drop_idx);
+LocalTableStorage::LocalTableStorage(DataTable &new_data_table, LocalTableStorage &parent,
+                                     const idx_t drop_column_index)
+    : table_ref(new_data_table), allocator(Allocator::Get(new_data_table.db)), deleted_rows(parent.deleted_rows),
+      optimistic_writer(new_data_table, parent.optimistic_writer),
+      optimistic_writers(std::move(parent.optimistic_writers)), merged_storage(parent.merged_storage) {
+
+	// Remove the column from the previous local table storage.
+	row_groups = parent.row_groups->RemoveColumn(drop_column_index);
+	parent.row_groups->CommitDropColumn(drop_column_index);
 	parent.row_groups.reset();
+
 	append_indexes.Move(parent.append_indexes);
 }
 
@@ -82,6 +87,7 @@ LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_dt,
 }
 
 LocalTableStorage::~LocalTableStorage() {
+	D_ASSERT(1);
 }
 
 void LocalTableStorage::InitializeScan(CollectionScanState &state, optional_ptr<TableFilterSet> table_filters) {
@@ -248,6 +254,9 @@ void LocalTableStorage::Rollback() {
 	}
 	optimistic_writers.clear();
 	optimistic_writer.Rollback();
+
+	// Drop any optimistically written local changes.
+	row_groups->CommitDropTable();
 }
 
 //===--------------------------------------------------------------------===//
@@ -547,7 +556,6 @@ void LocalStorage::Rollback() {
 			continue;
 		}
 		storage->Rollback();
-
 		entry.second.reset();
 	}
 }
@@ -598,13 +606,13 @@ void LocalStorage::AddColumn(DataTable &old_dt, DataTable &new_dt, ColumnDefinit
 	table_manager.InsertEntry(new_dt, std::move(new_storage));
 }
 
-void LocalStorage::DropColumn(DataTable &old_dt, DataTable &new_dt, idx_t removed_column) {
+void LocalStorage::DropColumn(DataTable &old_dt, DataTable &new_dt, const idx_t drop_column_index) {
 	// check if there are any pending appends for the old version of the table
 	auto storage = table_manager.MoveEntry(old_dt);
 	if (!storage) {
 		return;
 	}
-	auto new_storage = make_shared_ptr<LocalTableStorage>(new_dt, *storage, removed_column);
+	auto new_storage = make_shared_ptr<LocalTableStorage>(new_dt, *storage, drop_column_index);
 	table_manager.InsertEntry(new_dt, std::move(new_storage));
 }
 
diff --git a/src/storage/partial_block_manager.cpp b/src/storage/partial_block_manager.cpp
index 3dbf89760591..79bc4c813fbe 100644
--- a/src/storage/partial_block_manager.cpp
+++ b/src/storage/partial_block_manager.cpp
@@ -196,9 +196,9 @@ BlockManager &PartialBlockManager::GetBlockManager() const {
 
 void PartialBlockManager::Rollback() {
 	ClearBlocks();
-	for (auto &block_id : written_blocks) {
-		block_manager.MarkBlockAsFree(block_id);
-	}
+	//	for (auto &block_id : written_blocks) {
+	//		block_manager.MarkBlockAsFree(block_id);
+	//	}
 }
 
 } // namespace duckdb
diff --git a/src/storage/single_file_block_manager.cpp b/src/storage/single_file_block_manager.cpp
index f00d93040686..111ba02a55c9 100644
--- a/src/storage/single_file_block_manager.cpp
+++ b/src/storage/single_file_block_manager.cpp
@@ -313,6 +313,9 @@ void SingleFileBlockManager::LoadFreeList() {
 	free_list.clear();
 	for (idx_t i = 0; i < free_list_count; i++) {
 		auto block = reader.Read<block_id_t>();
+		if (block == 1) {
+			D_ASSERT(1);
+		}
 		free_list.insert(block);
 		newly_freed_list.insert(block);
 	}
@@ -363,6 +366,9 @@ void SingleFileBlockManager::MarkBlockAsFree(block_id_t block_id) {
 		throw InternalException("MarkBlockAsFree called but block %llu was already freed!", block_id);
 	}
 	multi_use_blocks.erase(block_id);
+	if (block_id == 1) {
+		D_ASSERT(1);
+	}
 	free_list.insert(block_id);
 	newly_freed_list.insert(block_id);
 }
@@ -377,6 +383,9 @@ void SingleFileBlockManager::MarkBlockAsUsed(block_id_t block_id) {
 		// i.e. if max_block = 0, and block_id = 3, we need to add blocks 1 and 2 to the free list
 		while (max_block < block_id) {
 			free_list.insert(max_block);
+			if (max_block == 1) {
+				D_ASSERT(1);
+			}
 			max_block++;
 		}
 		max_block++;
@@ -410,6 +419,9 @@ void SingleFileBlockManager::MarkBlockAsModified(block_id_t block_id) {
 	// Check for multi-free
 	// TODO: Fix the bug that causes this assert to fire, then uncomment it.
 	// D_ASSERT(modified_blocks.find(block_id) == modified_blocks.end());
+	if (block_id == 1) {
+		D_ASSERT(1);
+	}
 	D_ASSERT(free_list.find(block_id) == free_list.end());
 	modified_blocks.insert(block_id);
 }
@@ -640,6 +652,9 @@ void SingleFileBlockManager::WriteHeader(DatabaseHeader header) {
 
 	for (auto &block : modified_blocks) {
 		free_list.insert(block);
+		if (block == 1) {
+			D_ASSERT(1);
+		}
 		newly_freed_list.insert(block);
 	}
 	modified_blocks.clear();
diff --git a/src/storage/table/row_group.cpp b/src/storage/table/row_group.cpp
index d5250387362b..ee07f838c14c 100644
--- a/src/storage/table/row_group.cpp
+++ b/src/storage/table/row_group.cpp
@@ -385,8 +385,9 @@ void RowGroup::CommitDrop() {
 	}
 }
 
-void RowGroup::CommitDropColumn(idx_t column_idx) {
-	GetColumn(column_idx).CommitDropColumn();
+void RowGroup::CommitDropColumn(const idx_t column_idx) {
+	auto &column = GetColumn(column_idx);
+	column.CommitDropColumn();
 }
 
 void RowGroup::NextVector(CollectionScanState &state) {
diff --git a/src/storage/table/row_group_collection.cpp b/src/storage/table/row_group_collection.cpp
index a167644193fd..f775add2dca8 100644
--- a/src/storage/table/row_group_collection.cpp
+++ b/src/storage/table/row_group_collection.cpp
@@ -1110,9 +1110,9 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl
 //===--------------------------------------------------------------------===//
 // CommitDrop
 //===--------------------------------------------------------------------===//
-void RowGroupCollection::CommitDropColumn(idx_t index) {
+void RowGroupCollection::CommitDropColumn(const idx_t column_index) {
 	for (auto &row_group : row_groups->Segments()) {
-		row_group.CommitDropColumn(index);
+		row_group.CommitDropColumn(column_index);
 	}
 }
 
diff --git a/test/sql/storage/optimistic_write/optimistic_write_alter_type.test_slow b/test/sql/storage/optimistic_write/optimistic_write_alter_type.test_slow
index dbebdcece844..fa7847641795 100644
--- a/test/sql/storage/optimistic_write/optimistic_write_alter_type.test_slow
+++ b/test/sql/storage/optimistic_write/optimistic_write_alter_type.test_slow
@@ -2,10 +2,6 @@
 # description: Test optimistic write with alter type in transaction-local storage
 # group: [optimistic_write]
 
-# FIXME: for smaller block sizes (16KB) the database size does not stabilize in the loop, instead,
-# FIXME: it grows very slowly (only investigated up to 40 iterations)
-require block_size 262144
-
 load __TEST_DIR__/optimistic_write_alter_type.db
 
 statement ok
diff --git a/test/sql/storage/optimistic_write/optimistic_write_delete.test b/test/sql/storage/optimistic_write/optimistic_write_delete.test
index 5c71995862c8..b96d893fb865 100644
--- a/test/sql/storage/optimistic_write/optimistic_write_delete.test
+++ b/test/sql/storage/optimistic_write/optimistic_write_delete.test
@@ -2,7 +2,6 @@
 # description: Test optimistic write with deletes in transaction-local storage
 # group: [optimistic_write]
 
-# load the DB from disk
 load __TEST_DIR__/optimistic_write_delete.db
 
 statement ok
diff --git a/test/sql/storage/optimistic_write/optimistic_write_drop_column.test_slow b/test/sql/storage/optimistic_write/optimistic_write_drop_column.test_slow
index c91637cdc596..0427213938c0 100644
--- a/test/sql/storage/optimistic_write/optimistic_write_drop_column.test_slow
+++ b/test/sql/storage/optimistic_write/optimistic_write_drop_column.test_slow
@@ -2,10 +2,6 @@
 # description: Test optimistic write with drop column in transaction-local storage
 # group: [optimistic_write]
 
-# FIXME: for smaller block sizes (16KB) the database size does not stabilize in the loop, instead,
-# FIXME: it grows very slowly (only investigated up to 40 iterations)
-require block_size 262144
-
 load __TEST_DIR__/optimistic_write_drop.db
 
 statement ok
@@ -15,7 +11,7 @@ statement ok
 BEGIN TRANSACTION
 
 statement ok
-INSERT INTO test SELECT i, i+1, i+2 FROM range(1000000) tbl(i)
+INSERT INTO test SELECT i, i + 1, i + 2 FROM range(1000000) tbl(i)
 
 statement ok
 ALTER TABLE test DROP COLUMN c
@@ -52,12 +48,12 @@ SELECT SUM(a), SUM(b) FROM test
 ----
 499999500000	500000500000
 
-require skip_reload
+# Ensure that we reclaim space correctly.
 
-# ensure the drop column does not result in leaking blocks
+require skip_reload
 
-# for smaller block sizes (16KB) the total blocks alternate between a few values in the loop,
-# therefore, we need to compare to a range of total block counts
+# For smaller block sizes (16KB) the total blocks alternate between a few values in the loop.
+# Therefore, we compare to a range of total block counts.
 statement ok
 CREATE TABLE total_blocks_tbl AS SELECT total_blocks FROM pragma_database_size();
 
@@ -73,7 +69,7 @@ statement ok
 BEGIN TRANSACTION
 
 statement ok
-INSERT INTO test SELECT i, i+1, i+2 FROM range(1000000) tbl(i)
+INSERT INTO test SELECT i, i + 1, i + 2 FROM range(1000000) tbl(i)
 
 statement ok
 ALTER TABLE test DROP COLUMN c
@@ -86,8 +82,8 @@ SELECT SUM(a), SUM(b) FROM test
 ----
 499999500000	500000500000
 
-# ensure that the total blocks don't exceed the total blocks after the first iteration
-# by more than 1.2
+# Ensure that the total blocks don't exceed the total blocks after the first iteration
+# by more than 1.2.
 
 query I
 SELECT CASE WHEN ${i} = 0 THEN True
@@ -97,7 +93,7 @@ FROM pragma_database_size() AS current, total_blocks_tbl;
 ----
 1
 
-# adjust total_blocks_tbl once to the count after the first iteration
+# Adjust total_blocks_tbl once to the count after the first iteration.
 
 statement ok
 UPDATE total_blocks_tbl SET total_blocks = (
diff --git a/test/sql/storage/parallel/insert_order_preserving_odd_sized_batches.test_slow b/test/sql/storage/parallel/insert_order_preserving_odd_sized_batches.test_slow
index 3474b2e0621c..941885e4dcbf 100644
--- a/test/sql/storage/parallel/insert_order_preserving_odd_sized_batches.test_slow
+++ b/test/sql/storage/parallel/insert_order_preserving_odd_sized_batches.test_slow
@@ -2,9 +2,6 @@
 # description: Test parallel order-preserving insert
 # group: [parallel]
 
-# FIXME: see internal issue 3931.
-mode skip
-
 # There are different numbers of distinct blocks for smaller block sizes,
 # because the segment size is bound by the block size.
 require block_size 262144
diff --git a/test/sql/storage/parallel/reclaim_space_insert_unique_idx_optimistic.test_slow b/test/sql/storage/parallel/reclaim_space_insert_unique_idx_optimistic.test_slow
index c09499562f34..7cfbd2fa6a56 100644
--- a/test/sql/storage/parallel/reclaim_space_insert_unique_idx_optimistic.test_slow
+++ b/test/sql/storage/parallel/reclaim_space_insert_unique_idx_optimistic.test_slow
@@ -2,42 +2,20 @@
 # description: Test space reclamation of optimistic writing with a UNIQUE constraint violation.
 # group: [parallel]
 
-# FIXME: see internal issue 3931.
-mode skip
-
 load __TEST_DIR__/reclaim_space_unique_index.db
 
 statement ok
 SET preserve_insertion_order=false;
 
 statement ok
-CREATE TABLE integers AS SELECT * FROM range(10000000) t(i);
+CREATE TABLE integers AS SELECT * FROM range(1_000_000) t(i);
 
 statement ok
 CREATE TABLE integers2 (i INTEGER);
 
-statement ok
-INSERT INTO integers2 VALUES (9999999);
-
-statement ok
-CREATE UNIQUE INDEX idx ON integers2(i);
-
-# For smaller block sizes (16KB) the total blocks increase (to twice the original amount) in the first
-# iteration, and then stay constant.
 statement ok
 CREATE TABLE total_blocks_tbl AS SELECT total_blocks FROM pragma_database_size();
 
-statement ok
-CREATE TYPE test_result AS UNION (
-	ok BOOL,
-	err STRUCT(
-		old BIGINT,
-		allowed_max DECIMAL(21,1),
-		actual BIGINT)
-);
-
-loop i 0 10
-
 statement ok
 BEGIN;
 
@@ -45,41 +23,14 @@ statement ok
 CHECKPOINT;
 
 statement ok
-INSERT INTO integers2 VALUES (9999998);
-
-# Invalidate the transaction.
+INSERT INTO integers2 VALUES (999_998);
 
-statement error
-INSERT INTO integers2 SELECT * FROM integers WHERE i <= 9999998;
-----
-<REGEX>:Constraint Error.*PRIMARY KEY or UNIQUE constraint violation.*
+statement ok
+INSERT INTO integers2 SELECT * FROM integers WHERE i <= 999_998;
 
 statement ok
 ROLLBACK
 
-# Ensure that the total blocks don't exceed the total blocks after the first iteration by more than 1.2.
-
-query I
-SELECT
-	CASE WHEN ${i} = 0 THEN True::test_result
-	WHEN current.total_blocks <= total_blocks_tbl.total_blocks * 1.4 THEN True::test_result
-	ELSE {
-		'old': total_blocks_tbl.total_blocks,
-		'allowed_max': total_blocks_tbl.total_blocks * 1.4,
-		'actual': current.total_blocks
-	}::test_result
-	END
-FROM pragma_database_size() AS current, total_blocks_tbl;
-----
-true
-
-# Adjust the total_blocks_tbl once to the count after the first iteration.
-
 statement ok
 UPDATE total_blocks_tbl SET total_blocks = (
-	SELECT
-		CASE WHEN ${i} = 0 THEN (SELECT current.total_blocks FROM pragma_database_size() AS current)
-		ELSE (total_blocks)END
-	);
-
-endloop
+SELECT current.total_blocks FROM pragma_database_size() AS current);
diff --git a/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow b/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow
index 27753dd7877a..6f97e21dbbee 100644
--- a/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow
+++ b/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow
@@ -2,48 +2,30 @@
 # description: Test space reclamation of optimistic writing with a PK constraint violation.
 # group: [parallel]
 
-# FIXME: see internal issue 3931.
-mode skip
-
 load __TEST_DIR__/reclaim_space_primary_key.db
 
 statement ok
 SET preserve_insertion_order=false;
 
 statement ok
-CREATE TABLE integers AS SELECT * FROM range(10000000) t(i);
+CREATE TABLE integers AS SELECT * FROM range(1000000) t(i);
 
 statement ok
-CREATE TABLE integers2 (i INTEGER PRIMARY KEY);
+CREATE TABLE integers2 (i INTEGER);
 
 statement ok
-INSERT INTO integers2 VALUES (9999999);
-
-statement error
-INSERT INTO integers2 SELECT * FROM integers;
-----
-<REGEX>:Constraint Error.*violates primary key constraint.*
+INSERT INTO integers2 VALUES (999999);
 
 statement ok
 CREATE TABLE block_count (count INT);
 
 loop i 0 10
 
-statement error
-INSERT INTO integers2 SELECT * FROM integers;
-----
-<REGEX>:Constraint Error.*violates primary key constraint.*
-
 statement ok
 BEGIN;
 
 statement ok
-INSERT INTO integers2 VALUES (9999998);
-
-statement error
-INSERT INTO integers2 SELECT * FROM integers WHERE i <= 9999998;
-----
-<REGEX>:Constraint Error.*PRIMARY KEY or UNIQUE constraint violation.*
+INSERT INTO integers2 VALUES (999998);
 
 statement ok
 ROLLBACK
@@ -54,7 +36,7 @@ SELECT COUNT(*) - ${i} FROM integers2;
 1
 
 statement ok
-INSERT INTO integers2 VALUES (10000000 + ${i});
+INSERT INTO integers2 VALUES (1000000 + ${i});
 
 statement ok
 CHECKPOINT;

From cd9b27ad4cb96092b8d553e3552dcc2e8f0125da Mon Sep 17 00:00:00 2001
From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com>
Date: Tue, 21 Jan 2025 12:10:21 +0100
Subject: [PATCH 002/142] some tidying

---
 .../catalog_entry/duck_table_entry.cpp        |  1 -
 .../duckdb/storage/table/row_group.hpp        |  2 +-
 .../storage/table/row_group_collection.hpp    |  2 +-
 .../duckdb/transaction/local_storage.hpp      |  7 +--
 src/storage/local_storage.cpp                 | 19 ++++---
 src/storage/partial_block_manager.cpp         |  3 --
 src/storage/single_file_block_manager.cpp     | 15 ------
 src/storage/table/row_group.cpp               |  4 +-
 .../optimistic_write_alter_type.test_slow     | 18 +++----
 ...der_preserving_odd_sized_batches.test_slow |  2 +
 ...ace_insert_unique_idx_optimistic.test_slow | 51 ++++++++++++++++++-
 ...aim_space_primary_key_optimistic.test_slow | 27 ++++++++--
 test/temp.test                                | 24 +++++++++
 13 files changed, 125 insertions(+), 50 deletions(-)
 create mode 100644 test/temp.test

diff --git a/src/catalog/catalog_entry/duck_table_entry.cpp b/src/catalog/catalog_entry/duck_table_entry.cpp
index b58391d2ab4f..d12bc557e7aa 100644
--- a/src/catalog/catalog_entry/duck_table_entry.cpp
+++ b/src/catalog/catalog_entry/duck_table_entry.cpp
@@ -22,7 +22,6 @@
 #include "duckdb/planner/parsed_data/bound_create_table_info.hpp"
 #include "duckdb/storage/storage_manager.hpp"
 #include "duckdb/storage/table_storage_info.hpp"
-#include "duckdb/transaction/duck_transaction.hpp"
 
 namespace duckdb {
 
diff --git a/src/include/duckdb/storage/table/row_group.hpp b/src/include/duckdb/storage/table/row_group.hpp
index 40d0873a2ed7..8ceea68a3615 100644
--- a/src/include/duckdb/storage/table/row_group.hpp
+++ b/src/include/duckdb/storage/table/row_group.hpp
@@ -102,7 +102,7 @@ class RowGroup : public SegmentBase<RowGroup> {
 	unique_ptr<RowGroup> RemoveColumn(RowGroupCollection &collection, idx_t removed_column);
 
 	void CommitDrop();
-	void CommitDropColumn(const idx_t index);
+	void CommitDropColumn(const idx_t column_index);
 
 	void InitializeEmpty(const vector<LogicalType> &types);
 
diff --git a/src/include/duckdb/storage/table/row_group_collection.hpp b/src/include/duckdb/storage/table/row_group_collection.hpp
index 412d8dcdaa61..9940d80f45c0 100644
--- a/src/include/duckdb/storage/table/row_group_collection.hpp
+++ b/src/include/duckdb/storage/table/row_group_collection.hpp
@@ -108,7 +108,7 @@ class RowGroupCollection {
 	                         bool schedule_vacuum);
 	unique_ptr<CheckpointTask> GetCheckpointTask(CollectionCheckpointState &checkpoint_state, idx_t segment_idx);
 
-	void CommitDropColumn(const idx_t index);
+	void CommitDropColumn(const idx_t column_index);
 	void CommitDropTable();
 
 	vector<PartitionStatistics> GetPartitionStats() const;
diff --git a/src/include/duckdb/transaction/local_storage.hpp b/src/include/duckdb/transaction/local_storage.hpp
index 20ce212b8639..7516adeced72 100644
--- a/src/include/duckdb/transaction/local_storage.hpp
+++ b/src/include/duckdb/transaction/local_storage.hpp
@@ -29,9 +29,10 @@ class LocalTableStorage : public enable_shared_from_this<LocalTableStorage> {
 public:
 	// Create a new LocalTableStorage
 	explicit LocalTableStorage(ClientContext &context, DataTable &table);
-	// Create a LocalTableStorage from an ALTER TYPE
-	LocalTableStorage(ClientContext &context, DataTable &table, LocalTableStorage &parent, idx_t changed_idx,
-	                  const LogicalType &target_type, const vector<StorageIndex> &bound_columns, Expression &cast_expr);
+	//! Create a LocalTableStorage from an ALTER TYPE.
+	LocalTableStorage(ClientContext &context, DataTable &new_data_table, LocalTableStorage &parent,
+	                  const idx_t alter_column_index, const LogicalType &target_type,
+	                  const vector<StorageIndex> &bound_columns, Expression &cast_expr);
 	//! Create a LocalTableStorage from a DROP COLUMN.
 	LocalTableStorage(DataTable &new_data_table, LocalTableStorage &parent, const idx_t drop_column_index);
 	// Create a LocalTableStorage from an ADD COLUMN
diff --git a/src/storage/local_storage.cpp b/src/storage/local_storage.cpp
index 239596b3800c..20334291e229 100644
--- a/src/storage/local_storage.cpp
+++ b/src/storage/local_storage.cpp
@@ -51,14 +51,18 @@ LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &table)
 	});
 }
 
-LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_dt, LocalTableStorage &parent,
-                                     idx_t changed_idx, const LogicalType &target_type,
+LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_data_table, LocalTableStorage &parent,
+                                     const idx_t alter_column_index, const LogicalType &target_type,
                                      const vector<StorageIndex> &bound_columns, Expression &cast_expr)
-    : table_ref(new_dt), allocator(Allocator::Get(new_dt.db)), deleted_rows(parent.deleted_rows),
-      optimistic_writer(new_dt, parent.optimistic_writer), optimistic_writers(std::move(parent.optimistic_writers)),
-      merged_storage(parent.merged_storage) {
-	row_groups = parent.row_groups->AlterType(context, changed_idx, target_type, bound_columns, cast_expr);
+    : table_ref(new_data_table), allocator(Allocator::Get(new_data_table.db)), deleted_rows(parent.deleted_rows),
+      optimistic_writer(new_data_table, parent.optimistic_writer),
+      optimistic_writers(std::move(parent.optimistic_writers)), merged_storage(parent.merged_storage) {
+
+	// Alter the column type.
+	row_groups = parent.row_groups->AlterType(context, alter_column_index, target_type, bound_columns, cast_expr);
+	parent.row_groups->CommitDropColumn(alter_column_index);
 	parent.row_groups.reset();
+
 	append_indexes.Move(parent.append_indexes);
 }
 
@@ -68,7 +72,7 @@ LocalTableStorage::LocalTableStorage(DataTable &new_data_table, LocalTableStorag
       optimistic_writer(new_data_table, parent.optimistic_writer),
       optimistic_writers(std::move(parent.optimistic_writers)), merged_storage(parent.merged_storage) {
 
-	// Remove the column from the previous local table storage.
+	// Remove the column from the previous table storage.
 	row_groups = parent.row_groups->RemoveColumn(drop_column_index);
 	parent.row_groups->CommitDropColumn(drop_column_index);
 	parent.row_groups.reset();
@@ -87,7 +91,6 @@ LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_dt,
 }
 
 LocalTableStorage::~LocalTableStorage() {
-	D_ASSERT(1);
 }
 
 void LocalTableStorage::InitializeScan(CollectionScanState &state, optional_ptr<TableFilterSet> table_filters) {
diff --git a/src/storage/partial_block_manager.cpp b/src/storage/partial_block_manager.cpp
index 79bc4c813fbe..991e5fcd4cf5 100644
--- a/src/storage/partial_block_manager.cpp
+++ b/src/storage/partial_block_manager.cpp
@@ -196,9 +196,6 @@ BlockManager &PartialBlockManager::GetBlockManager() const {
 
 void PartialBlockManager::Rollback() {
 	ClearBlocks();
-	//	for (auto &block_id : written_blocks) {
-	//		block_manager.MarkBlockAsFree(block_id);
-	//	}
 }
 
 } // namespace duckdb
diff --git a/src/storage/single_file_block_manager.cpp b/src/storage/single_file_block_manager.cpp
index 111ba02a55c9..f00d93040686 100644
--- a/src/storage/single_file_block_manager.cpp
+++ b/src/storage/single_file_block_manager.cpp
@@ -313,9 +313,6 @@ void SingleFileBlockManager::LoadFreeList() {
 	free_list.clear();
 	for (idx_t i = 0; i < free_list_count; i++) {
 		auto block = reader.Read<block_id_t>();
-		if (block == 1) {
-			D_ASSERT(1);
-		}
 		free_list.insert(block);
 		newly_freed_list.insert(block);
 	}
@@ -366,9 +363,6 @@ void SingleFileBlockManager::MarkBlockAsFree(block_id_t block_id) {
 		throw InternalException("MarkBlockAsFree called but block %llu was already freed!", block_id);
 	}
 	multi_use_blocks.erase(block_id);
-	if (block_id == 1) {
-		D_ASSERT(1);
-	}
 	free_list.insert(block_id);
 	newly_freed_list.insert(block_id);
 }
@@ -383,9 +377,6 @@ void SingleFileBlockManager::MarkBlockAsUsed(block_id_t block_id) {
 		// i.e. if max_block = 0, and block_id = 3, we need to add blocks 1 and 2 to the free list
 		while (max_block < block_id) {
 			free_list.insert(max_block);
-			if (max_block == 1) {
-				D_ASSERT(1);
-			}
 			max_block++;
 		}
 		max_block++;
@@ -419,9 +410,6 @@ void SingleFileBlockManager::MarkBlockAsModified(block_id_t block_id) {
 	// Check for multi-free
 	// TODO: Fix the bug that causes this assert to fire, then uncomment it.
 	// D_ASSERT(modified_blocks.find(block_id) == modified_blocks.end());
-	if (block_id == 1) {
-		D_ASSERT(1);
-	}
 	D_ASSERT(free_list.find(block_id) == free_list.end());
 	modified_blocks.insert(block_id);
 }
@@ -652,9 +640,6 @@ void SingleFileBlockManager::WriteHeader(DatabaseHeader header) {
 
 	for (auto &block : modified_blocks) {
 		free_list.insert(block);
-		if (block == 1) {
-			D_ASSERT(1);
-		}
 		newly_freed_list.insert(block);
 	}
 	modified_blocks.clear();
diff --git a/src/storage/table/row_group.cpp b/src/storage/table/row_group.cpp
index ee07f838c14c..b9add3d67dd7 100644
--- a/src/storage/table/row_group.cpp
+++ b/src/storage/table/row_group.cpp
@@ -385,8 +385,8 @@ void RowGroup::CommitDrop() {
 	}
 }
 
-void RowGroup::CommitDropColumn(const idx_t column_idx) {
-	auto &column = GetColumn(column_idx);
+void RowGroup::CommitDropColumn(const idx_t column_index) {
+	auto &column = GetColumn(column_index);
 	column.CommitDropColumn();
 }
 
diff --git a/test/sql/storage/optimistic_write/optimistic_write_alter_type.test_slow b/test/sql/storage/optimistic_write/optimistic_write_alter_type.test_slow
index fa7847641795..a32e866bad35 100644
--- a/test/sql/storage/optimistic_write/optimistic_write_alter_type.test_slow
+++ b/test/sql/storage/optimistic_write/optimistic_write_alter_type.test_slow
@@ -14,7 +14,7 @@ statement ok
 INSERT INTO test SELECT i FROM range(1000000) tbl(i)
 
 statement ok
-ALTER TABLE test ALTER a SET TYPE BIGINT USING a+1
+ALTER TABLE test ALTER a SET TYPE BIGINT USING a + 1
 
 statement ok
 COMMIT
@@ -48,12 +48,12 @@ SELECT SUM(a) FROM test
 ----
 500000500000
 
-require skip_reload
+# Ensure that we reclaim space correctly.
 
-# ensure the alter type does not result in leaking blocks
+require skip_reload
 
-# for smaller block sizes (16KB) the total blocks alternate between a few values in the loop,
-# therefore, we need to compare to a range of total block counts
+# For smaller block sizes (16KB) the total blocks alternate between a few values in the loop.
+# Therefore, we compare to a range of total block counts.
 statement ok
 CREATE TABLE total_blocks_tbl AS SELECT total_blocks FROM pragma_database_size();
 
@@ -72,7 +72,7 @@ statement ok
 INSERT INTO test SELECT i FROM range(1000000) tbl(i)
 
 statement ok
-ALTER TABLE test ALTER a SET TYPE BIGINT USING a+1
+ALTER TABLE test ALTER a SET TYPE BIGINT USING a + 1
 
 statement ok
 COMMIT
@@ -82,8 +82,8 @@ SELECT SUM(a) FROM test
 ----
 500000500000
 
-# ensure that the total blocks don't exceed the total blocks after the first iteration
-# by more than 1.2
+# Ensure that the total blocks don't exceed the total blocks after the first iteration
+# by more than 1.2.
 
 query I
 SELECT CASE WHEN ${i} = 0 THEN True
@@ -93,7 +93,7 @@ FROM pragma_database_size() AS current, total_blocks_tbl;
 ----
 1
 
-# adjust total_blocks_tbl once to the count after the first iteration
+# Adjust total_blocks_tbl once to the count after the first iteration.
 
 statement ok
 UPDATE total_blocks_tbl SET total_blocks = (
diff --git a/test/sql/storage/parallel/insert_order_preserving_odd_sized_batches.test_slow b/test/sql/storage/parallel/insert_order_preserving_odd_sized_batches.test_slow
index 941885e4dcbf..4a0dc3c9bd09 100644
--- a/test/sql/storage/parallel/insert_order_preserving_odd_sized_batches.test_slow
+++ b/test/sql/storage/parallel/insert_order_preserving_odd_sized_batches.test_slow
@@ -2,6 +2,8 @@
 # description: Test parallel order-preserving insert
 # group: [parallel]
 
+mode skip
+
 # There are different numbers of distinct blocks for smaller block sizes,
 # because the segment size is bound by the block size.
 require block_size 262144
diff --git a/test/sql/storage/parallel/reclaim_space_insert_unique_idx_optimistic.test_slow b/test/sql/storage/parallel/reclaim_space_insert_unique_idx_optimistic.test_slow
index 7cfbd2fa6a56..243f4860b205 100644
--- a/test/sql/storage/parallel/reclaim_space_insert_unique_idx_optimistic.test_slow
+++ b/test/sql/storage/parallel/reclaim_space_insert_unique_idx_optimistic.test_slow
@@ -13,9 +13,29 @@ CREATE TABLE integers AS SELECT * FROM range(1_000_000) t(i);
 statement ok
 CREATE TABLE integers2 (i INTEGER);
 
+statement ok
+INSERT INTO integers2 VALUES (9999999);
+
+statement ok
+CREATE UNIQUE INDEX idx ON integers2(i);
+
+# For smaller block sizes (16KB) the total blocks increase (to twice the original amount) in the first
+# iteration, and then stay constant.
+
 statement ok
 CREATE TABLE total_blocks_tbl AS SELECT total_blocks FROM pragma_database_size();
 
+statement ok
+CREATE TYPE test_result AS UNION (
+	ok BOOL,
+	err STRUCT(
+		old BIGINT,
+		allowed_max DECIMAL(21,1),
+		actual BIGINT)
+);
+
+loop i 0 10
+
 statement ok
 BEGIN;
 
@@ -25,12 +45,39 @@ CHECKPOINT;
 statement ok
 INSERT INTO integers2 VALUES (999_998);
 
-statement ok
+# Invalidate the transaction.
+
+statement error
 INSERT INTO integers2 SELECT * FROM integers WHERE i <= 999_998;
+----
+<REGEX>:Constraint Error.*PRIMARY KEY or UNIQUE constraint violation.*
 
 statement ok
 ROLLBACK
 
+# Ensure that the total blocks don't exceed the total blocks after the first iteration by more than 1.2.
+
+query I
+SELECT
+	CASE WHEN ${i} = 0 THEN True::test_result
+	WHEN current.total_blocks <= total_blocks_tbl.total_blocks * 1.4 THEN True::test_result
+	ELSE {
+		'old': total_blocks_tbl.total_blocks,
+		'allowed_max': total_blocks_tbl.total_blocks * 1.4,
+		'actual': current.total_blocks
+	}::test_result
+	END
+FROM pragma_database_size() AS current, total_blocks_tbl;
+----
+true
+
+# Adjust the total_blocks_tbl once to the count after the first iteration.
+
 statement ok
 UPDATE total_blocks_tbl SET total_blocks = (
-SELECT current.total_blocks FROM pragma_database_size() AS current);
+	SELECT
+		CASE WHEN ${i} = 0 THEN (SELECT current.total_blocks FROM pragma_database_size() AS current)
+		ELSE (total_blocks)END
+	);
+
+endloop
diff --git a/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow b/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow
index 6f97e21dbbee..4d08b4a8f6ec 100644
--- a/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow
+++ b/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow
@@ -2,30 +2,47 @@
 # description: Test space reclamation of optimistic writing with a PK constraint violation.
 # group: [parallel]
 
+mode skip
+
 load __TEST_DIR__/reclaim_space_primary_key.db
 
 statement ok
 SET preserve_insertion_order=false;
 
 statement ok
-CREATE TABLE integers AS SELECT * FROM range(1000000) t(i);
+CREATE TABLE integers AS SELECT * FROM range(10000000) t(i);
 
 statement ok
-CREATE TABLE integers2 (i INTEGER);
+CREATE TABLE integers2 (i INTEGER PRIMARY KEY);
 
 statement ok
-INSERT INTO integers2 VALUES (999999);
+INSERT INTO integers2 VALUES (9999999);
+
+statement error
+INSERT INTO integers2 SELECT * FROM integers;
+----
+<REGEX>:Constraint Error.*violates primary key constraint.*
 
 statement ok
 CREATE TABLE block_count (count INT);
 
 loop i 0 10
 
+statement error
+INSERT INTO integers2 SELECT * FROM integers;
+----
+<REGEX>:Constraint Error.*violates primary key constraint.*
+
 statement ok
 BEGIN;
 
 statement ok
-INSERT INTO integers2 VALUES (999998);
+INSERT INTO integers2 VALUES (9999998);
+
+statement error
+INSERT INTO integers2 SELECT * FROM integers WHERE i <= 9999998;
+----
+<REGEX>:Constraint Error.*PRIMARY KEY or UNIQUE constraint violation.*
 
 statement ok
 ROLLBACK
@@ -36,7 +53,7 @@ SELECT COUNT(*) - ${i} FROM integers2;
 1
 
 statement ok
-INSERT INTO integers2 VALUES (1000000 + ${i});
+INSERT INTO integers2 VALUES (10000000 + ${i});
 
 statement ok
 CHECKPOINT;
diff --git a/test/temp.test b/test/temp.test
new file mode 100644
index 000000000000..0314c52deb1c
--- /dev/null
+++ b/test/temp.test
@@ -0,0 +1,24 @@
+# name: test/temp.test
+# group: [test]
+
+load __TEST_DIR__/reclaim_space_primary_key.db
+
+statement ok
+SET preserve_insertion_order=false;
+
+statement ok
+CREATE TABLE integers AS SELECT * FROM range(10000000) t(i);
+
+statement ok
+CREATE TABLE integers2 (i INTEGER PRIMARY KEY);
+
+statement ok
+INSERT INTO integers2 VALUES (9999999);
+
+statement error
+INSERT INTO integers2 SELECT * FROM integers;
+----
+<REGEX>:Constraint Error.*violates primary key constraint.*
+
+statement ok
+CREATE TABLE block_count (count INT);
\ No newline at end of file

From 62d6d87b3cf87fcb84249beb48b49887435fbf18 Mon Sep 17 00:00:00 2001
From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com>
Date: Tue, 21 Jan 2025 13:38:59 +0100
Subject: [PATCH 003/142] more reclaim space fixes

---
 .../duckdb/storage/optimistic_data_writer.hpp |  2 +-
 .../duckdb/storage/partial_block_manager.hpp  |  2 +-
 src/storage/local_storage.cpp                 |  7 ++--
 src/storage/optimistic_data_writer.cpp        |  5 +--
 src/storage/partial_block_manager.cpp         |  9 +++-
 ...der_preserving_odd_sized_batches.test_slow | 14 +++----
 ...aim_space_primary_key_optimistic.test_slow | 41 +++++++++++++++----
 test/temp.test                                | 24 -----------
 8 files changed, 55 insertions(+), 49 deletions(-)
 delete mode 100644 test/temp.test

diff --git a/src/include/duckdb/storage/optimistic_data_writer.hpp b/src/include/duckdb/storage/optimistic_data_writer.hpp
index 802d51bad707..c3d04e9c470f 100644
--- a/src/include/duckdb/storage/optimistic_data_writer.hpp
+++ b/src/include/duckdb/storage/optimistic_data_writer.hpp
@@ -30,7 +30,7 @@ class OptimisticDataWriter {
 	//! Merge the partially written blocks from one optimistic writer into another
 	void Merge(OptimisticDataWriter &other);
 	//! Rollback
-	void Rollback();
+	void Rollback(const bool mark_modified);
 
 private:
 	//! Prepare a write to disk
diff --git a/src/include/duckdb/storage/partial_block_manager.hpp b/src/include/duckdb/storage/partial_block_manager.hpp
index b46ea65ad031..dbe6e7029664 100644
--- a/src/include/duckdb/storage/partial_block_manager.hpp
+++ b/src/include/duckdb/storage/partial_block_manager.hpp
@@ -114,7 +114,7 @@ class PartialBlockManager {
 	void ClearBlocks();
 
 	//! Rollback all data written by this partial block manager
-	void Rollback();
+	void Rollback(const bool mark_modified);
 
 	//! Merge this block manager into another one
 	void Merge(PartialBlockManager &other);
diff --git a/src/storage/local_storage.cpp b/src/storage/local_storage.cpp
index 20334291e229..9764f878136d 100644
--- a/src/storage/local_storage.cpp
+++ b/src/storage/local_storage.cpp
@@ -253,12 +253,13 @@ void LocalTableStorage::FinalizeOptimisticWriter(OptimisticDataWriter &writer) {
 
 void LocalTableStorage::Rollback() {
 	for (auto &writer : optimistic_writers) {
-		writer->Rollback();
+		writer->Rollback(true);
 	}
-	optimistic_writers.clear();
-	optimistic_writer.Rollback();
 
 	// Drop any optimistically written local changes.
+	// The top-level writer writes to the row groups.
+	optimistic_writers.clear();
+	optimistic_writer.Rollback(false);
 	row_groups->CommitDropTable();
 }
 
diff --git a/src/storage/optimistic_data_writer.cpp b/src/storage/optimistic_data_writer.cpp
index 0a1966c500b4..e8f5bfb70485 100644
--- a/src/storage/optimistic_data_writer.cpp
+++ b/src/storage/optimistic_data_writer.cpp
@@ -80,13 +80,12 @@ void OptimisticDataWriter::Merge(OptimisticDataWriter &other) {
 void OptimisticDataWriter::FinalFlush() {
 	if (partial_manager) {
 		partial_manager->FlushPartialBlocks();
-		partial_manager.reset();
 	}
 }
 
-void OptimisticDataWriter::Rollback() {
+void OptimisticDataWriter::Rollback(const bool mark_modified) {
 	if (partial_manager) {
-		partial_manager->Rollback();
+		partial_manager->Rollback(mark_modified);
 		partial_manager.reset();
 	}
 }
diff --git a/src/storage/partial_block_manager.cpp b/src/storage/partial_block_manager.cpp
index 991e5fcd4cf5..5b8d392ea8da 100644
--- a/src/storage/partial_block_manager.cpp
+++ b/src/storage/partial_block_manager.cpp
@@ -46,6 +46,7 @@ PartialBlockManager::PartialBlockManager(BlockManager &block_manager, PartialBlo
 	// Use the default maximum partial block size with a ratio of 20% free and 80% utilization.
 	max_partial_block_size = NumericCast<uint32_t>(block_manager.GetBlockSize() / 5 * 4);
 }
+
 PartialBlockManager::~PartialBlockManager() {
 }
 
@@ -186,6 +187,7 @@ void PartialBlockManager::ClearBlocks() {
 void PartialBlockManager::FlushPartialBlocks() {
 	for (auto &e : partially_filled_blocks) {
 		e.second->Flush(e.first);
+		written_blocks.insert(e.second->state.block_id);
 	}
 	partially_filled_blocks.clear();
 }
@@ -194,8 +196,13 @@ BlockManager &PartialBlockManager::GetBlockManager() const {
 	return block_manager;
 }
 
-void PartialBlockManager::Rollback() {
+void PartialBlockManager::Rollback(const bool mark_modified) {
 	ClearBlocks();
+	if (mark_modified) {
+		for (auto &block_id : written_blocks) {
+			block_manager.MarkBlockAsFree(block_id);
+		}
+	}
 }
 
 } // namespace duckdb
diff --git a/test/sql/storage/parallel/insert_order_preserving_odd_sized_batches.test_slow b/test/sql/storage/parallel/insert_order_preserving_odd_sized_batches.test_slow
index 4a0dc3c9bd09..b87a2f377424 100644
--- a/test/sql/storage/parallel/insert_order_preserving_odd_sized_batches.test_slow
+++ b/test/sql/storage/parallel/insert_order_preserving_odd_sized_batches.test_slow
@@ -2,8 +2,6 @@
 # description: Test parallel order-preserving insert
 # group: [parallel]
 
-mode skip
-
 # There are different numbers of distinct blocks for smaller block sizes,
 # because the segment size is bound by the block size.
 require block_size 262144
@@ -19,7 +17,7 @@ CREATE TABLE integers AS SELECT * FROM range(10000000) tbl(i);
 ----
 10000000
 
-# check the block count and median number of rows per row group
+# Check the block count and median number of rows per row group.
 query I
 SELECT COUNT(DISTINCT block_id) < 4 FROM pragma_storage_info('integers');
 ----
@@ -33,7 +31,6 @@ SELECT MEDIAN(count) FROM pragma_storage_info('integers');
 statement ok
 COPY integers TO '__TEST_DIR__/integers.parquet' (ROW_GROUP_SIZE 77777)
 
-# verify that reading while preserving insertion order creates the same size table
 statement ok
 CREATE TABLE integers_parquet AS FROM '__TEST_DIR__/integers.parquet';
 
@@ -61,11 +58,12 @@ SELECT COUNT(DISTINCT block_id) < 4 FROM pragma_storage_info('integers_parquet')
 true
 
 query I
-SELECT MEDIAN(count)>100000 FROM pragma_storage_info('integers_parquet');
+SELECT MEDIAN(count) > 100000 FROM pragma_storage_info('integers_parquet');
 ----
 true
 
-# verify that reading without preserving insertion order creates the same size table
+# FIXME: does this even make sense?
+# Verify that reading without preserving insertion order creates a same size table.
 statement ok
 SET preserve_insertion_order=false
 
@@ -73,11 +71,11 @@ statement ok
 CREATE TABLE integers_parquet_no_order AS FROM '__TEST_DIR__/integers.parquet'
 
 query I
-SELECT COUNT(DISTINCT block_id) < 4 FROM pragma_storage_info('integers_parquet_no_order');
+SELECT COUNT(DISTINCT block_id) < 12 FROM pragma_storage_info('integers_parquet_no_order');
 ----
 true
 
 query I
-SELECT MEDIAN(count)>100000 FROM pragma_storage_info('integers_parquet_no_order');
+SELECT MEDIAN(count) > 100000 FROM pragma_storage_info('integers_parquet_no_order');
 ----
 true
diff --git a/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow b/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow
index 4d08b4a8f6ec..6865f4d0a75d 100644
--- a/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow
+++ b/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow
@@ -2,8 +2,6 @@
 # description: Test space reclamation of optimistic writing with a PK constraint violation.
 # group: [parallel]
 
-mode skip
-
 load __TEST_DIR__/reclaim_space_primary_key.db
 
 statement ok
@@ -23,8 +21,20 @@ INSERT INTO integers2 SELECT * FROM integers;
 ----
 <REGEX>:Constraint Error.*violates primary key constraint.*
 
+# For smaller block sizes (16KB) the total blocks increase (to twice the original amount) in the first
+# iteration, and then stay constant.
+
 statement ok
-CREATE TABLE block_count (count INT);
+CREATE TABLE total_blocks_tbl AS SELECT total_blocks FROM pragma_database_size();
+
+statement ok
+CREATE TYPE test_result AS UNION (
+	ok BOOL,
+	err STRUCT(
+		old BIGINT,
+		allowed_max DECIMAL(21,1),
+		actual BIGINT)
+);
 
 loop i 0 10
 
@@ -58,19 +68,34 @@ INSERT INTO integers2 VALUES (10000000 + ${i});
 statement ok
 CHECKPOINT;
 
-statement ok
-INSERT INTO block_count SELECT total_blocks FROM pragma_database_size();
-
 query I
 SELECT COUNT(*) - ${i} FROM integers2;
 ----
 2
 
-# Ensure there is only a small difference between the MIN and MAX block counts.
+# Ensure that the total blocks don't exceed the total blocks after the first iteration by more than 1.2.
 
 query I
-SELECT (MAX(count) - MIN(count)) < 20 FROM block_count;
+SELECT
+	CASE WHEN ${i} = 0 THEN True::test_result
+	WHEN current.total_blocks <= total_blocks_tbl.total_blocks * 1.4 THEN True::test_result
+	ELSE {
+		'old': total_blocks_tbl.total_blocks,
+		'allowed_max': total_blocks_tbl.total_blocks * 1.4,
+		'actual': current.total_blocks
+	}::test_result
+	END
+FROM pragma_database_size() AS current, total_blocks_tbl;
 ----
 true
 
+# Adjust the total_blocks_tbl once to the count after the first iteration.
+
+statement ok
+UPDATE total_blocks_tbl SET total_blocks = (
+	SELECT
+		CASE WHEN ${i} = 0 THEN (SELECT current.total_blocks FROM pragma_database_size() AS current)
+		ELSE (total_blocks)END
+	);
+
 endloop
diff --git a/test/temp.test b/test/temp.test
deleted file mode 100644
index 0314c52deb1c..000000000000
--- a/test/temp.test
+++ /dev/null
@@ -1,24 +0,0 @@
-# name: test/temp.test
-# group: [test]
-
-load __TEST_DIR__/reclaim_space_primary_key.db
-
-statement ok
-SET preserve_insertion_order=false;
-
-statement ok
-CREATE TABLE integers AS SELECT * FROM range(10000000) t(i);
-
-statement ok
-CREATE TABLE integers2 (i INTEGER PRIMARY KEY);
-
-statement ok
-INSERT INTO integers2 VALUES (9999999);
-
-statement error
-INSERT INTO integers2 SELECT * FROM integers;
-----
-<REGEX>:Constraint Error.*violates primary key constraint.*
-
-statement ok
-CREATE TABLE block_count (count INT);
\ No newline at end of file

From cbfdb4547c405d61b90ef0e9a7547bf0235836e3 Mon Sep 17 00:00:00 2001
From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com>
Date: Wed, 22 Jan 2025 17:30:10 +0100
Subject: [PATCH 004/142] removing written blocks and moving the optimistic row
 group collections into the local table storage

---
 .../persistent/physical_batch_insert.cpp      | 13 ++++++++++
 .../operator/persistent/physical_insert.cpp   | 18 ++++++++-----
 .../operator/persistent/physical_insert.hpp   |  2 +-
 src/include/duckdb/storage/data_table.hpp     |  4 ++-
 .../duckdb/storage/optimistic_data_writer.hpp |  2 +-
 .../duckdb/storage/partial_block_manager.hpp  |  7 +-----
 .../duckdb/transaction/local_storage.hpp      | 18 +++++++++----
 .../write_overflow_strings_to_disk.cpp        |  3 ---
 src/storage/compression/zstd.cpp              |  4 ---
 src/storage/data_table.cpp                    |  6 +++++
 src/storage/local_storage.cpp                 | 25 +++++++++++++------
 src/storage/optimistic_data_writer.cpp        |  5 ++--
 src/storage/partial_block_manager.cpp         | 21 +---------------
 src/storage/table/column_checkpoint_state.cpp |  1 -
 14 files changed, 72 insertions(+), 57 deletions(-)

diff --git a/src/execution/operator/persistent/physical_batch_insert.cpp b/src/execution/operator/persistent/physical_batch_insert.cpp
index 2e546c477282..8e41e7244914 100644
--- a/src/execution/operator/persistent/physical_batch_insert.cpp
+++ b/src/execution/operator/persistent/physical_batch_insert.cpp
@@ -188,6 +188,19 @@ class BatchInsertLocalState : public LocalSinkState {
 	optional_ptr<OptimisticDataWriter> writer;
 	unique_ptr<ConstraintState> constraint_state;
 
+	//	void CreateNewCollection(ClientContext &context, BatchInsertGlobalState &g_state, const vector<LogicalType>
+	//&insert_types) { 		auto &data_table = g_state.table; 		auto table_info = data_table.GetStorage().GetDataTableInfo();
+	//		auto &io_manager = TableIOManager::Get(data_table.GetStorage());
+	//
+	//		// Create the local row group collection.
+	//		auto max_row_id = NumericCast<idx_t>(MAX_ROW_ID);
+	//		auto collection = make_uniq<RowGroupCollection>(std::move(table_info), io_manager, insert_types,
+	//max_row_id); 		collection->InitializeEmpty(); 		collection->InitializeAppend(current_append_state);
+	//
+	//			lock_guard<mutex> l(g_state.lock);
+	//			auto &local_table_storage = data_table.GetStorage();
+	//			current_collection = data_table.CreateOptimisticRowGroups(context, std::move(collection));
+	//	}
 	void CreateNewCollection(DuckTableEntry &table, const vector<LogicalType> &insert_types) {
 		auto table_info = table.GetStorage().GetDataTableInfo();
 		auto &io_manager = TableIOManager::Get(table.GetStorage());
diff --git a/src/execution/operator/persistent/physical_insert.cpp b/src/execution/operator/persistent/physical_insert.cpp
index cb21cc26b91c..c7abdf25f9ca 100644
--- a/src/execution/operator/persistent/physical_insert.cpp
+++ b/src/execution/operator/persistent/physical_insert.cpp
@@ -685,14 +685,20 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, DataChunk &chunk,
 		D_ASSERT(!return_chunk);
 		// parallel append
 		if (!lstate.local_collection) {
-			lock_guard<mutex> l(gstate.lock);
 			auto table_info = storage.GetDataTableInfo();
 			auto &io_manager = TableIOManager::Get(table.GetStorage());
-			lstate.local_collection = make_uniq<RowGroupCollection>(std::move(table_info), io_manager, insert_types,
-			                                                        NumericCast<idx_t>(MAX_ROW_ID));
-			lstate.local_collection->InitializeEmpty();
-			lstate.local_collection->InitializeAppend(lstate.local_append_state);
-			lstate.writer = &gstate.table.GetStorage().CreateOptimisticWriter(context.client);
+
+			// Create the local row group collection.
+			auto max_row_id = NumericCast<idx_t>(MAX_ROW_ID);
+			auto collection =
+			    make_uniq<RowGroupCollection>(std::move(table_info), io_manager, insert_types, max_row_id);
+			collection->InitializeEmpty();
+			collection->InitializeAppend(lstate.local_append_state);
+
+			lock_guard<mutex> l(gstate.lock);
+			auto &data_table = gstate.table.GetStorage();
+			lstate.writer = data_table.CreateOptimisticWriter(context.client);
+			lstate.local_collection = data_table.CreateOptimisticRowGroups(context.client, std::move(collection));
 		}
 		OnConflictHandling(table, context, lstate);
 		D_ASSERT(action_type != OnConflictAction::UPDATE);
diff --git a/src/include/duckdb/execution/operator/persistent/physical_insert.hpp b/src/include/duckdb/execution/operator/persistent/physical_insert.hpp
index 04d202af24d9..acc36a7780c4 100644
--- a/src/include/duckdb/execution/operator/persistent/physical_insert.hpp
+++ b/src/include/duckdb/execution/operator/persistent/physical_insert.hpp
@@ -57,7 +57,7 @@ class InsertLocalState : public LocalSinkState {
 	DataChunk update_chunk;
 	ExpressionExecutor default_executor;
 	TableAppendState local_append_state;
-	unique_ptr<RowGroupCollection> local_collection;
+	optional_ptr<RowGroupCollection> local_collection;
 	optional_ptr<OptimisticDataWriter> writer;
 	// Rows that have been updated by a DO UPDATE conflict
 	unordered_set<row_t> updated_rows;
diff --git a/src/include/duckdb/storage/data_table.hpp b/src/include/duckdb/storage/data_table.hpp
index 40f8145ddb8b..5496c29115e2 100644
--- a/src/include/duckdb/storage/data_table.hpp
+++ b/src/include/duckdb/storage/data_table.hpp
@@ -115,7 +115,9 @@ class DataTable {
 	                 optional_ptr<const vector<LogicalIndex>> column_ids);
 	//! Merge a row group collection into the transaction-local storage
 	void LocalMerge(ClientContext &context, RowGroupCollection &collection);
-	//! Creates an optimistic writer for this table - used for optimistically writing parallel appends
+	//! Create an optimistic row group collection for this table. Used for optimistically writing parallel appends.
+	RowGroupCollection &CreateOptimisticRowGroups(ClientContext &context, unique_ptr<RowGroupCollection> collection);
+	//! Create an optimistic writer for this table. Used for optimistically writing parallel appends.
 	OptimisticDataWriter &CreateOptimisticWriter(ClientContext &context);
 	void FinalizeOptimisticWriter(ClientContext &context, OptimisticDataWriter &writer);
 
diff --git a/src/include/duckdb/storage/optimistic_data_writer.hpp b/src/include/duckdb/storage/optimistic_data_writer.hpp
index c3d04e9c470f..802d51bad707 100644
--- a/src/include/duckdb/storage/optimistic_data_writer.hpp
+++ b/src/include/duckdb/storage/optimistic_data_writer.hpp
@@ -30,7 +30,7 @@ class OptimisticDataWriter {
 	//! Merge the partially written blocks from one optimistic writer into another
 	void Merge(OptimisticDataWriter &other);
 	//! Rollback
-	void Rollback(const bool mark_modified);
+	void Rollback();
 
 private:
 	//! Prepare a write to disk
diff --git a/src/include/duckdb/storage/partial_block_manager.hpp b/src/include/duckdb/storage/partial_block_manager.hpp
index dbe6e7029664..c59869976a91 100644
--- a/src/include/duckdb/storage/partial_block_manager.hpp
+++ b/src/include/duckdb/storage/partial_block_manager.hpp
@@ -114,7 +114,7 @@ class PartialBlockManager {
 	void ClearBlocks();
 
 	//! Rollback all data written by this partial block manager
-	void Rollback(const bool mark_modified);
+	void Rollback();
 
 	//! Merge this block manager into another one
 	void Merge(PartialBlockManager &other);
@@ -129,9 +129,6 @@ class PartialBlockManager {
 	//! Returns a reference to the underlying block manager.
 	BlockManager &GetBlockManager() const;
 
-	//! Registers a block as "written" by this partial block manager
-	void AddWrittenBlock(block_id_t block);
-
 protected:
 	BlockManager &block_manager;
 	PartialBlockType partial_block_type;
@@ -140,8 +137,6 @@ class PartialBlockManager {
 	//! This is a multimap because there might be outstanding partial blocks with
 	//! the same amount of left-over space
 	multimap<idx_t, unique_ptr<PartialBlock>> partially_filled_blocks;
-	//! The set of written blocks
-	unordered_set<block_id_t> written_blocks;
 
 	//! The maximum size (in bytes) at which a partial block will be considered a partial block
 	uint32_t max_partial_block_size;
diff --git a/src/include/duckdb/transaction/local_storage.hpp b/src/include/duckdb/transaction/local_storage.hpp
index 7516adeced72..3c202a56fde6 100644
--- a/src/include/duckdb/transaction/local_storage.hpp
+++ b/src/include/duckdb/transaction/local_storage.hpp
@@ -43,7 +43,7 @@ class LocalTableStorage : public enable_shared_from_this<LocalTableStorage> {
 	reference<DataTable> table_ref;
 
 	Allocator &allocator;
-	//! The main chunk collection holding the data
+	//! The main row group collection.
 	shared_ptr<RowGroupCollection> row_groups;
 	//! The set of unique append indexes.
 	TableIndexList append_indexes;
@@ -51,10 +51,14 @@ class LocalTableStorage : public enable_shared_from_this<LocalTableStorage> {
 	TableIndexList delete_indexes;
 	//! The number of deleted rows
 	idx_t deleted_rows;
-	//! The main optimistic data writer
+
+	//! The optimistic row group collections associated with this table.
+	vector<unique_ptr<RowGroupCollection>> optimistic_row_groups;
+	//! The main optimistic data writer associated with this table.
 	OptimisticDataWriter optimistic_writer;
-	//! The set of all optimistic data writers associated with this table
+	//! The optimistic data writers associated with this table.
 	vector<unique_ptr<OptimisticDataWriter>> optimistic_writers;
+
 	//! Whether or not storage was merged
 	bool merged_storage = false;
 	//! Whether or not the storage was dropped
@@ -73,7 +77,9 @@ class LocalTableStorage : public enable_shared_from_this<LocalTableStorage> {
 	                          const vector<LogicalType> &table_types, row_t &start_row);
 	void AppendToDeleteIndexes(Vector &row_ids, DataChunk &delete_chunk);
 
-	//! Creates an optimistic writer for this table
+	//! Create an optimistic row group collection for this table.
+	RowGroupCollection &CreateOptimisticRowGroups(unique_ptr<RowGroupCollection> collection);
+	//! Create an optimistic writer for this table.
 	OptimisticDataWriter &CreateOptimisticWriter();
 	void FinalizeOptimisticWriter(OptimisticDataWriter &writer);
 };
@@ -129,7 +135,9 @@ class LocalStorage {
 	static void FinalizeAppend(LocalAppendState &state);
 	//! Merge a row group collection into the transaction-local storage
 	void LocalMerge(DataTable &table, RowGroupCollection &collection);
-	//! Create an optimistic writer for the specified table
+	//! Create an optimistic row group collection for this table.
+	RowGroupCollection &CreateOptimisticRowGroups(DataTable &table, unique_ptr<RowGroupCollection> collection);
+	//! Create an optimistic writer for this table.
 	OptimisticDataWriter &CreateOptimisticWriter(DataTable &table);
 	void FinalizeOptimisticWriter(DataTable &table, OptimisticDataWriter &writer);
 
diff --git a/src/storage/checkpoint/write_overflow_strings_to_disk.cpp b/src/storage/checkpoint/write_overflow_strings_to_disk.cpp
index c58be310271c..37492f787cbe 100644
--- a/src/storage/checkpoint/write_overflow_strings_to_disk.cpp
+++ b/src/storage/checkpoint/write_overflow_strings_to_disk.cpp
@@ -87,9 +87,6 @@ void WriteOverflowStringsToDisk::Flush() {
 		// write to disk
 		auto &block_manager = partial_block_manager.GetBlockManager();
 		block_manager.Write(handle.GetFileBuffer(), block_id);
-
-		auto lock = partial_block_manager.GetLock();
-		partial_block_manager.AddWrittenBlock(block_id);
 	}
 	block_id = INVALID_BLOCK;
 	offset = 0;
diff --git a/src/storage/compression/zstd.cpp b/src/storage/compression/zstd.cpp
index fca90c18099c..b3cac8107343 100644
--- a/src/storage/compression/zstd.cpp
+++ b/src/storage/compression/zstd.cpp
@@ -474,10 +474,6 @@ class ZSTDCompressionState : public CompressionState {
 		// Write the current page to disk
 		auto &block_manager = partial_block_manager.GetBlockManager();
 		block_manager.Write(buffer.GetFileBuffer(), block_id);
-		{
-			auto lock = partial_block_manager.GetLock();
-			partial_block_manager.AddWrittenBlock(block_id);
-		}
 	}
 
 	void FlushVector() {
diff --git a/src/storage/data_table.cpp b/src/storage/data_table.cpp
index 384b34330af3..9a2bc0fa988d 100644
--- a/src/storage/data_table.cpp
+++ b/src/storage/data_table.cpp
@@ -848,6 +848,12 @@ void DataTable::FinalizeLocalAppend(LocalAppendState &state) {
 	LocalStorage::FinalizeAppend(state);
 }
 
+RowGroupCollection &DataTable::CreateOptimisticRowGroups(ClientContext &context,
+                                                         unique_ptr<RowGroupCollection> collection) {
+	auto &local_storage = LocalStorage::Get(context, db);
+	return local_storage.CreateOptimisticRowGroups(*this, std::move(collection));
+}
+
 OptimisticDataWriter &DataTable::CreateOptimisticWriter(ClientContext &context) {
 	auto &local_storage = LocalStorage::Get(context, db);
 	return local_storage.CreateOptimisticWriter(*this);
diff --git a/src/storage/local_storage.cpp b/src/storage/local_storage.cpp
index 9764f878136d..ea88d506d41e 100644
--- a/src/storage/local_storage.cpp
+++ b/src/storage/local_storage.cpp
@@ -55,6 +55,7 @@ LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_data
                                      const idx_t alter_column_index, const LogicalType &target_type,
                                      const vector<StorageIndex> &bound_columns, Expression &cast_expr)
     : table_ref(new_data_table), allocator(Allocator::Get(new_data_table.db)), deleted_rows(parent.deleted_rows),
+      optimistic_row_groups(std::move(parent.optimistic_row_groups)),
       optimistic_writer(new_data_table, parent.optimistic_writer),
       optimistic_writers(std::move(parent.optimistic_writers)), merged_storage(parent.merged_storage) {
 
@@ -69,6 +70,7 @@ LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_data
 LocalTableStorage::LocalTableStorage(DataTable &new_data_table, LocalTableStorage &parent,
                                      const idx_t drop_column_index)
     : table_ref(new_data_table), allocator(Allocator::Get(new_data_table.db)), deleted_rows(parent.deleted_rows),
+      optimistic_row_groups(std::move(parent.optimistic_row_groups)),
       optimistic_writer(new_data_table, parent.optimistic_writer),
       optimistic_writers(std::move(parent.optimistic_writers)), merged_storage(parent.merged_storage) {
 
@@ -83,8 +85,10 @@ LocalTableStorage::LocalTableStorage(DataTable &new_data_table, LocalTableStorag
 LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_dt, LocalTableStorage &parent,
                                      ColumnDefinition &new_column, ExpressionExecutor &default_executor)
     : table_ref(new_dt), allocator(Allocator::Get(new_dt.db)), deleted_rows(parent.deleted_rows),
+      optimistic_row_groups(std::move(parent.optimistic_row_groups)),
       optimistic_writer(new_dt, parent.optimistic_writer), optimistic_writers(std::move(parent.optimistic_writers)),
       merged_storage(parent.merged_storage) {
+
 	row_groups = parent.row_groups->AddColumn(context, new_column, default_executor);
 	parent.row_groups.reset();
 	append_indexes.Move(parent.append_indexes);
@@ -229,6 +233,11 @@ void LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, TableAppen
 	}
 }
 
+RowGroupCollection &LocalTableStorage::CreateOptimisticRowGroups(unique_ptr<RowGroupCollection> collection) {
+	optimistic_row_groups.push_back(std::move(collection));
+	return *optimistic_row_groups.back();
+}
+
 OptimisticDataWriter &LocalTableStorage::CreateOptimisticWriter() {
 	auto writer = make_uniq<OptimisticDataWriter>(table_ref.get());
 	optimistic_writers.push_back(std::move(writer));
@@ -252,14 +261,10 @@ void LocalTableStorage::FinalizeOptimisticWriter(OptimisticDataWriter &writer) {
 }
 
 void LocalTableStorage::Rollback() {
-	for (auto &writer : optimistic_writers) {
-		writer->Rollback(true);
+	for (auto &collection : optimistic_row_groups) {
+		collection->CommitDropTable();
 	}
-
-	// Drop any optimistically written local changes.
-	// The top-level writer writes to the row groups.
-	optimistic_writers.clear();
-	optimistic_writer.Rollback(false);
+	optimistic_row_groups.clear();
 	row_groups->CommitDropTable();
 }
 
@@ -447,6 +452,12 @@ void LocalStorage::LocalMerge(DataTable &table, RowGroupCollection &collection)
 	storage.merged_storage = true;
 }
 
+RowGroupCollection &LocalStorage::CreateOptimisticRowGroups(DataTable &table,
+                                                            unique_ptr<RowGroupCollection> collection) {
+	auto &storage = table_manager.GetOrCreateStorage(context, table);
+	return storage.CreateOptimisticRowGroups(std::move(collection));
+}
+
 OptimisticDataWriter &LocalStorage::CreateOptimisticWriter(DataTable &table) {
 	auto &storage = table_manager.GetOrCreateStorage(context, table);
 	return storage.CreateOptimisticWriter();
diff --git a/src/storage/optimistic_data_writer.cpp b/src/storage/optimistic_data_writer.cpp
index e8f5bfb70485..0a1966c500b4 100644
--- a/src/storage/optimistic_data_writer.cpp
+++ b/src/storage/optimistic_data_writer.cpp
@@ -80,12 +80,13 @@ void OptimisticDataWriter::Merge(OptimisticDataWriter &other) {
 void OptimisticDataWriter::FinalFlush() {
 	if (partial_manager) {
 		partial_manager->FlushPartialBlocks();
+		partial_manager.reset();
 	}
 }
 
-void OptimisticDataWriter::Rollback(const bool mark_modified) {
+void OptimisticDataWriter::Rollback() {
 	if (partial_manager) {
-		partial_manager->Rollback(mark_modified);
+		partial_manager->Rollback();
 		partial_manager.reset();
 	}
 }
diff --git a/src/storage/partial_block_manager.cpp b/src/storage/partial_block_manager.cpp
index 5b8d392ea8da..7c23df3da75b 100644
--- a/src/storage/partial_block_manager.cpp
+++ b/src/storage/partial_block_manager.cpp
@@ -133,7 +133,6 @@ void PartialBlockManager::RegisterPartialBlock(PartialBlockAllocation allocation
 	// Flush any block that we're not going to reuse.
 	if (block_to_free) {
 		block_to_free->Flush(free_space);
-		AddWrittenBlock(block_to_free->state.block_id);
 	}
 }
 
@@ -162,21 +161,9 @@ void PartialBlockManager::Merge(PartialBlockManager &other) {
 			partially_filled_blocks.insert(make_pair(e.first, std::move(e.second)));
 		}
 	}
-	// copy over the written blocks
-	for (auto &block_id : other.written_blocks) {
-		AddWrittenBlock(block_id);
-	}
-	other.written_blocks.clear();
 	other.partially_filled_blocks.clear();
 }
 
-void PartialBlockManager::AddWrittenBlock(block_id_t block) {
-	auto entry = written_blocks.insert(block);
-	if (!entry.second) {
-		throw InternalException("Written block already exists");
-	}
-}
-
 void PartialBlockManager::ClearBlocks() {
 	for (auto &e : partially_filled_blocks) {
 		e.second->Clear();
@@ -187,7 +174,6 @@ void PartialBlockManager::ClearBlocks() {
 void PartialBlockManager::FlushPartialBlocks() {
 	for (auto &e : partially_filled_blocks) {
 		e.second->Flush(e.first);
-		written_blocks.insert(e.second->state.block_id);
 	}
 	partially_filled_blocks.clear();
 }
@@ -196,13 +182,8 @@ BlockManager &PartialBlockManager::GetBlockManager() const {
 	return block_manager;
 }
 
-void PartialBlockManager::Rollback(const bool mark_modified) {
+void PartialBlockManager::Rollback() {
 	ClearBlocks();
-	if (mark_modified) {
-		for (auto &block_id : written_blocks) {
-			block_manager.MarkBlockAsFree(block_id);
-		}
-	}
 }
 
 } // namespace duckdb
diff --git a/src/storage/table/column_checkpoint_state.cpp b/src/storage/table/column_checkpoint_state.cpp
index a67daa060b76..d2fce922af48 100644
--- a/src/storage/table/column_checkpoint_state.cpp
+++ b/src/storage/table/column_checkpoint_state.cpp
@@ -71,7 +71,6 @@ void PartialBlockForCheckpoint::Flush(const idx_t free_space_left) {
 			}
 		}
 	}
-
 	Clear();
 }
 

From 6a807b88c710ba03f405752169f22e05727cc8d0 Mon Sep 17 00:00:00 2001
From: Tmonster <tom@ebergen.com>
Date: Thu, 23 Jan 2025 16:14:17 +0100
Subject: [PATCH 005/142] implemented the filter

---
 src/common/enum_util.cpp                      | 14 ++--
 src/common/enums/metric_type.cpp              |  9 ++-
 src/common/enums/optimizer_type.cpp           |  1 +
 .../duckdb/common/enums/metric_type.hpp       |  1 +
 .../duckdb/common/enums/optimizer_type.hpp    |  3 +-
 .../optimizer/remove_useless_projections.hpp  | 29 +++++++++
 src/optimizer/CMakeLists.txt                  |  1 +
 src/optimizer/optimizer.cpp                   |  8 +++
 src/optimizer/remove_useless_projections.cpp  | 64 +++++++++++++++++++
 test/optimizer/pullup_filters.test            | 10 +--
 10 files changed, 126 insertions(+), 14 deletions(-)
 create mode 100644 src/include/duckdb/optimizer/remove_useless_projections.hpp
 create mode 100644 src/optimizer/remove_useless_projections.cpp

diff --git a/src/common/enum_util.cpp b/src/common/enum_util.cpp
index 0ac5bb0c4e7f..8da8057edeb1 100644
--- a/src/common/enum_util.cpp
+++ b/src/common/enum_util.cpp
@@ -2401,19 +2401,20 @@ const StringUtil::EnumStringLiteral *GetMetricsTypeValues() {
 		{ static_cast<uint32_t>(MetricsType::OPTIMIZER_EXTENSION), "OPTIMIZER_EXTENSION" },
 		{ static_cast<uint32_t>(MetricsType::OPTIMIZER_MATERIALIZED_CTE), "OPTIMIZER_MATERIALIZED_CTE" },
 		{ static_cast<uint32_t>(MetricsType::OPTIMIZER_SUM_REWRITER), "OPTIMIZER_SUM_REWRITER" },
-		{ static_cast<uint32_t>(MetricsType::OPTIMIZER_LATE_MATERIALIZATION), "OPTIMIZER_LATE_MATERIALIZATION" }
+		{ static_cast<uint32_t>(MetricsType::OPTIMIZER_LATE_MATERIALIZATION), "OPTIMIZER_LATE_MATERIALIZATION" },
+		{ static_cast<uint32_t>(MetricsType::OPTIMIZER_REMOVE_USELESS_PROJECTIONS), "OPTIMIZER_REMOVE_USELESS_PROJECTIONS" }
 	};
 	return values;
 }
 
 template<>
 const char* EnumUtil::ToChars<MetricsType>(MetricsType value) {
-	return StringUtil::EnumToString(GetMetricsTypeValues(), 49, "MetricsType", static_cast<uint32_t>(value));
+	return StringUtil::EnumToString(GetMetricsTypeValues(), 50, "MetricsType", static_cast<uint32_t>(value));
 }
 
 template<>
 MetricsType EnumUtil::FromString<MetricsType>(const char *value) {
-	return static_cast<MetricsType>(StringUtil::StringToEnum(GetMetricsTypeValues(), 49, "MetricsType", value));
+	return static_cast<MetricsType>(StringUtil::StringToEnum(GetMetricsTypeValues(), 50, "MetricsType", value));
 }
 
 const StringUtil::EnumStringLiteral *GetMultiFileReaderColumnMappingModeValues() {
@@ -2605,19 +2606,20 @@ const StringUtil::EnumStringLiteral *GetOptimizerTypeValues() {
 		{ static_cast<uint32_t>(OptimizerType::EXTENSION), "EXTENSION" },
 		{ static_cast<uint32_t>(OptimizerType::MATERIALIZED_CTE), "MATERIALIZED_CTE" },
 		{ static_cast<uint32_t>(OptimizerType::SUM_REWRITER), "SUM_REWRITER" },
-		{ static_cast<uint32_t>(OptimizerType::LATE_MATERIALIZATION), "LATE_MATERIALIZATION" }
+		{ static_cast<uint32_t>(OptimizerType::LATE_MATERIALIZATION), "LATE_MATERIALIZATION" },
+		{ static_cast<uint32_t>(OptimizerType::REMOVE_USELESS_PROJECTIONS), "REMOVE_USELESS_PROJECTIONS" }
 	};
 	return values;
 }
 
 template<>
 const char* EnumUtil::ToChars<OptimizerType>(OptimizerType value) {
-	return StringUtil::EnumToString(GetOptimizerTypeValues(), 28, "OptimizerType", static_cast<uint32_t>(value));
+	return StringUtil::EnumToString(GetOptimizerTypeValues(), 29, "OptimizerType", static_cast<uint32_t>(value));
 }
 
 template<>
 OptimizerType EnumUtil::FromString<OptimizerType>(const char *value) {
-	return static_cast<OptimizerType>(StringUtil::StringToEnum(GetOptimizerTypeValues(), 28, "OptimizerType", value));
+	return static_cast<OptimizerType>(StringUtil::StringToEnum(GetOptimizerTypeValues(), 29, "OptimizerType", value));
 }
 
 const StringUtil::EnumStringLiteral *GetOrderByNullTypeValues() {
diff --git a/src/common/enums/metric_type.cpp b/src/common/enums/metric_type.cpp
index d97579c23f2a..0477f2a4f672 100644
--- a/src/common/enums/metric_type.cpp
+++ b/src/common/enums/metric_type.cpp
@@ -66,6 +66,8 @@ MetricsType MetricsUtils::GetOptimizerMetricByType(OptimizerType type) {
             return MetricsType::OPTIMIZER_FILTER_PUSHDOWN;
         case OptimizerType::EMPTY_RESULT_PULLUP:
             return MetricsType::OPTIMIZER_EMPTY_RESULT_PULLUP;
+		case OptimizerType::REMOVE_USELESS_PROJECTIONS:
+			return MetricsType::OPTIMIZER_REMOVE_USELESS_PROJECTIONS;
         case OptimizerType::CTE_FILTER_PUSHER:
             return MetricsType::OPTIMIZER_CTE_FILTER_PUSHER;
         case OptimizerType::REGEX_RANGE:
@@ -153,6 +155,8 @@ OptimizerType MetricsUtils::GetOptimizerTypeByMetric(MetricsType type) {
             return OptimizerType::BUILD_SIDE_PROBE_SIDE;
         case MetricsType::OPTIMIZER_LIMIT_PUSHDOWN:
             return OptimizerType::LIMIT_PUSHDOWN;
+		case MetricsType::OPTIMIZER_REMOVE_USELESS_PROJECTIONS:
+			return OptimizerType::REMOVE_USELESS_PROJECTIONS;
         case MetricsType::OPTIMIZER_TOP_N:
             return OptimizerType::TOP_N;
         case MetricsType::OPTIMIZER_COMPRESSED_MATERIALIZATION:
@@ -170,9 +174,9 @@ OptimizerType MetricsUtils::GetOptimizerTypeByMetric(MetricsType type) {
         case MetricsType::OPTIMIZER_MATERIALIZED_CTE:
             return OptimizerType::MATERIALIZED_CTE;
         case MetricsType::OPTIMIZER_SUM_REWRITER:
-            return OptimizerType::SUM_REWRITER;
+			return OptimizerType::SUM_REWRITER;
         case MetricsType::OPTIMIZER_LATE_MATERIALIZATION:
-            return OptimizerType::LATE_MATERIALIZATION;
+			return OptimizerType::LATE_MATERIALIZATION;
     default:
             return OptimizerType::INVALID;
     };
@@ -206,6 +210,7 @@ bool MetricsUtils::IsOptimizerMetric(MetricsType type) {
         case MetricsType::OPTIMIZER_EXTENSION:
         case MetricsType::OPTIMIZER_MATERIALIZED_CTE:
         case MetricsType::OPTIMIZER_SUM_REWRITER:
+		case MetricsType::OPTIMIZER_REMOVE_USELESS_PROJECTIONS:
         case MetricsType::OPTIMIZER_LATE_MATERIALIZATION:
             return true;
         default:
diff --git a/src/common/enums/optimizer_type.cpp b/src/common/enums/optimizer_type.cpp
index f4d02d68a3b5..fd307260d06d 100644
--- a/src/common/enums/optimizer_type.cpp
+++ b/src/common/enums/optimizer_type.cpp
@@ -29,6 +29,7 @@ static const DefaultOptimizerType internal_optimizer_types[] = {
     {"column_lifetime", OptimizerType::COLUMN_LIFETIME},
     {"limit_pushdown", OptimizerType::LIMIT_PUSHDOWN},
     {"top_n", OptimizerType::TOP_N},
+    {"remove_useless_projections", OptimizerType::REMOVE_USELESS_PROJECTIONS},
     {"build_side_probe_side", OptimizerType::BUILD_SIDE_PROBE_SIDE},
     {"compressed_materialization", OptimizerType::COMPRESSED_MATERIALIZATION},
     {"duplicate_groups", OptimizerType::DUPLICATE_GROUPS},
diff --git a/src/include/duckdb/common/enums/metric_type.hpp b/src/include/duckdb/common/enums/metric_type.hpp
index 14389bf4a5f9..bd938779900d 100644
--- a/src/include/duckdb/common/enums/metric_type.hpp
+++ b/src/include/duckdb/common/enums/metric_type.hpp
@@ -69,6 +69,7 @@ enum class MetricsType : uint8_t {
     OPTIMIZER_MATERIALIZED_CTE,
     OPTIMIZER_SUM_REWRITER,
     OPTIMIZER_LATE_MATERIALIZATION,
+	OPTIMIZER_REMOVE_USELESS_PROJECTIONS,
 };
 
 struct MetricsTypeHashFunction {
diff --git a/src/include/duckdb/common/enums/optimizer_type.hpp b/src/include/duckdb/common/enums/optimizer_type.hpp
index adabacec225d..e9209d56ba1b 100644
--- a/src/include/duckdb/common/enums/optimizer_type.hpp
+++ b/src/include/duckdb/common/enums/optimizer_type.hpp
@@ -41,7 +41,8 @@ enum class OptimizerType : uint32_t {
 	EXTENSION,
 	MATERIALIZED_CTE,
 	SUM_REWRITER,
-	LATE_MATERIALIZATION
+	LATE_MATERIALIZATION,
+	REMOVE_USELESS_PROJECTIONS
 };
 
 string OptimizerTypeToString(OptimizerType type);
diff --git a/src/include/duckdb/optimizer/remove_useless_projections.hpp b/src/include/duckdb/optimizer/remove_useless_projections.hpp
new file mode 100644
index 000000000000..46665b74ee73
--- /dev/null
+++ b/src/include/duckdb/optimizer/remove_useless_projections.hpp
@@ -0,0 +1,29 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/optimizer/remove_useless_projections.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb/optimizer/column_binding_replacer.hpp"
+
+namespace duckdb {
+
+//! The RemoveUselessProjections Optimizer traverses the logical operator tree and removes all projections that just
+class RemoveUselessProjections : LogicalOperatorVisitor {
+public:
+	RemoveUselessProjections() {
+	}
+	unique_ptr<LogicalOperator> RemoveProjections(unique_ptr<LogicalOperator> plan);
+	unique_ptr<LogicalOperator> RemoveProjectionsChildren(unique_ptr<LogicalOperator> plan);
+	void ReplaceBindings(LogicalOperator &plan);
+
+private:
+	bool first_projection;
+	ColumnBindingReplacer replacer;
+};
+
+} // namespace duckdb
diff --git a/src/optimizer/CMakeLists.txt b/src/optimizer/CMakeLists.txt
index a7b881b09925..bed29e1866d8 100644
--- a/src/optimizer/CMakeLists.txt
+++ b/src/optimizer/CMakeLists.txt
@@ -30,6 +30,7 @@ add_library_unity(
   regex_range_filter.cpp
   remove_duplicate_groups.cpp
   remove_unused_columns.cpp
+  remove_useless_projections.cpp
   statistics_propagator.cpp
   limit_pushdown.cpp
   topn_optimizer.cpp
diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp
index 8ac4cdd87da8..d07d4271bd87 100644
--- a/src/optimizer/optimizer.cpp
+++ b/src/optimizer/optimizer.cpp
@@ -12,6 +12,7 @@
 #include "duckdb/optimizer/cte_filter_pusher.hpp"
 #include "duckdb/optimizer/deliminator.hpp"
 #include "duckdb/optimizer/empty_result_pullup.hpp"
+#include "duckdb/optimizer/remove_useless_projections.hpp"
 #include "duckdb/optimizer/expression_heuristics.hpp"
 #include "duckdb/optimizer/filter_pullup.hpp"
 #include "duckdb/optimizer/filter_pushdown.hpp"
@@ -166,6 +167,13 @@ void Optimizer::RunBuiltInOptimizers() {
 		plan = empty_result_pullup.Optimize(std::move(plan));
 	});
 
+	// Removes Unnecessary Projections
+	RunOptimizer(OptimizerType::REMOVE_USELESS_PROJECTIONS, [&]() {
+		RemoveUselessProjections remover;
+		plan = remover.RemoveProjections(std::move(plan));
+		remover.ReplaceBindings(*plan);
+	});
+
 	// then we perform the join ordering optimization
 	// this also rewrites cross products + filters into joins and performs filter pushdowns
 	RunOptimizer(OptimizerType::JOIN_ORDER, [&]() {
diff --git a/src/optimizer/remove_useless_projections.cpp b/src/optimizer/remove_useless_projections.cpp
new file mode 100644
index 000000000000..74ae42b30c87
--- /dev/null
+++ b/src/optimizer/remove_useless_projections.cpp
@@ -0,0 +1,64 @@
+#include "duckdb/optimizer/remove_useless_projections.hpp"
+#include "duckdb/common/enums/logical_operator_type.hpp"
+
+namespace duckdb {
+
+unique_ptr<LogicalOperator> RemoveUselessProjections::RemoveProjectionsChildren(unique_ptr<LogicalOperator> op) {
+	for (idx_t i = 0; i < op->children.size(); i++) {
+		op->children[i] = RemoveProjections(std::move(op->children[i]));
+	}
+	return op;
+}
+
+unique_ptr<LogicalOperator> RemoveUselessProjections::RemoveProjections(unique_ptr<LogicalOperator> op) {
+	if (op->type == LogicalOperatorType::LOGICAL_UNION || op->type == LogicalOperatorType::LOGICAL_EXCEPT ||
+	    op->type == LogicalOperatorType::LOGICAL_INTERSECT || op->type == LogicalOperatorType::LOGICAL_RECURSIVE_CTE ||
+	    op->type == LogicalOperatorType::LOGICAL_MATERIALIZED_CTE) {
+		// guaranteed to find a projection under this that is meant to keep the column order in the presence of
+		// an optimization done by build side probe side.
+		for (idx_t i = 0; i < op->children.size(); i++) {
+			first_projection = true;
+			op->children[i] = RemoveProjections(std::move(op->children[i]));
+		}
+		return op;
+	}
+	if (op->type != LogicalOperatorType::LOGICAL_PROJECTION) {
+		return RemoveProjectionsChildren(std::move(op));
+	}
+	// operator is a projection. Remove if possible
+	if (first_projection) {
+		first_projection = false;
+		return RemoveProjectionsChildren(std::move(op));
+	}
+	auto &proj = op->Cast<LogicalProjection>();
+	auto child_bindings = op->children[0]->GetColumnBindings();
+	if (proj.GetColumnBindings().size() != child_bindings.size()) {
+		return op;
+	}
+	idx_t binding_index = 0;
+	for (auto &expr : proj.expressions) {
+		if (expr->type != ExpressionType::BOUND_COLUMN_REF) {
+			return op;
+		}
+		auto &bound_ref = expr->Cast<BoundColumnRefExpression>();
+		if (bound_ref.binding != child_bindings[binding_index]) {
+			return op;
+		}
+		binding_index++;
+	}
+	D_ASSERT(binding_index == op->GetColumnBindings().size());
+	// we have a projection where every expression is a bound column ref, and they are in the same order as the
+	// bindings of the child. We can remove this projection
+	binding_index = 0;
+	for (auto &binding : op->GetColumnBindings()) {
+		replacer.replacement_bindings.push_back(ReplacementBinding(binding, child_bindings[binding_index]));
+		binding_index++;
+	}
+	return RemoveProjectionsChildren(std::move(op->children[0]));
+}
+
+void RemoveUselessProjections::ReplaceBindings(LogicalOperator &op) {
+	replacer.VisitOperator(op);
+}
+
+} // namespace duckdb
diff --git a/test/optimizer/pullup_filters.test b/test/optimizer/pullup_filters.test
index 62a76de2335e..87bd01e99daf 100644
--- a/test/optimizer/pullup_filters.test
+++ b/test/optimizer/pullup_filters.test
@@ -6,13 +6,13 @@ statement ok
 PRAGMA explain_output = 'PHYSICAL_ONLY'
 
 statement ok
-CREATE TABLE vals1 AS SELECT i AS i, i AS j FROM range(0, 11, 1) t1(i)
+CREATE TABLE vals1 AS SELECT i AS i, i AS j FROM range(0, 11, 1) t1(i);
 
 statement ok
-CREATE TABLE vals2(k BIGINT, l BIGINT)
+CREATE TABLE vals2(k BIGINT, l BIGINT);
 
 statement ok
-INSERT INTO vals2 SELECT * FROM vals1
+INSERT INTO vals2 SELECT * FROM vals1;
 
 ## INNER JOIN: pull up a single filter in cross product from LHS
 query II
@@ -30,13 +30,13 @@ physical_plan	<REGEX>:.*=5.*=5.*
 query II
 EXPLAIN SELECT * FROM (SELECT * FROM vals1, vals2 WHERE i=5 AND k=3) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i=tbl2.i AND tbl1.k=tbl2.k
 ----
-physical_plan	<REGEX>:(.*=5.*=3.*=5.*=3.*|.*=3.*=5.*=3.*=5.*)
+physical_plan	<REGEX>:(.*=5.*=5.*=3.*=3.*|.*=3.*=3.*=5.*=5.*)
 
 ## INNER JOIN: pull up two filters in cross product from RHS
 query II
 EXPLAIN SELECT * FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2 WHERE i=5 AND k=3) tbl2 WHERE tbl1.i=tbl2.i AND tbl1.k=tbl2.k
 ----
-physical_plan	<REGEX>:(.*=5.*=3.*=5.*=3.*|.*=3.*=5.*=3.*=5.*)
+physical_plan	<REGEX>:(.*=5.*=5.*=3.*=3.*|.*=3.*=3.*=5.*=5.*)
 
 #### LEFT JOIN: pull up a single filter from LHS ####
 query II

From b33572693e23ca00adf34847271572e6b06ca50c Mon Sep 17 00:00:00 2001
From: Tmonster <tom@ebergen.com>
Date: Thu, 23 Jan 2025 16:51:02 +0100
Subject: [PATCH 006/142] added one test

---
 .../optimizer/remove_useless_projections.hpp  |  2 +-
 .../remove_unnecessary_projections.test       | 23 +++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)
 create mode 100644 test/optimizer/remove_unnecessary_projections.test

diff --git a/src/include/duckdb/optimizer/remove_useless_projections.hpp b/src/include/duckdb/optimizer/remove_useless_projections.hpp
index 46665b74ee73..595d090630fb 100644
--- a/src/include/duckdb/optimizer/remove_useless_projections.hpp
+++ b/src/include/duckdb/optimizer/remove_useless_projections.hpp
@@ -15,7 +15,7 @@ namespace duckdb {
 //! The RemoveUselessProjections Optimizer traverses the logical operator tree and removes all projections that just
 class RemoveUselessProjections : LogicalOperatorVisitor {
 public:
-	RemoveUselessProjections() {
+	RemoveUselessProjections() : first_projection(true) {
 	}
 	unique_ptr<LogicalOperator> RemoveProjections(unique_ptr<LogicalOperator> plan);
 	unique_ptr<LogicalOperator> RemoveProjectionsChildren(unique_ptr<LogicalOperator> plan);
diff --git a/test/optimizer/remove_unnecessary_projections.test b/test/optimizer/remove_unnecessary_projections.test
new file mode 100644
index 000000000000..2bcab73648ba
--- /dev/null
+++ b/test/optimizer/remove_unnecessary_projections.test
@@ -0,0 +1,23 @@
+# name: test/optimizer/remove_unnecessary_projections.test
+# description: Test regex to like Optimization Rules
+# group: [optimizer]
+
+statement ok
+pragma disabled_optimizers='statistics_propagation,column_lifetime';
+
+statement ok
+create table t1 as select range%50 a from range(10000);
+
+statement ok
+create table t2 as select range b from range(100);
+
+statement ok
+create table t3 as select range c from range(10000);
+
+statement ok
+create table t4 as select range d from range(400);
+
+query II
+explain select * from (select * from t1, t2 where a = b) t_left, (select * from t3, t4 where c = d) t_right where a = d;
+----
+physical_plan	<!REGEX>:.*PROJECTION.*PROJECTION.*

From 63b53f0b920bfc4c6f40d8bfda5211d85c26a4f2 Mon Sep 17 00:00:00 2001
From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com>
Date: Fri, 24 Jan 2025 17:06:24 +0100
Subject: [PATCH 007/142] some refactoring to prepare for batch insert

---
 .../persistent/physical_batch_insert.cpp      | 13 ----
 .../operator/persistent/physical_insert.cpp   | 68 ++++++++++---------
 .../operator/persistent/physical_insert.hpp   |  2 +-
 src/include/duckdb/storage/data_table.hpp     |  5 +-
 .../duckdb/storage/optimistic_data_writer.hpp |  2 +-
 .../duckdb/transaction/local_storage.hpp      | 16 ++++-
 src/storage/data_table.cpp                    | 10 ++-
 src/storage/local_storage.cpp                 | 39 ++++++++---
 .../optimistic_write_update.test              |  1 -
 9 files changed, 90 insertions(+), 66 deletions(-)

diff --git a/src/execution/operator/persistent/physical_batch_insert.cpp b/src/execution/operator/persistent/physical_batch_insert.cpp
index 8e41e7244914..2e546c477282 100644
--- a/src/execution/operator/persistent/physical_batch_insert.cpp
+++ b/src/execution/operator/persistent/physical_batch_insert.cpp
@@ -188,19 +188,6 @@ class BatchInsertLocalState : public LocalSinkState {
 	optional_ptr<OptimisticDataWriter> writer;
 	unique_ptr<ConstraintState> constraint_state;
 
-	//	void CreateNewCollection(ClientContext &context, BatchInsertGlobalState &g_state, const vector<LogicalType>
-	//&insert_types) { 		auto &data_table = g_state.table; 		auto table_info = data_table.GetStorage().GetDataTableInfo();
-	//		auto &io_manager = TableIOManager::Get(data_table.GetStorage());
-	//
-	//		// Create the local row group collection.
-	//		auto max_row_id = NumericCast<idx_t>(MAX_ROW_ID);
-	//		auto collection = make_uniq<RowGroupCollection>(std::move(table_info), io_manager, insert_types,
-	//max_row_id); 		collection->InitializeEmpty(); 		collection->InitializeAppend(current_append_state);
-	//
-	//			lock_guard<mutex> l(g_state.lock);
-	//			auto &local_table_storage = data_table.GetStorage();
-	//			current_collection = data_table.CreateOptimisticRowGroups(context, std::move(collection));
-	//	}
 	void CreateNewCollection(DuckTableEntry &table, const vector<LogicalType> &insert_types) {
 		auto table_info = table.GetStorage().GetDataTableInfo();
 		auto &io_manager = TableIOManager::Get(table.GetStorage());
diff --git a/src/execution/operator/persistent/physical_insert.cpp b/src/execution/operator/persistent/physical_insert.cpp
index c7abdf25f9ca..3594ae15e7ff 100644
--- a/src/execution/operator/persistent/physical_insert.cpp
+++ b/src/execution/operator/persistent/physical_insert.cpp
@@ -83,7 +83,8 @@ InsertGlobalState::InsertGlobalState(ClientContext &context, const vector<Logica
 InsertLocalState::InsertLocalState(ClientContext &context, const vector<LogicalType> &types_p,
                                    const vector<unique_ptr<Expression>> &bound_defaults,
                                    const vector<unique_ptr<BoundConstraint>> &bound_constraints)
-    : default_executor(context, bound_defaults), bound_constraints(bound_constraints) {
+    : default_executor(context, bound_defaults), collection_index(DConstants::INVALID_INDEX),
+      bound_constraints(bound_constraints) {
 
 	auto &allocator = Allocator::Get(context);
 
@@ -681,34 +682,35 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, DataChunk &chunk,
 			// All of the tuples should have been turned into an update, leaving the chunk empty afterwards
 			D_ASSERT(lstate.update_chunk.size() == 0);
 		}
-	} else {
-		D_ASSERT(!return_chunk);
-		// parallel append
-		if (!lstate.local_collection) {
-			auto table_info = storage.GetDataTableInfo();
-			auto &io_manager = TableIOManager::Get(table.GetStorage());
-
-			// Create the local row group collection.
-			auto max_row_id = NumericCast<idx_t>(MAX_ROW_ID);
-			auto collection =
-			    make_uniq<RowGroupCollection>(std::move(table_info), io_manager, insert_types, max_row_id);
-			collection->InitializeEmpty();
-			collection->InitializeAppend(lstate.local_append_state);
-
-			lock_guard<mutex> l(gstate.lock);
-			auto &data_table = gstate.table.GetStorage();
-			lstate.writer = data_table.CreateOptimisticWriter(context.client);
-			lstate.local_collection = data_table.CreateOptimisticRowGroups(context.client, std::move(collection));
-		}
-		OnConflictHandling(table, context, lstate);
-		D_ASSERT(action_type != OnConflictAction::UPDATE);
+		return SinkResultType::NEED_MORE_INPUT;
+	}
 
-		auto new_row_group = lstate.local_collection->Append(lstate.insert_chunk, lstate.local_append_state);
-		if (new_row_group) {
-			lstate.writer->WriteNewRowGroup(*lstate.local_collection);
-		}
+	// parallel append
+	D_ASSERT(!return_chunk);
+	auto &data_table = gstate.table.GetStorage();
+	if (!lstate.collection_index.IsValid()) {
+		auto table_info = storage.GetDataTableInfo();
+		auto &io_manager = TableIOManager::Get(table.GetStorage());
+
+		// Create the local row group collection.
+		auto max_row_id = NumericCast<idx_t>(MAX_ROW_ID);
+		auto collection = make_uniq<RowGroupCollection>(std::move(table_info), io_manager, insert_types, max_row_id);
+		collection->InitializeEmpty();
+		collection->InitializeAppend(lstate.local_append_state);
+
+		lock_guard<mutex> l(gstate.lock);
+		lstate.writer = data_table.CreateOptimisticWriter(context.client);
+		lstate.collection_index = data_table.CreateOptimisticCollection(context.client, std::move(collection));
 	}
 
+	OnConflictHandling(table, context, lstate);
+	D_ASSERT(action_type != OnConflictAction::UPDATE);
+
+	auto &collection = data_table.GetOptimisticCollection(context.client, lstate.collection_index);
+	auto new_row_group = collection.Append(lstate.insert_chunk, lstate.local_append_state);
+	if (new_row_group) {
+		lstate.writer->WriteNewRowGroup(collection);
+	}
 	return SinkResultType::NEED_MORE_INPUT;
 }
 
@@ -719,7 +721,7 @@ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, Operato
 	context.thread.profiler.Flush(*this);
 	client_profiler.Flush(context.thread.profiler);
 
-	if (!parallel || !lstate.local_collection) {
+	if (!parallel || !lstate.collection_index.IsValid()) {
 		return SinkCombineResultType::FINISHED;
 	}
 
@@ -729,9 +731,11 @@ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, Operato
 
 	// parallel append: finalize the append
 	TransactionData tdata(0, 0);
-	lstate.local_collection->FinalizeAppend(tdata, lstate.local_append_state);
+	auto &data_table = gstate.table.GetStorage();
+	auto &collection = data_table.GetOptimisticCollection(context.client, lstate.collection_index);
+	collection.FinalizeAppend(tdata, lstate.local_append_state);
 
-	auto append_count = lstate.local_collection->GetTotalRows();
+	auto append_count = collection.GetTotalRows();
 
 	lock_guard<mutex> lock(gstate.lock);
 	gstate.insert_count += append_count;
@@ -739,16 +743,16 @@ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, Operato
 		// we have few rows - append to the local storage directly
 		storage.InitializeLocalAppend(gstate.append_state, table, context.client, bound_constraints);
 		auto &transaction = DuckTransaction::Get(context.client, table.catalog);
-		lstate.local_collection->Scan(transaction, [&](DataChunk &insert_chunk) {
+		collection.Scan(transaction, [&](DataChunk &insert_chunk) {
 			storage.LocalAppend(gstate.append_state, context.client, insert_chunk, false);
 			return true;
 		});
 		storage.FinalizeLocalAppend(gstate.append_state);
 	} else {
 		// we have written rows to disk optimistically - merge directly into the transaction-local storage
-		lstate.writer->WriteLastRowGroup(*lstate.local_collection);
+		lstate.writer->WriteLastRowGroup(collection);
 		lstate.writer->FinalFlush();
-		gstate.table.GetStorage().LocalMerge(context.client, *lstate.local_collection);
+		gstate.table.GetStorage().LocalMerge(context.client, collection);
 		gstate.table.GetStorage().FinalizeOptimisticWriter(context.client, *lstate.writer);
 	}
 
diff --git a/src/include/duckdb/execution/operator/persistent/physical_insert.hpp b/src/include/duckdb/execution/operator/persistent/physical_insert.hpp
index acc36a7780c4..f12084e39af9 100644
--- a/src/include/duckdb/execution/operator/persistent/physical_insert.hpp
+++ b/src/include/duckdb/execution/operator/persistent/physical_insert.hpp
@@ -57,7 +57,7 @@ class InsertLocalState : public LocalSinkState {
 	DataChunk update_chunk;
 	ExpressionExecutor default_executor;
 	TableAppendState local_append_state;
-	optional_ptr<RowGroupCollection> local_collection;
+	PhysicalIndex collection_index;
 	optional_ptr<OptimisticDataWriter> writer;
 	// Rows that have been updated by a DO UPDATE conflict
 	unordered_set<row_t> updated_rows;
diff --git a/src/include/duckdb/storage/data_table.hpp b/src/include/duckdb/storage/data_table.hpp
index 5496c29115e2..16354f45024b 100644
--- a/src/include/duckdb/storage/data_table.hpp
+++ b/src/include/duckdb/storage/data_table.hpp
@@ -116,7 +116,10 @@ class DataTable {
 	//! Merge a row group collection into the transaction-local storage
 	void LocalMerge(ClientContext &context, RowGroupCollection &collection);
 	//! Create an optimistic row group collection for this table. Used for optimistically writing parallel appends.
-	RowGroupCollection &CreateOptimisticRowGroups(ClientContext &context, unique_ptr<RowGroupCollection> collection);
+	//! Returns the index into the optimistic_collections vector for newly created collection.
+	PhysicalIndex CreateOptimisticCollection(ClientContext &context, unique_ptr<RowGroupCollection> collection);
+	//! Returns the optimistic row group collection corresponding to the index.
+	RowGroupCollection &GetOptimisticCollection(ClientContext &context, const PhysicalIndex collection_index);
 	//! Create an optimistic writer for this table. Used for optimistically writing parallel appends.
 	OptimisticDataWriter &CreateOptimisticWriter(ClientContext &context);
 	void FinalizeOptimisticWriter(ClientContext &context, OptimisticDataWriter &writer);
diff --git a/src/include/duckdb/storage/optimistic_data_writer.hpp b/src/include/duckdb/storage/optimistic_data_writer.hpp
index 802d51bad707..cdf96a038264 100644
--- a/src/include/duckdb/storage/optimistic_data_writer.hpp
+++ b/src/include/duckdb/storage/optimistic_data_writer.hpp
@@ -39,7 +39,7 @@ class OptimisticDataWriter {
 private:
 	//! The table
 	DataTable &table;
-	//! The partial block manager (if we created one yet)
+	//! The partial block manager, if any was created.
 	unique_ptr<PartialBlockManager> partial_manager;
 };
 
diff --git a/src/include/duckdb/transaction/local_storage.hpp b/src/include/duckdb/transaction/local_storage.hpp
index 3c202a56fde6..b5a7398a8446 100644
--- a/src/include/duckdb/transaction/local_storage.hpp
+++ b/src/include/duckdb/transaction/local_storage.hpp
@@ -53,7 +53,7 @@ class LocalTableStorage : public enable_shared_from_this<LocalTableStorage> {
 	idx_t deleted_rows;
 
 	//! The optimistic row group collections associated with this table.
-	vector<unique_ptr<RowGroupCollection>> optimistic_row_groups;
+	vector<unique_ptr<RowGroupCollection>> optimistic_collections;
 	//! The main optimistic data writer associated with this table.
 	OptimisticDataWriter optimistic_writer;
 	//! The optimistic data writers associated with this table.
@@ -78,10 +78,16 @@ class LocalTableStorage : public enable_shared_from_this<LocalTableStorage> {
 	void AppendToDeleteIndexes(Vector &row_ids, DataChunk &delete_chunk);
 
 	//! Create an optimistic row group collection for this table.
-	RowGroupCollection &CreateOptimisticRowGroups(unique_ptr<RowGroupCollection> collection);
+	//! Returns the index into the optimistic_collections vector for newly created collection.
+	PhysicalIndex CreateOptimisticCollection(unique_ptr<RowGroupCollection> collection);
+	//! Returns the optimistic row group collection corresponding to the index.
+	RowGroupCollection &GetOptimisticCollection(const PhysicalIndex collection_index);
 	//! Create an optimistic writer for this table.
 	OptimisticDataWriter &CreateOptimisticWriter();
 	void FinalizeOptimisticWriter(OptimisticDataWriter &writer);
+
+private:
+	mutex collections_lock;
 };
 
 class LocalTableManager {
@@ -136,7 +142,10 @@ class LocalStorage {
 	//! Merge a row group collection into the transaction-local storage
 	void LocalMerge(DataTable &table, RowGroupCollection &collection);
 	//! Create an optimistic row group collection for this table.
-	RowGroupCollection &CreateOptimisticRowGroups(DataTable &table, unique_ptr<RowGroupCollection> collection);
+	//! Returns the index into the optimistic_collections vector for newly created collection.
+	PhysicalIndex CreateOptimisticCollection(DataTable &table, unique_ptr<RowGroupCollection> collection);
+	//! Returns the optimistic row group collection corresponding to the index.
+	RowGroupCollection &GetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index);
 	//! Create an optimistic writer for this table.
 	OptimisticDataWriter &CreateOptimisticWriter(DataTable &table);
 	void FinalizeOptimisticWriter(DataTable &table, OptimisticDataWriter &writer);
@@ -179,6 +188,7 @@ class LocalStorage {
 	DuckTransaction &transaction;
 	LocalTableManager table_manager;
 
+private:
 	void Flush(DataTable &table, LocalTableStorage &storage, optional_ptr<StorageCommitState> commit_state);
 };
 
diff --git a/src/storage/data_table.cpp b/src/storage/data_table.cpp
index 9a2bc0fa988d..9e194f0d6549 100644
--- a/src/storage/data_table.cpp
+++ b/src/storage/data_table.cpp
@@ -848,10 +848,14 @@ void DataTable::FinalizeLocalAppend(LocalAppendState &state) {
 	LocalStorage::FinalizeAppend(state);
 }
 
-RowGroupCollection &DataTable::CreateOptimisticRowGroups(ClientContext &context,
-                                                         unique_ptr<RowGroupCollection> collection) {
+PhysicalIndex DataTable::CreateOptimisticCollection(ClientContext &context, unique_ptr<RowGroupCollection> collection) {
 	auto &local_storage = LocalStorage::Get(context, db);
-	return local_storage.CreateOptimisticRowGroups(*this, std::move(collection));
+	return local_storage.CreateOptimisticCollection(*this, std::move(collection));
+}
+
+RowGroupCollection &DataTable::GetOptimisticCollection(ClientContext &context, const PhysicalIndex collection_index) {
+	auto &local_storage = LocalStorage::Get(context, db);
+	return local_storage.GetOptimisticCollection(*this, collection_index);
 }
 
 OptimisticDataWriter &DataTable::CreateOptimisticWriter(ClientContext &context) {
diff --git a/src/storage/local_storage.cpp b/src/storage/local_storage.cpp
index ea88d506d41e..6304147f715b 100644
--- a/src/storage/local_storage.cpp
+++ b/src/storage/local_storage.cpp
@@ -55,7 +55,7 @@ LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_data
                                      const idx_t alter_column_index, const LogicalType &target_type,
                                      const vector<StorageIndex> &bound_columns, Expression &cast_expr)
     : table_ref(new_data_table), allocator(Allocator::Get(new_data_table.db)), deleted_rows(parent.deleted_rows),
-      optimistic_row_groups(std::move(parent.optimistic_row_groups)),
+      optimistic_collections(std::move(parent.optimistic_collections)),
       optimistic_writer(new_data_table, parent.optimistic_writer),
       optimistic_writers(std::move(parent.optimistic_writers)), merged_storage(parent.merged_storage) {
 
@@ -70,7 +70,7 @@ LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_data
 LocalTableStorage::LocalTableStorage(DataTable &new_data_table, LocalTableStorage &parent,
                                      const idx_t drop_column_index)
     : table_ref(new_data_table), allocator(Allocator::Get(new_data_table.db)), deleted_rows(parent.deleted_rows),
-      optimistic_row_groups(std::move(parent.optimistic_row_groups)),
+      optimistic_collections(std::move(parent.optimistic_collections)),
       optimistic_writer(new_data_table, parent.optimistic_writer),
       optimistic_writers(std::move(parent.optimistic_writers)), merged_storage(parent.merged_storage) {
 
@@ -85,7 +85,7 @@ LocalTableStorage::LocalTableStorage(DataTable &new_data_table, LocalTableStorag
 LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_dt, LocalTableStorage &parent,
                                      ColumnDefinition &new_column, ExpressionExecutor &default_executor)
     : table_ref(new_dt), allocator(Allocator::Get(new_dt.db)), deleted_rows(parent.deleted_rows),
-      optimistic_row_groups(std::move(parent.optimistic_row_groups)),
+      optimistic_collections(std::move(parent.optimistic_collections)),
       optimistic_writer(new_dt, parent.optimistic_writer), optimistic_writers(std::move(parent.optimistic_writers)),
       merged_storage(parent.merged_storage) {
 
@@ -233,9 +233,15 @@ void LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, TableAppen
 	}
 }
 
-RowGroupCollection &LocalTableStorage::CreateOptimisticRowGroups(unique_ptr<RowGroupCollection> collection) {
-	optimistic_row_groups.push_back(std::move(collection));
-	return *optimistic_row_groups.back();
+PhysicalIndex LocalTableStorage::CreateOptimisticCollection(unique_ptr<RowGroupCollection> collection) {
+	lock_guard<mutex> l(collections_lock);
+	optimistic_collections.push_back(std::move(collection));
+	return PhysicalIndex(optimistic_collections.size() - 1);
+}
+
+RowGroupCollection &LocalTableStorage::GetOptimisticCollection(const PhysicalIndex collection_index) {
+	lock_guard<mutex> l(collections_lock);
+	return *optimistic_collections[collection_index.index];
 }
 
 OptimisticDataWriter &LocalTableStorage::CreateOptimisticWriter() {
@@ -261,10 +267,17 @@ void LocalTableStorage::FinalizeOptimisticWriter(OptimisticDataWriter &writer) {
 }
 
 void LocalTableStorage::Rollback() {
-	for (auto &collection : optimistic_row_groups) {
+	for (auto &writer : optimistic_writers) {
+		writer->Rollback();
+	}
+	optimistic_writer.Rollback();
+	for (auto &collection : optimistic_collections) {
+		if (!collection) {
+			continue;
+		}
 		collection->CommitDropTable();
 	}
-	optimistic_row_groups.clear();
+	optimistic_collections.clear();
 	row_groups->CommitDropTable();
 }
 
@@ -452,10 +465,14 @@ void LocalStorage::LocalMerge(DataTable &table, RowGroupCollection &collection)
 	storage.merged_storage = true;
 }
 
-RowGroupCollection &LocalStorage::CreateOptimisticRowGroups(DataTable &table,
-                                                            unique_ptr<RowGroupCollection> collection) {
+PhysicalIndex LocalStorage::CreateOptimisticCollection(DataTable &table, unique_ptr<RowGroupCollection> collection) {
+	auto &storage = table_manager.GetOrCreateStorage(context, table);
+	return storage.CreateOptimisticCollection(std::move(collection));
+}
+
+RowGroupCollection &LocalStorage::GetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index) {
 	auto &storage = table_manager.GetOrCreateStorage(context, table);
-	return storage.CreateOptimisticRowGroups(std::move(collection));
+	return storage.GetOptimisticCollection(collection_index);
 }
 
 OptimisticDataWriter &LocalStorage::CreateOptimisticWriter(DataTable &table) {
diff --git a/test/sql/storage/optimistic_write/optimistic_write_update.test b/test/sql/storage/optimistic_write/optimistic_write_update.test
index 41bc6d1f7c6d..12c42a763824 100644
--- a/test/sql/storage/optimistic_write/optimistic_write_update.test
+++ b/test/sql/storage/optimistic_write/optimistic_write_update.test
@@ -2,7 +2,6 @@
 # description: Test optimistic write with updates in transaction-local storage
 # group: [optimistic_write]
 
-# load the DB from disk
 load __TEST_DIR__/optimistic_write_update.db
 
 statement ok

From 9203af533d1740fde50b194ca395534fa9cad891 Mon Sep 17 00:00:00 2001
From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com>
Date: Sat, 25 Jan 2025 16:13:16 +0100
Subject: [PATCH 008/142] use local table storage for batch insert

---
 .../persistent/physical_batch_insert.cpp      | 222 ++++++++++--------
 .../operator/persistent/physical_insert.cpp   |  18 +-
 .../operator/persistent/physical_insert.hpp   |   1 +
 src/include/duckdb/storage/data_table.hpp     |   5 +-
 .../duckdb/transaction/local_storage.hpp      |   8 +-
 src/storage/data_table.cpp                    |   8 +-
 src/storage/local_storage.cpp                 |  21 +-
 7 files changed, 171 insertions(+), 112 deletions(-)

diff --git a/src/execution/operator/persistent/physical_batch_insert.cpp b/src/execution/operator/persistent/physical_batch_insert.cpp
index 2e546c477282..a9d8696201f4 100644
--- a/src/execution/operator/persistent/physical_batch_insert.cpp
+++ b/src/execution/operator/persistent/physical_batch_insert.cpp
@@ -38,38 +38,47 @@ enum class RowGroupBatchType : uint8_t { FLUSHED, NOT_FLUSHED };
 
 class CollectionMerger {
 public:
-	explicit CollectionMerger(ClientContext &context) : context(context) {
+	explicit CollectionMerger(ClientContext &context, DataTable &data_table)
+	    : context(context), data_table(data_table), batch_type(RowGroupBatchType::NOT_FLUSHED) {
 	}
 
+	//! The transaction context.
 	ClientContext &context;
-	vector<unique_ptr<RowGroupCollection>> current_collections;
-	RowGroupBatchType batch_type = RowGroupBatchType::NOT_FLUSHED;
+	//! The data table.
+	DataTable &data_table;
+	//! Indexes to the optimistic row group collection vector of the local table storage for this transaction.
+	vector<PhysicalIndex> collection_indexes;
+	//! The batch type for merging collections.
+	RowGroupBatchType batch_type;
 
 public:
-	void AddCollection(unique_ptr<RowGroupCollection> collection, RowGroupBatchType type) {
-		current_collections.push_back(std::move(collection));
+	void AddCollection(const PhysicalIndex collection_index, RowGroupBatchType type) {
+		collection_indexes.push_back(collection_index);
 		if (type == RowGroupBatchType::FLUSHED) {
 			batch_type = RowGroupBatchType::FLUSHED;
-			if (current_collections.size() > 1) {
+			if (collection_indexes.size() > 1) {
 				throw InternalException("Cannot merge flushed collections");
 			}
 		}
 	}
 
 	bool Empty() {
-		return current_collections.empty();
+		return collection_indexes.empty();
 	}
 
-	unique_ptr<RowGroupCollection> Flush(OptimisticDataWriter &writer) {
+	PhysicalIndex Flush(OptimisticDataWriter &writer) {
 		if (Empty()) {
-			return nullptr;
+			return PhysicalIndex(DConstants::INVALID_INDEX);
 		}
-		unique_ptr<RowGroupCollection> new_collection = std::move(current_collections[0]);
-		if (current_collections.size() > 1) {
-			// we have gathered multiple collections: create one big collection and merge that
-			auto &types = new_collection->GetTypes();
+
+		auto result_collection_index = collection_indexes[0];
+		auto result_collection = data_table.GetOptimisticCollection(context, result_collection_index);
+		D_ASSERT(result_collection);
+		if (collection_indexes.size() > 1) {
+			// Merge all collections into one result collection.
+			auto &types = result_collection->GetTypes();
 			TableAppendState append_state;
-			new_collection->InitializeAppend(append_state);
+			result_collection->InitializeAppend(append_state);
 
 			DataChunk scan_chunk;
 			scan_chunk.Initialize(context, types);
@@ -78,7 +87,8 @@ class CollectionMerger {
 			for (idx_t i = 0; i < types.size(); i++) {
 				column_ids.emplace_back(i);
 			}
-			for (auto &collection : current_collections) {
+			for (idx_t i = 1; i < collection_indexes.size(); i++) {
+				auto collection = data_table.GetOptimisticCollection(context, collection_indexes[i]);
 				if (!collection) {
 					continue;
 				}
@@ -92,35 +102,38 @@ class CollectionMerger {
 					if (scan_chunk.size() == 0) {
 						break;
 					}
-					auto new_row_group = new_collection->Append(scan_chunk, append_state);
+					auto new_row_group = result_collection->Append(scan_chunk, append_state);
 					if (new_row_group) {
-						writer.WriteNewRowGroup(*new_collection);
+						writer.WriteNewRowGroup(*result_collection);
 					}
 				}
+				data_table.ResetOptimisticCollection(context, collection_indexes[i]);
 			}
-			new_collection->FinalizeAppend(TransactionData(0, 0), append_state);
-			writer.WriteLastRowGroup(*new_collection);
+			result_collection->FinalizeAppend(TransactionData(0, 0), append_state);
+			writer.WriteLastRowGroup(*result_collection);
 		} else if (batch_type == RowGroupBatchType::NOT_FLUSHED) {
-			writer.WriteLastRowGroup(*new_collection);
+			writer.WriteLastRowGroup(*result_collection);
 		}
-		current_collections.clear();
-		return new_collection;
+
+		collection_indexes.clear();
+		return result_collection_index;
 	}
 };
 
 struct RowGroupBatchEntry {
-	RowGroupBatchEntry(idx_t batch_idx, unique_ptr<RowGroupCollection> collection_p, RowGroupBatchType type)
-	    : batch_idx(batch_idx), total_rows(collection_p->GetTotalRows()), unflushed_memory(0),
-	      collection(std::move(collection_p)), type(type) {
+	RowGroupBatchEntry(RowGroupCollection &collection, const idx_t batch_idx, const PhysicalIndex collection_index,
+	                   const RowGroupBatchType type)
+	    : batch_idx(batch_idx), total_rows(collection.GetTotalRows()), unflushed_memory(0),
+	      collection_index(collection_index), type(type) {
 		if (type == RowGroupBatchType::NOT_FLUSHED) {
-			unflushed_memory = collection->GetAllocationSize();
+			unflushed_memory = collection.GetAllocationSize();
 		}
 	}
 
 	idx_t batch_idx;
 	idx_t total_rows;
 	idx_t unflushed_memory;
-	unique_ptr<RowGroupCollection> collection;
+	PhysicalIndex collection_index;
 	RowGroupBatchType type;
 };
 
@@ -138,7 +151,7 @@ class BatchInsertTask {
 
 class BatchInsertGlobalState : public GlobalSinkState {
 public:
-	explicit BatchInsertGlobalState(ClientContext &context, DuckTableEntry &table, idx_t minimum_memory_per_thread)
+	BatchInsertGlobalState(ClientContext &context, DuckTableEntry &table, idx_t minimum_memory_per_thread)
 	    : memory_manager(context, minimum_memory_per_thread), table(table), insert_count(0),
 	      optimistically_written(false), minimum_memory_per_thread(minimum_memory_per_thread) {
 		row_group_size = table.GetStorage().GetRowGroupSize();
@@ -155,16 +168,14 @@ class BatchInsertGlobalState : public GlobalSinkState {
 	atomic<bool> optimistically_written;
 	idx_t minimum_memory_per_thread;
 
-	bool ReadyToMerge(idx_t count) const;
-	void ScheduleMergeTasks(idx_t min_batch_index);
-	unique_ptr<RowGroupCollection> MergeCollections(ClientContext &context,
-	                                                vector<RowGroupBatchEntry> merge_collections,
-	                                                OptimisticDataWriter &writer);
-	void AddCollection(ClientContext &context, idx_t batch_index, idx_t min_batch_index,
-	                   unique_ptr<RowGroupCollection> current_collection,
-	                   optional_ptr<OptimisticDataWriter> writer = nullptr);
+	bool ReadyToMerge(const idx_t count) const;
+	void ScheduleMergeTasks(ClientContext &context, const idx_t min_batch_index);
+	PhysicalIndex MergeCollections(ClientContext &context, const vector<RowGroupBatchEntry> &merge_collections,
+	                               OptimisticDataWriter &writer);
+	void AddCollection(ClientContext &context, const idx_t batch_index, const idx_t min_batch_index,
+	                   const PhysicalIndex collection_index, optional_ptr<OptimisticDataWriter> writer = nullptr);
 
-	idx_t MaxThreads(idx_t source_max_threads) override {
+	idx_t MaxThreads(const idx_t source_max_threads) override {
 		// try to request 4MB per column per thread
 		memory_manager.SetMemorySize(source_max_threads * minimum_memory_per_thread);
 		// cap the concurrent threads working on this task based on the amount of available memory
@@ -176,7 +187,7 @@ class BatchInsertLocalState : public LocalSinkState {
 public:
 	BatchInsertLocalState(ClientContext &context, const vector<LogicalType> &types,
 	                      const vector<unique_ptr<Expression>> &bound_defaults)
-	    : default_executor(context, bound_defaults) {
+	    : default_executor(context, bound_defaults), collection_index(DConstants::INVALID_INDEX) {
 		insert_chunk.Initialize(Allocator::Get(context), types);
 	}
 
@@ -184,17 +195,23 @@ class BatchInsertLocalState : public LocalSinkState {
 	ExpressionExecutor default_executor;
 	idx_t current_index;
 	TableAppendState current_append_state;
-	unique_ptr<RowGroupCollection> current_collection;
+	PhysicalIndex collection_index;
 	optional_ptr<OptimisticDataWriter> writer;
 	unique_ptr<ConstraintState> constraint_state;
 
-	void CreateNewCollection(DuckTableEntry &table, const vector<LogicalType> &insert_types) {
-		auto table_info = table.GetStorage().GetDataTableInfo();
-		auto &io_manager = TableIOManager::Get(table.GetStorage());
-		current_collection = make_uniq<RowGroupCollection>(std::move(table_info), io_manager, insert_types,
-		                                                   NumericCast<idx_t>(MAX_ROW_ID));
-		current_collection->InitializeEmpty();
-		current_collection->InitializeAppend(current_append_state);
+	void CreateNewCollection(ClientContext &context, DuckTableEntry &table_entry,
+	                         const vector<LogicalType> &insert_types) {
+		auto table_info = table_entry.GetStorage().GetDataTableInfo();
+		auto &io_manager = TableIOManager::Get(table_entry.GetStorage());
+
+		// Create the local row group collection.
+		auto max_row_id = NumericCast<idx_t>(MAX_ROW_ID);
+		auto collection = make_uniq<RowGroupCollection>(std::move(table_info), io_manager, insert_types, max_row_id);
+		collection->InitializeEmpty();
+		collection->InitializeAppend(current_append_state);
+
+		auto &data_table = table_entry.GetStorage();
+		collection_index = data_table.CreateOptimisticCollection(context, std::move(collection));
 	}
 };
 
@@ -210,23 +227,27 @@ class MergeCollectionTask : public BatchInsertTask {
 	vector<RowGroupBatchEntry> merge_collections;
 	idx_t merged_batch_index;
 
-	void Execute(const PhysicalBatchInsert &op, ClientContext &context, GlobalSinkState &gstate_p,
-	             LocalSinkState &lstate_p) override {
-		auto &gstate = gstate_p.Cast<BatchInsertGlobalState>();
-		auto &lstate = lstate_p.Cast<BatchInsertLocalState>();
-		// merge together the collections
-		D_ASSERT(lstate.writer);
-		auto final_collection = gstate.MergeCollections(context, std::move(merge_collections), *lstate.writer);
-		// add the merged-together collection to the set of batch indexes
-		lock_guard<mutex> l(gstate.lock);
-		RowGroupBatchEntry new_entry(merged_batch_index, std::move(final_collection), RowGroupBatchType::FLUSHED);
+	void Execute(const PhysicalBatchInsert &op, ClientContext &context, GlobalSinkState &g_state_p,
+	             LocalSinkState &l_state_p) override {
+		auto &g_state = g_state_p.Cast<BatchInsertGlobalState>();
+		auto &l_state = l_state_p.Cast<BatchInsertLocalState>();
+
+		// Merge the collections.
+		D_ASSERT(l_state.writer);
+		auto collection_index = g_state.MergeCollections(context, std::move(merge_collections), *l_state.writer);
+
+		// Add the result collection to the set of batch indexes.
+		lock_guard<mutex> l(g_state.lock);
+		auto result_collection = g_state.table.GetStorage().GetOptimisticCollection(context, collection_index);
+		RowGroupBatchEntry new_entry(*result_collection, merged_batch_index, collection_index,
+		                             RowGroupBatchType::FLUSHED);
 		auto it = std::lower_bound(
-		    gstate.collections.begin(), gstate.collections.end(), new_entry,
+		    g_state.collections.begin(), g_state.collections.end(), new_entry,
 		    [&](const RowGroupBatchEntry &a, const RowGroupBatchEntry &b) { return a.batch_idx < b.batch_idx; });
 		if (it->batch_idx != merged_batch_index) {
 			throw InternalException("Merged batch index was no longer present in collection");
 		}
-		it->collection = std::move(new_entry.collection);
+		it->collection_index = new_entry.collection_index;
 	}
 };
 
@@ -239,7 +260,7 @@ struct BatchMergeTask {
 	idx_t total_count;
 };
 
-bool BatchInsertGlobalState::ReadyToMerge(idx_t count) const {
+bool BatchInsertGlobalState::ReadyToMerge(const idx_t count) const {
 	// we try to merge so the count fits nicely into row groups
 	if (count >= row_group_size / 10 * 9 && count <= row_group_size) {
 		// 90%-100% of row group size
@@ -260,9 +281,8 @@ bool BatchInsertGlobalState::ReadyToMerge(idx_t count) const {
 	return false;
 }
 
-void BatchInsertGlobalState::ScheduleMergeTasks(idx_t min_batch_index) {
+void BatchInsertGlobalState::ScheduleMergeTasks(ClientContext &context, const idx_t min_batch_index) {
 	idx_t current_idx;
-
 	vector<BatchMergeTask> to_be_scheduled_tasks;
 
 	BatchMergeTask current_task(next_start);
@@ -312,13 +332,14 @@ void BatchInsertGlobalState::ScheduleMergeTasks(idx_t min_batch_index) {
 		vector<RowGroupBatchEntry> merge_collections;
 		for (idx_t idx = scheduled_task.start_index; idx < scheduled_task.end_index; idx++) {
 			auto &entry = collections[idx];
-			if (!entry.collection || entry.type == RowGroupBatchType::FLUSHED) {
+			if (!entry.collection_index.IsValid() || entry.type == RowGroupBatchType::FLUSHED) {
 				throw InternalException("Adding a row group collection that should not be flushed");
 			}
-			RowGroupBatchEntry added_entry(collections[scheduled_task.start_index].batch_idx,
-			                               std::move(entry.collection), RowGroupBatchType::FLUSHED);
+			auto collection = table.GetStorage().GetOptimisticCollection(context, entry.collection_index);
+			RowGroupBatchEntry added_entry(*collection, collections[scheduled_task.start_index].batch_idx,
+			                               entry.collection_index, RowGroupBatchType::FLUSHED);
 			added_entry.unflushed_memory = entry.unflushed_memory;
-			merge_collections.push_back(std::move(added_entry));
+			merge_collections.push_back(added_entry);
 			entry.total_rows = scheduled_task.total_count;
 			entry.type = RowGroupBatchType::FLUSHED;
 		}
@@ -335,14 +356,14 @@ void BatchInsertGlobalState::ScheduleMergeTasks(idx_t min_batch_index) {
 	}
 }
 
-unique_ptr<RowGroupCollection> BatchInsertGlobalState::MergeCollections(ClientContext &context,
-                                                                        vector<RowGroupBatchEntry> merge_collections,
-                                                                        OptimisticDataWriter &writer) {
+PhysicalIndex BatchInsertGlobalState::MergeCollections(ClientContext &context,
+                                                       const vector<RowGroupBatchEntry> &merge_collections,
+                                                       OptimisticDataWriter &writer) {
 	D_ASSERT(!merge_collections.empty());
-	CollectionMerger merger(context);
+	CollectionMerger merger(context, table.GetStorage());
 	idx_t written_data = 0;
 	for (auto &entry : merge_collections) {
-		merger.AddCollection(std::move(entry.collection), RowGroupBatchType::NOT_FLUSHED);
+		merger.AddCollection(entry.collection_index, RowGroupBatchType::NOT_FLUSHED);
 		written_data += entry.unflushed_memory;
 	}
 	optimistically_written = true;
@@ -350,22 +371,23 @@ unique_ptr<RowGroupCollection> BatchInsertGlobalState::MergeCollections(ClientCo
 	return merger.Flush(writer);
 }
 
-void BatchInsertGlobalState::AddCollection(ClientContext &context, idx_t batch_index, idx_t min_batch_index,
-                                           unique_ptr<RowGroupCollection> current_collection,
+void BatchInsertGlobalState::AddCollection(ClientContext &context, const idx_t batch_index, const idx_t min_batch_index,
+                                           const PhysicalIndex collection_index,
                                            optional_ptr<OptimisticDataWriter> writer) {
 	if (batch_index < min_batch_index) {
 		throw InternalException("Batch index of the added collection (%llu) is smaller than the min batch index (%llu)",
 		                        batch_index, min_batch_index);
 	}
-	auto new_count = current_collection->GetTotalRows();
+	auto collection = table.GetStorage().GetOptimisticCollection(context, collection_index);
+	auto new_count = collection->GetTotalRows();
 	auto batch_type = new_count < row_group_size ? RowGroupBatchType::NOT_FLUSHED : RowGroupBatchType::FLUSHED;
 	if (batch_type == RowGroupBatchType::FLUSHED && writer) {
-		writer->WriteLastRowGroup(*current_collection);
+		writer->WriteLastRowGroup(*collection);
 	}
 	lock_guard<mutex> l(lock);
 	insert_count += new_count;
 	// add the collection to the batch index
-	RowGroupBatchEntry new_entry(batch_index, std::move(current_collection), batch_type);
+	RowGroupBatchEntry new_entry(*collection, batch_index, collection_index, batch_type);
 	if (batch_type == RowGroupBatchType::NOT_FLUSHED) {
 		memory_manager.IncreaseUnflushedMemory(new_entry.unflushed_memory);
 	}
@@ -379,9 +401,9 @@ void BatchInsertGlobalState::AddCollection(ClientContext &context, idx_t batch_i
 		                        "batch indexes are not uniquely distributed over threads",
 		                        batch_index);
 	}
-	collections.insert(it, std::move(new_entry));
+	collections.insert(it, new_entry);
 	if (writer) {
-		ScheduleMergeTasks(min_batch_index);
+		ScheduleMergeTasks(context, min_batch_index);
 	}
 }
 
@@ -441,15 +463,16 @@ SinkNextBatchType PhysicalBatchInsert::NextBatch(ExecutionContext &context, Oper
 	auto &memory_manager = gstate.memory_manager;
 
 	auto batch_index = lstate.partition_info.batch_index.GetIndex();
-	if (lstate.current_collection) {
+	if (lstate.collection_index.IsValid()) {
 		if (lstate.current_index == batch_index) {
 			throw InternalException("NextBatch called with the same batch index?");
 		}
 		// batch index has changed: move the old collection to the global state and create a new collection
 		TransactionData tdata(0, 0);
-		lstate.current_collection->FinalizeAppend(tdata, lstate.current_append_state);
+		auto collection = gstate.table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index);
+		collection->FinalizeAppend(tdata, lstate.current_append_state);
 		gstate.AddCollection(context.client, lstate.current_index, lstate.partition_info.min_batch_index.GetIndex(),
-		                     std::move(lstate.current_collection), lstate.writer);
+		                     lstate.collection_index, lstate.writer);
 
 		bool any_unblocked;
 		{
@@ -459,7 +482,7 @@ SinkNextBatchType PhysicalBatchInsert::NextBatch(ExecutionContext &context, Oper
 		if (!any_unblocked) {
 			ExecuteTasks(context.client, gstate, lstate);
 		}
-		lstate.current_collection.reset();
+		lstate.collection_index.index = DConstants::INVALID_INDEX;
 	}
 	lstate.current_index = batch_index;
 
@@ -501,10 +524,10 @@ SinkResultType PhysicalBatchInsert::Sink(ExecutionContext &context, DataChunk &c
 			}
 		}
 	}
-	if (!lstate.current_collection) {
+	if (!lstate.collection_index.IsValid()) {
 		lock_guard<mutex> l(gstate.lock);
 		// no collection yet: create a new one
-		lstate.CreateNewCollection(table, insert_types);
+		lstate.CreateNewCollection(context.client, table, insert_types);
 		if (!lstate.writer) {
 			lstate.writer = &table.GetStorage().CreateOptimisticWriter(context.client);
 		}
@@ -520,10 +543,11 @@ SinkResultType PhysicalBatchInsert::Sink(ExecutionContext &context, DataChunk &c
 	auto &storage = table.GetStorage();
 	storage.VerifyAppendConstraints(*lstate.constraint_state, context.client, lstate.insert_chunk, nullptr, nullptr);
 
-	auto new_row_group = lstate.current_collection->Append(lstate.insert_chunk, lstate.current_append_state);
+	auto collection = table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index);
+	auto new_row_group = collection->Append(lstate.insert_chunk, lstate.current_append_state);
 	if (new_row_group) {
 		// we have already written to disk - flush the next row group as well
-		lstate.writer->WriteNewRowGroup(*lstate.current_collection);
+		lstate.writer->WriteNewRowGroup(*collection);
 	}
 	return SinkResultType::NEED_MORE_INPUT;
 }
@@ -541,12 +565,13 @@ SinkCombineResultType PhysicalBatchInsert::Combine(ExecutionContext &context, Op
 
 	memory_manager.UpdateMinBatchIndex(lstate.partition_info.min_batch_index.GetIndex());
 
-	if (lstate.current_collection) {
+	if (lstate.collection_index.IsValid()) {
 		TransactionData tdata(0, 0);
-		lstate.current_collection->FinalizeAppend(tdata, lstate.current_append_state);
-		if (lstate.current_collection->GetTotalRows() > 0) {
+		auto collection = gstate.table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index);
+		collection->FinalizeAppend(tdata, lstate.current_append_state);
+		if (collection->GetTotalRows() > 0) {
 			gstate.AddCollection(context.client, lstate.current_index, lstate.partition_info.min_batch_index.GetIndex(),
-			                     std::move(lstate.current_collection));
+			                     lstate.collection_index);
 		}
 	}
 	if (lstate.writer) {
@@ -568,6 +593,7 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
                                                OperatorSinkFinalizeInput &input) const {
 	auto &gstate = input.global_state.Cast<BatchInsertGlobalState>();
 	auto &memory_manager = gstate.memory_manager;
+	auto &data_table = gstate.table.GetStorage();
 
 	if (gstate.optimistically_written || gstate.insert_count >= gstate.row_group_size) {
 		// we have written data to disk optimistically or are inserting a large amount of data
@@ -580,9 +606,9 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
 			if (entry.type == RowGroupBatchType::NOT_FLUSHED) {
 				// this collection has not been flushed: add it to the merge set
 				if (!current_merger) {
-					current_merger = make_uniq<CollectionMerger>(context);
+					current_merger = make_uniq<CollectionMerger>(context, data_table);
 				}
-				current_merger->AddCollection(std::move(entry.collection), entry.type);
+				current_merger->AddCollection(entry.collection_index, entry.type);
 				memory_manager.ReduceUnflushedMemory(entry.unflushed_memory);
 			} else {
 				// this collection has been flushed: it does not need to be merged
@@ -592,8 +618,8 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
 					mergers.push_back(std::move(current_merger));
 					current_merger.reset();
 				}
-				auto larger_merger = make_uniq<CollectionMerger>(context);
-				larger_merger->AddCollection(std::move(entry.collection), entry.type);
+				auto larger_merger = make_uniq<CollectionMerger>(context, data_table);
+				larger_merger->AddCollection(entry.collection_index, entry.type);
 				mergers.push_back(std::move(larger_merger));
 			}
 		}
@@ -602,7 +628,7 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
 		}
 
 		// now that we have created all of the mergers, perform the actual merging
-		vector<unique_ptr<RowGroupCollection>> final_collections;
+		vector<PhysicalIndex> final_collections;
 		final_collections.reserve(mergers.size());
 		auto &writer = storage.CreateOptimisticWriter(context);
 		for (auto &merger : mergers) {
@@ -610,8 +636,10 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
 		}
 
 		// finally, merge the row groups into the local storage
-		for (auto &collection : final_collections) {
+		for (const auto collection_index : final_collections) {
+			auto collection = data_table.GetOptimisticCollection(context, collection_index);
 			storage.LocalMerge(context, *collection);
+			data_table.ResetOptimisticCollection(context, collection_index);
 		}
 		storage.FinalizeOptimisticWriter(context, writer);
 	} else {
@@ -628,10 +656,12 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
 			}
 
 			memory_manager.ReduceUnflushedMemory(entry.unflushed_memory);
-			entry.collection->Scan(transaction, [&](DataChunk &insert_chunk) {
+			auto collection = data_table.GetOptimisticCollection(context, entry.collection_index);
+			collection->Scan(transaction, [&](DataChunk &insert_chunk) {
 				storage.LocalAppend(append_state, context, insert_chunk, false);
 				return true;
 			});
+			data_table.ResetOptimisticCollection(context, entry.collection_index);
 		}
 		storage.FinalizeLocalAppend(append_state);
 	}
diff --git a/src/execution/operator/persistent/physical_insert.cpp b/src/execution/operator/persistent/physical_insert.cpp
index 3594ae15e7ff..1604d3fcbc1d 100644
--- a/src/execution/operator/persistent/physical_insert.cpp
+++ b/src/execution/operator/persistent/physical_insert.cpp
@@ -706,10 +706,10 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, DataChunk &chunk,
 	OnConflictHandling(table, context, lstate);
 	D_ASSERT(action_type != OnConflictAction::UPDATE);
 
-	auto &collection = data_table.GetOptimisticCollection(context.client, lstate.collection_index);
-	auto new_row_group = collection.Append(lstate.insert_chunk, lstate.local_append_state);
+	auto collection = data_table.GetOptimisticCollection(context.client, lstate.collection_index);
+	auto new_row_group = collection->Append(lstate.insert_chunk, lstate.local_append_state);
 	if (new_row_group) {
-		lstate.writer->WriteNewRowGroup(collection);
+		lstate.writer->WriteNewRowGroup(*collection);
 	}
 	return SinkResultType::NEED_MORE_INPUT;
 }
@@ -732,10 +732,10 @@ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, Operato
 	// parallel append: finalize the append
 	TransactionData tdata(0, 0);
 	auto &data_table = gstate.table.GetStorage();
-	auto &collection = data_table.GetOptimisticCollection(context.client, lstate.collection_index);
-	collection.FinalizeAppend(tdata, lstate.local_append_state);
+	auto collection = data_table.GetOptimisticCollection(context.client, lstate.collection_index);
+	collection->FinalizeAppend(tdata, lstate.local_append_state);
 
-	auto append_count = collection.GetTotalRows();
+	auto append_count = collection->GetTotalRows();
 
 	lock_guard<mutex> lock(gstate.lock);
 	gstate.insert_count += append_count;
@@ -743,16 +743,16 @@ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, Operato
 		// we have few rows - append to the local storage directly
 		storage.InitializeLocalAppend(gstate.append_state, table, context.client, bound_constraints);
 		auto &transaction = DuckTransaction::Get(context.client, table.catalog);
-		collection.Scan(transaction, [&](DataChunk &insert_chunk) {
+		collection->Scan(transaction, [&](DataChunk &insert_chunk) {
 			storage.LocalAppend(gstate.append_state, context.client, insert_chunk, false);
 			return true;
 		});
 		storage.FinalizeLocalAppend(gstate.append_state);
 	} else {
 		// we have written rows to disk optimistically - merge directly into the transaction-local storage
-		lstate.writer->WriteLastRowGroup(collection);
+		lstate.writer->WriteLastRowGroup(*collection);
 		lstate.writer->FinalFlush();
-		gstate.table.GetStorage().LocalMerge(context.client, collection);
+		gstate.table.GetStorage().LocalMerge(context.client, *collection);
 		gstate.table.GetStorage().FinalizeOptimisticWriter(context.client, *lstate.writer);
 	}
 
diff --git a/src/include/duckdb/execution/operator/persistent/physical_insert.hpp b/src/include/duckdb/execution/operator/persistent/physical_insert.hpp
index f12084e39af9..c5f4f3ac2807 100644
--- a/src/include/duckdb/execution/operator/persistent/physical_insert.hpp
+++ b/src/include/duckdb/execution/operator/persistent/physical_insert.hpp
@@ -57,6 +57,7 @@ class InsertLocalState : public LocalSinkState {
 	DataChunk update_chunk;
 	ExpressionExecutor default_executor;
 	TableAppendState local_append_state;
+	//! An index to the optimistic row group collection vector of the local table storage for this transaction.
 	PhysicalIndex collection_index;
 	optional_ptr<OptimisticDataWriter> writer;
 	// Rows that have been updated by a DO UPDATE conflict
diff --git a/src/include/duckdb/storage/data_table.hpp b/src/include/duckdb/storage/data_table.hpp
index 16354f45024b..ee366441cca1 100644
--- a/src/include/duckdb/storage/data_table.hpp
+++ b/src/include/duckdb/storage/data_table.hpp
@@ -119,7 +119,10 @@ class DataTable {
 	//! Returns the index into the optimistic_collections vector for newly created collection.
 	PhysicalIndex CreateOptimisticCollection(ClientContext &context, unique_ptr<RowGroupCollection> collection);
 	//! Returns the optimistic row group collection corresponding to the index.
-	RowGroupCollection &GetOptimisticCollection(ClientContext &context, const PhysicalIndex collection_index);
+	optional_ptr<RowGroupCollection> GetOptimisticCollection(ClientContext &context,
+	                                                         const PhysicalIndex collection_index);
+	//! Resets the optimistic row group collection corresponding to the index.
+	void ResetOptimisticCollection(ClientContext &context, const PhysicalIndex collection_index);
 	//! Create an optimistic writer for this table. Used for optimistically writing parallel appends.
 	OptimisticDataWriter &CreateOptimisticWriter(ClientContext &context);
 	void FinalizeOptimisticWriter(ClientContext &context, OptimisticDataWriter &writer);
diff --git a/src/include/duckdb/transaction/local_storage.hpp b/src/include/duckdb/transaction/local_storage.hpp
index b5a7398a8446..58fda0931bdf 100644
--- a/src/include/duckdb/transaction/local_storage.hpp
+++ b/src/include/duckdb/transaction/local_storage.hpp
@@ -81,7 +81,9 @@ class LocalTableStorage : public enable_shared_from_this<LocalTableStorage> {
 	//! Returns the index into the optimistic_collections vector for newly created collection.
 	PhysicalIndex CreateOptimisticCollection(unique_ptr<RowGroupCollection> collection);
 	//! Returns the optimistic row group collection corresponding to the index.
-	RowGroupCollection &GetOptimisticCollection(const PhysicalIndex collection_index);
+	optional_ptr<RowGroupCollection> GetOptimisticCollection(const PhysicalIndex collection_index);
+	//! Resets the optimistic row group collection corresponding to the index.
+	void ResetOptimisticCollection(const PhysicalIndex collection_index);
 	//! Create an optimistic writer for this table.
 	OptimisticDataWriter &CreateOptimisticWriter();
 	void FinalizeOptimisticWriter(OptimisticDataWriter &writer);
@@ -145,7 +147,9 @@ class LocalStorage {
 	//! Returns the index into the optimistic_collections vector for newly created collection.
 	PhysicalIndex CreateOptimisticCollection(DataTable &table, unique_ptr<RowGroupCollection> collection);
 	//! Returns the optimistic row group collection corresponding to the index.
-	RowGroupCollection &GetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index);
+	optional_ptr<RowGroupCollection> GetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index);
+	//! Resets the optimistic row group collection corresponding to the index.
+	void ResetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index);
 	//! Create an optimistic writer for this table.
 	OptimisticDataWriter &CreateOptimisticWriter(DataTable &table);
 	void FinalizeOptimisticWriter(DataTable &table, OptimisticDataWriter &writer);
diff --git a/src/storage/data_table.cpp b/src/storage/data_table.cpp
index 9e194f0d6549..c717c6697da3 100644
--- a/src/storage/data_table.cpp
+++ b/src/storage/data_table.cpp
@@ -853,11 +853,17 @@ PhysicalIndex DataTable::CreateOptimisticCollection(ClientContext &context, uniq
 	return local_storage.CreateOptimisticCollection(*this, std::move(collection));
 }
 
-RowGroupCollection &DataTable::GetOptimisticCollection(ClientContext &context, const PhysicalIndex collection_index) {
+optional_ptr<RowGroupCollection> DataTable::GetOptimisticCollection(ClientContext &context,
+                                                                    const PhysicalIndex collection_index) {
 	auto &local_storage = LocalStorage::Get(context, db);
 	return local_storage.GetOptimisticCollection(*this, collection_index);
 }
 
+void DataTable::ResetOptimisticCollection(ClientContext &context, const PhysicalIndex collection_index) {
+	auto &local_storage = LocalStorage::Get(context, db);
+	local_storage.ResetOptimisticCollection(*this, collection_index);
+}
+
 OptimisticDataWriter &DataTable::CreateOptimisticWriter(ClientContext &context) {
 	auto &local_storage = LocalStorage::Get(context, db);
 	return local_storage.CreateOptimisticWriter(*this);
diff --git a/src/storage/local_storage.cpp b/src/storage/local_storage.cpp
index 6304147f715b..3e36e0ba55d8 100644
--- a/src/storage/local_storage.cpp
+++ b/src/storage/local_storage.cpp
@@ -239,9 +239,18 @@ PhysicalIndex LocalTableStorage::CreateOptimisticCollection(unique_ptr<RowGroupC
 	return PhysicalIndex(optimistic_collections.size() - 1);
 }
 
-RowGroupCollection &LocalTableStorage::GetOptimisticCollection(const PhysicalIndex collection_index) {
+optional_ptr<RowGroupCollection> LocalTableStorage::GetOptimisticCollection(const PhysicalIndex collection_index) {
 	lock_guard<mutex> l(collections_lock);
-	return *optimistic_collections[collection_index.index];
+	auto &collection = optimistic_collections[collection_index.index];
+	if (collection == nullptr) {
+		return nullptr;
+	}
+	return *collection;
+}
+
+void LocalTableStorage::ResetOptimisticCollection(const PhysicalIndex collection_index) {
+	lock_guard<mutex> l(collections_lock);
+	optimistic_collections[collection_index.index].reset();
 }
 
 OptimisticDataWriter &LocalTableStorage::CreateOptimisticWriter() {
@@ -470,11 +479,17 @@ PhysicalIndex LocalStorage::CreateOptimisticCollection(DataTable &table, unique_
 	return storage.CreateOptimisticCollection(std::move(collection));
 }
 
-RowGroupCollection &LocalStorage::GetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index) {
+optional_ptr<RowGroupCollection> LocalStorage::GetOptimisticCollection(DataTable &table,
+                                                                       const PhysicalIndex collection_index) {
 	auto &storage = table_manager.GetOrCreateStorage(context, table);
 	return storage.GetOptimisticCollection(collection_index);
 }
 
+void LocalStorage::ResetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index) {
+	auto &storage = table_manager.GetOrCreateStorage(context, table);
+	storage.ResetOptimisticCollection(collection_index);
+}
+
 OptimisticDataWriter &LocalStorage::CreateOptimisticWriter(DataTable &table) {
 	auto &storage = table_manager.GetOrCreateStorage(context, table);
 	return storage.CreateOptimisticWriter();

From ae5e49486d02af4f29079467198242ed89c9bd95 Mon Sep 17 00:00:00 2001
From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com>
Date: Sat, 25 Jan 2025 16:32:27 +0100
Subject: [PATCH 009/142] turn into reference

---
 .../persistent/physical_batch_insert.cpp      | 69 +++++++++----------
 .../operator/persistent/physical_insert.cpp   | 20 +++---
 src/include/duckdb/storage/data_table.hpp     |  3 +-
 .../duckdb/transaction/local_storage.hpp      |  4 +-
 src/storage/data_table.cpp                    |  3 +-
 src/storage/local_storage.cpp                 |  8 +--
 6 files changed, 49 insertions(+), 58 deletions(-)

diff --git a/src/execution/operator/persistent/physical_batch_insert.cpp b/src/execution/operator/persistent/physical_batch_insert.cpp
index a9d8696201f4..17ed86ea6767 100644
--- a/src/execution/operator/persistent/physical_batch_insert.cpp
+++ b/src/execution/operator/persistent/physical_batch_insert.cpp
@@ -72,13 +72,13 @@ class CollectionMerger {
 		}
 
 		auto result_collection_index = collection_indexes[0];
-		auto result_collection = data_table.GetOptimisticCollection(context, result_collection_index);
-		D_ASSERT(result_collection);
+		auto &result_collection = data_table.GetOptimisticCollection(context, result_collection_index);
+
 		if (collection_indexes.size() > 1) {
 			// Merge all collections into one result collection.
-			auto &types = result_collection->GetTypes();
+			auto &types = result_collection.GetTypes();
 			TableAppendState append_state;
-			result_collection->InitializeAppend(append_state);
+			result_collection.InitializeAppend(append_state);
 
 			DataChunk scan_chunk;
 			scan_chunk.Initialize(context, types);
@@ -88,13 +88,10 @@ class CollectionMerger {
 				column_ids.emplace_back(i);
 			}
 			for (idx_t i = 1; i < collection_indexes.size(); i++) {
-				auto collection = data_table.GetOptimisticCollection(context, collection_indexes[i]);
-				if (!collection) {
-					continue;
-				}
+				auto &collection = data_table.GetOptimisticCollection(context, collection_indexes[i]);
 				TableScanState scan_state;
 				scan_state.Initialize(column_ids);
-				collection->InitializeScan(scan_state.local_state, column_ids, nullptr);
+				collection.InitializeScan(scan_state.local_state, column_ids, nullptr);
 
 				while (true) {
 					scan_chunk.Reset();
@@ -102,17 +99,17 @@ class CollectionMerger {
 					if (scan_chunk.size() == 0) {
 						break;
 					}
-					auto new_row_group = result_collection->Append(scan_chunk, append_state);
+					auto new_row_group = result_collection.Append(scan_chunk, append_state);
 					if (new_row_group) {
-						writer.WriteNewRowGroup(*result_collection);
+						writer.WriteNewRowGroup(result_collection);
 					}
 				}
 				data_table.ResetOptimisticCollection(context, collection_indexes[i]);
 			}
-			result_collection->FinalizeAppend(TransactionData(0, 0), append_state);
-			writer.WriteLastRowGroup(*result_collection);
+			result_collection.FinalizeAppend(TransactionData(0, 0), append_state);
+			writer.WriteLastRowGroup(result_collection);
 		} else if (batch_type == RowGroupBatchType::NOT_FLUSHED) {
-			writer.WriteLastRowGroup(*result_collection);
+			writer.WriteLastRowGroup(result_collection);
 		}
 
 		collection_indexes.clear();
@@ -234,12 +231,12 @@ class MergeCollectionTask : public BatchInsertTask {
 
 		// Merge the collections.
 		D_ASSERT(l_state.writer);
-		auto collection_index = g_state.MergeCollections(context, std::move(merge_collections), *l_state.writer);
+		auto collection_index = g_state.MergeCollections(context, merge_collections, *l_state.writer);
 
 		// Add the result collection to the set of batch indexes.
 		lock_guard<mutex> l(g_state.lock);
-		auto result_collection = g_state.table.GetStorage().GetOptimisticCollection(context, collection_index);
-		RowGroupBatchEntry new_entry(*result_collection, merged_batch_index, collection_index,
+		auto &result_collection = g_state.table.GetStorage().GetOptimisticCollection(context, collection_index);
+		RowGroupBatchEntry new_entry(result_collection, merged_batch_index, collection_index,
 		                             RowGroupBatchType::FLUSHED);
 		auto it = std::lower_bound(
 		    g_state.collections.begin(), g_state.collections.end(), new_entry,
@@ -328,15 +325,15 @@ void BatchInsertGlobalState::ScheduleMergeTasks(ClientContext &context, const id
 	for (auto &scheduled_task : to_be_scheduled_tasks) {
 		D_ASSERT(scheduled_task.total_count > 0);
 		D_ASSERT(current_idx > scheduled_task.start_index);
-		idx_t merged_batch_index = collections[scheduled_task.start_index].batch_idx;
+		auto merged_batch_index = collections[scheduled_task.start_index].batch_idx;
 		vector<RowGroupBatchEntry> merge_collections;
 		for (idx_t idx = scheduled_task.start_index; idx < scheduled_task.end_index; idx++) {
 			auto &entry = collections[idx];
 			if (!entry.collection_index.IsValid() || entry.type == RowGroupBatchType::FLUSHED) {
 				throw InternalException("Adding a row group collection that should not be flushed");
 			}
-			auto collection = table.GetStorage().GetOptimisticCollection(context, entry.collection_index);
-			RowGroupBatchEntry added_entry(*collection, collections[scheduled_task.start_index].batch_idx,
+			auto &collection = table.GetStorage().GetOptimisticCollection(context, entry.collection_index);
+			RowGroupBatchEntry added_entry(collection, collections[scheduled_task.start_index].batch_idx,
 			                               entry.collection_index, RowGroupBatchType::FLUSHED);
 			added_entry.unflushed_memory = entry.unflushed_memory;
 			merge_collections.push_back(added_entry);
@@ -378,16 +375,16 @@ void BatchInsertGlobalState::AddCollection(ClientContext &context, const idx_t b
 		throw InternalException("Batch index of the added collection (%llu) is smaller than the min batch index (%llu)",
 		                        batch_index, min_batch_index);
 	}
-	auto collection = table.GetStorage().GetOptimisticCollection(context, collection_index);
-	auto new_count = collection->GetTotalRows();
+	auto &collection = table.GetStorage().GetOptimisticCollection(context, collection_index);
+	auto new_count = collection.GetTotalRows();
 	auto batch_type = new_count < row_group_size ? RowGroupBatchType::NOT_FLUSHED : RowGroupBatchType::FLUSHED;
 	if (batch_type == RowGroupBatchType::FLUSHED && writer) {
-		writer->WriteLastRowGroup(*collection);
+		writer->WriteLastRowGroup(collection);
 	}
 	lock_guard<mutex> l(lock);
 	insert_count += new_count;
 	// add the collection to the batch index
-	RowGroupBatchEntry new_entry(*collection, batch_index, collection_index, batch_type);
+	RowGroupBatchEntry new_entry(collection, batch_index, collection_index, batch_type);
 	if (batch_type == RowGroupBatchType::NOT_FLUSHED) {
 		memory_manager.IncreaseUnflushedMemory(new_entry.unflushed_memory);
 	}
@@ -469,8 +466,8 @@ SinkNextBatchType PhysicalBatchInsert::NextBatch(ExecutionContext &context, Oper
 		}
 		// batch index has changed: move the old collection to the global state and create a new collection
 		TransactionData tdata(0, 0);
-		auto collection = gstate.table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index);
-		collection->FinalizeAppend(tdata, lstate.current_append_state);
+		auto &collection = gstate.table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index);
+		collection.FinalizeAppend(tdata, lstate.current_append_state);
 		gstate.AddCollection(context.client, lstate.current_index, lstate.partition_info.min_batch_index.GetIndex(),
 		                     lstate.collection_index, lstate.writer);
 
@@ -543,11 +540,11 @@ SinkResultType PhysicalBatchInsert::Sink(ExecutionContext &context, DataChunk &c
 	auto &storage = table.GetStorage();
 	storage.VerifyAppendConstraints(*lstate.constraint_state, context.client, lstate.insert_chunk, nullptr, nullptr);
 
-	auto collection = table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index);
-	auto new_row_group = collection->Append(lstate.insert_chunk, lstate.current_append_state);
+	auto &collection = table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index);
+	auto new_row_group = collection.Append(lstate.insert_chunk, lstate.current_append_state);
 	if (new_row_group) {
 		// we have already written to disk - flush the next row group as well
-		lstate.writer->WriteNewRowGroup(*collection);
+		lstate.writer->WriteNewRowGroup(collection);
 	}
 	return SinkResultType::NEED_MORE_INPUT;
 }
@@ -567,9 +564,9 @@ SinkCombineResultType PhysicalBatchInsert::Combine(ExecutionContext &context, Op
 
 	if (lstate.collection_index.IsValid()) {
 		TransactionData tdata(0, 0);
-		auto collection = gstate.table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index);
-		collection->FinalizeAppend(tdata, lstate.current_append_state);
-		if (collection->GetTotalRows() > 0) {
+		auto &collection = gstate.table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index);
+		collection.FinalizeAppend(tdata, lstate.current_append_state);
+		if (collection.GetTotalRows() > 0) {
 			gstate.AddCollection(context.client, lstate.current_index, lstate.partition_info.min_batch_index.GetIndex(),
 			                     lstate.collection_index);
 		}
@@ -637,8 +634,8 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
 
 		// finally, merge the row groups into the local storage
 		for (const auto collection_index : final_collections) {
-			auto collection = data_table.GetOptimisticCollection(context, collection_index);
-			storage.LocalMerge(context, *collection);
+			auto &collection = data_table.GetOptimisticCollection(context, collection_index);
+			storage.LocalMerge(context, collection);
 			data_table.ResetOptimisticCollection(context, collection_index);
 		}
 		storage.FinalizeOptimisticWriter(context, writer);
@@ -656,8 +653,8 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
 			}
 
 			memory_manager.ReduceUnflushedMemory(entry.unflushed_memory);
-			auto collection = data_table.GetOptimisticCollection(context, entry.collection_index);
-			collection->Scan(transaction, [&](DataChunk &insert_chunk) {
+			auto &collection = data_table.GetOptimisticCollection(context, entry.collection_index);
+			collection.Scan(transaction, [&](DataChunk &insert_chunk) {
 				storage.LocalAppend(append_state, context, insert_chunk, false);
 				return true;
 			});
diff --git a/src/execution/operator/persistent/physical_insert.cpp b/src/execution/operator/persistent/physical_insert.cpp
index 1604d3fcbc1d..415e94ba6e42 100644
--- a/src/execution/operator/persistent/physical_insert.cpp
+++ b/src/execution/operator/persistent/physical_insert.cpp
@@ -685,7 +685,7 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, DataChunk &chunk,
 		return SinkResultType::NEED_MORE_INPUT;
 	}
 
-	// parallel append
+	// Parallel append.
 	D_ASSERT(!return_chunk);
 	auto &data_table = gstate.table.GetStorage();
 	if (!lstate.collection_index.IsValid()) {
@@ -706,10 +706,10 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, DataChunk &chunk,
 	OnConflictHandling(table, context, lstate);
 	D_ASSERT(action_type != OnConflictAction::UPDATE);
 
-	auto collection = data_table.GetOptimisticCollection(context.client, lstate.collection_index);
-	auto new_row_group = collection->Append(lstate.insert_chunk, lstate.local_append_state);
+	auto &collection = data_table.GetOptimisticCollection(context.client, lstate.collection_index);
+	auto new_row_group = collection.Append(lstate.insert_chunk, lstate.local_append_state);
 	if (new_row_group) {
-		lstate.writer->WriteNewRowGroup(*collection);
+		lstate.writer->WriteNewRowGroup(collection);
 	}
 	return SinkResultType::NEED_MORE_INPUT;
 }
@@ -732,10 +732,10 @@ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, Operato
 	// parallel append: finalize the append
 	TransactionData tdata(0, 0);
 	auto &data_table = gstate.table.GetStorage();
-	auto collection = data_table.GetOptimisticCollection(context.client, lstate.collection_index);
-	collection->FinalizeAppend(tdata, lstate.local_append_state);
+	auto &collection = data_table.GetOptimisticCollection(context.client, lstate.collection_index);
+	collection.FinalizeAppend(tdata, lstate.local_append_state);
 
-	auto append_count = collection->GetTotalRows();
+	auto append_count = collection.GetTotalRows();
 
 	lock_guard<mutex> lock(gstate.lock);
 	gstate.insert_count += append_count;
@@ -743,16 +743,16 @@ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, Operato
 		// we have few rows - append to the local storage directly
 		storage.InitializeLocalAppend(gstate.append_state, table, context.client, bound_constraints);
 		auto &transaction = DuckTransaction::Get(context.client, table.catalog);
-		collection->Scan(transaction, [&](DataChunk &insert_chunk) {
+		collection.Scan(transaction, [&](DataChunk &insert_chunk) {
 			storage.LocalAppend(gstate.append_state, context.client, insert_chunk, false);
 			return true;
 		});
 		storage.FinalizeLocalAppend(gstate.append_state);
 	} else {
 		// we have written rows to disk optimistically - merge directly into the transaction-local storage
-		lstate.writer->WriteLastRowGroup(*collection);
+		lstate.writer->WriteLastRowGroup(collection);
 		lstate.writer->FinalFlush();
-		gstate.table.GetStorage().LocalMerge(context.client, *collection);
+		gstate.table.GetStorage().LocalMerge(context.client, collection);
 		gstate.table.GetStorage().FinalizeOptimisticWriter(context.client, *lstate.writer);
 	}
 
diff --git a/src/include/duckdb/storage/data_table.hpp b/src/include/duckdb/storage/data_table.hpp
index ee366441cca1..a46ec0637206 100644
--- a/src/include/duckdb/storage/data_table.hpp
+++ b/src/include/duckdb/storage/data_table.hpp
@@ -119,8 +119,7 @@ class DataTable {
 	//! Returns the index into the optimistic_collections vector for newly created collection.
 	PhysicalIndex CreateOptimisticCollection(ClientContext &context, unique_ptr<RowGroupCollection> collection);
 	//! Returns the optimistic row group collection corresponding to the index.
-	optional_ptr<RowGroupCollection> GetOptimisticCollection(ClientContext &context,
-	                                                         const PhysicalIndex collection_index);
+	RowGroupCollection &GetOptimisticCollection(ClientContext &context, const PhysicalIndex collection_index);
 	//! Resets the optimistic row group collection corresponding to the index.
 	void ResetOptimisticCollection(ClientContext &context, const PhysicalIndex collection_index);
 	//! Create an optimistic writer for this table. Used for optimistically writing parallel appends.
diff --git a/src/include/duckdb/transaction/local_storage.hpp b/src/include/duckdb/transaction/local_storage.hpp
index 58fda0931bdf..a71ea9eacbaa 100644
--- a/src/include/duckdb/transaction/local_storage.hpp
+++ b/src/include/duckdb/transaction/local_storage.hpp
@@ -81,7 +81,7 @@ class LocalTableStorage : public enable_shared_from_this<LocalTableStorage> {
 	//! Returns the index into the optimistic_collections vector for newly created collection.
 	PhysicalIndex CreateOptimisticCollection(unique_ptr<RowGroupCollection> collection);
 	//! Returns the optimistic row group collection corresponding to the index.
-	optional_ptr<RowGroupCollection> GetOptimisticCollection(const PhysicalIndex collection_index);
+	RowGroupCollection &GetOptimisticCollection(const PhysicalIndex collection_index);
 	//! Resets the optimistic row group collection corresponding to the index.
 	void ResetOptimisticCollection(const PhysicalIndex collection_index);
 	//! Create an optimistic writer for this table.
@@ -147,7 +147,7 @@ class LocalStorage {
 	//! Returns the index into the optimistic_collections vector for newly created collection.
 	PhysicalIndex CreateOptimisticCollection(DataTable &table, unique_ptr<RowGroupCollection> collection);
 	//! Returns the optimistic row group collection corresponding to the index.
-	optional_ptr<RowGroupCollection> GetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index);
+	RowGroupCollection &GetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index);
 	//! Resets the optimistic row group collection corresponding to the index.
 	void ResetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index);
 	//! Create an optimistic writer for this table.
diff --git a/src/storage/data_table.cpp b/src/storage/data_table.cpp
index c717c6697da3..0620c939ba04 100644
--- a/src/storage/data_table.cpp
+++ b/src/storage/data_table.cpp
@@ -853,8 +853,7 @@ PhysicalIndex DataTable::CreateOptimisticCollection(ClientContext &context, uniq
 	return local_storage.CreateOptimisticCollection(*this, std::move(collection));
 }
 
-optional_ptr<RowGroupCollection> DataTable::GetOptimisticCollection(ClientContext &context,
-                                                                    const PhysicalIndex collection_index) {
+RowGroupCollection &DataTable::GetOptimisticCollection(ClientContext &context, const PhysicalIndex collection_index) {
 	auto &local_storage = LocalStorage::Get(context, db);
 	return local_storage.GetOptimisticCollection(*this, collection_index);
 }
diff --git a/src/storage/local_storage.cpp b/src/storage/local_storage.cpp
index 3e36e0ba55d8..929aef5ebb3d 100644
--- a/src/storage/local_storage.cpp
+++ b/src/storage/local_storage.cpp
@@ -239,12 +239,9 @@ PhysicalIndex LocalTableStorage::CreateOptimisticCollection(unique_ptr<RowGroupC
 	return PhysicalIndex(optimistic_collections.size() - 1);
 }
 
-optional_ptr<RowGroupCollection> LocalTableStorage::GetOptimisticCollection(const PhysicalIndex collection_index) {
+RowGroupCollection &LocalTableStorage::GetOptimisticCollection(const PhysicalIndex collection_index) {
 	lock_guard<mutex> l(collections_lock);
 	auto &collection = optimistic_collections[collection_index.index];
-	if (collection == nullptr) {
-		return nullptr;
-	}
 	return *collection;
 }
 
@@ -479,8 +476,7 @@ PhysicalIndex LocalStorage::CreateOptimisticCollection(DataTable &table, unique_
 	return storage.CreateOptimisticCollection(std::move(collection));
 }
 
-optional_ptr<RowGroupCollection> LocalStorage::GetOptimisticCollection(DataTable &table,
-                                                                       const PhysicalIndex collection_index) {
+RowGroupCollection &LocalStorage::GetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index) {
 	auto &storage = table_manager.GetOrCreateStorage(context, table);
 	return storage.GetOptimisticCollection(collection_index);
 }

From 9ebf87d7e20752058047b9555e5137e16afb134a Mon Sep 17 00:00:00 2001
From: Tmonster <tom@ebergen.com>
Date: Mon, 27 Jan 2025 09:14:14 +0100
Subject: [PATCH 010/142] fix broken test

---
 src/optimizer/join_order/relation_manager.cpp     | 15 +++++----------
 .../optimizer/remove_unnecessary_projections.test |  8 ++++++++
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp
index d4f7032d676d..ccd68101e5e1 100644
--- a/src/optimizer/join_order/relation_manager.cpp
+++ b/src/optimizer/join_order/relation_manager.cpp
@@ -65,19 +65,14 @@ void RelationManager::AddRelation(LogicalOperator &op, optional_ptr<LogicalOpera
 			D_ASSERT(relation_mapping.find(reference) == relation_mapping.end());
 			relation_mapping[reference] = relation_id;
 		}
-	} else if (op.type == LogicalOperatorType::LOGICAL_UNNEST) {
-		// logical unnest has a logical_unnest index, but other bindings can refer to
-		// columns that are not unnested.
+	} else {
+		// Relations should never return more than 1 table index
 		auto bindings = op.GetColumnBindings();
 		for (auto &binding : bindings) {
-			relation_mapping[binding.table_index] = relation_id;
+			if (relation_mapping.find(binding.table_index) == relation_mapping.end()) {
+				relation_mapping[binding.table_index] = relation_id;
+			}
 		}
-	} else {
-		// Relations should never return more than 1 table index
-		D_ASSERT(table_indexes.size() == 1);
-		idx_t table_index = table_indexes.at(0);
-		D_ASSERT(relation_mapping.find(table_index) == relation_mapping.end());
-		relation_mapping[table_index] = relation_id;
 	}
 	relations.push_back(std::move(relation));
 	op.estimated_cardinality = stats.cardinality;
diff --git a/test/optimizer/remove_unnecessary_projections.test b/test/optimizer/remove_unnecessary_projections.test
index 2bcab73648ba..89f15c3c191f 100644
--- a/test/optimizer/remove_unnecessary_projections.test
+++ b/test/optimizer/remove_unnecessary_projections.test
@@ -21,3 +21,11 @@ query II
 explain select * from (select * from t1, t2 where a = b) t_left, (select * from t3, t4 where c = d) t_right where a = d;
 ----
 physical_plan	<!REGEX>:.*PROJECTION.*PROJECTION.*
+
+statement ok
+pragma explain_output='optimized_only';
+
+query II
+explain select a b from (select b a from (select a b from values (1), (2), (3) t(a)));
+----
+logical_opt	<!REGEX>:.*PROJECTION.*PROJECTION.*PROJECTION.*
\ No newline at end of file

From 2a2d359d1ed4777002231e875a33ace9ae447b67 Mon Sep 17 00:00:00 2001
From: Tmonster <tom@ebergen.com>
Date: Mon, 27 Jan 2025 12:40:46 +0100
Subject: [PATCH 011/142] generate metric enum

---
 src/common/enums/metric_type.cpp                | 15 ++++++++-------
 src/include/duckdb/common/enums/metric_type.hpp |  2 +-
 src/optimizer/remove_useless_projections.cpp    | 15 ++++++++++-----
 3 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/common/enums/metric_type.cpp b/src/common/enums/metric_type.cpp
index 0477f2a4f672..24793fdcdecc 100644
--- a/src/common/enums/metric_type.cpp
+++ b/src/common/enums/metric_type.cpp
@@ -40,6 +40,7 @@ profiler_settings_t MetricsUtils::GetOptimizerMetrics() {
         MetricsType::OPTIMIZER_MATERIALIZED_CTE,
         MetricsType::OPTIMIZER_SUM_REWRITER,
         MetricsType::OPTIMIZER_LATE_MATERIALIZATION,
+        MetricsType::OPTIMIZER_REMOVE_USELESS_PROJECTIONS,
     };
 }
 
@@ -66,8 +67,6 @@ MetricsType MetricsUtils::GetOptimizerMetricByType(OptimizerType type) {
             return MetricsType::OPTIMIZER_FILTER_PUSHDOWN;
         case OptimizerType::EMPTY_RESULT_PULLUP:
             return MetricsType::OPTIMIZER_EMPTY_RESULT_PULLUP;
-		case OptimizerType::REMOVE_USELESS_PROJECTIONS:
-			return MetricsType::OPTIMIZER_REMOVE_USELESS_PROJECTIONS;
         case OptimizerType::CTE_FILTER_PUSHER:
             return MetricsType::OPTIMIZER_CTE_FILTER_PUSHER;
         case OptimizerType::REGEX_RANGE:
@@ -114,6 +113,8 @@ MetricsType MetricsUtils::GetOptimizerMetricByType(OptimizerType type) {
             return MetricsType::OPTIMIZER_SUM_REWRITER;
         case OptimizerType::LATE_MATERIALIZATION:
             return MetricsType::OPTIMIZER_LATE_MATERIALIZATION;
+        case OptimizerType::REMOVE_USELESS_PROJECTIONS:
+            return MetricsType::OPTIMIZER_REMOVE_USELESS_PROJECTIONS;
        default:
             throw InternalException("OptimizerType %s cannot be converted to a MetricsType", EnumUtil::ToString(type));
     };
@@ -155,8 +156,6 @@ OptimizerType MetricsUtils::GetOptimizerTypeByMetric(MetricsType type) {
             return OptimizerType::BUILD_SIDE_PROBE_SIDE;
         case MetricsType::OPTIMIZER_LIMIT_PUSHDOWN:
             return OptimizerType::LIMIT_PUSHDOWN;
-		case MetricsType::OPTIMIZER_REMOVE_USELESS_PROJECTIONS:
-			return OptimizerType::REMOVE_USELESS_PROJECTIONS;
         case MetricsType::OPTIMIZER_TOP_N:
             return OptimizerType::TOP_N;
         case MetricsType::OPTIMIZER_COMPRESSED_MATERIALIZATION:
@@ -174,9 +173,11 @@ OptimizerType MetricsUtils::GetOptimizerTypeByMetric(MetricsType type) {
         case MetricsType::OPTIMIZER_MATERIALIZED_CTE:
             return OptimizerType::MATERIALIZED_CTE;
         case MetricsType::OPTIMIZER_SUM_REWRITER:
-			return OptimizerType::SUM_REWRITER;
+            return OptimizerType::SUM_REWRITER;
         case MetricsType::OPTIMIZER_LATE_MATERIALIZATION:
-			return OptimizerType::LATE_MATERIALIZATION;
+            return OptimizerType::LATE_MATERIALIZATION;
+        case MetricsType::OPTIMIZER_REMOVE_USELESS_PROJECTIONS:
+            return OptimizerType::REMOVE_USELESS_PROJECTIONS;
     default:
             return OptimizerType::INVALID;
     };
@@ -210,8 +211,8 @@ bool MetricsUtils::IsOptimizerMetric(MetricsType type) {
         case MetricsType::OPTIMIZER_EXTENSION:
         case MetricsType::OPTIMIZER_MATERIALIZED_CTE:
         case MetricsType::OPTIMIZER_SUM_REWRITER:
-		case MetricsType::OPTIMIZER_REMOVE_USELESS_PROJECTIONS:
         case MetricsType::OPTIMIZER_LATE_MATERIALIZATION:
+        case MetricsType::OPTIMIZER_REMOVE_USELESS_PROJECTIONS:
             return true;
         default:
             return false;
diff --git a/src/include/duckdb/common/enums/metric_type.hpp b/src/include/duckdb/common/enums/metric_type.hpp
index bd938779900d..825e5c5369e8 100644
--- a/src/include/duckdb/common/enums/metric_type.hpp
+++ b/src/include/duckdb/common/enums/metric_type.hpp
@@ -69,7 +69,7 @@ enum class MetricsType : uint8_t {
     OPTIMIZER_MATERIALIZED_CTE,
     OPTIMIZER_SUM_REWRITER,
     OPTIMIZER_LATE_MATERIALIZATION,
-	OPTIMIZER_REMOVE_USELESS_PROJECTIONS,
+    OPTIMIZER_REMOVE_USELESS_PROJECTIONS,
 };
 
 struct MetricsTypeHashFunction {
diff --git a/src/optimizer/remove_useless_projections.cpp b/src/optimizer/remove_useless_projections.cpp
index 74ae42b30c87..736d1bcd4e88 100644
--- a/src/optimizer/remove_useless_projections.cpp
+++ b/src/optimizer/remove_useless_projections.cpp
@@ -1,4 +1,5 @@
 #include "duckdb/optimizer/remove_useless_projections.hpp"
+#include "duckdb/planner/operator/logical_projection.hpp"
 #include "duckdb/common/enums/logical_operator_type.hpp"
 
 namespace duckdb {
@@ -11,17 +12,21 @@ unique_ptr<LogicalOperator> RemoveUselessProjections::RemoveProjectionsChildren(
 }
 
 unique_ptr<LogicalOperator> RemoveUselessProjections::RemoveProjections(unique_ptr<LogicalOperator> op) {
-	if (op->type == LogicalOperatorType::LOGICAL_UNION || op->type == LogicalOperatorType::LOGICAL_EXCEPT ||
-	    op->type == LogicalOperatorType::LOGICAL_INTERSECT || op->type == LogicalOperatorType::LOGICAL_RECURSIVE_CTE ||
-	    op->type == LogicalOperatorType::LOGICAL_MATERIALIZED_CTE) {
-		// guaranteed to find a projection under this that is meant to keep the column order in the presence of
-		// an optimization done by build side probe side.
+	switch (op->type) {
+	case LogicalOperatorType::LOGICAL_UNION:
+	case LogicalOperatorType::LOGICAL_EXCEPT:
+	case LogicalOperatorType::LOGICAL_RECURSIVE_CTE:
+	case LogicalOperatorType::LOGICAL_INTERSECT:
+	case LogicalOperatorType::LOGICAL_MATERIALIZED_CTE: {
 		for (idx_t i = 0; i < op->children.size(); i++) {
 			first_projection = true;
 			op->children[i] = RemoveProjections(std::move(op->children[i]));
 		}
 		return op;
 	}
+	default:
+		break;
+	}
 	if (op->type != LogicalOperatorType::LOGICAL_PROJECTION) {
 		return RemoveProjectionsChildren(std::move(op));
 	}

From 4f17c52a6f83467e1231f8915c1ef401361fd009 Mon Sep 17 00:00:00 2001
From: Tmonster <tom@ebergen.com>
Date: Mon, 27 Jan 2025 12:58:20 +0100
Subject: [PATCH 012/142] add missing includes

---
 src/optimizer/remove_useless_projections.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/optimizer/remove_useless_projections.cpp b/src/optimizer/remove_useless_projections.cpp
index 736d1bcd4e88..9ed9d7a93cf2 100644
--- a/src/optimizer/remove_useless_projections.cpp
+++ b/src/optimizer/remove_useless_projections.cpp
@@ -1,4 +1,5 @@
 #include "duckdb/optimizer/remove_useless_projections.hpp"
+#include "duckdb/planner/expression/bound_columnref_expression.hpp"
 #include "duckdb/planner/operator/logical_projection.hpp"
 #include "duckdb/common/enums/logical_operator_type.hpp"
 

From f024e1d663f7a641a18dad788d3824e88419c597 Mon Sep 17 00:00:00 2001
From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com>
Date: Mon, 27 Jan 2025 13:11:46 +0100
Subject: [PATCH 013/142] tidying some stuff up

---
 .../persistent/physical_batch_insert.cpp      | 104 ++++++++++--------
 src/storage/local_storage.cpp                 |   2 +
 2 files changed, 59 insertions(+), 47 deletions(-)

diff --git a/src/execution/operator/persistent/physical_batch_insert.cpp b/src/execution/operator/persistent/physical_batch_insert.cpp
index 17ed86ea6767..097341d97c02 100644
--- a/src/execution/operator/persistent/physical_batch_insert.cpp
+++ b/src/execution/operator/persistent/physical_batch_insert.cpp
@@ -148,7 +148,7 @@ class BatchInsertTask {
 
 class BatchInsertGlobalState : public GlobalSinkState {
 public:
-	BatchInsertGlobalState(ClientContext &context, DuckTableEntry &table, idx_t minimum_memory_per_thread)
+	BatchInsertGlobalState(ClientContext &context, DuckTableEntry &table, const idx_t minimum_memory_per_thread)
 	    : memory_manager(context, minimum_memory_per_thread), table(table), insert_count(0),
 	      optimistically_written(false), minimum_memory_per_thread(minimum_memory_per_thread) {
 		row_group_size = table.GetStorage().GetRowGroupSize();
@@ -231,13 +231,15 @@ class MergeCollectionTask : public BatchInsertTask {
 
 		// Merge the collections.
 		D_ASSERT(l_state.writer);
-		auto collection_index = g_state.MergeCollections(context, merge_collections, *l_state.writer);
+		auto result_collection_index = g_state.MergeCollections(context, merge_collections, *l_state.writer);
+		merge_collections.clear();
 
-		// Add the result collection to the set of batch indexes.
 		lock_guard<mutex> l(g_state.lock);
-		auto &result_collection = g_state.table.GetStorage().GetOptimisticCollection(context, collection_index);
-		RowGroupBatchEntry new_entry(result_collection, merged_batch_index, collection_index,
+		auto &result_collection = g_state.table.GetStorage().GetOptimisticCollection(context, result_collection_index);
+		RowGroupBatchEntry new_entry(result_collection, merged_batch_index, result_collection_index,
 		                             RowGroupBatchType::FLUSHED);
+
+		// Add the result collection to the set of batch indexes.
 		auto it = std::lower_bound(
 		    g_state.collections.begin(), g_state.collections.end(), new_entry,
 		    [&](const RowGroupBatchEntry &a, const RowGroupBatchEntry &b) { return a.batch_idx < b.batch_idx; });
@@ -339,6 +341,7 @@ void BatchInsertGlobalState::ScheduleMergeTasks(ClientContext &context, const id
 			merge_collections.push_back(added_entry);
 			entry.total_rows = scheduled_task.total_count;
 			entry.type = RowGroupBatchType::FLUSHED;
+			entry.collection_index = PhysicalIndex(DConstants::INVALID_INDEX);
 		}
 		task_manager.AddTask(make_uniq<MergeCollectionTask>(std::move(merge_collections), merged_batch_index));
 	}
@@ -567,8 +570,9 @@ SinkCombineResultType PhysicalBatchInsert::Combine(ExecutionContext &context, Op
 		auto &collection = gstate.table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index);
 		collection.FinalizeAppend(tdata, lstate.current_append_state);
 		if (collection.GetTotalRows() > 0) {
-			gstate.AddCollection(context.client, lstate.current_index, lstate.partition_info.min_batch_index.GetIndex(),
-			                     lstate.collection_index);
+			auto batch_index = lstate.partition_info.min_batch_index.GetIndex();
+			gstate.AddCollection(context.client, lstate.current_index, batch_index, lstate.collection_index);
+			lstate.collection_index = PhysicalIndex(DConstants::INVALID_INDEX);
 		}
 	}
 	if (lstate.writer) {
@@ -588,18 +592,18 @@ SinkCombineResultType PhysicalBatchInsert::Combine(ExecutionContext &context, Op
 //===--------------------------------------------------------------------===//
 SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
                                                OperatorSinkFinalizeInput &input) const {
-	auto &gstate = input.global_state.Cast<BatchInsertGlobalState>();
-	auto &memory_manager = gstate.memory_manager;
-	auto &data_table = gstate.table.GetStorage();
+	auto &g_state = input.global_state.Cast<BatchInsertGlobalState>();
+	auto &table = g_state.table;
+	auto &data_table = g_state.table.GetStorage();
+	auto &memory_manager = g_state.memory_manager;
 
-	if (gstate.optimistically_written || gstate.insert_count >= gstate.row_group_size) {
+	if (g_state.optimistically_written || g_state.insert_count >= g_state.row_group_size) {
 		// we have written data to disk optimistically or are inserting a large amount of data
 		// perform a final pass over all of the row groups and merge them together
 		vector<unique_ptr<CollectionMerger>> mergers;
 		unique_ptr<CollectionMerger> current_merger;
 
-		auto &storage = gstate.table.GetStorage();
-		for (auto &entry : gstate.collections) {
+		for (auto &entry : g_state.collections) {
 			if (entry.type == RowGroupBatchType::NOT_FLUSHED) {
 				// this collection has not been flushed: add it to the merge set
 				if (!current_merger) {
@@ -607,19 +611,22 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
 				}
 				current_merger->AddCollection(entry.collection_index, entry.type);
 				memory_manager.ReduceUnflushedMemory(entry.unflushed_memory);
-			} else {
-				// this collection has been flushed: it does not need to be merged
-				// create a separate collection merger only for this entry
-				if (current_merger) {
-					// we have small collections remaining: flush them
-					mergers.push_back(std::move(current_merger));
-					current_merger.reset();
-				}
-				auto larger_merger = make_uniq<CollectionMerger>(context, data_table);
-				larger_merger->AddCollection(entry.collection_index, entry.type);
-				mergers.push_back(std::move(larger_merger));
+				continue;
+			}
+
+			// This collection has been flushed, so it does not need to be merged.
+			// Create a separate collection merger for it.
+			if (current_merger) {
+				// Flush any remaining small allocations.
+				mergers.push_back(std::move(current_merger));
+				current_merger.reset();
 			}
+			auto larger_merger = make_uniq<CollectionMerger>(context, data_table);
+			larger_merger->AddCollection(entry.collection_index, entry.type);
+			mergers.push_back(std::move(larger_merger));
 		}
+
+		g_state.collections.clear();
 		if (current_merger) {
 			mergers.push_back(std::move(current_merger));
 		}
@@ -627,7 +634,7 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
 		// now that we have created all of the mergers, perform the actual merging
 		vector<PhysicalIndex> final_collections;
 		final_collections.reserve(mergers.size());
-		auto &writer = storage.CreateOptimisticWriter(context);
+		auto &writer = data_table.CreateOptimisticWriter(context);
 		for (auto &merger : mergers) {
 			final_collections.push_back(merger->Flush(writer));
 		}
@@ -635,33 +642,36 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
 		// finally, merge the row groups into the local storage
 		for (const auto collection_index : final_collections) {
 			auto &collection = data_table.GetOptimisticCollection(context, collection_index);
-			storage.LocalMerge(context, collection);
+			data_table.LocalMerge(context, collection);
 			data_table.ResetOptimisticCollection(context, collection_index);
 		}
-		storage.FinalizeOptimisticWriter(context, writer);
-	} else {
-		// we are writing a small amount of data to disk
-		// append directly to transaction local storage
-		auto &table = gstate.table;
-		auto &storage = table.GetStorage();
-		LocalAppendState append_state;
-		storage.InitializeLocalAppend(append_state, table, context, bound_constraints);
-		auto &transaction = DuckTransaction::Get(context, table.catalog);
-		for (auto &entry : gstate.collections) {
-			if (entry.type != RowGroupBatchType::NOT_FLUSHED) {
-				throw InternalException("Encountered a flushed batch");
-			}
 
-			memory_manager.ReduceUnflushedMemory(entry.unflushed_memory);
-			auto &collection = data_table.GetOptimisticCollection(context, entry.collection_index);
-			collection.Scan(transaction, [&](DataChunk &insert_chunk) {
-				storage.LocalAppend(append_state, context, insert_chunk, false);
-				return true;
-			});
-			data_table.ResetOptimisticCollection(context, entry.collection_index);
+		data_table.FinalizeOptimisticWriter(context, writer);
+		memory_manager.FinalCheck();
+		return SinkFinalizeType::READY;
+	}
+
+	// We are writing a small amount of data to disk.
+	// Thus, we append directly to the transaction local storage.
+	LocalAppendState append_state;
+	data_table.InitializeLocalAppend(append_state, table, context, bound_constraints);
+	auto &transaction = DuckTransaction::Get(context, table.catalog);
+	for (auto &entry : g_state.collections) {
+		if (entry.type != RowGroupBatchType::NOT_FLUSHED) {
+			throw InternalException("Encountered a flushed batch");
 		}
-		storage.FinalizeLocalAppend(append_state);
+
+		memory_manager.ReduceUnflushedMemory(entry.unflushed_memory);
+		auto &collection = data_table.GetOptimisticCollection(context, entry.collection_index);
+		collection.Scan(transaction, [&](DataChunk &insert_chunk) {
+			data_table.LocalAppend(append_state, context, insert_chunk, false);
+			return true;
+		});
+		data_table.ResetOptimisticCollection(context, entry.collection_index);
 	}
+
+	g_state.collections.clear();
+	data_table.FinalizeLocalAppend(append_state);
 	memory_manager.FinalCheck();
 	return SinkFinalizeType::READY;
 }
diff --git a/src/storage/local_storage.cpp b/src/storage/local_storage.cpp
index 929aef5ebb3d..9a1a880e7413 100644
--- a/src/storage/local_storage.cpp
+++ b/src/storage/local_storage.cpp
@@ -276,7 +276,9 @@ void LocalTableStorage::Rollback() {
 	for (auto &writer : optimistic_writers) {
 		writer->Rollback();
 	}
+	optimistic_writers.clear();
 	optimistic_writer.Rollback();
+
 	for (auto &collection : optimistic_collections) {
 		if (!collection) {
 			continue;

From 3ff8737a4396748b99ed90ddc65c56121bf956d8 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Mon, 27 Jan 2025 16:06:57 +0100
Subject: [PATCH 014/142] this aligns the behavior of varchar->list with that
 of varchar->struct

---
 src/function/cast/vector_cast_helpers.cpp |  7 +---
 test/sql/cast/string_to_list_cast.test    | 48 +++++++++++------------
 2 files changed, 26 insertions(+), 29 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index c7aa523eaa0d..9dfcfa3d3b0c 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -141,12 +141,9 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 			idx_t struct_lvl = 0;
 			SkipToClose(pos, buf, len, struct_lvl, '}');
 		} else if (buf[pos] == ',' || buf[pos] == ']') {
-			idx_t trailing_whitespace = 0;
-			while (StringUtil::CharacterIsSpace(buf[pos - trailing_whitespace - 1])) {
-				trailing_whitespace++;
-			}
+			auto trimmed_pos = StringTrim(buf, start_pos, pos);
 			if (buf[pos] != ']' || start_pos != pos || seen_value) {
-				state.HandleValue(buf, start_pos, pos - trailing_whitespace);
+				state.HandleValue(buf, start_pos, trimmed_pos);
 				seen_value = true;
 			}
 			if (buf[pos] == ']') {
diff --git a/test/sql/cast/string_to_list_cast.test b/test/sql/cast/string_to_list_cast.test
index 45a84603810a..ff1c56ac87e5 100644
--- a/test/sql/cast/string_to_list_cast.test
+++ b/test/sql/cast/string_to_list_cast.test
@@ -16,7 +16,7 @@ SELECT '[12,13,14]'::INT[];
 query I
 SELECT '["hello", "world", "!"]'::VARCHAR[];
 ----
-["hello", "world", "!"]
+[hello, world, !]
 
 query I
 SELECT CAST('[Hello World!]' AS VARCHAR[]);
@@ -110,8 +110,8 @@ INSERT INTO stringList VALUES ('["hello","world","!"]'), ('["Amazing","text"]'),
 query I
 SELECT col1::VARCHAR[] FROM stringList;
 ----
-["hello", "world", "!"]
-["Amazing", "text"]
+[hello, world, !]
+[Amazing, text]
 [Hello World!]
 
 # ---------------------------------------------------
@@ -124,8 +124,8 @@ INSERT INTO nestedStrings VALUES ('[["hello"], ["world"],["!"]]'), ('[["Amazing"
 query I
 SELECT col1::VARCHAR[][] FROM nestedStrings;
 ----
-[["hello"], ["world"], ["!"]]
-[["Amazing"], ["text"]]
+[[hello], [world], [!]]
+[[Amazing], [text]]
 [[Hello World!]]
 
 # ---------------------------------------------------
@@ -138,8 +138,8 @@ INSERT INTO superNestedStrings VALUES ('[[[[["hello"]]], [[["world"],["!"]]]]]')
 query I
 SELECT col1::VARCHAR[][][][][] FROM superNestedStrings;
 ----
-[[[[["hello"]]], [[["world"], ["!"]]]]]
-[[[[["Amazing"]], [["text"]]]]]
+[[[[[hello]]], [[[world], [!]]]]]
+[[[[[Amazing]], [[text]]]]]
 [[[[[Hello World!]]]]]
 
 # ---------------------------------------------------
@@ -201,39 +201,39 @@ SELECT col1::INT[][][][][][] FROM crazyNested;
 #               Quote handling
 # ---------------------------------------------------
 query I
-SELECT CAST('[''hello'',''world'', ''!'']' AS VARCHAR[]);
+SELECT CAST($$['hello','world', '!']$$ AS VARCHAR[]);
 ----
-['hello', 'world', '!']
+[hello, world, !]
 
 query I
-SELECT CAST('[''''hello'''',''''world'''', ''''!'''']' AS VARCHAR[]);
+SELECT CAST($$[''hello'',''world'', ''!'']$$ AS VARCHAR[]);
 ----
-[''hello'', ''world'', ''!'']
+['hello', 'world', '!']
 
 query I
-SELECT CAST('[[ [''🦆, 🦆, 🦆'']], [[duck, db, ''🦆''] ]]' AS VARCHAR[][][]);
+SELECT CAST($$[[ ['🦆, 🦆, 🦆']], [[duck, db, '🦆'] ]]$$ AS VARCHAR[][][]);
 ----
-[[['🦆, 🦆, 🦆']], [[duck, db, '🦆']]]
+[[[🦆, 🦆, 🦆]], [[duck, db, 🦆]]]
 
 query I
-SELECT CAST('["can''t", "you''re", "i''m"]' AS VARCHAR[]);
+SELECT CAST($$["can't", "you're", "i'm"]$$ AS VARCHAR[]);
 ----
-["can't", "you're", "i'm"]
+[can't, you're, i'm]
 
 query I
-SELECT CAST('[can''t, you''re, i''m]' AS VARCHAR[]);
+SELECT CAST($$[can't, you're, i'm]$$ AS VARCHAR[]);
 ----
 [can't, you're, i'm]
 
 query I
-SELECT CAST('["]", "hello", "world"]' AS VARCHAR[]);
+SELECT CAST($$["]", "hello", "world"]$$ AS VARCHAR[]);
 ----
-["]", "hello", "world"]
+[], hello, world]
 
 query I
-SELECT CAST('['']'', "hello", "world"]' AS VARCHAR[]);
+SELECT CAST($$[']', "hello", "world"]$$ AS VARCHAR[]);
 ----
-[']', "hello", "world"]
+[], hello, world]
 
 
 #               Test for whitespaces
@@ -249,9 +249,9 @@ SELECT CAST('[          [ [12,     13,14], [8, 9         ]  ],[[ 4    ]   ],
 [[[12, 13, 14], [8, 9]], [[4]], [[2, 1, 0]]]
 
 query I
-SELECT CAST('["   hello","          ''  world", "!         "]' AS VARCHAR[]);
+SELECT CAST($$["   hello","          '  world", "!         "]$$ AS VARCHAR[]);
 ----
-["   hello", "          '  world", "!         "]
+[   hello,           '  world, !         ]
 
 query I
 SELECT CAST('[   hello     ,   world      , !         ]' AS VARCHAR[]);     
@@ -259,9 +259,9 @@ SELECT CAST('[   hello     ,   world      , !         ]' AS VARCHAR[]);
 [hello, world, !]
 
 query I
-SELECT CAST('[    [ "   hello"]  ,["            world"        ],[ "!        "           ]      ]' AS VARCHAR[][]);
+SELECT CAST($$[    [ "   hello"]  ,["            world"        ],[ "!        "           ]      ]$$ AS VARCHAR[][]);
 ----
-[["   hello"], ["            world"], ["!        "]]
+[[   hello], [            world], [!        ]]
 
 
 #               Empty list

From edd27473333030b1b7ace05124a40681b4337bcb Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Wed, 5 Feb 2025 15:37:28 +0100
Subject: [PATCH 015/142] WIP: support for escaping in string -> list/struct
 cast, struct isn't finished yet

---
 src/function/cast/vector_cast_helpers.cpp | 326 +++++++++++++++-------
 test/sql/cast/string_to_list_cast.test    |   2 +-
 2 files changed, 226 insertions(+), 102 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 9dfcfa3d3b0c..64f26187e4dc 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -1,32 +1,56 @@
 #include "duckdb/function/cast/vector_cast_helpers.hpp"
+#include "duckdb/common/typedefs.hpp"
+
+namespace {
+
+struct StringCastInputState {
+public:
+	StringCastInputState(const char *buf, idx_t &pos, idx_t &len) : buf(buf), pos(pos), len(len) {
+	}
+
+public:
+	const char *buf;
+	idx_t &pos;
+	idx_t &len;
+	bool escaped = false;
+};
+
+} // namespace
 
 namespace duckdb {
 
 // ------- Helper functions for splitting string nested types  -------
-static bool IsNull(const char *buf, idx_t start_pos, Vector &child, idx_t row_idx) {
-	if ((buf[start_pos] == 'N' || buf[start_pos] == 'n') && (buf[start_pos + 1] == 'U' || buf[start_pos + 1] == 'u') &&
-	    (buf[start_pos + 2] == 'L' || buf[start_pos + 2] == 'l') &&
-	    (buf[start_pos + 3] == 'L' || buf[start_pos + 3] == 'l')) {
-		FlatVector::SetNull(child, row_idx, true);
-		return true;
+static bool IsNull(StringCastInputState &input_state) {
+	auto &buf = input_state.buf;
+	auto &pos = input_state.pos;
+	if (input_state.pos + 4 != input_state.len) {
+		return false;
 	}
-	return false;
+	return StringUtil::CIEquals(string(buf + pos, buf + pos + 4), "null");
 }
 
-inline static void SkipWhitespace(const char *buf, idx_t &pos, idx_t len) {
+inline static void SkipWhitespace(StringCastInputState &input_state) {
+	auto &buf = input_state.buf;
+	auto &pos = input_state.pos;
+	auto &len = input_state.len;
 	while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
 		pos++;
+		input_state.escaped = false;
 	}
 }
 
-static bool SkipToCloseQuotes(idx_t &pos, const char *buf, idx_t &len) {
+static bool SkipToCloseQuotes(StringCastInputState &input_state) {
+	auto &buf = input_state.buf;
+	auto &pos = input_state.pos;
+	auto &len = input_state.len;
+	auto &escaped = input_state.escaped;
+
 	char quote = buf[pos];
 	pos++;
-	bool escaped = false;
 
 	while (pos < len) {
 		if (buf[pos] == '\\') {
-			escaped = !escaped;
+			escaped = true;
 		} else {
 			if (buf[pos] == quote && !escaped) {
 				return true;
@@ -38,48 +62,45 @@ static bool SkipToCloseQuotes(idx_t &pos, const char *buf, idx_t &len) {
 	return false;
 }
 
-static bool SkipToClose(idx_t &idx, const char *buf, idx_t &len, idx_t &lvl, char close_bracket) {
+static bool SkipToClose(StringCastInputState &input_state, idx_t &lvl, char close_bracket) {
+	auto &idx = input_state.pos;
+	auto &buf = input_state.buf;
+	auto &len = input_state.len;
+	auto &escaped = input_state.escaped;
 	idx++;
 
 	vector<char> brackets;
 	brackets.push_back(close_bracket);
 	while (idx < len) {
-		if (buf[idx] == '"' || buf[idx] == '\'') {
-			if (!SkipToCloseQuotes(idx, buf, len)) {
-				return false;
-			}
-		} else if (buf[idx] == '{') {
-			brackets.push_back('}');
-		} else if (buf[idx] == '[') {
-			brackets.push_back(']');
-			lvl++;
-		} else if (buf[idx] == brackets.back()) {
-			if (buf[idx] == ']') {
-				lvl--;
-			}
-			brackets.pop_back();
-			if (brackets.empty()) {
-				return true;
+		if (!escaped) {
+			if (buf[idx] == '"' || buf[idx] == '\'') {
+				if (!SkipToCloseQuotes(input_state)) {
+					return false;
+				}
+			} else if (buf[idx] == '{') {
+				brackets.push_back('}');
+			} else if (buf[idx] == '[') {
+				brackets.push_back(']');
+				lvl++;
+			} else if (buf[idx] == brackets.back()) {
+				if (buf[idx] == ']') {
+					lvl--;
+				}
+				brackets.pop_back();
+				if (brackets.empty()) {
+					return true;
+				}
+			} else if (buf[idx] == '\\') {
+				escaped = true;
 			}
+		} else {
+			escaped = false;
 		}
 		idx++;
 	}
 	return false;
 }
 
-static idx_t StringTrim(const char *buf, idx_t &start_pos, idx_t pos) {
-	idx_t trailing_whitespace = 0;
-	while (pos > start_pos && StringUtil::CharacterIsSpace(buf[pos - trailing_whitespace - 1])) {
-		trailing_whitespace++;
-	}
-	if ((buf[start_pos] == '"' && buf[pos - trailing_whitespace - 1] == '"') ||
-	    (buf[start_pos] == '\'' && buf[pos - trailing_whitespace - 1] == '\'')) {
-		start_pos++;
-		trailing_whitespace++;
-	}
-	return (pos - trailing_whitespace);
-}
-
 struct CountPartOperation {
 	idx_t count = 0;
 
@@ -94,25 +115,45 @@ struct CountPartOperation {
 
 // ------- LIST SPLIT -------
 struct SplitStringListOperation {
-	SplitStringListOperation(string_t *child_data, idx_t &child_start, Vector &child)
-	    : child_data(child_data), child_start(child_start), child(child) {
+public:
+	SplitStringListOperation(string_t *child_data, idx_t &entry_count, Vector &child)
+	    : child_data(child_data), entry_count(entry_count), child(child) {
 	}
 
-	string_t *child_data;
-	idx_t &child_start;
-	Vector &child;
-
-	void HandleValue(const char *buf, idx_t start_pos, idx_t pos) {
-		if ((pos - start_pos) == 4 && IsNull(buf, start_pos, child, child_start)) {
-			child_start++;
+public:
+	void HandleValue(const char *buf, idx_t start, idx_t end) {
+		StringCastInputState temp_state(buf, start, end);
+		if (IsNull(temp_state)) {
+			FlatVector::SetNull(child, entry_count, true);
+			entry_count++;
 			return;
 		}
-		if (start_pos > pos) {
-			pos = start_pos;
+		D_ASSERT(start <= end);
+		auto length = end - start;
+		auto allocated_string = StringVector::EmptyString(child, length);
+		auto string_data = allocated_string.GetDataWriteable();
+		uint32_t copied_count = 0;
+		bool escaped = false;
+		for (idx_t i = 0; i < length; i++) {
+			if (!escaped) {
+				if (buf[start + i] == '\\') {
+					escaped = true;
+				} else {
+					string_data[copied_count++] = buf[start + i];
+				}
+			} else {
+				string_data[copied_count++] = buf[start + i];
+				escaped = false;
+			}
 		}
-		child_data[child_start] = StringVector::AddString(child, buf + start_pos, pos - start_pos);
-		child_start++;
+		child_data[entry_count] = string_t((const char *)string_data, copied_count); // NOLINT
+		entry_count++;
 	}
+
+private:
+	string_t *child_data;
+	idx_t &entry_count;
+	Vector &child;
 };
 
 template <class OP>
@@ -121,42 +162,94 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 	idx_t len = input.GetSize();
 	idx_t lvl = 1;
 	idx_t pos = 0;
-	bool seen_value = false;
 
-	SkipWhitespace(buf, pos, len);
+	StringCastInputState input_state(buf, pos, len);
+
+	SkipWhitespace(input_state);
 	if (pos == len || buf[pos] != '[') {
+		//! Does not have a valid list start
 		return false;
 	}
 
-	SkipWhitespace(buf, ++pos, len);
-	idx_t start_pos = pos;
+	//! Skip the '['
+	pos++;
+	SkipWhitespace(input_state);
+	optional_idx start_pos;
+	idx_t end_pos;
+	bool seen_value = false;
 	while (pos < len) {
 		if (buf[pos] == '[') {
-			if (!SkipToClose(pos, buf, len, ++lvl, ']')) {
-				return false;
+			if (!start_pos.IsValid()) {
+				start_pos = pos;
+			}
+			//! Start of a LIST
+			if (!input_state.escaped) {
+				lvl++;
+				if (!SkipToClose(input_state, lvl, ']')) {
+					return false;
+				}
+			}
+			end_pos = pos;
+		} else if ((buf[pos] == '"' || buf[pos] == '\'')) {
+			if (!input_state.escaped) {
+				if (!start_pos.IsValid()) {
+					//! Trim the start quote
+					start_pos = pos + 1;
+				}
+				if (!SkipToCloseQuotes(input_state)) {
+					return false;
+				}
+				end_pos = pos - 1;
+			} else {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				end_pos = pos;
 			}
-		} else if ((buf[pos] == '"' || buf[pos] == '\'') && pos == start_pos) {
-			SkipToCloseQuotes(pos, buf, len);
 		} else if (buf[pos] == '{') {
-			idx_t struct_lvl = 0;
-			SkipToClose(pos, buf, len, struct_lvl, '}');
+			if (!start_pos.IsValid()) {
+				start_pos = pos;
+			}
+			//! Start of a STRUCT
+			if (!input_state.escaped) {
+				idx_t struct_lvl = 0;
+				if (!SkipToClose(input_state, struct_lvl, '}')) {
+					return false;
+				}
+			}
+			end_pos = pos;
 		} else if (buf[pos] == ',' || buf[pos] == ']') {
-			auto trimmed_pos = StringTrim(buf, start_pos, pos);
-			if (buf[pos] != ']' || start_pos != pos || seen_value) {
-				state.HandleValue(buf, start_pos, trimmed_pos);
+			if (buf[pos] != ']' || start_pos.IsValid() || seen_value) {
+				if (!start_pos.IsValid()) {
+					state.HandleValue(buf, 0, 0);
+				} else {
+					auto start = start_pos.GetIndex();
+					auto end = (end_pos + 1) - start;
+					auto substr = std::string(buf + start, end);
+					state.HandleValue(buf, start, end_pos + 1);
+				}
 				seen_value = true;
 			}
 			if (buf[pos] == ']') {
 				lvl--;
 				break;
 			}
-			SkipWhitespace(buf, ++pos, len);
-			start_pos = pos;
+			pos++;
+			SkipWhitespace(input_state);
+			start_pos = optional_idx();
 			continue;
+		} else if (buf[pos] == '\\') {
+			input_state.escaped = true;
+		} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
+			if (!start_pos.IsValid()) {
+				start_pos = pos;
+			}
+			end_pos = pos;
 		}
 		pos++;
 	}
-	SkipWhitespace(buf, ++pos, len);
+	pos++;
+	SkipWhitespace(input_state);
 	return (pos == len && lvl == 0);
 }
 
@@ -187,7 +280,8 @@ struct SplitStringMapOperation {
 	Vector &varchar_val;
 
 	bool HandleKey(const char *buf, idx_t start_pos, idx_t pos) {
-		if ((pos - start_pos) == 4 && IsNull(buf, start_pos, varchar_key, child_start)) {
+		StringCastInputState temp_state(buf, start_pos, pos);
+		if (IsNull(temp_state)) {
 			FlatVector::SetNull(varchar_val, child_start, true);
 			child_start++;
 			return false;
@@ -197,7 +291,9 @@ struct SplitStringMapOperation {
 	}
 
 	void HandleValue(const char *buf, idx_t start_pos, idx_t pos) {
-		if ((pos - start_pos) == 4 && IsNull(buf, start_pos, varchar_val, child_start)) {
+		StringCastInputState temp_state(buf, start_pos, pos);
+		if (IsNull(temp_state)) {
+			FlatVector::SetNull(varchar_val, child_start, true);
 			child_start++;
 			return;
 		}
@@ -207,21 +303,30 @@ struct SplitStringMapOperation {
 };
 
 template <class OP>
-static bool FindKeyOrValueMap(const char *buf, idx_t len, idx_t &pos, OP &state, bool key) {
-	auto start_pos = pos;
+static bool FindKeyOrValueMap(StringCastInputState &input_state, OP &state, bool key) {
+	auto start_pos = input_state.pos;
 	idx_t lvl = 0;
+
+	auto &buf = input_state.buf;
+	auto &len = input_state.len;
+	auto &pos = input_state.pos;
+
 	while (pos < len) {
 		if (buf[pos] == '"' || buf[pos] == '\'') {
-			SkipToCloseQuotes(pos, buf, len);
+			SkipToCloseQuotes(input_state);
 		} else if (buf[pos] == '{') {
-			SkipToClose(pos, buf, len, lvl, '}');
+			SkipToClose(input_state, lvl, '}');
 		} else if (buf[pos] == '[') {
-			SkipToClose(pos, buf, len, lvl, ']');
+			SkipToClose(input_state, lvl, ']');
 		} else if (key && buf[pos] == '=') {
-			idx_t end_pos = StringTrim(buf, start_pos, pos);
+			// TODO: process the string
+			// idx_t end_pos = StringTrim(buf, start_pos, pos);
+			idx_t end_pos = pos;
 			return state.HandleKey(buf, start_pos, end_pos); // put string in KEY_child_vector
 		} else if (!key && (buf[pos] == ',' || buf[pos] == '}')) {
-			idx_t end_pos = StringTrim(buf, start_pos, pos);
+			// TODO: process the string
+			// idx_t end_pos = StringTrim(buf, start_pos, pos);
+			idx_t end_pos = pos;
 			state.HandleValue(buf, start_pos, end_pos); // put string in VALUE_child_vector
 			return true;
 		}
@@ -235,28 +340,33 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 	const char *buf = input.GetData();
 	idx_t len = input.GetSize();
 	idx_t pos = 0;
+	StringCastInputState input_state(buf, pos, len);
 
-	SkipWhitespace(buf, pos, len);
+	SkipWhitespace(input_state);
 	if (pos == len || buf[pos] != '{') {
 		return false;
 	}
-	SkipWhitespace(buf, ++pos, len);
+	pos++;
+	SkipWhitespace(input_state);
 	if (pos == len) {
 		return false;
 	}
 	if (buf[pos] == '}') {
-		SkipWhitespace(buf, ++pos, len);
+		pos++;
+		SkipWhitespace(input_state);
 		return (pos == len);
 	}
 	while (pos < len) {
-		if (!FindKeyOrValueMap(buf, len, pos, state, true)) {
+		if (!FindKeyOrValueMap(input_state, state, true)) {
 			return false;
 		}
-		SkipWhitespace(buf, ++pos, len);
-		if (!FindKeyOrValueMap(buf, len, pos, state, false)) {
+		pos++;
+		SkipWhitespace(input_state);
+		if (!FindKeyOrValueMap(input_state, state, false)) {
 			return false;
 		}
-		SkipWhitespace(buf, ++pos, len);
+		pos++;
+		SkipWhitespace(input_state);
 	}
 	return true;
 }
@@ -284,24 +394,31 @@ static bool FindKeyStruct(const char *buf, idx_t len, idx_t &pos) {
 	return false;
 }
 
-static bool FindValueStruct(const char *buf, idx_t len, idx_t &pos, Vector &varchar_child, idx_t &row_idx,
+static bool FindValueStruct(StringCastInputState &input_state, Vector &varchar_child, idx_t &row_idx,
                             ValidityMask &child_mask) {
-	auto start_pos = pos;
+	auto start_pos = input_state.pos;
 	idx_t lvl = 0;
+
+	auto &len = input_state.len;
+	auto &pos = input_state.pos;
+	auto &buf = input_state.buf;
 	while (pos < len) {
 		if (buf[pos] == '"' || buf[pos] == '\'') {
-			SkipToCloseQuotes(pos, buf, len);
+			SkipToCloseQuotes(input_state);
 		} else if (buf[pos] == '{') {
-			SkipToClose(pos, buf, len, lvl, '}');
+			SkipToClose(input_state, lvl, '}');
 		} else if (buf[pos] == '[') {
-			SkipToClose(pos, buf, len, lvl, ']');
+			SkipToClose(input_state, lvl, ']');
 		} else if (buf[pos] == ',' || buf[pos] == '}') {
-			idx_t end_pos = StringTrim(buf, start_pos, pos);
-			if ((end_pos - start_pos) == 4 && IsNull(buf, start_pos, varchar_child, row_idx)) {
+			// TODO: start_pos at first non-whitespace character
+			StringCastInputState temp_state(buf, start_pos, pos);
+			if (IsNull(temp_state)) {
+				FlatVector::SetNull(varchar_child, row_idx, true);
 				return true;
 			}
+			// TODO: copy the unescaped portion of the string
 			FlatVector::GetData<string_t>(varchar_child)[row_idx] =
-			    StringVector::AddString(varchar_child, buf + start_pos, end_pos - start_pos);
+			    StringVector::AddString(varchar_child, buf + start_pos, pos - start_pos);
 			child_mask.SetValid(row_idx); // any child not set to valid will remain invalid
 			return true;
 		}
@@ -318,11 +435,14 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 	idx_t pos = 0;
 	idx_t child_idx;
 
-	SkipWhitespace(buf, pos, len);
+	StringCastInputState input_state(buf, pos, len);
+
+	SkipWhitespace(input_state);
 	if (pos == len || buf[pos] != '{') {
 		return false;
 	}
-	SkipWhitespace(buf, ++pos, len);
+	pos++;
+	SkipWhitespace(input_state);
 	if (buf[pos] == '}') {
 		pos++;
 	} else {
@@ -331,7 +451,9 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 			if (!FindKeyStruct(buf, len, pos)) {
 				return false;
 			}
-			auto key_end = StringTrim(buf, key_start, pos);
+			// TODO: process the string
+			// auto key_end = StringTrim(buf, key_start, pos);
+			auto key_end = pos;
 			if (key_start >= key_end) {
 				// empty key name unsupported
 				return false;
@@ -343,14 +465,16 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 				return false; // false key
 			}
 			child_idx = it->second;
-			SkipWhitespace(buf, ++pos, len);
-			if (!FindValueStruct(buf, len, pos, *varchar_vectors[child_idx], row_idx, child_masks[child_idx].get())) {
+			pos++;
+			SkipWhitespace(input_state);
+			if (!FindValueStruct(input_state, *varchar_vectors[child_idx], row_idx, child_masks[child_idx].get())) {
 				return false;
 			}
-			SkipWhitespace(buf, ++pos, len);
+			pos++;
+			SkipWhitespace(input_state);
 		}
 	}
-	SkipWhitespace(buf, pos, len);
+	SkipWhitespace(input_state);
 	return (pos == len);
 }
 
diff --git a/test/sql/cast/string_to_list_cast.test b/test/sql/cast/string_to_list_cast.test
index ff1c56ac87e5..8d9d8fee1879 100644
--- a/test/sql/cast/string_to_list_cast.test
+++ b/test/sql/cast/string_to_list_cast.test
@@ -221,7 +221,7 @@ SELECT CAST($$["can't", "you're", "i'm"]$$ AS VARCHAR[]);
 [can't, you're, i'm]
 
 query I
-SELECT CAST($$[can't, you're, i'm]$$ AS VARCHAR[]);
+SELECT CAST($$[can\'t, you\'re, i\'m]$$ AS VARCHAR[]);
 ----
 [can't, you're, i'm]
 

From 49558e9ef2ce67ddd76ba15733b5f04c8de742a4 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Wed, 5 Feb 2025 17:31:50 +0100
Subject: [PATCH 016/142] removing unescaped quotes, perhaps a little too
 aggressively, still WIP

---
 src/function/cast/vector_cast_helpers.cpp | 23 +++++++++++------------
 test/sql/cast/string_to_list_cast.test    |  8 ++++----
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 64f26187e4dc..48719a2ec77d 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -138,7 +138,7 @@ struct SplitStringListOperation {
 			if (!escaped) {
 				if (buf[start + i] == '\\') {
 					escaped = true;
-				} else {
+				} else if (buf[start + i] != '\'' && buf[start + i] != '"') {
 					string_data[copied_count++] = buf[start + i];
 				}
 			} else {
@@ -191,21 +191,15 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 			}
 			end_pos = pos;
 		} else if ((buf[pos] == '"' || buf[pos] == '\'')) {
+			if (!start_pos.IsValid()) {
+				start_pos = pos;
+			}
 			if (!input_state.escaped) {
-				if (!start_pos.IsValid()) {
-					//! Trim the start quote
-					start_pos = pos + 1;
-				}
 				if (!SkipToCloseQuotes(input_state)) {
 					return false;
 				}
-				end_pos = pos - 1;
-			} else {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				end_pos = pos;
 			}
+			end_pos = pos;
 		} else if (buf[pos] == '{') {
 			if (!start_pos.IsValid()) {
 				start_pos = pos;
@@ -239,7 +233,12 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 			start_pos = optional_idx();
 			continue;
 		} else if (buf[pos] == '\\') {
-			input_state.escaped = true;
+			if (!start_pos.IsValid()) {
+				start_pos = pos;
+			}
+			if (!input_state.escaped) {
+				input_state.escaped = true;
+			}
 		} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
 			if (!start_pos.IsValid()) {
 				start_pos = pos;
diff --git a/test/sql/cast/string_to_list_cast.test b/test/sql/cast/string_to_list_cast.test
index 8d9d8fee1879..504f416973c1 100644
--- a/test/sql/cast/string_to_list_cast.test
+++ b/test/sql/cast/string_to_list_cast.test
@@ -206,7 +206,7 @@ SELECT CAST($$['hello','world', '!']$$ AS VARCHAR[]);
 [hello, world, !]
 
 query I
-SELECT CAST($$[''hello'',''world'', ''!'']$$ AS VARCHAR[]);
+SELECT CAST($$[\'hello\',\'world\', \'!\']$$ AS VARCHAR[]);
 ----
 ['hello', 'world', '!']
 
@@ -216,7 +216,7 @@ SELECT CAST($$[[ ['🦆, 🦆, 🦆']], [[duck, db, '🦆'] ]]$$ AS VARCHAR[][][
 [[[🦆, 🦆, 🦆]], [[duck, db, 🦆]]]
 
 query I
-SELECT CAST($$["can't", "you're", "i'm"]$$ AS VARCHAR[]);
+SELECT CAST($$[can\'t, you\'re, i\'m]$$ AS VARCHAR[]);
 ----
 [can't, you're, i'm]
 
@@ -249,7 +249,7 @@ SELECT CAST('[          [ [12,     13,14], [8, 9         ]  ],[[ 4    ]   ],
 [[[12, 13, 14], [8, 9]], [[4]], [[2, 1, 0]]]
 
 query I
-SELECT CAST($$["   hello","          '  world", "!         "]$$ AS VARCHAR[]);
+SELECT CAST($$["   hello","          \'  world", "!         "]$$ AS VARCHAR[]);
 ----
 [   hello,           '  world, !         ]
 
@@ -259,7 +259,7 @@ SELECT CAST('[   hello     ,   world      , !         ]' AS VARCHAR[]);
 [hello, world, !]
 
 query I
-SELECT CAST($$[    [ "   hello"]  ,["            world"        ],[ "!        "           ]      ]$$ AS VARCHAR[][]);
+SELECT CAST($$[    [ \"   hello\"]  ,[\"            world\"        ],[ \"!        \"           ]      ]$$ AS VARCHAR[][]);
 ----
 [[   hello], [            world], [!        ]]
 

From 8866325e21854f44fd0dbcc52007c12afe8734ae Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Wed, 5 Feb 2025 18:17:01 +0100
Subject: [PATCH 017/142] leave the escapes in deeper list levels alone

---
 src/function/cast/vector_cast_helpers.cpp | 36 ++++++++++++++++++++---
 test/sql/cast/string_to_list_cast.test    |  2 +-
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 48719a2ec77d..7eca9420aba6 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -1,5 +1,6 @@
 #include "duckdb/function/cast/vector_cast_helpers.hpp"
 #include "duckdb/common/typedefs.hpp"
+#include "duckdb/common/stack.hpp"
 
 namespace {
 
@@ -134,15 +135,42 @@ struct SplitStringListOperation {
 		auto string_data = allocated_string.GetDataWriteable();
 		uint32_t copied_count = 0;
 		bool escaped = false;
+
+		bool quoted = false;
+		char quote_char;
+		stack<char> scopes;
 		for (idx_t i = 0; i < length; i++) {
+			auto current_char = buf[start + i];
 			if (!escaped) {
-				if (buf[start + i] == '\\') {
+				if (scopes.empty() && current_char == '\\') {
+					//! Start of escape
 					escaped = true;
-				} else if (buf[start + i] != '\'' && buf[start + i] != '"') {
-					string_data[copied_count++] = buf[start + i];
+					continue;
+				}
+				if (scopes.empty() && (current_char == '\'' || current_char == '"')) {
+					if (quoted && current_char == quote_char) {
+						quoted = false;
+						//! Skip the ending quote
+						continue;
+					} else if (!quoted) {
+						quoted = true;
+						quote_char = current_char;
+						//! Skip the starting quote
+						continue;
+					}
+				}
+				if (!quoted && !scopes.empty() && current_char == scopes.top()) {
+					//! Close scope
+					scopes.pop();
+				}
+				if (!quoted && (current_char == '[' || current_char == '{')) {
+					//! New scope
+					scopes.push(current_char == '[' ? ']' : '}');
 				}
+				//! Regular character
+				string_data[copied_count++] = current_char;
 			} else {
-				string_data[copied_count++] = buf[start + i];
+				string_data[copied_count++] = current_char;
 				escaped = false;
 			}
 		}
diff --git a/test/sql/cast/string_to_list_cast.test b/test/sql/cast/string_to_list_cast.test
index 504f416973c1..c6e58cd99338 100644
--- a/test/sql/cast/string_to_list_cast.test
+++ b/test/sql/cast/string_to_list_cast.test
@@ -259,7 +259,7 @@ SELECT CAST('[   hello     ,   world      , !         ]' AS VARCHAR[]);
 [hello, world, !]
 
 query I
-SELECT CAST($$[    [ \"   hello\"]  ,[\"            world\"        ],[ \"!        \"           ]      ]$$ AS VARCHAR[][]);
+SELECT CAST($$[    [ "   hello"]  ,["            world"        ],[ "!        "           ]      ]$$ AS VARCHAR[][]);
 ----
 [[   hello], [            world], [!        ]]
 

From 595e4a8d8a42a2d7a130db9d4fe3b61fc69eccb2 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Thu, 6 Feb 2025 09:51:46 +0100
Subject: [PATCH 018/142] add escaped doublequote to test

---
 test/sql/cast/string_to_list_cast.test | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/sql/cast/string_to_list_cast.test b/test/sql/cast/string_to_list_cast.test
index c6e58cd99338..c64a950651cf 100644
--- a/test/sql/cast/string_to_list_cast.test
+++ b/test/sql/cast/string_to_list_cast.test
@@ -249,9 +249,9 @@ SELECT CAST('[          [ [12,     13,14], [8, 9         ]  ],[[ 4    ]   ],
 [[[12, 13, 14], [8, 9]], [[4]], [[2, 1, 0]]]
 
 query I
-SELECT CAST($$["   hello","          \'  world", "!         "]$$ AS VARCHAR[]);
+SELECT CAST($$["   hello","          \"'  world", "!         "]$$ AS VARCHAR[]);
 ----
-[   hello,           '  world, !         ]
+[   hello,           "'  world, !         ]
 
 query I
 SELECT CAST('[   hello     ,   world      , !         ]' AS VARCHAR[]);     

From bb5ca2ec8eb2843ac33a52250e2493efe6ba7ca0 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Thu, 6 Feb 2025 11:37:09 +0100
Subject: [PATCH 019/142] more WIP, worked on supporting the same escaping in
 MAP

---
 src/function/cast/vector_cast_helpers.cpp  | 283 ++++++++++++++-------
 test/sql/cast/string_to_map_cast.test_slow |   8 +-
 2 files changed, 194 insertions(+), 97 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 7eca9420aba6..ba4eeaa67427 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -85,6 +85,9 @@ static bool SkipToClose(StringCastInputState &input_state, idx_t &lvl, char clos
 				lvl++;
 			} else if (buf[idx] == brackets.back()) {
 				if (buf[idx] == ']') {
+					if (lvl == 0) {
+						return false;
+					}
 					lvl--;
 				}
 				brackets.pop_back();
@@ -114,6 +117,55 @@ struct CountPartOperation {
 	}
 };
 
+static string_t HandleString(Vector &vec, const char *buf, idx_t start, idx_t end) {
+	D_ASSERT(start <= end);
+	auto length = end - start;
+	auto allocated_string = StringVector::EmptyString(vec, length);
+	auto string_data = allocated_string.GetDataWriteable();
+	uint32_t copied_count = 0;
+	bool escaped = false;
+
+	bool quoted = false;
+	char quote_char;
+	stack<char> scopes;
+	for (idx_t i = 0; i < length; i++) {
+		auto current_char = buf[start + i];
+		if (!escaped) {
+			if (scopes.empty() && current_char == '\\') {
+				//! Start of escape
+				escaped = true;
+				continue;
+			}
+			if (scopes.empty() && (current_char == '\'' || current_char == '"')) {
+				if (quoted && current_char == quote_char) {
+					quoted = false;
+					//! Skip the ending quote
+					continue;
+				} else if (!quoted) {
+					quoted = true;
+					quote_char = current_char;
+					//! Skip the starting quote
+					continue;
+				}
+			}
+			if (!quoted && !scopes.empty() && current_char == scopes.top()) {
+				//! Close scope
+				scopes.pop();
+			}
+			if (!quoted && (current_char == '[' || current_char == '{')) {
+				//! New scope
+				scopes.push(current_char == '[' ? ']' : '}');
+			}
+			//! Regular character
+			string_data[copied_count++] = current_char;
+		} else {
+			string_data[copied_count++] = current_char;
+			escaped = false;
+		}
+	}
+	return string_t((const char *)string_data, copied_count); // NOLINT
+}
+
 // ------- LIST SPLIT -------
 struct SplitStringListOperation {
 public:
@@ -129,52 +181,7 @@ struct SplitStringListOperation {
 			entry_count++;
 			return;
 		}
-		D_ASSERT(start <= end);
-		auto length = end - start;
-		auto allocated_string = StringVector::EmptyString(child, length);
-		auto string_data = allocated_string.GetDataWriteable();
-		uint32_t copied_count = 0;
-		bool escaped = false;
-
-		bool quoted = false;
-		char quote_char;
-		stack<char> scopes;
-		for (idx_t i = 0; i < length; i++) {
-			auto current_char = buf[start + i];
-			if (!escaped) {
-				if (scopes.empty() && current_char == '\\') {
-					//! Start of escape
-					escaped = true;
-					continue;
-				}
-				if (scopes.empty() && (current_char == '\'' || current_char == '"')) {
-					if (quoted && current_char == quote_char) {
-						quoted = false;
-						//! Skip the ending quote
-						continue;
-					} else if (!quoted) {
-						quoted = true;
-						quote_char = current_char;
-						//! Skip the starting quote
-						continue;
-					}
-				}
-				if (!quoted && !scopes.empty() && current_char == scopes.top()) {
-					//! Close scope
-					scopes.pop();
-				}
-				if (!quoted && (current_char == '[' || current_char == '{')) {
-					//! New scope
-					scopes.push(current_char == '[' ? ']' : '}');
-				}
-				//! Regular character
-				string_data[copied_count++] = current_char;
-			} else {
-				string_data[copied_count++] = current_char;
-				escaped = false;
-			}
-		}
-		child_data[entry_count] = string_t((const char *)string_data, copied_count); // NOLINT
+		child_data[entry_count] = HandleString(child, buf, start, end);
 		entry_count++;
 	}
 
@@ -206,6 +213,11 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 	idx_t end_pos;
 	bool seen_value = false;
 	while (pos < len) {
+		if (pos == len) {
+			return false;
+		}
+		bool set_escaped = false;
+
 		if (buf[pos] == '[') {
 			if (!start_pos.IsValid()) {
 				start_pos = pos;
@@ -253,19 +265,19 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 				seen_value = true;
 			}
 			if (buf[pos] == ']') {
+				if (lvl == 0) {
+					return false;
+				}
 				lvl--;
 				break;
 			}
-			pos++;
-			SkipWhitespace(input_state);
 			start_pos = optional_idx();
-			continue;
 		} else if (buf[pos] == '\\') {
 			if (!start_pos.IsValid()) {
 				start_pos = pos;
 			}
 			if (!input_state.escaped) {
-				input_state.escaped = true;
+				set_escaped = true;
 			}
 		} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
 			if (!start_pos.IsValid()) {
@@ -273,7 +285,9 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 			}
 			end_pos = pos;
 		}
+		input_state.escaped = set_escaped;
 		pos++;
+		SkipWhitespace(input_state);
 	}
 	pos++;
 	SkipWhitespace(input_state);
@@ -313,7 +327,7 @@ struct SplitStringMapOperation {
 			child_start++;
 			return false;
 		}
-		child_key_data[child_start] = StringVector::AddString(varchar_key, buf + start_pos, pos - start_pos);
+		child_key_data[child_start] = HandleString(varchar_key, buf, start_pos, pos);
 		return true;
 	}
 
@@ -324,50 +338,18 @@ struct SplitStringMapOperation {
 			child_start++;
 			return;
 		}
-		child_val_data[child_start] = StringVector::AddString(varchar_val, buf + start_pos, pos - start_pos);
+		child_val_data[child_start] = HandleString(varchar_val, buf, start_pos, pos);
 		child_start++;
 	}
 };
 
-template <class OP>
-static bool FindKeyOrValueMap(StringCastInputState &input_state, OP &state, bool key) {
-	auto start_pos = input_state.pos;
-	idx_t lvl = 0;
-
-	auto &buf = input_state.buf;
-	auto &len = input_state.len;
-	auto &pos = input_state.pos;
-
-	while (pos < len) {
-		if (buf[pos] == '"' || buf[pos] == '\'') {
-			SkipToCloseQuotes(input_state);
-		} else if (buf[pos] == '{') {
-			SkipToClose(input_state, lvl, '}');
-		} else if (buf[pos] == '[') {
-			SkipToClose(input_state, lvl, ']');
-		} else if (key && buf[pos] == '=') {
-			// TODO: process the string
-			// idx_t end_pos = StringTrim(buf, start_pos, pos);
-			idx_t end_pos = pos;
-			return state.HandleKey(buf, start_pos, end_pos); // put string in KEY_child_vector
-		} else if (!key && (buf[pos] == ',' || buf[pos] == '}')) {
-			// TODO: process the string
-			// idx_t end_pos = StringTrim(buf, start_pos, pos);
-			idx_t end_pos = pos;
-			state.HandleValue(buf, start_pos, end_pos); // put string in VALUE_child_vector
-			return true;
-		}
-		pos++;
-	}
-	return false;
-}
-
 template <class OP>
 static bool SplitStringMapInternal(const string_t &input, OP &state) {
 	const char *buf = input.GetData();
 	idx_t len = input.GetSize();
 	idx_t pos = 0;
 	StringCastInputState input_state(buf, pos, len);
+	idx_t lvl = 0;
 
 	SkipWhitespace(input_state);
 	if (pos == len || buf[pos] != '{') {
@@ -378,24 +360,139 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 	if (pos == len) {
 		return false;
 	}
-	if (buf[pos] == '}') {
-		pos++;
-		SkipWhitespace(input_state);
-		return (pos == len);
-	}
+
 	while (pos < len) {
-		if (!FindKeyOrValueMap(input_state, state, true)) {
+		optional_idx start_pos;
+		idx_t end_pos;
+		while (pos < len && (buf[pos] != '=' || input_state.escaped)) {
+			bool set_escaped = false;
+			if (buf[pos] == '"' || buf[pos] == '\'') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!input_state.escaped) {
+					if (!SkipToCloseQuotes(input_state)) {
+						return false;
+					}
+				}
+				end_pos = pos;
+			} else if (buf[pos] == '{') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!input_state.escaped) {
+					if (!SkipToClose(input_state, lvl, '}')) {
+						return false;
+					}
+				}
+				end_pos = pos;
+			} else if (buf[pos] == '[') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!input_state.escaped) {
+					lvl++;
+					if (!SkipToClose(input_state, lvl, ']')) {
+						return false;
+					}
+				}
+				end_pos = pos;
+			} else if (buf[pos] == '\\') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!input_state.escaped) {
+					set_escaped = true;
+				}
+			} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				end_pos = pos;
+			}
+			input_state.escaped = set_escaped;
+			pos++;
+		}
+		if (pos == len) {
+			return false;
+		}
+		if (!start_pos.IsValid()) {
+			//! Key can not be empty
 			return false;
 		}
+		auto key_substr = std::string(buf + start_pos.GetIndex(), buf + end_pos + 1);
+		if (!state.HandleKey(buf, start_pos.GetIndex(), end_pos + 1)) {
+			return false;
+		}
+		start_pos = optional_idx();
 		pos++;
 		SkipWhitespace(input_state);
-		if (!FindKeyOrValueMap(input_state, state, false)) {
+		while (pos < len && ((buf[pos] != ',' && buf[pos] != '}') || input_state.escaped)) {
+			if (buf[pos] == '"' || buf[pos] == '\'') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!input_state.escaped) {
+					if (!SkipToCloseQuotes(input_state)) {
+						return false;
+					}
+				}
+				end_pos = pos;
+			} else if (buf[pos] == '{') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!input_state.escaped) {
+					if (!SkipToClose(input_state, lvl, '}')) {
+						return false;
+					}
+				}
+				end_pos = pos;
+			} else if (buf[pos] == '[') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!input_state.escaped) {
+					lvl++;
+					if (!SkipToClose(input_state, lvl, ']')) {
+						return false;
+					}
+				}
+				end_pos = pos;
+			} else if (buf[pos] == '\\') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!input_state.escaped) {
+					input_state.escaped = true;
+				}
+			} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				end_pos = pos;
+			}
+			pos++;
+		}
+		if (pos == len) {
 			return false;
 		}
+		if (!start_pos.IsValid()) {
+			//! Value is empty
+			state.HandleValue(buf, 0, 0);
+		} else {
+			auto value_substr = std::string(buf + start_pos.GetIndex(), buf + end_pos + 1);
+			state.HandleValue(buf, start_pos.GetIndex(), end_pos + 1);
+		}
+		if (buf[pos] == '}') {
+			break;
+		}
 		pos++;
 		SkipWhitespace(input_state);
 	}
-	return true;
+	pos++;
+	SkipWhitespace(input_state);
+	return (pos == len && lvl == 0);
 }
 
 bool VectorStringToMap::SplitStringMap(const string_t &input, string_t *child_key_data, string_t *child_val_data,
diff --git a/test/sql/cast/string_to_map_cast.test_slow b/test/sql/cast/string_to_map_cast.test_slow
index 3a5cd142860e..2c896fdc61fa 100644
--- a/test/sql/cast/string_to_map_cast.test_slow
+++ b/test/sql/cast/string_to_map_cast.test_slow
@@ -76,12 +76,12 @@ SELECT CAST('{''hello''=2, ''world''=50, ''!''=12}' AS MAP(VARCHAR, INT));
 {hello=2, world=50, !=12}
 
 query I
-SELECT CAST('{''''hello''''=hello, ''''world''''=world, ''''!''''=!}' AS MAP(VARCHAR, VARCHAR));
+SELECT CAST($${\'hello\'=hello, \'world\'=world, \'!\'=!}$$ AS MAP(VARCHAR, VARCHAR));
 ----
 {'hello'=hello, 'world'=world, '!'=!}
 
 query I
-SELECT CAST('{[[''🦆, 🦆, 🦆'']]=100, [[duck, db, ''🦆'']]=101}' AS MAP(VARCHAR[][], INT));
+SELECT CAST($${[[\'🦆, 🦆, 🦆\']]=100, [[duck, db, \'🦆\']]=101}$$ AS MAP(VARCHAR[][], INT));
 ----
 {[['🦆, 🦆, 🦆']]=100, [[duck, db, '🦆']]=101}
 
@@ -114,8 +114,8 @@ SELECT CAST('{ [12,     13,14]=val, [       8, 9         ]      =val, [ 4    ]=v
 {[12, 13, 14]=val, [8, 9]=val, [4]=val}
 
 query I
-SELECT CAST('   { { a:[2,    3], b:  Duckster      }=         {50.0        =50}, {a    : [9,1,4], b:Duck          }
-                ={  1      =    0}  }' AS MAP(STRUCT(a INT[], b VARCHAR), MAP(INT, DOUBLE)));
+SELECT CAST($$   { { a:[2,    3], b:  Duckster      }=         {50.0        =50}, {a    : [9,1,4], b:Duck          }
+                ={  1      =    0}  }$$ AS MAP(STRUCT(a INT[], b VARCHAR), MAP(INT, DOUBLE)));
 ----
 {{'a': [2, 3], 'b': Duckster}={50=50.0}, {'a': [9, 1, 4], 'b': Duck}={1=0.0}}
 

From 8de6cd55917b430904ffd8a0db8e3cf4dfdd85f3 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Thu, 6 Feb 2025 12:51:23 +0100
Subject: [PATCH 020/142] map, struct and list should all work correctly now

---
 src/function/cast/vector_cast_helpers.cpp | 229 +++++++++++++++-------
 test/sql/cast/string_to_list_cast.test    |  17 +-
 2 files changed, 173 insertions(+), 73 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index ba4eeaa67427..b6048c12eb53 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -50,14 +50,17 @@ static bool SkipToCloseQuotes(StringCastInputState &input_state) {
 	pos++;
 
 	while (pos < len) {
+		bool set_escaped = false;
 		if (buf[pos] == '\\') {
-			escaped = true;
+			if (!escaped) {
+				set_escaped = true;
+			}
 		} else {
 			if (buf[pos] == quote && !escaped) {
 				return true;
 			}
-			escaped = false;
 		}
+		escaped = set_escaped;
 		pos++;
 	}
 	return false;
@@ -324,6 +327,7 @@ struct SplitStringMapOperation {
 		StringCastInputState temp_state(buf, start_pos, pos);
 		if (IsNull(temp_state)) {
 			FlatVector::SetNull(varchar_val, child_start, true);
+			FlatVector::SetNull(varchar_key, child_start, true);
 			child_start++;
 			return false;
 		}
@@ -360,6 +364,11 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 	if (pos == len) {
 		return false;
 	}
+	if (buf[pos] == '}') {
+		pos++;
+		SkipWhitespace(input_state);
+		return pos == len;
+	}
 
 	while (pos < len) {
 		optional_idx start_pos;
@@ -508,49 +517,6 @@ idx_t VectorStringToMap::CountPartsMap(const string_t &input) {
 }
 
 // ------- STRUCT SPLIT -------
-static bool FindKeyStruct(const char *buf, idx_t len, idx_t &pos) {
-	while (pos < len) {
-		if (buf[pos] == ':') {
-			return true;
-		}
-		pos++;
-	}
-	return false;
-}
-
-static bool FindValueStruct(StringCastInputState &input_state, Vector &varchar_child, idx_t &row_idx,
-                            ValidityMask &child_mask) {
-	auto start_pos = input_state.pos;
-	idx_t lvl = 0;
-
-	auto &len = input_state.len;
-	auto &pos = input_state.pos;
-	auto &buf = input_state.buf;
-	while (pos < len) {
-		if (buf[pos] == '"' || buf[pos] == '\'') {
-			SkipToCloseQuotes(input_state);
-		} else if (buf[pos] == '{') {
-			SkipToClose(input_state, lvl, '}');
-		} else if (buf[pos] == '[') {
-			SkipToClose(input_state, lvl, ']');
-		} else if (buf[pos] == ',' || buf[pos] == '}') {
-			// TODO: start_pos at first non-whitespace character
-			StringCastInputState temp_state(buf, start_pos, pos);
-			if (IsNull(temp_state)) {
-				FlatVector::SetNull(varchar_child, row_idx, true);
-				return true;
-			}
-			// TODO: copy the unescaped portion of the string
-			FlatVector::GetData<string_t>(varchar_child)[row_idx] =
-			    StringVector::AddString(varchar_child, buf + start_pos, pos - start_pos);
-			child_mask.SetValid(row_idx); // any child not set to valid will remain invalid
-			return true;
-		}
-		pos++;
-	}
-	return false;
-}
-
 bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<Vector>> &varchar_vectors,
                                        idx_t &row_idx, string_map_t<idx_t> &child_names,
                                        vector<reference<ValidityMask>> &child_masks) {
@@ -558,7 +524,9 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 	idx_t len = input.GetSize();
 	idx_t pos = 0;
 	idx_t child_idx;
+	idx_t lvl = 0;
 
+	Vector temp_vec(LogicalType::VARCHAR);
 	StringCastInputState input_state(buf, pos, len);
 
 	SkipWhitespace(input_state);
@@ -569,35 +537,162 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 	SkipWhitespace(input_state);
 	if (buf[pos] == '}') {
 		pos++;
-	} else {
-		while (pos < len) {
-			auto key_start = pos;
-			if (!FindKeyStruct(buf, len, pos)) {
-				return false;
-			}
-			// TODO: process the string
-			// auto key_end = StringTrim(buf, key_start, pos);
-			auto key_end = pos;
-			if (key_start >= key_end) {
-				// empty key name unsupported
-				return false;
-			}
-			string_t found_key(buf + key_start, UnsafeNumericCast<uint32_t>(key_end - key_start));
+		SkipWhitespace(input_state);
+		return (pos == len);
+	}
 
-			auto it = child_names.find(found_key);
-			if (it == child_names.end()) {
-				return false; // false key
+	while (pos < len) {
+		optional_idx start_pos;
+		idx_t end_pos;
+		while (pos < len && (buf[pos] != ':' || input_state.escaped)) {
+			bool set_escaped = false;
+			if (buf[pos] == '"' || buf[pos] == '\'') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!input_state.escaped) {
+					if (!SkipToCloseQuotes(input_state)) {
+						return false;
+					}
+				}
+				end_pos = pos;
+			} else if (buf[pos] == '{') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!input_state.escaped) {
+					if (!SkipToClose(input_state, lvl, '}')) {
+						return false;
+					}
+				}
+				end_pos = pos;
+			} else if (buf[pos] == '[') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!input_state.escaped) {
+					lvl++;
+					if (!SkipToClose(input_state, lvl, ']')) {
+						return false;
+					}
+				}
+				end_pos = pos;
+			} else if (buf[pos] == '\\') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!input_state.escaped) {
+					set_escaped = true;
+				}
+			} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				end_pos = pos;
 			}
-			child_idx = it->second;
+			input_state.escaped = set_escaped;
 			pos++;
-			SkipWhitespace(input_state);
-			if (!FindValueStruct(input_state, *varchar_vectors[child_idx], row_idx, child_masks[child_idx].get())) {
-				return false;
+		}
+		if (pos == len) {
+			return false;
+		}
+		if (!start_pos.IsValid()) {
+			//! Key can not be empty
+			return false;
+		}
+		idx_t key_start = start_pos.GetIndex();
+		end_pos++;
+		StringCastInputState key_temp_state(buf, key_start, end_pos);
+		if (IsNull(key_temp_state)) {
+			//! Key can not be NULL
+			return false;
+		}
+		auto child_name = HandleString(temp_vec, buf, key_start, end_pos);
+		auto it = child_names.find(child_name);
+		if (it == child_names.end()) {
+			return false; // false key
+		}
+		child_idx = it->second;
+
+		start_pos = optional_idx();
+		pos++;
+		SkipWhitespace(input_state);
+		while (pos < len && ((buf[pos] != ',' && buf[pos] != '}') || input_state.escaped)) {
+			if (buf[pos] == '"' || buf[pos] == '\'') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!input_state.escaped) {
+					if (!SkipToCloseQuotes(input_state)) {
+						return false;
+					}
+				}
+				end_pos = pos;
+			} else if (buf[pos] == '{') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!input_state.escaped) {
+					if (!SkipToClose(input_state, lvl, '}')) {
+						return false;
+					}
+				}
+				end_pos = pos;
+			} else if (buf[pos] == '[') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!input_state.escaped) {
+					lvl++;
+					if (!SkipToClose(input_state, lvl, ']')) {
+						return false;
+					}
+				}
+				end_pos = pos;
+			} else if (buf[pos] == '\\') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!input_state.escaped) {
+					input_state.escaped = true;
+				}
+			} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				end_pos = pos;
 			}
 			pos++;
-			SkipWhitespace(input_state);
 		}
+		if (pos == len) {
+			return false;
+		}
+		auto &child_vec = *varchar_vectors[child_idx];
+		auto string_data = FlatVector::GetData<string_t>(child_vec);
+		auto &child_mask = child_masks[child_idx].get();
+
+		if (!start_pos.IsValid()) {
+			start_pos = 0;
+			end_pos = 0;
+		} else {
+			end_pos++;
+		}
+		auto value_start = start_pos.GetIndex();
+		StringCastInputState value_temp_state(buf, value_start, end_pos);
+		if (IsNull(value_temp_state)) {
+			child_mask.SetInvalid(row_idx);
+		} else {
+			string_data[row_idx] = HandleString(child_vec, buf, value_start, end_pos);
+			child_mask.SetValid(row_idx);
+		}
+
+		if (buf[pos] == '}') {
+			break;
+		}
+		pos++;
+		SkipWhitespace(input_state);
 	}
+	pos++;
 	SkipWhitespace(input_state);
 	return (pos == len);
 }
diff --git a/test/sql/cast/string_to_list_cast.test b/test/sql/cast/string_to_list_cast.test
index c64a950651cf..37f5b24e4ea8 100644
--- a/test/sql/cast/string_to_list_cast.test
+++ b/test/sql/cast/string_to_list_cast.test
@@ -488,7 +488,7 @@ statement ok
 CREATE TABLE assorted_lists(col1 INT[], col2 VARCHAR[], col3 DATE[]);
 
 statement ok
-COPY (SELECT [8,7,6], '[hello, Duck''DB]', '[2022-12-2, 1929-01-25]') TO '__TEST_DIR__/assorted_lists.csv' (Header 0);
+COPY (SELECT [8,7,6], $$[hello, Duck\\'DB]$$, '[2022-12-2, 1929-01-25]') TO '__TEST_DIR__/assorted_lists.csv' (Header 0);
 
 statement ok
 COPY assorted_lists FROM '__TEST_DIR__/assorted_lists.csv';
@@ -507,19 +507,24 @@ select '[{"bar":"\""}]'::VARCHAR[];
 ----
 [{"bar":"\""}]
 
-# escaped '\', does not count as an escape for "
 statement error
 select '[{"bar":"\\""}]'::VARCHAR[];
 ----
 
+# escapes are only processed once the {} is cast as well
+statement error
+query I
+select '[{"bar":"\\""}]'::STRUCT(bar VARCHAR)[];
+----
+
 # uneven amount of escapes does escape the "
 query I
-select '[{"bar":"\\\""}]'::VARCHAR[];
+select '[{"bar":"\\\""}]'::STRUCT(bar VARCHAR)[];
 ----
-[{"bar":"\\\""}]
+[{'bar': \"}]
 
 # all are escaped except for the last one
 query I
-select '[{"bar":"\"\"\\\"\"\"\\"}]'::VARCHAR[];
+select '[{"bar":"\"\"\\\"\"\"\\"}]'::STRUCT(bar VARCHAR)[];
 ----
-[{"bar":"\"\"\\\"\"\"\\"}]
+[{'bar': ""\"""\}]

From 9b9df847038a72c11a0dbd82af74f03d1e1134ce Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Thu, 6 Feb 2025 13:09:28 +0100
Subject: [PATCH 021/142] messed up one piece of escape handling logic

---
 src/function/cast/vector_cast_helpers.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index b6048c12eb53..347d7102bfa1 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -437,6 +437,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 		pos++;
 		SkipWhitespace(input_state);
 		while (pos < len && ((buf[pos] != ',' && buf[pos] != '}') || input_state.escaped)) {
+			bool set_escaped = false;
 			if (buf[pos] == '"' || buf[pos] == '\'') {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
@@ -473,7 +474,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 					start_pos = pos;
 				}
 				if (!input_state.escaped) {
-					input_state.escaped = true;
+					set_escaped = true;
 				}
 			} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
 				if (!start_pos.IsValid()) {
@@ -481,6 +482,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				}
 				end_pos = pos;
 			}
+			input_state.escaped = set_escaped;
 			pos++;
 		}
 		if (pos == len) {

From fe5a76f12b08a49f68c5a18b35e63c8c1f74d996 Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Mon, 10 Feb 2025 13:42:38 -0300
Subject: [PATCH 022/142] Do duckdb_extract_statements to be able to execute
 pivot

---
 data/csv/flights.csv        |  4 ++++
 src/common/adbc/adbc.cpp    | 40 ++++++++++++++++++++++++++++++++++++-
 test/api/adbc/test_adbc.cpp | 13 ++++++++++++
 3 files changed, 56 insertions(+), 1 deletion(-)
 create mode 100644 data/csv/flights.csv

diff --git a/data/csv/flights.csv b/data/csv/flights.csv
new file mode 100644
index 000000000000..7e8e451da4d1
--- /dev/null
+++ b/data/csv/flights.csv
@@ -0,0 +1,4 @@
+FlightDate|UniqueCarrier|OriginCityName|DestCityName
+1988-01-01|AA|New York, NY|Los Angeles, CA
+1988-01-02|AA|New York, NY|Los Angeles, CA
+1988-01-03|AA|New York, NY|Los Angeles, CA
diff --git a/src/common/adbc/adbc.cpp b/src/common/adbc/adbc.cpp
index 35ceb2f3406f..09ac3aa5d8dc 100644
--- a/src/common/adbc/adbc.cpp
+++ b/src/common/adbc/adbc.cpp
@@ -875,8 +875,46 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
 		duckdb_destroy_prepare(&wrapper->statement);
 		wrapper->statement = nullptr;
 	}
-	auto res = duckdb_prepare(wrapper->connection, query, &wrapper->statement);
+	duckdb_extracted_statements extracted_statements;
+	auto extract_statements_size = duckdb_extract_statements(wrapper->connection, query, &extracted_statements);
+	auto error_msg_extract_statements = duckdb_extract_statements_error(extracted_statements);
+	if (error_msg_extract_statements != nullptr) {
+		// Things went wrong when executing internal prepared statement
+		delete extracted_statements;
+		SetError(error, error_msg_extract_statements);
+		return ADBC_STATUS_INTERNAL;
+	}
+	// Now lets loop over the statements, and execute every one
+	for (idx_t i = 0; i < extract_statements_size - 1; i++) {
+		duckdb_prepared_statement statement_internal;
+		auto res =
+		    duckdb_prepare_extracted_statement(wrapper->connection, extracted_statements, i, &statement_internal);
+		auto error_msg = duckdb_prepare_error(statement_internal);
+		auto adbc_status = CheckResult(res, error, error_msg);
+		if (adbc_status != ADBC_STATUS_OK) {
+			// Things went wrong when executing internal prepared statement
+			delete extracted_statements;
+			delete statement_internal;
+			return adbc_status;
+		}
+		// Execute
+		duckdb_arrow out_result;
+		res = duckdb_execute_prepared_arrow(statement_internal, &out_result);
+		if (res != DuckDBSuccess) {
+			SetError(error, duckdb_query_arrow_error(out_result));
+			delete out_result;
+			delete statement_internal;
+			delete extracted_statements;
+			return ADBC_STATUS_INVALID_ARGUMENT;
+		}
+		delete out_result;
+		delete statement_internal;
+	}
+	// Besides ze last, this one we return
+	auto res = duckdb_prepare_extracted_statement(wrapper->connection, extracted_statements,
+	                                              extract_statements_size - 1, &wrapper->statement);
 	auto error_msg = duckdb_prepare_error(wrapper->statement);
+	delete extracted_statements;
 	return CheckResult(res, error, error_msg);
 }
 
diff --git a/test/api/adbc/test_adbc.cpp b/test/api/adbc/test_adbc.cpp
index afcbb596d073..213ed7710dd0 100644
--- a/test/api/adbc/test_adbc.cpp
+++ b/test/api/adbc/test_adbc.cpp
@@ -194,6 +194,19 @@ TEST_CASE("ADBC - Test ingestion - Lineitem", "[adbc]") {
 	REQUIRE(db.QueryAndCheck("SELECT l_partkey, l_comment FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber"));
 }
 
+TEST_CASE("ADBC - Pivot", "[adbc]") {
+	if (!duckdb_lib) {
+		return;
+	}
+	ADBCTestDatabase db;
+
+	auto input_data = db.QueryArrow("SELECT * FROM read_csv_auto(\'data/csv/flights.csv\')");
+
+	db.CreateTable("flights", input_data);
+
+	REQUIRE(db.QueryAndCheck("PIVOT flights ON UniqueCarrier USING COUNT(1) GROUP BY OriginCityName;"));
+}
+
 TEST_CASE("Test Null Error/Database", "[adbc]") {
 	if (!duckdb_lib) {
 		return;

From 1dbbb6c1370706afcc12c9f7f53663eca447ddba Mon Sep 17 00:00:00 2001
From: Richard Wesley <hawkfish@electricfish.com>
Date: Mon, 10 Feb 2025 12:02:48 -0800
Subject: [PATCH 023/142] Issue #8265: AsOf Nested Loop

* For small probe cardinalities, plan a nested loop join + aggregate
* Add asof_loop_join_threshold setting to control its use.
---
 src/common/settings.json                      |   6 +
 .../physical_plan/plan_asof_join.cpp          | 232 +++++++++++++++++-
 src/include/duckdb/main/client_config.hpp     |   2 +
 src/include/duckdb/main/settings.hpp          |  11 +
 src/main/config.cpp                           |   1 +
 src/main/settings/autogenerated_settings.cpp  |  17 ++
 test/sql/join/asof/test_asof_join.test        |   5 +-
 .../join/asof/test_asof_join_merge.test_slow  |   4 +
 .../join/asof/test_asof_join_pushdown.test    |   3 +-
 9 files changed, 277 insertions(+), 4 deletions(-)

diff --git a/src/common/settings.json b/src/common/settings.json
index 6b6718ea5474..ffa09591a398 100644
--- a/src/common/settings.json
+++ b/src/common/settings.json
@@ -114,6 +114,12 @@
         "internal_setting": "arrow_use_list_view",
         "scope": "global"
     },
+    {
+        "name": "asof_loop_join_threshold",
+        "description": "The maximum number of rows we need on the left side of an ASOF join to use a nested loop join",
+        "type": "UBIGINT",
+        "scope": "local"
+    },
     {
         "name": "autoinstall_extension_repository",
         "description": "Overrides the custom endpoint for extension installation on autoloading",
diff --git a/src/execution/physical_plan/plan_asof_join.cpp b/src/execution/physical_plan/plan_asof_join.cpp
index 927defa4ff27..aa2df50d6313 100644
--- a/src/execution/physical_plan/plan_asof_join.cpp
+++ b/src/execution/physical_plan/plan_asof_join.cpp
@@ -1,8 +1,14 @@
+#include "duckdb/catalog/catalog_entry/aggregate_function_catalog_entry.hpp"
+#include "duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp"
+#include "duckdb/execution/operator/aggregate/physical_streaming_window.hpp"
 #include "duckdb/execution/operator/aggregate/physical_window.hpp"
 #include "duckdb/execution/operator/join/physical_asof_join.hpp"
 #include "duckdb/execution/operator/join/physical_iejoin.hpp"
+#include "duckdb/execution/operator/join/physical_nested_loop_join.hpp"
 #include "duckdb/execution/operator/projection/physical_projection.hpp"
+#include "duckdb/function/aggregate/distributive_function_utils.hpp"
 #include "duckdb/execution/physical_plan_generator.hpp"
+#include "duckdb/function/function_binder.hpp"
 #include "duckdb/main/client_context.hpp"
 #include "duckdb/planner/expression/bound_constant_expression.hpp"
 #include "duckdb/planner/expression/bound_reference_expression.hpp"
@@ -10,6 +16,223 @@
 
 namespace duckdb {
 
+static unique_ptr<PhysicalOperator> PlanAsOfLoopJoin(LogicalComparisonJoin &op, unique_ptr<PhysicalOperator> &probe,
+                                                     unique_ptr<PhysicalOperator> &build, ClientContext &context) {
+
+	// Plan a inverse nested loop join, then aggregate the values to choose the optimal match for each probe row.
+	// Use a row number primary key to handle duplicate probe values.
+	// aggregate the fields to produce at most one match per probe row,
+	// then project the columns back into the correct order and drop the primary key.
+	//
+	//		 ∏ * \ pk
+	//		 |
+	//		 Γ pk;first(P),arg_xxx(B,inequality)
+	//		 |
+	//		 ∏ *,inequality
+	//		 |
+	//       ⨝ swapped
+	//     /   \
+	//    B     W pk:row_number
+	//          |
+	//          P
+
+	LogicalComparisonJoin join_op(InverseJoinType(op.join_type));
+
+	join_op.types = op.children[1]->types;
+	const auto &probe_types = op.children[0]->types;
+	join_op.types.insert(join_op.types.end(), probe_types.begin(), probe_types.end());
+
+	//	Fill in the projection maps to simplify the code below
+	//	Since NLJ doesn't support projection, but ASOF does,
+	//	we have to track this carefully...
+	join_op.left_projection_map = op.right_projection_map;
+	if (join_op.left_projection_map.empty()) {
+		for (idx_t i = 0; i < op.children[1]->types.size(); ++i) {
+			join_op.left_projection_map.emplace_back(i);
+		}
+	}
+
+	join_op.right_projection_map = op.left_projection_map;
+	if (join_op.right_projection_map.empty()) {
+		for (idx_t i = 0; i < op.children[0]->types.size(); ++i) {
+			join_op.right_projection_map.emplace_back(i);
+		}
+	}
+
+	// Project pk
+	LogicalType pk_type = LogicalType::BIGINT;
+	join_op.types.emplace_back(pk_type);
+
+	auto binder = Binder::CreateBinder(context);
+	FunctionBinder function_binder(*binder);
+	auto asof_idx = op.conditions.size();
+	string arg_min_max;
+	for (idx_t i = 0; i < op.conditions.size(); ++i) {
+		const auto &cond = op.conditions[i];
+		JoinCondition nested_cond;
+		nested_cond.left = cond.right->Copy();
+		nested_cond.right = cond.left->Copy();
+		if (!nested_cond.left || !nested_cond.right) {
+			return nullptr;
+		}
+		nested_cond.comparison = FlipComparisonExpression(cond.comparison);
+		join_op.conditions.emplace_back(std::move(nested_cond));
+		switch (cond.comparison) {
+		case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
+		case ExpressionType::COMPARE_GREATERTHAN:
+			D_ASSERT(asof_idx == op.conditions.size());
+			asof_idx = i;
+			arg_min_max = "arg_max";
+			break;
+		case ExpressionType::COMPARE_LESSTHANOREQUALTO:
+		case ExpressionType::COMPARE_LESSTHAN:
+			D_ASSERT(asof_idx == op.conditions.size());
+			asof_idx = i;
+			arg_min_max = "arg_min";
+			break;
+		default:
+			break;
+		}
+	}
+
+	//	NLJ does not support some join types
+	switch (join_op.join_type) {
+	case JoinType::SEMI:
+	case JoinType::ANTI:
+	case JoinType::MARK:
+	case JoinType::INNER:
+	case JoinType::RIGHT:
+		// Unfortunately, this does not check all the join types...
+		if (!PhysicalNestedLoopJoin::IsSupported(op.conditions, op.join_type)) {
+			return nullptr;
+		}
+		break;
+	case JoinType::OUTER:
+	case JoinType::LEFT:
+		//	RIGHT ASOF JOINs produce the entire build table and would require grouping on all build rows,
+		//	which defeats the purpose of this optimisation.
+	default:
+		return nullptr;
+	}
+
+	QueryErrorContext error_context;
+	auto arg_min_max_func = binder->GetCatalogEntry(CatalogType::SCALAR_FUNCTION_ENTRY, SYSTEM_CATALOG, DEFAULT_SCHEMA,
+	                                                arg_min_max, OnEntryNotFound::RETURN_NULL, error_context);
+	//	Can't find the arg_min/max aggregate we need, so give up before we break anything.
+	if (!arg_min_max_func || arg_min_max_func->type != CatalogType::AGGREGATE_FUNCTION_ENTRY) {
+		return nullptr;
+	}
+	auto &arg_min_max_entry = arg_min_max_func->Cast<AggregateFunctionCatalogEntry>();
+
+	// PhysicalHashAggregate requires that the arguments to aggregate functions be bound references,
+	// so we Project the (shared) ordering argument on the end of the join results.
+	vector<unique_ptr<Expression>> comp_list;
+	for (const auto &col_type : join_op.types) {
+		const auto col_idx = comp_list.size();
+		comp_list.emplace_back(make_uniq<BoundReferenceExpression>(col_type, col_idx));
+	}
+	vector<LogicalType> comp_types = join_op.types;
+	auto comp_expr = op.conditions[asof_idx].right->Copy();
+	comp_types.emplace_back(comp_expr->return_type);
+	comp_list.emplace_back(std::move(comp_expr));
+
+	//	Bind the aggregates first so we can abort safely if we can't find one.
+	vector<LogicalType> aggr_types(1, pk_type);
+
+	// Wrap all the projected non-pk probe fields in `first` aggregates;
+	vector<unique_ptr<Expression>> aggregates;
+	for (const auto &i : join_op.right_projection_map) {
+		const auto col_idx = op.children[1]->types.size() + i;
+		const auto col_type = join_op.types[col_idx];
+		aggr_types.emplace_back(col_type);
+
+		vector<unique_ptr<Expression>> aggr_children;
+		auto col_ref = make_uniq<BoundReferenceExpression>(col_type, col_idx);
+		aggr_children.push_back(std::move(col_ref));
+
+		auto first_aggregate = FirstFunctionGetter::GetFunction(col_type);
+		auto aggr_expr = make_uniq<BoundAggregateExpression>(std::move(first_aggregate), std::move(aggr_children),
+		                                                     nullptr, nullptr, AggregateType::NON_DISTINCT);
+		D_ASSERT(col_type == aggr_expr->return_type);
+		aggregates.emplace_back(std::move(aggr_expr));
+	}
+
+	// Wrap all the projected build fields in `arg_max/min` aggregates using the inequality ordering;
+	// We are doing all this first in case we can't find a matching function.
+	for (const auto &col_idx : join_op.left_projection_map) {
+		const auto col_type = join_op.types[col_idx];
+		aggr_types.emplace_back(col_type);
+
+		vector<unique_ptr<Expression>> aggr_children;
+		auto col_ref = make_uniq<BoundReferenceExpression>(col_type, col_idx);
+		aggr_children.push_back(std::move(col_ref));
+		auto comp_expr = make_uniq<BoundReferenceExpression>(comp_types.back(), comp_types.size() - 1);
+		aggr_children.push_back(std::move(comp_expr));
+		vector<LogicalType> child_types;
+		for (const auto &child : aggr_children) {
+			child_types.emplace_back(child->return_type);
+		}
+
+		auto &func = arg_min_max_entry;
+		ErrorData error;
+		auto best_function = function_binder.BindFunction(func.name, func.functions, child_types, error);
+		if (!best_function.IsValid()) {
+			return nullptr;
+		}
+		auto bound_function = func.functions.GetFunctionByOffset(best_function.GetIndex());
+		auto aggr_expr = function_binder.BindAggregateFunction(bound_function, std::move(aggr_children), nullptr,
+		                                                       AggregateType::NON_DISTINCT);
+		D_ASSERT(col_type == aggr_expr->return_type);
+		aggregates.emplace_back(std::move(aggr_expr));
+	}
+
+	// Add a synthetic primary integer key to the probe relation using streaming windowing.
+	vector<unique_ptr<Expression>> window_select;
+	auto pk = make_uniq<BoundWindowExpression>(ExpressionType::WINDOW_ROW_NUMBER, pk_type, nullptr, nullptr);
+	pk->start = WindowBoundary::UNBOUNDED_PRECEDING;
+	pk->end = WindowBoundary::CURRENT_ROW_ROWS;
+	pk->alias = "row_number";
+	window_select.emplace_back(std::move(pk));
+
+	auto window_types = probe->types;
+	window_types.emplace_back(pk_type);
+
+	idx_t probe_cardinality = op.children[0]->EstimateCardinality(context);
+	auto window = make_uniq<PhysicalStreamingWindow>(window_types, std::move(window_select), probe_cardinality);
+	window->children.emplace_back(std::move(probe));
+
+	auto join = make_uniq<PhysicalNestedLoopJoin>(join_op, std::move(build), std::move(window),
+	                                              std::move(join_op.conditions), join_op.join_type, probe_cardinality);
+
+	// Plan a projection of the compare column
+	auto comp = make_uniq<PhysicalProjection>(std::move(comp_types), std::move(comp_list), probe_cardinality);
+	comp->children.emplace_back(std::move(join));
+
+	// Plan an aggregation on the output of the join, grouping by key;
+	// TODO: Can we make it perfect?
+	// Note that the NLJ produced all fields, but only the projected ones were aggregated
+	vector<unique_ptr<Expression>> groups;
+	auto pk_ref = make_uniq<BoundReferenceExpression>(pk_type, join_op.types.size() - 1);
+	groups.emplace_back(std::move(pk_ref));
+	auto aggr = make_uniq<PhysicalHashAggregate>(context, aggr_types, std::move(aggregates), std::move(groups),
+	                                             probe_cardinality);
+	aggr->children.emplace_back(std::move(comp));
+
+	// Project away primary/grouping key
+	// The aggregates were generated in the output order of the original ASOF,
+	// so we just have to shift away the pk
+	vector<unique_ptr<Expression>> project_list;
+	for (column_t i = 1; i < aggr->types.size(); ++i) {
+		auto col_ref = make_uniq<BoundReferenceExpression>(aggr->types[i], i);
+		project_list.emplace_back(std::move(col_ref));
+	}
+
+	auto proj = make_uniq<PhysicalProjection>(op.types, std::move(project_list), probe_cardinality);
+	proj->children.emplace_back(std::move(aggr));
+
+	return proj;
+}
+
 unique_ptr<PhysicalOperator> PhysicalPlanGenerator::PlanAsOfJoin(LogicalComparisonJoin &op) {
 	// now visit the children
 	D_ASSERT(op.children.size() == 2);
@@ -42,7 +265,14 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::PlanAsOfJoin(LogicalComparis
 	}
 	D_ASSERT(asof_idx < op.conditions.size());
 
-	if (!ClientConfig::GetConfig(context).force_asof_iejoin) {
+	auto &config = ClientConfig::GetConfig(context);
+	if (!config.force_asof_iejoin) {
+		if (op.children[0]->has_estimated_cardinality && lhs_cardinality <= config.asof_loop_join_threshold) {
+			auto result = PlanAsOfLoopJoin(op, left, right, context);
+			if (result) {
+				return result;
+			}
+		}
 		return make_uniq<PhysicalAsOfJoin>(op, std::move(left), std::move(right));
 	}
 
diff --git a/src/include/duckdb/main/client_config.hpp b/src/include/duckdb/main/client_config.hpp
index 9cedb4074a49..4e73f8f922b8 100644
--- a/src/include/duckdb/main/client_config.hpp
+++ b/src/include/duckdb/main/client_config.hpp
@@ -101,6 +101,8 @@ struct ClientConfig {
 	idx_t nested_loop_join_threshold = 5;
 	//! The number of rows we need on either table to choose a merge join over an IE join
 	idx_t merge_join_threshold = 1000;
+	//! The maximum number of rows to use the nested loop join implementation
+	idx_t asof_loop_join_threshold = 2048;
 
 	//! The maximum amount of memory to keep buffered in a streaming query result. Default: 1mb.
 	idx_t streaming_buffer_size = 1000000;
diff --git a/src/include/duckdb/main/settings.hpp b/src/include/duckdb/main/settings.hpp
index b9c979dcaa13..02ee9b2ee507 100644
--- a/src/include/duckdb/main/settings.hpp
+++ b/src/include/duckdb/main/settings.hpp
@@ -218,6 +218,17 @@ struct ArrowOutputListViewSetting {
 	static Value GetSetting(const ClientContext &context);
 };
 
+struct AsofLoopJoinThresholdSetting {
+	using RETURN_TYPE = idx_t;
+	static constexpr const char *Name = "asof_loop_join_threshold";
+	static constexpr const char *Description =
+	    "The maximum number of rows we need on the left side of an ASOF join to use a nested loop join";
+	static constexpr const char *InputType = "UBIGINT";
+	static void SetLocal(ClientContext &context, const Value &parameter);
+	static void ResetLocal(ClientContext &context);
+	static Value GetSetting(const ClientContext &context);
+};
+
 struct AutoinstallExtensionRepositorySetting {
 	using RETURN_TYPE = string;
 	static constexpr const char *Name = "autoinstall_extension_repository";
diff --git a/src/main/config.cpp b/src/main/config.cpp
index a69075c6d65a..2bebd3458093 100644
--- a/src/main/config.cpp
+++ b/src/main/config.cpp
@@ -72,6 +72,7 @@ static const ConfigurationOption internal_options[] = {
     DUCKDB_GLOBAL(ArrowLargeBufferSizeSetting),
     DUCKDB_GLOBAL(ArrowLosslessConversionSetting),
     DUCKDB_GLOBAL(ArrowOutputListViewSetting),
+    DUCKDB_LOCAL(AsofLoopJoinThresholdSetting),
     DUCKDB_GLOBAL(AutoinstallExtensionRepositorySetting),
     DUCKDB_GLOBAL(AutoinstallKnownExtensionsSetting),
     DUCKDB_GLOBAL(AutoloadKnownExtensionsSetting),
diff --git a/src/main/settings/autogenerated_settings.cpp b/src/main/settings/autogenerated_settings.cpp
index d007da71fc52..c7c71fd0c177 100644
--- a/src/main/settings/autogenerated_settings.cpp
+++ b/src/main/settings/autogenerated_settings.cpp
@@ -177,6 +177,23 @@ Value ArrowOutputListViewSetting::GetSetting(const ClientContext &context) {
 	return Value::BOOLEAN(config.options.arrow_use_list_view);
 }
 
+//===----------------------------------------------------------------------===//
+// Asof Loop Join Threshold
+//===----------------------------------------------------------------------===//
+void AsofLoopJoinThresholdSetting::SetLocal(ClientContext &context, const Value &input) {
+	auto &config = ClientConfig::GetConfig(context);
+	config.asof_loop_join_threshold = input.GetValue<idx_t>();
+}
+
+void AsofLoopJoinThresholdSetting::ResetLocal(ClientContext &context) {
+	ClientConfig::GetConfig(context).asof_loop_join_threshold = ClientConfig().asof_loop_join_threshold;
+}
+
+Value AsofLoopJoinThresholdSetting::GetSetting(const ClientContext &context) {
+	auto &config = ClientConfig::GetConfig(context);
+	return Value::UBIGINT(config.asof_loop_join_threshold);
+}
+
 //===----------------------------------------------------------------------===//
 // Autoinstall Extension Repository
 //===----------------------------------------------------------------------===//
diff --git a/test/sql/join/asof/test_asof_join.test b/test/sql/join/asof/test_asof_join.test
index 4f8e0ae0123e..a879c582fc8b 100644
--- a/test/sql/join/asof/test_asof_join.test
+++ b/test/sql/join/asof/test_asof_join.test
@@ -68,10 +68,11 @@ SELECT
       s1.starts as s1_starts,
       s2.starts as s2_starts,
 FROM samples AS s1 ASOF JOIN samples as s2 ON s2.ends >= (s1.ends - 5)
-WHERE s1_starts <> s2_starts;
+WHERE s1_starts <> s2_starts
+ORDER BY ALL
 ----
-21 	14
 10	5
+21 	14
 
 # Use an ASOF join inside of a correlated subquery
 
diff --git a/test/sql/join/asof/test_asof_join_merge.test_slow b/test/sql/join/asof/test_asof_join_merge.test_slow
index 544deaad4cd5..12266d3747b7 100644
--- a/test/sql/join/asof/test_asof_join_merge.test_slow
+++ b/test/sql/join/asof/test_asof_join_merge.test_slow
@@ -11,6 +11,10 @@ PRAGMA threads=4
 statement ok
 SET temp_directory='__TEST_DIR__/temp.tmp'
 
+# Force PhysicalAsOfJoin
+statement ok
+PRAGMA asof_loop_join_threshold = 0;
+
 query II
 WITH build AS (
 	SELECT k, ('2021-01-01'::TIMESTAMP + INTERVAL (i) SECOND) AS t, i % 37 AS v
diff --git a/test/sql/join/asof/test_asof_join_pushdown.test b/test/sql/join/asof/test_asof_join_pushdown.test
index 1ef308a6eb91..9345d84b58f7 100644
--- a/test/sql/join/asof/test_asof_join_pushdown.test
+++ b/test/sql/join/asof/test_asof_join_pushdown.test
@@ -24,7 +24,8 @@ FROM right_pushdown d1
 ASOF JOIN (
 	SELECT * FROM right_pushdown WHERE value is not NULL
 	) d2
-	ON d1.time >= d2.time;
+	ON d1.time >= d2.time
+ORDER BY ALL;
 ----
 0 	0	0.0	0.0
 1 	0	NULL	0.0

From 5d8434ab6214d1a6646a7a1f0e02da12d231ebf0 Mon Sep 17 00:00:00 2001
From: Richard Wesley <hawkfish@electricfish.com>
Date: Mon, 10 Feb 2025 12:22:35 -0800
Subject: [PATCH 024/142] Issue #8265: AsOf Nested Loop

* Add asof_loop_join_threshold loops to tests.
---
 test/optimizer/joins/asof_join_adds_rows.test | 10 ++++-
 .../cross_join_and_unnest_dont_work.test      | 10 ++++-
 test/sql/join/asof/test_asof_join.test        |  9 +++-
 .../sql/join/asof/test_asof_join_doubles.test |  4 ++
 .../asof/test_asof_join_inequalities.test     |  8 ++++
 .../join/asof/test_asof_join_integers.test    | 11 +++--
 .../asof/test_asof_join_missing.test_slow     |  3 ++
 .../join/asof/test_asof_join_pushdown.test    | 42 +++++++++++--------
 .../join/asof/test_asof_join_subquery.test    |  8 ++++
 .../join/asof/test_asof_join_timestamps.test  |  8 ++++
 .../sql/join/asof/test_asof_join_varchar.test |  8 ++++
 11 files changed, 97 insertions(+), 24 deletions(-)

diff --git a/test/optimizer/joins/asof_join_adds_rows.test b/test/optimizer/joins/asof_join_adds_rows.test
index 2b15fbcb45a3..2ebdfd145f3a 100644
--- a/test/optimizer/joins/asof_join_adds_rows.test
+++ b/test/optimizer/joins/asof_join_adds_rows.test
@@ -37,6 +37,12 @@ create table large_build as from values
 (1, '1992-03-22 01:02:19'::TIMESTAMP),
 (1, '1992-03-22 01:02:20'::TIMESTAMP) t(lb_const, b);
 
+# Compare NLJ optimisation to operator
+foreach threshold 0 32
+
+statement ok
+PRAGMA asof_loop_join_threshold = ${threshold};
+
 query I
 select a from (select * from small_probe, child_join where c=sp_const) asof join large_build on (lb_const = sp_const and a < b) order by a;
 ----
@@ -109,4 +115,6 @@ ORDER BY
   timepoint;
 ----
 ID1	fqn1	fqn1	2021-01-01 00:00:00
-ID1	fqn2	fqn2	2021-03-03 00:00:00
\ No newline at end of file
+ID1	fqn2	fqn2	2021-03-03 00:00:00
+
+endloop
diff --git a/test/optimizer/joins/cross_join_and_unnest_dont_work.test b/test/optimizer/joins/cross_join_and_unnest_dont_work.test
index ee1eee4acd87..4c4c6bf5356e 100644
--- a/test/optimizer/joins/cross_join_and_unnest_dont_work.test
+++ b/test/optimizer/joins/cross_join_and_unnest_dont_work.test
@@ -37,6 +37,12 @@ create table large_build as from values
 (1, '1992-03-22 01:02:19'::TIMESTAMP),
 (1, '1992-03-22 01:02:20'::TIMESTAMP) t(lb_const, b);
 
+# Compare NLJ optimisation to operator
+foreach threshold 0 32
+
+statement ok
+PRAGMA asof_loop_join_threshold = ${threshold};
+
 query I
 select a from (select * from small_probe, child_join where c=sp_const) asof join large_build on (lb_const = sp_const and a < b) order by a;
 ----
@@ -109,4 +115,6 @@ ORDER BY
   timepoint;
 ----
 ID1	fqn1	fqn1	2021-01-01 00:00:00
-ID1	fqn2	fqn2	2021-03-03 00:00:00
\ No newline at end of file
+ID1	fqn2	fqn2	2021-03-03 00:00:00
+
+endloop
diff --git a/test/sql/join/asof/test_asof_join.test b/test/sql/join/asof/test_asof_join.test
index a879c582fc8b..4f1dd370ce13 100644
--- a/test/sql/join/asof/test_asof_join.test
+++ b/test/sql/join/asof/test_asof_join.test
@@ -30,6 +30,12 @@ create table trades("when" timestamp, symbol int);
 statement ok
 insert into trades values ('2020-01-01 00:00:03', 1);
 
+# Compare NLJ optimisation to operator
+foreach threshold 0 32
+
+statement ok
+PRAGMA asof_loop_join_threshold = ${threshold};
+
 query III
 SELECT t.*, p.price
 FROM trades t ASOF JOIN prices p 
@@ -74,8 +80,7 @@ ORDER BY ALL
 10	5
 21 	14
 
-# Use an ASOF join inside of a correlated subquery
-
+endloop
 
 #
 #  Errors
diff --git a/test/sql/join/asof/test_asof_join_doubles.test b/test/sql/join/asof/test_asof_join_doubles.test
index e6c98a7aed3c..9fa3d24df568 100644
--- a/test/sql/join/asof/test_asof_join_doubles.test
+++ b/test/sql/join/asof/test_asof_join_doubles.test
@@ -9,6 +9,9 @@ PRAGMA enable_verification
 # Inequality only
 #
 
+statement ok
+PRAGMA asof_loop_join_threshold=0;
+
 # Use doubles for readable infinities
 statement ok
 CREATE TABLE events0 (begin DOUBLE, value INTEGER);
@@ -324,3 +327,4 @@ ASOF RIGHT JOIN
 USING (key, begin)
 ORDER BY 1 ASC NULLS FIRST, 2
 ----
+
diff --git a/test/sql/join/asof/test_asof_join_inequalities.test b/test/sql/join/asof/test_asof_join_inequalities.test
index 8d5a3e1f312b..eef40824789c 100644
--- a/test/sql/join/asof/test_asof_join_inequalities.test
+++ b/test/sql/join/asof/test_asof_join_inequalities.test
@@ -37,6 +37,12 @@ foreach debug False True
 statement ok 
 PRAGMA debug_asof_iejoin=${debug}
 
+# Check NLJ results against both
+foreach threshold 0 32
+
+statement ok
+PRAGMA asof_loop_join_threshold = ${threshold};
+
 #
 # Strictly Greater Than
 #
@@ -229,3 +235,5 @@ NULL	-infinity	-1
 NULL	NULL	-10
 
 endloop
+
+endloop
diff --git a/test/sql/join/asof/test_asof_join_integers.test b/test/sql/join/asof/test_asof_join_integers.test
index 1bc4b0762a6b..77c414fb4631 100644
--- a/test/sql/join/asof/test_asof_join_integers.test
+++ b/test/sql/join/asof/test_asof_join_integers.test
@@ -5,7 +5,7 @@
 statement ok
 PRAGMA enable_verification
 
-# Join on a string range
+# Join on an integer range
 
 statement ok
 CREATE TABLE events0 (begin INTEGER, value INTEGER);
@@ -26,8 +26,11 @@ CREATE TABLE probe0 AS
 	FROM range(0,10)
 ;
 
-# This is not implemented yet because it requires a dedicated operator 
-# instead of LEAD(...infinity::INTEGER)
+# Compare NLJ optimisation to operator
+foreach threshold 0 32
+
+statement ok
+PRAGMA asof_loop_join_threshold = ${threshold};
 
 # INNER ON inequality only
 query II
@@ -134,3 +137,5 @@ ORDER BY ALL
 9	3
 NULL	-1
 NULL	9
+
+endloop
diff --git a/test/sql/join/asof/test_asof_join_missing.test_slow b/test/sql/join/asof/test_asof_join_missing.test_slow
index 57a2c64e5231..17355c981f99 100644
--- a/test/sql/join/asof/test_asof_join_missing.test_slow
+++ b/test/sql/join/asof/test_asof_join_missing.test_slow
@@ -13,6 +13,9 @@ PRAGMA enable_verification
 # * First payload bin empty 
 # * Multiple scanned payload blocks
 
+statement ok
+PRAGMA asof_loop_join_threshold=0;
+
 # Check results against IEJoin
 foreach debug False True
 
diff --git a/test/sql/join/asof/test_asof_join_pushdown.test b/test/sql/join/asof/test_asof_join_pushdown.test
index 9345d84b58f7..465b407ad850 100644
--- a/test/sql/join/asof/test_asof_join_pushdown.test
+++ b/test/sql/join/asof/test_asof_join_pushdown.test
@@ -14,6 +14,29 @@ INSERT INTO right_pushdown VALUES
     (1, NULL), 
 ;
 
+statement ok
+CREATE TABLE issue13899(seq_no INT, amount DECIMAL(10,2));
+
+statement ok
+INSERT INTO issue13899 VALUES
+	(1,1.00),
+	(2,null),
+	(3,null),
+	(4,null),
+	(5,2.00),
+	(6,null),
+	(7,null),
+	(8,3.00),
+	(9,null),
+	(10,null),
+	(11,5.00);
+
+# Compare NLJ optimisation to operator
+foreach threshold 0 32
+
+statement ok
+PRAGMA asof_loop_join_threshold = ${threshold};
+
 query IIII
 SELECT 
 	d1.time,
@@ -104,23 +127,6 @@ ORDER BY ALL
 5	6
 10	11
 
-statement ok
-CREATE TABLE issue13899(seq_no INT, amount DECIMAL(10,2));
-
-statement ok
-INSERT INTO issue13899 VALUES
-	(1,1.00),
-	(2,null),
-	(3,null),
-	(4,null),
-	(5,2.00),
-	(6,null),
-	(7,null),
-	(8,3.00),
-	(9,null),
-	(10,null),
-	(11,5.00);
-
 query III
 select 
 	a.seq_no,
@@ -143,3 +149,5 @@ ORDER BY 1
 9	NULL	3.00
 10	NULL	3.00
 11	5.00	5.00
+
+endloop
diff --git a/test/sql/join/asof/test_asof_join_subquery.test b/test/sql/join/asof/test_asof_join_subquery.test
index ec2f72687adb..f61c3f67e602 100644
--- a/test/sql/join/asof/test_asof_join_subquery.test
+++ b/test/sql/join/asof/test_asof_join_subquery.test
@@ -16,6 +16,12 @@ INSERT INTO events VALUES
 	(8, 3)
 ;
 
+# Compare NLJ optimisation to operator
+foreach threshold 0 32
+
+statement ok
+PRAGMA asof_loop_join_threshold = ${threshold};
+
 query II
 SELECT begin, value IN (
 	SELECT e1.value 
@@ -34,3 +40,5 @@ ORDER BY ALL;
 3.0	true
 6.0	true
 8.0	true
+
+endloop
diff --git a/test/sql/join/asof/test_asof_join_timestamps.test b/test/sql/join/asof/test_asof_join_timestamps.test
index 7e2a6ec0e69e..00aa276d793d 100644
--- a/test/sql/join/asof/test_asof_join_timestamps.test
+++ b/test/sql/join/asof/test_asof_join_timestamps.test
@@ -32,6 +32,12 @@ INSERT INTO probe0 VALUES
 	('infinity')
 ;
 
+# Compare NLJ optimisation to operator
+foreach threshold 0 32
+
+statement ok
+PRAGMA asof_loop_join_threshold = ${threshold};
+
 # INNER ON inequality only
 query II nosort
 SELECT p.begin, e.value
@@ -204,3 +210,5 @@ ON p.begin >= e.begin
 ORDER BY p.begin ASC
 ----
 2023-03-21 12:00:00
+
+endloop
diff --git a/test/sql/join/asof/test_asof_join_varchar.test b/test/sql/join/asof/test_asof_join_varchar.test
index ef5f31e7bfa4..b008c0e2816f 100644
--- a/test/sql/join/asof/test_asof_join_varchar.test
+++ b/test/sql/join/asof/test_asof_join_varchar.test
@@ -26,6 +26,12 @@ CREATE TABLE probe0 AS
 	FROM range(0,10)
 ;
 
+# Compare NLJ optimisation to operator
+foreach threshold 0 32
+
+statement ok
+PRAGMA asof_loop_join_threshold = ${threshold};
+
 # INNER ON inequality only
 query II
 SELECT p.begin, e.value
@@ -131,3 +137,5 @@ ORDER BY ALL
 9	3
 NULL	-1
 NULL	9
+
+endloop

From 110808fe6dac91c1ea0f8a699be2dd1ec756a127 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Tue, 11 Feb 2025 00:48:04 +0100
Subject: [PATCH 025/142] allow escaping whitespace

---
 src/function/cast/vector_cast_helpers.cpp | 26 +++++++++++++++++------
 test/sql/cast/string_to_list_cast.test    |  5 +++++
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 347d7102bfa1..d9d9d3c2717a 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -34,9 +34,21 @@ inline static void SkipWhitespace(StringCastInputState &input_state) {
 	auto &buf = input_state.buf;
 	auto &pos = input_state.pos;
 	auto &len = input_state.len;
-	while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
+	while (pos < len) {
+		bool set_escaped = false;
+		if (buf[pos] == '\\') {
+			if (!input_state.escaped) {
+				set_escaped = true;
+			}
+		} else if (StringUtil::CharacterIsSpace(buf[pos])) {
+			if (input_state.escaped) {
+				break;
+			}
+		} else {
+			break;
+		}
 		pos++;
-		input_state.escaped = false;
+		input_state.escaped = set_escaped;
 	}
 }
 
@@ -282,7 +294,7 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 			if (!input_state.escaped) {
 				set_escaped = true;
 			}
-		} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
+		} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
 			if (!start_pos.IsValid()) {
 				start_pos = pos;
 			}
@@ -413,7 +425,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				if (!input_state.escaped) {
 					set_escaped = true;
 				}
-			} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
+			} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
@@ -476,7 +488,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				if (!input_state.escaped) {
 					set_escaped = true;
 				}
-			} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
+			} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
@@ -586,7 +598,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 				if (!input_state.escaped) {
 					set_escaped = true;
 				}
-			} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
+			} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
@@ -658,7 +670,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 				if (!input_state.escaped) {
 					input_state.escaped = true;
 				}
-			} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
+			} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
diff --git a/test/sql/cast/string_to_list_cast.test b/test/sql/cast/string_to_list_cast.test
index 37f5b24e4ea8..fdaa3fd72242 100644
--- a/test/sql/cast/string_to_list_cast.test
+++ b/test/sql/cast/string_to_list_cast.test
@@ -528,3 +528,8 @@ query I
 select '[{"bar":"\"\"\\\"\"\"\\"}]'::STRUCT(bar VARCHAR)[];
 ----
 [{'bar': ""\"""\}]
+
+query III
+select $$[\  \\abc\\ \ , def, ghi]$$::VARCHAR[] a, a[1], len(a[1])
+----
+[  \abc\  , def, ghi]	  \abc\  	9

From a5305f5a87368bf97e137645aa2ff34a36abeadd Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Tue, 11 Feb 2025 01:53:39 +0100
Subject: [PATCH 026/142] better way of dealing with escaped spaces

---
 src/function/cast/vector_cast_helpers.cpp | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index d9d9d3c2717a..29be5cc75e71 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -34,21 +34,12 @@ inline static void SkipWhitespace(StringCastInputState &input_state) {
 	auto &buf = input_state.buf;
 	auto &pos = input_state.pos;
 	auto &len = input_state.len;
-	while (pos < len) {
-		bool set_escaped = false;
-		if (buf[pos] == '\\') {
-			if (!input_state.escaped) {
-				set_escaped = true;
-			}
-		} else if (StringUtil::CharacterIsSpace(buf[pos])) {
-			if (input_state.escaped) {
-				break;
-			}
-		} else {
-			break;
-		}
+	if (input_state.escaped) {
+		return;
+	}
+	while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
 		pos++;
-		input_state.escaped = set_escaped;
+		input_state.escaped = false;
 	}
 }
 

From f65c09760338f9aa031dad63d6f4d50cca3fb8c6 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Tue, 11 Feb 2025 14:16:33 +0100
Subject: [PATCH 027/142] yet another case of missing backslash escape logic

---
 src/function/cast/vector_cast_helpers.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 29be5cc75e71..a51ff8ee1732 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -623,6 +623,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 		pos++;
 		SkipWhitespace(input_state);
 		while (pos < len && ((buf[pos] != ',' && buf[pos] != '}') || input_state.escaped)) {
+			bool set_escaped = false;
 			if (buf[pos] == '"' || buf[pos] == '\'') {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
@@ -659,7 +660,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 					start_pos = pos;
 				}
 				if (!input_state.escaped) {
-					input_state.escaped = true;
+					set_escaped = true;
 				}
 			} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
 				if (!start_pos.IsValid()) {
@@ -668,6 +669,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 				end_pos = pos;
 			}
 			pos++;
+			input_state.escaped = set_escaped;
 		}
 		if (pos == len) {
 			return false;

From 602fa82d7d2443e89bbbd5e8a47d35556e16f92b Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Tue, 11 Feb 2025 14:38:24 +0100
Subject: [PATCH 028/142] don't trim escaped backslashes at the end of the
 input

---
 src/function/cast/vector_cast_helpers.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index a51ff8ee1732..9154681dcead 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -285,6 +285,7 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 			if (!input_state.escaped) {
 				set_escaped = true;
 			}
+			end_pos = pos;
 		} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
 			if (!start_pos.IsValid()) {
 				start_pos = pos;
@@ -416,6 +417,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				if (!input_state.escaped) {
 					set_escaped = true;
 				}
+				end_pos = pos;
 			} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
@@ -479,6 +481,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				if (!input_state.escaped) {
 					set_escaped = true;
 				}
+				end_pos = pos;
 			} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
@@ -589,6 +592,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 				if (!input_state.escaped) {
 					set_escaped = true;
 				}
+				end_pos = pos;
 			} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
@@ -662,6 +666,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 				if (!input_state.escaped) {
 					set_escaped = true;
 				}
+				end_pos = pos;
 			} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;

From 80a8c374240025f177fea1fbc22ea527558c9707 Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Tue, 11 Feb 2025 10:53:00 -0300
Subject: [PATCH 029/142] whis is this leaking

---
 src/common/adbc/adbc.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/common/adbc/adbc.cpp b/src/common/adbc/adbc.cpp
index 09ac3aa5d8dc..0a77b0e67b42 100644
--- a/src/common/adbc/adbc.cpp
+++ b/src/common/adbc/adbc.cpp
@@ -880,7 +880,7 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
 	auto error_msg_extract_statements = duckdb_extract_statements_error(extracted_statements);
 	if (error_msg_extract_statements != nullptr) {
 		// Things went wrong when executing internal prepared statement
-		delete extracted_statements;
+		delete (reinterpret_cast<duckdb::PreparedStatementWrapper*> (&extracted_statements));
 		SetError(error, error_msg_extract_statements);
 		return ADBC_STATUS_INTERNAL;
 	}
@@ -893,7 +893,7 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
 		auto adbc_status = CheckResult(res, error, error_msg);
 		if (adbc_status != ADBC_STATUS_OK) {
 			// Things went wrong when executing internal prepared statement
-			delete extracted_statements;
+			delete (reinterpret_cast<duckdb::PreparedStatementWrapper*> (&extracted_statements));
 			delete statement_internal;
 			return adbc_status;
 		}
@@ -904,7 +904,7 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
 			SetError(error, duckdb_query_arrow_error(out_result));
 			delete out_result;
 			delete statement_internal;
-			delete extracted_statements;
+			delete (reinterpret_cast<duckdb::PreparedStatementWrapper*> (&extracted_statements));
 			return ADBC_STATUS_INVALID_ARGUMENT;
 		}
 		delete out_result;
@@ -914,7 +914,7 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
 	auto res = duckdb_prepare_extracted_statement(wrapper->connection, extracted_statements,
 	                                              extract_statements_size - 1, &wrapper->statement);
 	auto error_msg = duckdb_prepare_error(wrapper->statement);
-	delete extracted_statements;
+	delete (reinterpret_cast<duckdb::PreparedStatementWrapper*> (&extracted_statements));
 	return CheckResult(res, error, error_msg);
 }
 

From fb59f61e0178288ba3adb2c4cb72e49f994f24d5 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Tue, 11 Feb 2025 17:20:44 +0100
Subject: [PATCH 030/142] add support for unnamed struct format to
 VARCHAR->STRUCT cast

---
 src/function/cast/vector_cast_helpers.cpp   | 404 +++++++++++++-------
 test/sql/cast/string_to_unnamed_struct.test |  61 +++
 2 files changed, 326 insertions(+), 139 deletions(-)
 create mode 100644 test/sql/cast/string_to_unnamed_struct.test

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 9154681dcead..572b03e563e7 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -86,6 +86,8 @@ static bool SkipToClose(StringCastInputState &input_state, idx_t &lvl, char clos
 				}
 			} else if (buf[idx] == '{') {
 				brackets.push_back('}');
+			} else if (buf[idx] == '(') {
+				brackets.push_back(')');
 			} else if (buf[idx] == '[') {
 				brackets.push_back(']');
 				lvl++;
@@ -538,171 +540,295 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 	StringCastInputState input_state(buf, pos, len);
 
 	SkipWhitespace(input_state);
-	if (pos == len || buf[pos] != '{') {
+	if (pos == len || (buf[pos] != '{' && buf[pos] != '(')) {
 		return false;
 	}
+	auto end_char = buf[pos] == '{' ? '}' : ')';
 	pos++;
 	SkipWhitespace(input_state);
-	if (buf[pos] == '}') {
+	if (buf[pos] == end_char) {
 		pos++;
 		SkipWhitespace(input_state);
 		return (pos == len);
 	}
 
-	while (pos < len) {
-		optional_idx start_pos;
-		idx_t end_pos;
-		while (pos < len && (buf[pos] != ':' || input_state.escaped)) {
-			bool set_escaped = false;
-			if (buf[pos] == '"' || buf[pos] == '\'') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				if (!input_state.escaped) {
-					if (!SkipToCloseQuotes(input_state)) {
-						return false;
+	if (end_char == '}') {
+		//! Regular struct, in the form of `{name: value, name_2: value_2, ...}`
+		while (pos < len) {
+			optional_idx start_pos;
+			idx_t end_pos;
+			while (pos < len && (buf[pos] != ':' || input_state.escaped)) {
+				bool set_escaped = false;
+				if (buf[pos] == '"' || buf[pos] == '\'') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
 					}
-				}
-				end_pos = pos;
-			} else if (buf[pos] == '{') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				if (!input_state.escaped) {
-					if (!SkipToClose(input_state, lvl, '}')) {
-						return false;
+					if (!input_state.escaped) {
+						if (!SkipToCloseQuotes(input_state)) {
+							return false;
+						}
 					}
-				}
-				end_pos = pos;
-			} else if (buf[pos] == '[') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				if (!input_state.escaped) {
-					lvl++;
-					if (!SkipToClose(input_state, lvl, ']')) {
-						return false;
+					end_pos = pos;
+				} else if (buf[pos] == '{') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
 					}
+					if (!input_state.escaped) {
+						if (!SkipToClose(input_state, lvl, '}')) {
+							return false;
+						}
+					}
+					end_pos = pos;
+				} else if (buf[pos] == '(') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					if (!input_state.escaped) {
+						if (!SkipToClose(input_state, lvl, ')')) {
+							return false;
+						}
+					}
+					end_pos = pos;
+				} else if (buf[pos] == '[') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					if (!input_state.escaped) {
+						lvl++;
+						if (!SkipToClose(input_state, lvl, ']')) {
+							return false;
+						}
+					}
+					end_pos = pos;
+				} else if (buf[pos] == '\\') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					if (!input_state.escaped) {
+						set_escaped = true;
+					}
+					end_pos = pos;
+				} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					end_pos = pos;
 				}
-				end_pos = pos;
-			} else if (buf[pos] == '\\') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				if (!input_state.escaped) {
-					set_escaped = true;
-				}
-				end_pos = pos;
-			} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
+				input_state.escaped = set_escaped;
+				pos++;
+			}
+			if (pos == len) {
+				return false;
+			}
+			if (!start_pos.IsValid()) {
+				//! Key can not be empty
+				return false;
+			}
+			idx_t key_start = start_pos.GetIndex();
+			end_pos++;
+			StringCastInputState key_temp_state(buf, key_start, end_pos);
+			if (IsNull(key_temp_state)) {
+				//! Key can not be NULL
+				return false;
+			}
+			auto child_name = HandleString(temp_vec, buf, key_start, end_pos);
+			auto it = child_names.find(child_name);
+			if (it == child_names.end()) {
+				return false; // false key
+			}
+			child_idx = it->second;
+
+			start_pos = optional_idx();
+			pos++;
+			SkipWhitespace(input_state);
+			while (pos < len && ((buf[pos] != ',' && buf[pos] != '}') || input_state.escaped)) {
+				bool set_escaped = false;
+				if (buf[pos] == '"' || buf[pos] == '\'') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					if (!input_state.escaped) {
+						if (!SkipToCloseQuotes(input_state)) {
+							return false;
+						}
+					}
+					end_pos = pos;
+				} else if (buf[pos] == '{') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					if (!input_state.escaped) {
+						if (!SkipToClose(input_state, lvl, '}')) {
+							return false;
+						}
+					}
+					end_pos = pos;
+				} else if (buf[pos] == '(') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					if (!input_state.escaped) {
+						if (!SkipToClose(input_state, lvl, ')')) {
+							return false;
+						}
+					}
+					end_pos = pos;
+				} else if (buf[pos] == '[') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					if (!input_state.escaped) {
+						lvl++;
+						if (!SkipToClose(input_state, lvl, ']')) {
+							return false;
+						}
+					}
+					end_pos = pos;
+				} else if (buf[pos] == '\\') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					if (!input_state.escaped) {
+						set_escaped = true;
+					}
+					end_pos = pos;
+				} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					end_pos = pos;
 				}
-				end_pos = pos;
+				pos++;
+				input_state.escaped = set_escaped;
+			}
+			if (pos == len) {
+				return false;
+			}
+			auto &child_vec = *varchar_vectors[child_idx];
+			auto string_data = FlatVector::GetData<string_t>(child_vec);
+			auto &child_mask = child_masks[child_idx].get();
+
+			if (!start_pos.IsValid()) {
+				start_pos = 0;
+				end_pos = 0;
+			} else {
+				end_pos++;
+			}
+			auto value_start = start_pos.GetIndex();
+			StringCastInputState value_temp_state(buf, value_start, end_pos);
+			if (IsNull(value_temp_state)) {
+				child_mask.SetInvalid(row_idx);
+			} else {
+				string_data[row_idx] = HandleString(child_vec, buf, value_start, end_pos);
+				child_mask.SetValid(row_idx);
+			}
+
+			if (buf[pos] == '}') {
+				break;
 			}
-			input_state.escaped = set_escaped;
 			pos++;
+			SkipWhitespace(input_state);
 		}
-		if (pos == len) {
-			return false;
-		}
-		if (!start_pos.IsValid()) {
-			//! Key can not be empty
-			return false;
-		}
-		idx_t key_start = start_pos.GetIndex();
-		end_pos++;
-		StringCastInputState key_temp_state(buf, key_start, end_pos);
-		if (IsNull(key_temp_state)) {
-			//! Key can not be NULL
-			return false;
-		}
-		auto child_name = HandleString(temp_vec, buf, key_start, end_pos);
-		auto it = child_names.find(child_name);
-		if (it == child_names.end()) {
-			return false; // false key
-		}
-		child_idx = it->second;
+	} else {
+		//! This is an unnamed struct in the form of `(value, value_2, ...)`
+		D_ASSERT(end_char == ')');
+		idx_t child_idx = 0;
+		while (pos < len) {
+			if (child_idx == child_names.size()) {
+				return false;
+			}
 
-		start_pos = optional_idx();
-		pos++;
-		SkipWhitespace(input_state);
-		while (pos < len && ((buf[pos] != ',' && buf[pos] != '}') || input_state.escaped)) {
-			bool set_escaped = false;
-			if (buf[pos] == '"' || buf[pos] == '\'') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				if (!input_state.escaped) {
-					if (!SkipToCloseQuotes(input_state)) {
-						return false;
+			optional_idx start_pos;
+			idx_t end_pos;
+			while (pos < len && ((buf[pos] != ',' && buf[pos] != ')') || input_state.escaped)) {
+				bool set_escaped = false;
+				if (buf[pos] == '"' || buf[pos] == '\'') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
 					}
-				}
-				end_pos = pos;
-			} else if (buf[pos] == '{') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				if (!input_state.escaped) {
-					if (!SkipToClose(input_state, lvl, '}')) {
-						return false;
+					if (!input_state.escaped) {
+						if (!SkipToCloseQuotes(input_state)) {
+							return false;
+						}
 					}
-				}
-				end_pos = pos;
-			} else if (buf[pos] == '[') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				if (!input_state.escaped) {
-					lvl++;
-					if (!SkipToClose(input_state, lvl, ']')) {
-						return false;
+					end_pos = pos;
+				} else if (buf[pos] == '{') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
 					}
+					if (!input_state.escaped) {
+						if (!SkipToClose(input_state, lvl, '}')) {
+							return false;
+						}
+					}
+					end_pos = pos;
+				} else if (buf[pos] == '(') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					if (!input_state.escaped) {
+						if (!SkipToClose(input_state, lvl, ')')) {
+							return false;
+						}
+					}
+					end_pos = pos;
+				} else if (buf[pos] == '[') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					if (!input_state.escaped) {
+						lvl++;
+						if (!SkipToClose(input_state, lvl, ']')) {
+							return false;
+						}
+					}
+					end_pos = pos;
+				} else if (buf[pos] == '\\') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					if (!input_state.escaped) {
+						set_escaped = true;
+					}
+					end_pos = pos;
+				} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					end_pos = pos;
 				}
-				end_pos = pos;
-			} else if (buf[pos] == '\\') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				if (!input_state.escaped) {
-					set_escaped = true;
-				}
-				end_pos = pos;
-			} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				end_pos = pos;
+				pos++;
+				input_state.escaped = set_escaped;
 			}
-			pos++;
-			input_state.escaped = set_escaped;
-		}
-		if (pos == len) {
-			return false;
-		}
-		auto &child_vec = *varchar_vectors[child_idx];
-		auto string_data = FlatVector::GetData<string_t>(child_vec);
-		auto &child_mask = child_masks[child_idx].get();
+			if (pos == len) {
+				return false;
+			}
+			auto &child_vec = *varchar_vectors[child_idx];
+			auto string_data = FlatVector::GetData<string_t>(child_vec);
+			auto &child_mask = child_masks[child_idx].get();
 
-		if (!start_pos.IsValid()) {
-			start_pos = 0;
-			end_pos = 0;
-		} else {
-			end_pos++;
-		}
-		auto value_start = start_pos.GetIndex();
-		StringCastInputState value_temp_state(buf, value_start, end_pos);
-		if (IsNull(value_temp_state)) {
-			child_mask.SetInvalid(row_idx);
-		} else {
-			string_data[row_idx] = HandleString(child_vec, buf, value_start, end_pos);
-			child_mask.SetValid(row_idx);
-		}
+			if (!start_pos.IsValid()) {
+				start_pos = 0;
+				end_pos = 0;
+			} else {
+				end_pos++;
+			}
+			auto value_start = start_pos.GetIndex();
+			StringCastInputState value_temp_state(buf, value_start, end_pos);
+			if (IsNull(value_temp_state)) {
+				child_mask.SetInvalid(row_idx);
+			} else {
+				string_data[row_idx] = HandleString(child_vec, buf, value_start, end_pos);
+				child_mask.SetValid(row_idx);
+			}
 
-		if (buf[pos] == '}') {
-			break;
+			if (buf[pos] == ')') {
+				break;
+			}
+			child_idx++;
+			pos++;
+			SkipWhitespace(input_state);
 		}
-		pos++;
-		SkipWhitespace(input_state);
 	}
 	pos++;
 	SkipWhitespace(input_state);
diff --git a/test/sql/cast/string_to_unnamed_struct.test b/test/sql/cast/string_to_unnamed_struct.test
new file mode 100644
index 000000000000..879def4a9948
--- /dev/null
+++ b/test/sql/cast/string_to_unnamed_struct.test
@@ -0,0 +1,61 @@
+# name: test/sql/cast/string_to_unnamed_struct.test
+# group: [cast]
+
+# Basic single value struct
+query I
+select $$(abc)$$::STRUCT(a VARCHAR)
+----
+{'a': abc}
+
+# Multiple values
+query I
+select $$(abc, def, ghi)$$::STRUCT(a VARCHAR, b VARCHAR, c VARCHAR)
+----
+{'a': abc, 'b': def, 'c': ghi}
+
+# Empty unnamed struct
+query I
+select $$()$$::STRUCT(a VARCHAR)
+----
+{'a': NULL}
+
+# Nested regular struct inside unnamed struct
+query I
+select $$({'amount': 42})$$::STRUCT(a STRUCT(amount INT))
+----
+{'a': {'amount': 42}}
+
+# Nested unnamed struct inside unnamed struct
+query I
+select $$((42))$$::STRUCT(a STRUCT(amount INT))
+----
+{'a': {'amount': 42}}
+
+# Nested unnamed struct AND regular struct inside unnamed struct
+query I
+select $$((42), {amount: 21})$$::STRUCT(a STRUCT(amount INT), b STRUCT(amount INT))
+----
+{'a': {'amount': 42}, 'b': {'amount': 21}}
+
+# List inside unnamed struct
+query I
+select $$([1,2,3], [4,5,6])$$::STRUCT(a INTEGER[], b INTEGER[])
+----
+{'a': [1, 2, 3], 'b': [4, 5, 6]}
+
+statement error
+select $$([1,2,3],)$$::STRUCT(a INTEGER[])
+----
+can't be cast to the destination type STRUCT
+
+# Empty string in the second child of the unnamed struct
+query I
+select $$([1,2,3],)$$::STRUCT(a INTEGER[], b VARCHAR)
+----
+{'a': [1, 2, 3], 'b': }
+
+# Empty string in the second child of a named struct
+query I
+select $${'a': [1,2,3],'b':}$$::STRUCT(a INTEGER[], b VARCHAR)
+----
+{'a': [1, 2, 3], 'b': }

From 1748b0794e2f8477e444f754c2fa032c4e444ef9 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Tue, 11 Feb 2025 17:29:46 +0100
Subject: [PATCH 031/142] need to recognize ( as a scope as well

---
 src/function/cast/vector_cast_helpers.cpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 572b03e563e7..6a54b9435d42 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -160,9 +160,18 @@ static string_t HandleString(Vector &vec, const char *buf, idx_t start, idx_t en
 				//! Close scope
 				scopes.pop();
 			}
-			if (!quoted && (current_char == '[' || current_char == '{')) {
+			if (!quoted && (current_char == '[' || current_char == '{' || current_char == '(')) {
 				//! New scope
-				scopes.push(current_char == '[' ? ']' : '}');
+				char end_char;
+				if (current_char == '[') {
+					end_char = ']';
+				} else if (current_char == '{') {
+					end_char = '}';
+				} else {
+					D_ASSERT(current_char == '(');
+					end_char = ')';
+				}
+				scopes.push(end_char);
 			}
 			//! Regular character
 			string_data[copied_count++] = current_char;

From 1da281bc7b055486a17d6443efe8b6d345236d5b Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Tue, 11 Feb 2025 17:33:50 +0100
Subject: [PATCH 032/142] add another test, with escaped leading and trailing
 spaces, and escaped backslashes

---
 test/sql/cast/string_to_unnamed_struct.test | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/test/sql/cast/string_to_unnamed_struct.test b/test/sql/cast/string_to_unnamed_struct.test
index 879def4a9948..58f548065789 100644
--- a/test/sql/cast/string_to_unnamed_struct.test
+++ b/test/sql/cast/string_to_unnamed_struct.test
@@ -59,3 +59,8 @@ query I
 select $${'a': [1,2,3],'b':}$$::STRUCT(a INTEGER[], b VARCHAR)
 ----
 {'a': [1, 2, 3], 'b': }
+
+query I
+select $$[("  test  "), {'a': (\\  test  \\)}]$$::STRUCT(a VARCHAR)[]
+----
+[{'a':   test  }, {'a': (\\  test  \\)}]

From 8252d4de62a05bfd5518ce7b937d9e6b1cc50c4b Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Tue, 11 Feb 2025 17:35:56 +0100
Subject: [PATCH 033/142] some more nesting

---
 test/sql/cast/string_to_unnamed_struct.test | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/sql/cast/string_to_unnamed_struct.test b/test/sql/cast/string_to_unnamed_struct.test
index 58f548065789..ecf4c39ec468 100644
--- a/test/sql/cast/string_to_unnamed_struct.test
+++ b/test/sql/cast/string_to_unnamed_struct.test
@@ -61,6 +61,6 @@ select $${'a': [1,2,3],'b':}$$::STRUCT(a INTEGER[], b VARCHAR)
 {'a': [1, 2, 3], 'b': }
 
 query I
-select $$[("  test  "), {'a': (\\  test  \\)}]$$::STRUCT(a VARCHAR)[]
+select $$[(("  test  ")), {'a': (\\  test  \\)}]$$::STRUCT(a STRUCT("inner" VARCHAR))[]
 ----
-[{'a':   test  }, {'a': (\\  test  \\)}]
+[{'a': {'inner':   test  }}, {'a': {'inner': \  test  \}}]

From d2a2d3dc5649eda69a5400f449cfa27b64204db9 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Tue, 11 Feb 2025 17:49:45 +0100
Subject: [PATCH 034/142] adjust tests

---
 test/sql/cast/string_to_nested_types_cast.test_slow | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/sql/cast/string_to_nested_types_cast.test_slow b/test/sql/cast/string_to_nested_types_cast.test_slow
index 90c97c24128d..b8f1346f59e4 100644
--- a/test/sql/cast/string_to_nested_types_cast.test_slow
+++ b/test/sql/cast/string_to_nested_types_cast.test_slow
@@ -78,7 +78,7 @@ SELECT CAST(LIST(timestamp_ns)::VARCHAR AS TIME[]) FROM test_all_types();
 query I
 SELECT CAST(LIST(blob)::VARCHAR AS BLOB[]) FROM test_all_types();
 ----
-[thisisalongblob\x00withnullbytes, \x00\x00\x00a, NULL]
+[thisisalongblobx00withnullbytes, x00x00x00a, NULL]
 
 query I
 SELECT CAST(LIST(interval)::VARCHAR AS INTERVAL[]) FROM test_all_types();
@@ -191,8 +191,8 @@ SELECT CAST(struct_pack(A=>timestamp_ns)::VARCHAR AS STRUCT(A TIME)) FROM test_a
 query I
 SELECT CAST(struct_pack(A=>blob)::VARCHAR AS STRUCT(A BLOB)) FROM test_all_types();
 ----
-{'A': thisisalongblob\x00withnullbytes}
-{'A': \x00\x00\x00a}
+{'A': thisisalongblobx00withnullbytes}
+{'A': x00x00x00a}
 {'A': NULL}
 
 query I

From a5173d3de01269d2a6d19dc00c73241c189e4762 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Tue, 11 Feb 2025 19:46:16 +0100
Subject: [PATCH 035/142] fix tidy issues

---
 src/function/cast/vector_cast_helpers.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 6a54b9435d42..cb0feb7dce9f 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -1,18 +1,18 @@
 #include "duckdb/function/cast/vector_cast_helpers.hpp"
-#include "duckdb/common/typedefs.hpp"
 #include "duckdb/common/stack.hpp"
+#include "duckdb/common/typedefs.hpp"
 
 namespace {
 
 struct StringCastInputState {
 public:
-	StringCastInputState(const char *buf, idx_t &pos, idx_t &len) : buf(buf), pos(pos), len(len) {
+	StringCastInputState(const char *buf, duckdb::idx_t &pos, duckdb::idx_t &len) : buf(buf), pos(pos), len(len) {
 	}
 
 public:
 	const char *buf;
-	idx_t &pos;
-	idx_t &len;
+	duckdb::idx_t &pos;
+	duckdb::idx_t &len;
 	bool escaped = false;
 };
 

From 50613b65e0d55455a5922f9e55d57bb91d4435a0 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Wed, 12 Feb 2025 10:14:45 +0100
Subject: [PATCH 036/142] fix up test

---
 test/sql/cast/string_to_list_cast.test | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/sql/cast/string_to_list_cast.test b/test/sql/cast/string_to_list_cast.test
index fdaa3fd72242..61f367326ac4 100644
--- a/test/sql/cast/string_to_list_cast.test
+++ b/test/sql/cast/string_to_list_cast.test
@@ -511,11 +511,11 @@ statement error
 select '[{"bar":"\\""}]'::VARCHAR[];
 ----
 
-# escapes are only processed once the {} is cast as well
+# Unescaped doublequote ends the quote early, leaving an uneven amount of `"`, causing an error
 statement error
-query I
 select '[{"bar":"\\""}]'::STRUCT(bar VARCHAR)[];
 ----
+can't be cast to the destination type LIST
 
 # uneven amount of escapes does escape the "
 query I

From 384f5f01b5bf4f584975bbd2a66e0733756a499c Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Wed, 12 Feb 2025 13:10:06 +0100
Subject: [PATCH 037/142] moved the escaped case to the start of the cases,
 reduces complexity of the bodies of the other cases

---
 src/function/cast/vector_cast_helpers.cpp |  37 ++--
 test/sql/cast/string_to_list_escapes.test | 198 ++++++++++++++++++++++
 2 files changed, 215 insertions(+), 20 deletions(-)
 create mode 100644 test/sql/cast/string_to_list_escapes.test

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index cb0feb7dce9f..16cc40192bab 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -235,26 +235,27 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 		}
 		bool set_escaped = false;
 
-		if (buf[pos] == '[') {
+		if (input_state.escaped) {
+			if (!start_pos.IsValid()) {
+				start_pos = pos;
+			}
+			end_pos = pos;
+		} else if (buf[pos] == '[') {
 			if (!start_pos.IsValid()) {
 				start_pos = pos;
 			}
 			//! Start of a LIST
-			if (!input_state.escaped) {
-				lvl++;
-				if (!SkipToClose(input_state, lvl, ']')) {
-					return false;
-				}
+			lvl++;
+			if (!SkipToClose(input_state, lvl, ']')) {
+				return false;
 			}
 			end_pos = pos;
 		} else if ((buf[pos] == '"' || buf[pos] == '\'')) {
 			if (!start_pos.IsValid()) {
 				start_pos = pos;
 			}
-			if (!input_state.escaped) {
-				if (!SkipToCloseQuotes(input_state)) {
-					return false;
-				}
+			if (!SkipToCloseQuotes(input_state)) {
+				return false;
 			}
 			end_pos = pos;
 		} else if (buf[pos] == '{') {
@@ -262,14 +263,12 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 				start_pos = pos;
 			}
 			//! Start of a STRUCT
-			if (!input_state.escaped) {
-				idx_t struct_lvl = 0;
-				if (!SkipToClose(input_state, struct_lvl, '}')) {
-					return false;
-				}
+			idx_t struct_lvl = 0;
+			if (!SkipToClose(input_state, struct_lvl, '}')) {
+				return false;
 			}
 			end_pos = pos;
-		} else if (buf[pos] == ',' || buf[pos] == ']') {
+		} else if ((buf[pos] == ',' || buf[pos] == ']')) {
 			if (buf[pos] != ']' || start_pos.IsValid() || seen_value) {
 				if (!start_pos.IsValid()) {
 					state.HandleValue(buf, 0, 0);
@@ -293,11 +292,9 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 			if (!start_pos.IsValid()) {
 				start_pos = pos;
 			}
-			if (!input_state.escaped) {
-				set_escaped = true;
-			}
+			set_escaped = true;
 			end_pos = pos;
-		} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
+		} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
 			if (!start_pos.IsValid()) {
 				start_pos = pos;
 			}
diff --git a/test/sql/cast/string_to_list_escapes.test b/test/sql/cast/string_to_list_escapes.test
new file mode 100644
index 000000000000..aca72ada840f
--- /dev/null
+++ b/test/sql/cast/string_to_list_escapes.test
@@ -0,0 +1,198 @@
+# name: test/sql/cast/string_to_list_escapes.test
+# group: [cast]
+
+query I
+SELECT $$[hello, world]$$::VARCHAR[];
+----
+[hello, world]
+
+query I
+SELECT $$[hello\ world, world]$$::VARCHAR[];
+----
+[hello world, world]
+
+query I
+SELECT $$[hello\,world, test]$$::VARCHAR[];
+----
+[hello,world, test]
+
+query I
+SELECT $$[hello\,, test]$$::VARCHAR[];
+----
+[hello,, test]
+
+query I
+SELECT $$[hello\"quoted\"text, more]$$::VARCHAR[];
+----
+[hello"quoted"text, more]
+
+query I
+SELECT $$[escaped\\backslash, test]$$::VARCHAR[];
+----
+[escaped\backslash, test]
+
+query I
+SELECT $$[nested[brackets], test]$$::VARCHAR[];
+----
+[nested[brackets], test]
+
+query I
+SELECT $$[quote\'in\'string, test]$$::VARCHAR[];
+----
+[quote'in'string, test]
+
+query I
+SELECT $$[mix\ of\ special\,chars]$$::VARCHAR[];
+----
+[mix of special,chars]
+
+query I
+SELECT $$["ends with space ", "trailing space "]$$::VARCHAR[];
+----
+[ends with space , trailing space ]
+
+query I
+SELECT $$["ends with comma,", "another,"]$$::VARCHAR[];
+----
+[ends with comma,, another,]
+
+query I
+SELECT $$["quote at end\"", "\""]$$::VARCHAR[];
+----
+[quote at end", "]
+
+query I
+SELECT $$["ends with bracket]", "[bracket"]$$::VARCHAR[];
+----
+[ends with bracket], [bracket]
+
+query I
+SELECT $$["backslash at end\\", "\\"]$$::VARCHAR[];
+----
+[backslash at end\, \]
+
+query I
+SELECT $$[" space at start", " leading space"]$$::VARCHAR[];
+----
+[ space at start,  leading space]
+
+query I
+SELECT $$[",comma at start", ",leading comma"]$$::VARCHAR[];
+----
+[,comma at start, ,leading comma]
+
+query I
+SELECT $$["\"quote at start", "\"leading quote"]$$::VARCHAR[];
+----
+["quote at start, "leading quote]
+
+query I
+SELECT $$["[bracket at start", "[leading bracket"]$$::VARCHAR[];
+----
+[[bracket at start, [leading bracket]
+
+query I
+SELECT $$["\\backslash at start", "\\leading backslash"]$$::VARCHAR[];
+----
+[\backslash at start, \leading backslash]
+
+query I
+SELECT $$[" space at start and end ", " leading and trailing space "]$$::VARCHAR[];
+----
+[ space at start and end ,  leading and trailing space ]
+
+query I
+SELECT $$[",comma at start and end,", ",leading and trailing comma,"]$$::VARCHAR[];
+----
+[,comma at start and end,, ,leading and trailing comma,]
+
+query I
+SELECT $$["\"quote at start and end\"", "\"leading and trailing quote\""]$$::VARCHAR[];
+----
+["quote at start and end", "leading and trailing quote"]
+
+query I
+SELECT $$["[bracket at start and end]", "[leading and trailing bracket]"]$$::VARCHAR[];
+----
+[[bracket at start and end], [leading and trailing bracket]]
+
+query I
+SELECT $$["\\backslash at start and end\\", "\\leading and trailing backslash\\"]$$::VARCHAR[];
+----
+[\backslash at start and end\, \leading and trailing backslash\]
+
+
+query I
+SELECT $$[" mix, of special\ characters " , "[various] \"combinations\" "]$$::VARCHAR[];
+----
+[ mix, of special characters , [various] "combinations" ]
+
+query I
+SELECT $$[", starts and ends with ,", "[brackets] and ,commas,"]$$::VARCHAR[];
+----
+[, starts and ends with ,, [brackets] and ,commas,]
+
+query I
+SELECT $$["\"quotes\" and \ spaces ", "\ leading and trailing \ "]$$::VARCHAR[];
+----
+["quotes" and  spaces ,  leading and trailing  ]
+
+query I
+SELECT $$["[complex\ combination, of\" special]", "\\all cases covered\\"]$$::VARCHAR[];
+----
+[[complex combination, of" special], \all cases covered\]
+
+query I
+SELECT $$["hello, world"]$$::VARCHAR[];
+----
+[hello, world]
+
+statement error
+SELECT $$["missing quote]]$$::VARCHAR[]; -- Mismatched quotes
+----
+can't be cast to the destination type
+
+statement error
+SELECT $$["backslash at end\"]$$::VARCHAR[]; -- Improper escaping
+----
+can't be cast to the destination type
+
+statement error
+SELECT $$[unescaped[bracket]$$::VARCHAR[]; -- Unescaped bracket
+----
+can't be cast to the destination type
+
+statement error
+SELECT $$[unterminated string]"]$$::VARCHAR[];
+----
+can't be cast to the destination type
+
+query I
+SELECT $$[]$$::VARCHAR[];  -- Empty list
+----
+[]
+
+query I
+SELECT $$[""]$$::VARCHAR[]; -- List with empty string
+----
+[]
+
+query I
+SELECT $$[" "]$$::VARCHAR[]; -- List with whitespace string
+----
+[ ]
+
+query I
+SELECT $$["\\"]$$::VARCHAR[]; -- List with only a backslash
+----
+[\]
+
+query I
+SELECT $$["\""]$$::VARCHAR[]; -- List with only a quote
+----
+["]
+
+query I
+SELECT $$[\,]$$::VARCHAR[]; -- List with only a comma
+----
+[,]

From 26e8d22a7433508af51c087e40c7683227bfcdf8 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Wed, 12 Feb 2025 13:14:36 +0100
Subject: [PATCH 038/142] give varchar->struct the same treatment, escaped case
 should be on top

---
 src/function/cast/vector_cast_helpers.cpp | 80 ++++++++++-------------
 1 file changed, 36 insertions(+), 44 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 16cc40192bab..b443ac7e858e 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -565,56 +565,52 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 			idx_t end_pos;
 			while (pos < len && (buf[pos] != ':' || input_state.escaped)) {
 				bool set_escaped = false;
-				if (buf[pos] == '"' || buf[pos] == '\'') {
+
+				if (input_state.escaped) {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!input_state.escaped) {
-						if (!SkipToCloseQuotes(input_state)) {
-							return false;
-						}
+					end_pos = pos;
+				} else if (buf[pos] == '"' || buf[pos] == '\'') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					if (!SkipToCloseQuotes(input_state)) {
+						return false;
 					}
 					end_pos = pos;
 				} else if (buf[pos] == '{') {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!input_state.escaped) {
-						if (!SkipToClose(input_state, lvl, '}')) {
-							return false;
-						}
+					if (!SkipToClose(input_state, lvl, '}')) {
+						return false;
 					}
 					end_pos = pos;
 				} else if (buf[pos] == '(') {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!input_state.escaped) {
-						if (!SkipToClose(input_state, lvl, ')')) {
-							return false;
-						}
+					if (!SkipToClose(input_state, lvl, ')')) {
+						return false;
 					}
 					end_pos = pos;
 				} else if (buf[pos] == '[') {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!input_state.escaped) {
-						lvl++;
-						if (!SkipToClose(input_state, lvl, ']')) {
-							return false;
-						}
+					lvl++;
+					if (!SkipToClose(input_state, lvl, ']')) {
+						return false;
 					}
 					end_pos = pos;
 				} else if (buf[pos] == '\\') {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!input_state.escaped) {
-						set_escaped = true;
-					}
+					set_escaped = true;
 					end_pos = pos;
-				} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
+				} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
@@ -649,56 +645,52 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 			SkipWhitespace(input_state);
 			while (pos < len && ((buf[pos] != ',' && buf[pos] != '}') || input_state.escaped)) {
 				bool set_escaped = false;
-				if (buf[pos] == '"' || buf[pos] == '\'') {
+
+				if (input_state.escaped) {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!input_state.escaped) {
-						if (!SkipToCloseQuotes(input_state)) {
-							return false;
-						}
+					end_pos = pos;
+				} else if (buf[pos] == '"' || buf[pos] == '\'') {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					if (!SkipToCloseQuotes(input_state)) {
+						return false;
 					}
 					end_pos = pos;
 				} else if (buf[pos] == '{') {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!input_state.escaped) {
-						if (!SkipToClose(input_state, lvl, '}')) {
-							return false;
-						}
+					if (!SkipToClose(input_state, lvl, '}')) {
+						return false;
 					}
 					end_pos = pos;
 				} else if (buf[pos] == '(') {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!input_state.escaped) {
-						if (!SkipToClose(input_state, lvl, ')')) {
-							return false;
-						}
+					if (!SkipToClose(input_state, lvl, ')')) {
+						return false;
 					}
 					end_pos = pos;
 				} else if (buf[pos] == '[') {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!input_state.escaped) {
-						lvl++;
-						if (!SkipToClose(input_state, lvl, ']')) {
-							return false;
-						}
+					lvl++;
+					if (!SkipToClose(input_state, lvl, ']')) {
+						return false;
 					}
 					end_pos = pos;
 				} else if (buf[pos] == '\\') {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!input_state.escaped) {
-						set_escaped = true;
-					}
+					set_escaped = true;
 					end_pos = pos;
-				} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
+				} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}

From 12e51ba42abf488b0b6e62ddfa8cc8b5fa20e9c9 Mon Sep 17 00:00:00 2001
From: Yannick Welsch <yannick@welsch.lu>
Date: Wed, 12 Feb 2025 12:04:26 +0100
Subject: [PATCH 039/142] Ensure MergeCollectionTask has a writer

---
 src/execution/operator/persistent/physical_batch_insert.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/execution/operator/persistent/physical_batch_insert.cpp b/src/execution/operator/persistent/physical_batch_insert.cpp
index 2e546c477282..f898af2c7014 100644
--- a/src/execution/operator/persistent/physical_batch_insert.cpp
+++ b/src/execution/operator/persistent/physical_batch_insert.cpp
@@ -215,7 +215,9 @@ class MergeCollectionTask : public BatchInsertTask {
 		auto &gstate = gstate_p.Cast<BatchInsertGlobalState>();
 		auto &lstate = lstate_p.Cast<BatchInsertLocalState>();
 		// merge together the collections
-		D_ASSERT(lstate.writer);
+		if (!lstate.writer) {
+			lstate.writer = &gstate.table.GetStorage().CreateOptimisticWriter(context);
+		}
 		auto final_collection = gstate.MergeCollections(context, std::move(merge_collections), *lstate.writer);
 		// add the merged-together collection to the set of batch indexes
 		lock_guard<mutex> l(gstate.lock);

From e78d96e43c1262bdb8e0ab694d8abc08ec5b721b Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Wed, 12 Feb 2025 13:17:56 +0100
Subject: [PATCH 040/142] and varchar->map as well

---
 src/function/cast/vector_cast_helpers.cpp | 67 +++++++++++------------
 1 file changed, 31 insertions(+), 36 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index b443ac7e858e..bc2e11085291 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -387,46 +387,43 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 		idx_t end_pos;
 		while (pos < len && (buf[pos] != '=' || input_state.escaped)) {
 			bool set_escaped = false;
-			if (buf[pos] == '"' || buf[pos] == '\'') {
+			if (input_state.escaped) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!input_state.escaped) {
-					if (!SkipToCloseQuotes(input_state)) {
-						return false;
-					}
+				end_pos = pos;
+			} else if (buf[pos] == '"' || buf[pos] == '\'') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!SkipToCloseQuotes(input_state)) {
+					return false;
 				}
 				end_pos = pos;
 			} else if (buf[pos] == '{') {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!input_state.escaped) {
-					if (!SkipToClose(input_state, lvl, '}')) {
-						return false;
-					}
+				if (!SkipToClose(input_state, lvl, '}')) {
+					return false;
 				}
 				end_pos = pos;
 			} else if (buf[pos] == '[') {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!input_state.escaped) {
-					lvl++;
-					if (!SkipToClose(input_state, lvl, ']')) {
-						return false;
-					}
+				lvl++;
+				if (!SkipToClose(input_state, lvl, ']')) {
+					return false;
 				}
 				end_pos = pos;
 			} else if (buf[pos] == '\\') {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!input_state.escaped) {
-					set_escaped = true;
-				}
+				set_escaped = true;
 				end_pos = pos;
-			} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
+			} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
@@ -451,46 +448,44 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 		SkipWhitespace(input_state);
 		while (pos < len && ((buf[pos] != ',' && buf[pos] != '}') || input_state.escaped)) {
 			bool set_escaped = false;
-			if (buf[pos] == '"' || buf[pos] == '\'') {
+
+			if (input_state.escaped) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!input_state.escaped) {
-					if (!SkipToCloseQuotes(input_state)) {
-						return false;
-					}
+				end_pos = pos;
+			} else if (buf[pos] == '"' || buf[pos] == '\'') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!SkipToCloseQuotes(input_state)) {
+					return false;
 				}
 				end_pos = pos;
 			} else if (buf[pos] == '{') {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!input_state.escaped) {
-					if (!SkipToClose(input_state, lvl, '}')) {
-						return false;
-					}
+				if (!SkipToClose(input_state, lvl, '}')) {
+					return false;
 				}
 				end_pos = pos;
 			} else if (buf[pos] == '[') {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!input_state.escaped) {
-					lvl++;
-					if (!SkipToClose(input_state, lvl, ']')) {
-						return false;
-					}
+				lvl++;
+				if (!SkipToClose(input_state, lvl, ']')) {
+					return false;
 				}
 				end_pos = pos;
 			} else if (buf[pos] == '\\') {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!input_state.escaped) {
-					set_escaped = true;
-				}
+				set_escaped = true;
 				end_pos = pos;
-			} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
+			} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}

From 9bdf2d5bc601a0126206b3d5ec7bd9803bce6e76 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Wed, 12 Feb 2025 13:21:56 +0100
Subject: [PATCH 041/142] same for unnamed structs

---
 src/function/cast/vector_cast_helpers.cpp | 41 +++++++++++------------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index bc2e11085291..d679c7126816 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -35,6 +35,7 @@ inline static void SkipWhitespace(StringCastInputState &input_state) {
 	auto &pos = input_state.pos;
 	auto &len = input_state.len;
 	if (input_state.escaped) {
+		//! Escaped whitespace should not be skipped
 		return;
 	}
 	while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
@@ -735,56 +736,52 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 			idx_t end_pos;
 			while (pos < len && ((buf[pos] != ',' && buf[pos] != ')') || input_state.escaped)) {
 				bool set_escaped = false;
-				if (buf[pos] == '"' || buf[pos] == '\'') {
+
+				if (input_state.escaped) {
+					if (!start_pos.IsValid()) {
+						start_pos = pos;
+					}
+					end_pos = pos;
+				} else if (buf[pos] == '"' || buf[pos] == '\'') {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!input_state.escaped) {
-						if (!SkipToCloseQuotes(input_state)) {
-							return false;
-						}
+					if (!SkipToCloseQuotes(input_state)) {
+						return false;
 					}
 					end_pos = pos;
 				} else if (buf[pos] == '{') {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!input_state.escaped) {
-						if (!SkipToClose(input_state, lvl, '}')) {
-							return false;
-						}
+					if (!SkipToClose(input_state, lvl, '}')) {
+						return false;
 					}
 					end_pos = pos;
 				} else if (buf[pos] == '(') {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!input_state.escaped) {
-						if (!SkipToClose(input_state, lvl, ')')) {
-							return false;
-						}
+					if (!SkipToClose(input_state, lvl, ')')) {
+						return false;
 					}
 					end_pos = pos;
 				} else if (buf[pos] == '[') {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!input_state.escaped) {
-						lvl++;
-						if (!SkipToClose(input_state, lvl, ']')) {
-							return false;
-						}
+					lvl++;
+					if (!SkipToClose(input_state, lvl, ']')) {
+						return false;
 					}
 					end_pos = pos;
 				} else if (buf[pos] == '\\') {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!input_state.escaped) {
-						set_escaped = true;
-					}
+					set_escaped = true;
 					end_pos = pos;
-				} else if (!StringUtil::CharacterIsSpace(buf[pos]) || input_state.escaped) {
+				} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}

From 6a25a902777a1bd40d1991ea1a1df690d72ccc7b Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Wed, 12 Feb 2025 09:56:56 -0300
Subject: [PATCH 042/142] I think I should use duckdb_destroy_extracted instead
 of delete

---
 src/common/adbc/adbc.cpp    | 8 ++++----
 test/api/adbc/test_adbc.cpp | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/common/adbc/adbc.cpp b/src/common/adbc/adbc.cpp
index 0a77b0e67b42..95cc90639d7a 100644
--- a/src/common/adbc/adbc.cpp
+++ b/src/common/adbc/adbc.cpp
@@ -880,7 +880,7 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
 	auto error_msg_extract_statements = duckdb_extract_statements_error(extracted_statements);
 	if (error_msg_extract_statements != nullptr) {
 		// Things went wrong when executing internal prepared statement
-		delete (reinterpret_cast<duckdb::PreparedStatementWrapper*> (&extracted_statements));
+		duckdb_destroy_extracted(&extracted_statements);
 		SetError(error, error_msg_extract_statements);
 		return ADBC_STATUS_INTERNAL;
 	}
@@ -893,7 +893,7 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
 		auto adbc_status = CheckResult(res, error, error_msg);
 		if (adbc_status != ADBC_STATUS_OK) {
 			// Things went wrong when executing internal prepared statement
-			delete (reinterpret_cast<duckdb::PreparedStatementWrapper*> (&extracted_statements));
+			duckdb_destroy_extracted(&extracted_statements);
 			delete statement_internal;
 			return adbc_status;
 		}
@@ -904,7 +904,7 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
 			SetError(error, duckdb_query_arrow_error(out_result));
 			delete out_result;
 			delete statement_internal;
-			delete (reinterpret_cast<duckdb::PreparedStatementWrapper*> (&extracted_statements));
+			duckdb_destroy_extracted(&extracted_statements);
 			return ADBC_STATUS_INVALID_ARGUMENT;
 		}
 		delete out_result;
@@ -914,7 +914,7 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
 	auto res = duckdb_prepare_extracted_statement(wrapper->connection, extracted_statements,
 	                                              extract_statements_size - 1, &wrapper->statement);
 	auto error_msg = duckdb_prepare_error(wrapper->statement);
-	delete (reinterpret_cast<duckdb::PreparedStatementWrapper*> (&extracted_statements));
+	duckdb_destroy_extracted(&extracted_statements);
 	return CheckResult(res, error, error_msg);
 }
 
diff --git a/test/api/adbc/test_adbc.cpp b/test/api/adbc/test_adbc.cpp
index 213ed7710dd0..124f6472e6a3 100644
--- a/test/api/adbc/test_adbc.cpp
+++ b/test/api/adbc/test_adbc.cpp
@@ -19,7 +19,7 @@ bool SUCCESS(AdbcStatusCode status) {
 	return status == ADBC_STATUS_OK;
 }
 
-const char *duckdb_lib = std::getenv("DUCKDB_INSTALL_LIB");
+const char *duckdb_lib = "/Users/holanda/Documents/Projects/duckdb/cmake-build-debug/src/libduckdb.dylib";
 
 class ADBCTestDatabase {
 public:

From 3edeac6b5069ee4ed66dffa60477ea4843d0e57d Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Wed, 12 Feb 2025 13:59:44 +0100
Subject: [PATCH 043/142] turn off RESPECT_SCOPES for struct keys, so escapes
 are interpreted even when they would otherwise be 'inside of a deeper scope'

---
 src/function/cast/vector_cast_helpers.cpp   |  51 +++-------
 test/sql/cast/string_to_struct_escapes.test | 103 ++++++++++++++++++++
 2 files changed, 118 insertions(+), 36 deletions(-)
 create mode 100644 test/sql/cast/string_to_struct_escapes.test

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index d679c7126816..092c02d10c8a 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -126,6 +126,7 @@ struct CountPartOperation {
 	}
 };
 
+template <bool RESPECT_SCOPES = true>
 static string_t HandleString(Vector &vec, const char *buf, idx_t start, idx_t end) {
 	D_ASSERT(start <= end);
 	auto length = end - start;
@@ -162,17 +163,20 @@ static string_t HandleString(Vector &vec, const char *buf, idx_t start, idx_t en
 				scopes.pop();
 			}
 			if (!quoted && (current_char == '[' || current_char == '{' || current_char == '(')) {
-				//! New scope
-				char end_char;
-				if (current_char == '[') {
-					end_char = ']';
-				} else if (current_char == '{') {
-					end_char = '}';
-				} else {
-					D_ASSERT(current_char == '(');
-					end_char = ')';
+				if (RESPECT_SCOPES) {
+					//! 'RESPECT_SCOPES' is false in things like STRUCT keys, these are regular strings
+					//! New scope
+					char end_char;
+					if (current_char == '[') {
+						end_char = ']';
+					} else if (current_char == '{') {
+						end_char = '}';
+					} else {
+						D_ASSERT(current_char == '(');
+						end_char = ')';
+					}
+					scopes.push(end_char);
 				}
-				scopes.push(end_char);
 			}
 			//! Regular character
 			string_data[copied_count++] = current_char;
@@ -575,31 +579,6 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 						return false;
 					}
 					end_pos = pos;
-				} else if (buf[pos] == '{') {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					if (!SkipToClose(input_state, lvl, '}')) {
-						return false;
-					}
-					end_pos = pos;
-				} else if (buf[pos] == '(') {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					if (!SkipToClose(input_state, lvl, ')')) {
-						return false;
-					}
-					end_pos = pos;
-				} else if (buf[pos] == '[') {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					lvl++;
-					if (!SkipToClose(input_state, lvl, ']')) {
-						return false;
-					}
-					end_pos = pos;
 				} else if (buf[pos] == '\\') {
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
@@ -629,7 +608,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 				//! Key can not be NULL
 				return false;
 			}
-			auto child_name = HandleString(temp_vec, buf, key_start, end_pos);
+			auto child_name = HandleString<false>(temp_vec, buf, key_start, end_pos);
 			auto it = child_names.find(child_name);
 			if (it == child_names.end()) {
 				return false; // false key
diff --git a/test/sql/cast/string_to_struct_escapes.test b/test/sql/cast/string_to_struct_escapes.test
new file mode 100644
index 000000000000..9d9f03d4eaac
--- /dev/null
+++ b/test/sql/cast/string_to_struct_escapes.test
@@ -0,0 +1,103 @@
+# name: test/sql/cast/string_to_struct_escapes.test
+# group: [cast]
+
+query I
+SELECT $${name: value, age: 30}$$::STRUCT(name VARCHAR, age INT);
+----
+{'name': value, 'age': 30}
+
+query I
+SELECT $${name: John, city: "New York"}$$::STRUCT(name VARCHAR, city VARCHAR);
+----
+{'name': John, 'city': New York}
+
+query I
+SELECT $${quote_at_start: "\"test\"", age: 30}$$::STRUCT(quote_at_start VARCHAR, age INT);
+----
+{'quote_at_start': "test", 'age': 30}
+
+query I
+SELECT $${user_name: Alice, status: active}$$::STRUCT(user_name VARCHAR, status VARCHAR);
+----
+{'user_name': Alice, 'status': active}
+
+query I
+SELECT $${special_characters: "comma, semicolon; and backslash\\", age: 30}$$::STRUCT(special_characters VARCHAR, age INT);
+----
+{'special_characters': comma, semicolon; and backslash\, 'age': 30}
+
+query I
+SELECT $${a: 10, b: "hello world"}$$::STRUCT(a INT, b VARCHAR);
+----
+{'a': 10, 'b': hello world}
+
+query I
+SELECT $${first_name: "John", last_name: "Doe", age: 28}$$::STRUCT(first_name VARCHAR, last_name VARCHAR, age INT);
+----
+{'first_name': John, 'last_name': Doe, 'age': 28}
+
+query I
+SELECT $${first name: John, age: 30}$$::STRUCT("first name" VARCHAR, age INT);
+----
+{'first name': John, 'age': 30}
+
+# Invalid: Value contains a quote that isn't escaped
+statement error
+SELECT $${name: "John "Doe"}$$::STRUCT(name VARCHAR);
+----
+can't be cast to the destination type
+
+# Invalid: Value contains a comma that isn't escaped
+statement error
+SELECT $${name: John, age, 30}$$::STRUCT(name VARCHAR, age INT);
+----
+can't be cast to the destination type
+
+# Name is free to contain `,`, only `:` is problematic
+query I
+SELECT $${user,name: Alice, age: 30}$$::STRUCT("user,name" VARCHAR, age INT);
+----
+{'user,name': Alice, 'age': 30}
+
+# Invalid: Contains an unescaped closing bracket
+statement error
+SELECT $${name: Alice, age: 30})$$::STRUCT(name VARCHAR, age INT);
+----
+can't be cast to the destination type
+
+# Invalid: Name contains a backslash
+statement error
+SELECT $${backslash\name: value}$$::STRUCT("backslash\name" VARCHAR);
+----
+can't be cast to the destination type
+
+# first `:` is not escaped, won't match the "name:" struct key
+statement error
+SELECT $${name: test, value: 30}$$::STRUCT("name:" VARCHAR, value INT);
+----
+can't be cast to the destination type
+
+# Name can contain escaped `:`
+query I
+SELECT $${name\:: test, value: 30}$$::STRUCT("name:" VARCHAR, value INT);
+----
+{'name:': test, 'value': 30}
+
+# Name consists of `{}`, not a problem, with this syntax we expect a name, which is a plain string
+# Only reserved character there is `:` (and quotes, and backslash of course)
+query I
+SELECT $${{name}: John, age: 3}$$::STRUCT("{name}" VARCHAR, age INT);
+----
+{'{name}': John, 'age': 3}
+
+# Name has `{` which normally starts a bracket that disables interpreting escape characters
+query I
+SELECT $${{\"name\"}: John, age: 3}$$::STRUCT("{""name""}" VARCHAR, age INT);
+----
+{'{"name"}': John, 'age': 3}
+
+# Invalid: Unterminated string value
+statement error
+SELECT $${name: "John, age: 30}$$::STRUCT(name VARCHAR, age INT);
+----
+can't be cast to the destination type

From da9ab9032a69d10d5a261183f9536c1312f2e2c8 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Wed, 12 Feb 2025 14:19:15 +0100
Subject: [PATCH 044/142] more tests

---
 test/sql/cast/string_to_struct_escapes.test | 103 +++++++++++++++++++-
 1 file changed, 102 insertions(+), 1 deletion(-)

diff --git a/test/sql/cast/string_to_struct_escapes.test b/test/sql/cast/string_to_struct_escapes.test
index 9d9f03d4eaac..869a48bc35c6 100644
--- a/test/sql/cast/string_to_struct_escapes.test
+++ b/test/sql/cast/string_to_struct_escapes.test
@@ -47,7 +47,7 @@ SELECT $${name: "John "Doe"}$$::STRUCT(name VARCHAR);
 ----
 can't be cast to the destination type
 
-# Invalid: Value contains a comma that isn't escaped
+# second key has no ending character (:)
 statement error
 SELECT $${name: John, age, 30}$$::STRUCT(name VARCHAR, age INT);
 ----
@@ -101,3 +101,104 @@ statement error
 SELECT $${name: "John, age: 30}$$::STRUCT(name VARCHAR, age INT);
 ----
 can't be cast to the destination type
+
+query I
+SELECT $${}$$::STRUCT(name VARCHAR, age INT);
+----
+{'name': NULL, 'age': NULL}
+
+# STRUCT with whitespace around colon (escaped)
+query I
+SELECT $${name : John, age : 30}$$::STRUCT(name VARCHAR, age INT);
+----
+{'name': John, 'age': 30}
+
+# STRUCT with escaped backslash in value
+query I
+SELECT $${path: "C:\\Users\\John"}$$::STRUCT(path VARCHAR);
+----
+{'path': C:\Users\John}
+
+# STRUCT with special characters in value, properly escaped
+query I
+SELECT $${description: "Special characters: \\, \", ;, (, )"}$$::STRUCT(description VARCHAR);
+----
+{'description': Special characters: \, ", ;, (, )}
+
+# Valid: Name with escaped space
+query I
+SELECT $${first\ name: "John", age: 30}$$::STRUCT("first name" VARCHAR, age INT);
+----
+{'first name': John, 'age': 30}
+
+# Valid: Name with escaped quote
+query I
+SELECT $${\"quote at start\": "value", age: 30}$$::STRUCT("""quote at start""" VARCHAR, age INT);
+----
+{'"quote at start"': value, 'age': 30}
+
+# Valid: Name with escaped backslash
+query I
+SELECT $${backslash\\name: "John Doe", age: 30}$$::STRUCT("backslash\name" VARCHAR, age INT);
+----
+{'backslash\name': John Doe, 'age': 30}
+
+# Valid: Name with escaped comma
+query I
+SELECT $${user\,name: "Alice", age: 25}$$::STRUCT("user,name" VARCHAR, age INT);
+----
+{'user,name': Alice, 'age': 25}
+
+# Valid: Name with escaped parenthesis
+query I
+SELECT $${user\(name\): "Alice", status: "active"}$$::STRUCT("user(name)" VARCHAR, status VARCHAR);
+----
+{'user(name)': Alice, 'status': active}
+
+# Valid: Name with unescaped parenthesis
+query I
+SELECT $${user(name): "Alice", status: "active"}$$::STRUCT("user(name)" VARCHAR, status VARCHAR);
+----
+{'user(name)': Alice, 'status': active}
+
+# Valid: Name with escaped space at end
+query I
+SELECT $${user\ name\ : "Alice", age\ : 25}$$::STRUCT("user name " VARCHAR, "age " INT);
+----
+{'user name ': Alice, 'age ': 25}
+
+# Invalid: Name contains unescaped quote
+statement error
+SELECT $${"quote"start": "value", age: 30}$$::STRUCT("quote""start" VARCHAR, age INT);
+----
+can't be cast to the destination type
+
+# Invalid: Name contains unescaped backslash
+statement error
+SELECT $${backslash\name: "John", age: 30}$$::STRUCT("backslash\name" VARCHAR, age INT);
+----
+can't be cast to the destination type
+
+# Valid: Name contains (unescaped) opening parenthesis
+query I
+SELECT $${user(name: "Alice", age: 25}$$::STRUCT("user(name" VARCHAR, age INT);
+----
+{'user(name': Alice, 'age': 25}
+
+# Name is single double quote
+query I
+SELECT $${\": "value", age: 30}$$::STRUCT("""" VARCHAR, age INTEGER)
+----
+{'"': value, 'age': 30}
+
+# Name with only a special character (escaped)
+query I
+SELECT $${\\: "escaped", age: 30}$$::STRUCT("\" VARCHAR, age INT);
+----
+{'\': escaped, 'age': 30}
+
+# Name with only a special character (not escaped)
+query I
+SELECT $${@: "value", age: 30}$$::STRUCT("@" VARCHAR, age INT);
+----
+{'@': value, 'age': 30}

From ba1cb2eb64a68a6a24094f4fe8dfb9cf23f9c18f Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Wed, 12 Feb 2025 14:49:59 +0100
Subject: [PATCH 045/142] add escape tests for maps, also fix a bug: map keys
 are allowed to be empty, should accept this in cast

---
 src/function/cast/vector_cast_helpers.cpp |  11 +-
 test/sql/cast/string_to_map_escapes.test  | 141 ++++++++++++++++++++++
 2 files changed, 146 insertions(+), 6 deletions(-)
 create mode 100644 test/sql/cast/string_to_map_escapes.test

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 092c02d10c8a..be2a21b195c8 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -280,7 +280,6 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 				} else {
 					auto start = start_pos.GetIndex();
 					auto end = (end_pos + 1) - start;
-					auto substr = std::string(buf + start, end);
 					state.HandleValue(buf, start, end_pos + 1);
 				}
 				seen_value = true;
@@ -441,11 +440,12 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 			return false;
 		}
 		if (!start_pos.IsValid()) {
-			//! Key can not be empty
-			return false;
+			start_pos = 0;
+			end_pos = 0;
+		} else {
+			end_pos++;
 		}
-		auto key_substr = std::string(buf + start_pos.GetIndex(), buf + end_pos + 1);
-		if (!state.HandleKey(buf, start_pos.GetIndex(), end_pos + 1)) {
+		if (!state.HandleKey(buf, start_pos.GetIndex(), end_pos)) {
 			return false;
 		}
 		start_pos = optional_idx();
@@ -506,7 +506,6 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 			//! Value is empty
 			state.HandleValue(buf, 0, 0);
 		} else {
-			auto value_substr = std::string(buf + start_pos.GetIndex(), buf + end_pos + 1);
 			state.HandleValue(buf, start_pos.GetIndex(), end_pos + 1);
 		}
 		if (buf[pos] == '}') {
diff --git a/test/sql/cast/string_to_map_escapes.test b/test/sql/cast/string_to_map_escapes.test
new file mode 100644
index 000000000000..40cf7834451b
--- /dev/null
+++ b/test/sql/cast/string_to_map_escapes.test
@@ -0,0 +1,141 @@
+# name: test/sql/cast/string_to_map_escapes.test
+# group: [cast]
+
+# Valid: key and value with escaped space
+query I
+SELECT $${key\ with\ space = value\ with\ space}$$::MAP(VARCHAR, VARCHAR);
+----
+{key with space=value with space}
+
+# Valid: key with escaped quote and value with escaped quote
+query I
+SELECT $${\"key\" = \"value\"}$$::MAP(VARCHAR, VARCHAR);
+----
+{"key"="value"}
+
+# Valid: key with escaped backslash, value with escaped backslash
+query I
+SELECT $${key\ with\ backslash = value\ with\ backslash}$$::MAP(VARCHAR, VARCHAR);
+----
+{key with backslash=value with backslash}
+
+# Valid: key with escaped comma, value with escaped comma
+query I
+SELECT $${key\ with\, comma = value\ with\, comma}$$::MAP(VARCHAR, VARCHAR);
+----
+{key with, comma=value with, comma}
+
+# Valid: key and value with escaped colon
+query I
+SELECT $${key\ with\ colon\: = value\ with\ colon\:}$$::MAP(VARCHAR, VARCHAR);
+----
+{key with colon:=value with colon:}
+
+## FIXME: not sure what to do here, maybe we shouldn't "respect scopes" if the child type is not nested
+## Valid: key and value with parentheses
+#query I
+#SELECT $${key\ (with\ parens) = value\ (with\ parens)}$$::MAP(VARCHAR, VARCHAR);
+#----
+#{key (with parens)=value (with parens)}
+
+# Valid: key contains unescaped space
+query I
+SELECT $${key with space = value with space}$$::MAP(VARCHAR, VARCHAR);
+----
+{key with space=value with space}
+
+# Invalid: key input contains quotes
+query I
+SELECT $${key"with"quote = value}$$::MAP(VARCHAR, VARCHAR);
+----
+{keywithquote=value}
+
+# Valid: value input contains quotes
+query I
+SELECT $${key = value"with"quote}$$::MAP(VARCHAR, VARCHAR);
+----
+{key=valuewithquote}
+
+# Valid: key contains unescaped comma
+query I
+SELECT $${key,with,comma = value}$$::MAP(VARCHAR, VARCHAR);
+----
+{key,with,comma=value}
+
+# Invalid: value contains unescaped comma
+statement error
+SELECT $${key = value,with,comma}$$::MAP(VARCHAR, VARCHAR);
+----
+can't be cast to the destination type MAP
+
+# Valid: key contains unescaped curly bracket
+query I
+SELECT $${key{with}bracket = value}$$::MAP(VARCHAR, VARCHAR);
+----
+{key{with}bracket=value}
+
+# Invalid: value contains unescaped curly bracket
+query I
+SELECT $${key = value{with}bracket}$$::MAP(VARCHAR, VARCHAR);
+----
+{key=value{with}bracket}
+
+# Valid: key contains useless backslashes
+query I
+SELECT $${key\with\backslash = value}$$::MAP(VARCHAR, VARCHAR);
+----
+{keywithbackslash=value}
+
+# Valid: value contains useless backslashes
+query I
+SELECT $${key = value\with\backslash}$$::MAP(VARCHAR, VARCHAR);
+----
+{key=valuewithbackslash}
+
+# Valid: key/value contains unescaped equal sign
+query II
+SELECT $${key=with=equals = value}$$::MAP(VARCHAR, VARCHAR) a, a['key'];
+----
+{key=with=equals = value}	with=equals = value
+
+# Valid: key/value contains unescaped equal sign
+query II
+SELECT $${key\=with=equals = value}$$::MAP(VARCHAR, VARCHAR) a, a['key=with'];
+----
+{key=with=equals = value}	equals = value
+
+# Valid: key/value contains unescaped equal sign
+query II
+SELECT $${key\=with\=equals = value}$$::MAP(VARCHAR, VARCHAR) a, a['key=with=equals'];
+----
+{key=with=equals=value}	value
+
+# Edge Case: Empty MAP with no keys/values
+query I
+SELECT $${}$$::MAP(VARCHAR, VARCHAR);
+----
+{}
+
+# Valid: MAP with empty key and value
+query I
+SELECT $${=}$$::MAP(VARCHAR, VARCHAR);
+----
+{=}
+
+# Edge Case: MAP with special characters only (escaped)
+query I
+SELECT $${\{escaped\brace\} = \}escaped\brace\\}$$::MAP(VARCHAR, VARCHAR);
+----
+{{escapedbrace}=}escapedbrace\}
+
+# Edge Case: MAP with only a key and no value
+query I
+SELECT $${key=}$$::MAP(VARCHAR, VARCHAR);
+----
+{key=}
+
+# Valid: MAP with an empty key
+query I
+SELECT $${=value}$$::MAP(VARCHAR, VARCHAR);
+----
+{=value}

From 89eccf147cc017e9d7c7c279575b45da33208144 Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Wed, 12 Feb 2025 10:56:14 -0300
Subject: [PATCH 046/142] Woopsie

---
 test/api/adbc/test_adbc.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/api/adbc/test_adbc.cpp b/test/api/adbc/test_adbc.cpp
index 124f6472e6a3..213ed7710dd0 100644
--- a/test/api/adbc/test_adbc.cpp
+++ b/test/api/adbc/test_adbc.cpp
@@ -19,7 +19,7 @@ bool SUCCESS(AdbcStatusCode status) {
 	return status == ADBC_STATUS_OK;
 }
 
-const char *duckdb_lib = "/Users/holanda/Documents/Projects/duckdb/cmake-build-debug/src/libduckdb.dylib";
+const char *duckdb_lib = std::getenv("DUCKDB_INSTALL_LIB");
 
 class ADBCTestDatabase {
 public:

From f1179bf6b65ef8941c6fdd1416f9baa7ba9b5836 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Wed, 12 Feb 2025 15:31:51 +0100
Subject: [PATCH 047/142] fix unused variable

---
 src/function/cast/vector_cast_helpers.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index be2a21b195c8..f00bb8157249 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -279,7 +279,6 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 					state.HandleValue(buf, 0, 0);
 				} else {
 					auto start = start_pos.GetIndex();
-					auto end = (end_pos + 1) - start;
 					state.HandleValue(buf, start, end_pos + 1);
 				}
 				seen_value = true;

From c640ee19654b461dc3566987682c683ddb4c31b4 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Wed, 12 Feb 2025 16:06:39 +0100
Subject: [PATCH 048/142] improve performance of hashing longer strings

---
 src/common/types/hash.cpp | 73 +++++++++++----------------------------
 1 file changed, 21 insertions(+), 52 deletions(-)

diff --git a/src/common/types/hash.cpp b/src/common/types/hash.cpp
index 83a1ef22310e..160d5f3c2924 100644
--- a/src/common/types/hash.cpp
+++ b/src/common/types/hash.cpp
@@ -4,6 +4,7 @@
 #include "duckdb/common/types/string_type.hpp"
 #include "duckdb/common/types/interval.hpp"
 #include "duckdb/common/types/uhugeint.hpp"
+#include "duckdb/common/fast_mem.hpp"
 
 #include <functional>
 #include <cmath>
@@ -80,68 +81,36 @@ hash_t Hash(char *val) {
 	return Hash<const char *>(val);
 }
 
-// MIT License
-// Copyright (c) 2018-2021 Martin Ankerl
-// https://github.com/martinus/robin-hood-hashing/blob/3.11.5/LICENSE
-hash_t HashBytes(void *ptr, size_t len) noexcept {
-	static constexpr uint64_t M = UINT64_C(0xc6a4a7935bd1e995);
-	static constexpr uint64_t SEED = UINT64_C(0xe17a1465);
-	static constexpr unsigned int R = 47;
+hash_t HashBytes(const_data_ptr_t ptr, const idx_t len) noexcept {
+	// This seed slightly improves bit distribution, taken from here:
+	// https://github.com/martinus/robin-hood-hashing/blob/3.11.5/LICENSE
+	// MIT License Copyright (c) 2018-2021 Martin Ankerl
+	hash_t h = 0xe17a1465U ^ (len * 0xc6a4a7935bd1e995U);
 
-	auto const *const data64 = static_cast<uint64_t const *>(ptr);
-	uint64_t h = SEED ^ (len * M);
-
-	size_t const n_blocks = len / 8;
-	for (size_t i = 0; i < n_blocks; ++i) {
-		auto k = Load<uint64_t>(reinterpret_cast<const_data_ptr_t>(data64 + i));
+	// Hash/combine in blocks of 8 bytes
+	for (const auto end = ptr + len - (len & 7U); ptr != end; ptr += 8U) {
+		h ^= Load<hash_t>(ptr);
+		h *= 0xd6e8feb86659fd93U;
+	}
 
-		k *= M;
-		k ^= k >> R;
-		k *= M;
+	// XOR with remaining (<8) bytes
+	hash_t hr = 0;
+	FastMemcpy(&hr, ptr, len & 7U);
+	h ^= hr;
 
-		h ^= k;
-		h *= M;
-	}
+	// Finalize
+	h *= 0xd6e8feb86659fd93U;
+	h ^= h >> 32;
 
-	auto const *const data8 = reinterpret_cast<uint8_t const *>(data64 + n_blocks);
-	switch (len & 7U) {
-	case 7:
-		h ^= static_cast<uint64_t>(data8[6]) << 48U;
-		DUCKDB_EXPLICIT_FALLTHROUGH;
-	case 6:
-		h ^= static_cast<uint64_t>(data8[5]) << 40U;
-		DUCKDB_EXPLICIT_FALLTHROUGH;
-	case 5:
-		h ^= static_cast<uint64_t>(data8[4]) << 32U;
-		DUCKDB_EXPLICIT_FALLTHROUGH;
-	case 4:
-		h ^= static_cast<uint64_t>(data8[3]) << 24U;
-		DUCKDB_EXPLICIT_FALLTHROUGH;
-	case 3:
-		h ^= static_cast<uint64_t>(data8[2]) << 16U;
-		DUCKDB_EXPLICIT_FALLTHROUGH;
-	case 2:
-		h ^= static_cast<uint64_t>(data8[1]) << 8U;
-		DUCKDB_EXPLICIT_FALLTHROUGH;
-	case 1:
-		h ^= static_cast<uint64_t>(data8[0]);
-		h *= M;
-		DUCKDB_EXPLICIT_FALLTHROUGH;
-	default:
-		break;
-	}
-	h ^= h >> R;
-	h *= M;
-	h ^= h >> R;
-	return static_cast<hash_t>(h);
+	return h;
 }
 
 hash_t Hash(const char *val, size_t size) {
-	return HashBytes((void *)val, size);
+	return HashBytes(const_data_ptr_cast(val), size);
 }
 
 hash_t Hash(uint8_t *val, size_t size) {
-	return HashBytes((void *)val, size);
+	return HashBytes(const_data_ptr_cast(val), size);
 }
 
 } // namespace duckdb

From f503fbad1dfb57da6964601c44714e94c84c3d56 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Wed, 12 Feb 2025 16:07:28 +0100
Subject: [PATCH 049/142] implement bit-packing for RleBpEncoder and allow for
 larger dictionaries

---
 .../include/parquet_rle_bp_encoder.hpp        | 139 ++++++++++--------
 .../writer/templated_column_writer.hpp        |   9 +-
 extension/parquet/parquet_extension.cpp       |  12 +-
 .../parquet/writer/enum_column_writer.cpp     |   9 +-
 .../writer/primitive_column_writer.cpp        |  21 ++-
 5 files changed, 107 insertions(+), 83 deletions(-)

diff --git a/extension/parquet/include/parquet_rle_bp_encoder.hpp b/extension/parquet/include/parquet_rle_bp_encoder.hpp
index af321c160c17..689d0fee132c 100644
--- a/extension/parquet/include/parquet_rle_bp_encoder.hpp
+++ b/extension/parquet/include/parquet_rle_bp_encoder.hpp
@@ -16,95 +16,116 @@ namespace duckdb {
 
 class RleBpEncoder {
 public:
-	explicit RleBpEncoder(uint32_t bit_width)
-	    : byte_width((bit_width + 7) / 8), byte_count(idx_t(-1)), run_count(idx_t(-1)) {
+	explicit RleBpEncoder(uint32_t bit_width_p) : bit_width(bit_width_p), byte_width((bit_width + 7) / 8) {
 	}
 
 public:
-	//! NOTE: Prepare is only required if a byte count is required BEFORE writing
-	//! This is the case with e.g. writing repetition/definition levels
-	//! If GetByteCount() is not required, prepare can be safely skipped
-	void BeginPrepare(uint32_t first_value) {
-		byte_count = 0;
-		run_count = 1;
-		current_run_count = 1;
-		last_value = first_value;
-	}
-	void PrepareValue(uint32_t value) {
-		if (value != last_value) {
-			FinishRun();
-			last_value = value;
-		} else {
-			current_run_count++;
-		}
-	}
-	void FinishPrepare() {
-		FinishRun();
+	void BeginWrite() {
+		rle_count = 0;
+		bp_block_count = 0;
 	}
 
-	void BeginWrite(WriteStream &writer, uint32_t first_value) {
-		// start the RLE runs
-		last_value = first_value;
-		current_run_count = 1;
-	}
 	void WriteValue(WriteStream &writer, uint32_t value) {
-		if (value != last_value) {
+		if (bp_block_count != 0) {
+			// We already committed to a BP run
+			D_ASSERT(rle_count == 0);
+			bp_block[bp_block_count++] = value;
+			if (bp_block_count == BP_BLOCK_SIZE) {
+				WriteRun(writer);
+			}
+			return;
+		}
+
+		if (rle_count == 0) {
+			// Starting fresh, try for an RLE run first
+			rle_value = value;
+			rle_count = 1;
+			return;
+		}
+
+		// We're trying for an RLE run
+		if (rle_value == value) {
+			// Same as current RLE value
+			rle_count++;
+			return;
+		}
+
+		// Value differs from current RLE value
+		if (rle_count >= MINIMUM_RLE_COUNT) {
+			// We have enough values for an RLE run
 			WriteRun(writer);
-			last_value = value;
-		} else {
-			current_run_count++;
+			rle_value = value;
+			rle_count = 1;
+			return;
+		}
+
+		// Not enough values, convert and commit to a BP run
+		D_ASSERT(bp_block_count == 0);
+		for (idx_t i = 0; i < rle_count; i++) {
+			bp_block[bp_block_count++] = rle_value;
 		}
+		bp_block[bp_block_count++] = value;
+		rle_count = 0;
 	}
+
 	void FinishWrite(WriteStream &writer) {
 		WriteRun(writer);
 	}
 
-	idx_t GetByteCount() {
-		D_ASSERT(byte_count != idx_t(-1));
-		return byte_count;
-	}
-
 private:
-	//! meta information
+	//! Meta information
+	uint32_t bit_width;
 	uint32_t byte_width;
-	//! RLE run information
-	idx_t byte_count;
-	idx_t run_count;
-	idx_t current_run_count;
-	uint32_t last_value;
+
+	//! RLE stuff
+	static constexpr idx_t MINIMUM_RLE_COUNT = 4;
+	uint32_t rle_value;
+	idx_t rle_count;
+
+	//! BP stuff
+	static constexpr idx_t BP_BLOCK_SIZE = BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE;
+	uint32_t bp_block[BP_BLOCK_SIZE] = {0};
+	uint32_t bp_block_packed[BP_BLOCK_SIZE] = {0};
+	idx_t bp_block_count;
 
 private:
-	void FinishRun() {
-		// last value, or value has changed
-		// write out the current run
-		byte_count += ParquetDecodeUtils::GetVarintSize(current_run_count << 1) + byte_width;
-		current_run_count = 1;
-		run_count++;
-	}
 	void WriteRun(WriteStream &writer) {
-		// write the header of the run
-		ParquetDecodeUtils::VarintEncode(current_run_count << 1, writer);
-		// now write the value
-		D_ASSERT(last_value >> (byte_width * 8) == 0);
+		if (rle_count != 0) {
+			WriteCurrentBlockRLE(writer);
+		} else {
+			WriteCurrentBlockBP(writer);
+		}
+	}
+
+	void WriteCurrentBlockRLE(WriteStream &writer) {
+		ParquetDecodeUtils::VarintEncode(rle_count << 1 | 0, writer); // (... | 0) signals RLE run
+		D_ASSERT(rle_value >> (byte_width * 8) == 0);
 		switch (byte_width) {
 		case 1:
-			writer.Write<uint8_t>(last_value);
+			writer.Write<uint8_t>(rle_value);
 			break;
 		case 2:
-			writer.Write<uint16_t>(last_value);
+			writer.Write<uint16_t>(rle_value);
 			break;
 		case 3:
-			writer.Write<uint8_t>(last_value & 0xFF);
-			writer.Write<uint8_t>((last_value >> 8) & 0xFF);
-			writer.Write<uint8_t>((last_value >> 16) & 0xFF);
+			writer.Write<uint8_t>(rle_value & 0xFF);
+			writer.Write<uint8_t>((rle_value >> 8) & 0xFF);
+			writer.Write<uint8_t>((rle_value >> 16) & 0xFF);
 			break;
 		case 4:
-			writer.Write<uint32_t>(last_value);
+			writer.Write<uint32_t>(rle_value);
 			break;
 		default:
 			throw InternalException("unsupported byte width for RLE encoding");
 		}
-		current_run_count = 1;
+		rle_count = 0;
+	}
+
+	void WriteCurrentBlockBP(WriteStream &writer) {
+		ParquetDecodeUtils::VarintEncode(BP_BLOCK_SIZE / 8 << 1 | 1, writer); // (... | 1) signals BP run
+		ParquetDecodeUtils::BitPackAligned(bp_block, data_ptr_cast(bp_block_packed), BP_BLOCK_SIZE, bit_width);
+		writer.WriteData(data_ptr_cast(bp_block_packed), BP_BLOCK_SIZE * bit_width / 8);
+		bp_block_count = 0;
 	}
 };
 
diff --git a/extension/parquet/include/writer/templated_column_writer.hpp b/extension/parquet/include/writer/templated_column_writer.hpp
index a4324a70b48a..d9dbbc4bb4c4 100644
--- a/extension/parquet/include/writer/templated_column_writer.hpp
+++ b/extension/parquet/include/writer/templated_column_writer.hpp
@@ -240,15 +240,12 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 				auto &src_val = data_ptr[r];
 				auto value_index = page_state.dictionary.at(src_val);
 				if (!page_state.dict_written_value) {
-					// first value
-					// write the bit-width as a one-byte entry
+					// first value: write the bit-width as a one-byte entry and initialize writer
 					temp_writer.Write<uint8_t>(page_state.dict_bit_width);
-					// now begin writing the actual value
-					page_state.dict_encoder.BeginWrite(temp_writer, value_index);
+					page_state.dict_encoder.BeginWrite();
 					page_state.dict_written_value = true;
-				} else {
-					page_state.dict_encoder.WriteValue(temp_writer, value_index);
 				}
+				page_state.dict_encoder.WriteValue(temp_writer, value_index);
 			}
 			break;
 		}
diff --git a/extension/parquet/parquet_extension.cpp b/extension/parquet/parquet_extension.cpp
index f93b3f04acbe..9545b3cb96c6 100644
--- a/extension/parquet/parquet_extension.cpp
+++ b/extension/parquet/parquet_extension.cpp
@@ -192,7 +192,12 @@ struct ParquetWriteBindData : public TableFunctionData {
 	bool debug_use_openssl = true;
 
 	//! After how many distinct values should we abandon dictionary compression and bloom filters?
-	idx_t dictionary_size_limit = row_group_size / 100;
+	idx_t dictionary_size_limit = row_group_size / 20;
+
+	void SetToDefaultDictionarySizeLimit() {
+		// This depends on row group size so we should "reset" if the row group size is changed
+		dictionary_size_limit = row_group_size / 20;
+	}
 
 	//! What false positive rate are we willing to accept for bloom filters
 	double bloom_filter_false_positive_ratio = 0.01;
@@ -1185,6 +1190,7 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyFunctionBi
 	D_ASSERT(names.size() == sql_types.size());
 	bool row_group_size_bytes_set = false;
 	bool compression_level_set = false;
+	bool dictionary_size_limit_set = false;
 	auto bind_data = make_uniq<ParquetWriteBindData>();
 	for (auto &option : input.info.options) {
 		const auto loption = StringUtil::Lower(option.first);
@@ -1194,6 +1200,9 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyFunctionBi
 		}
 		if (loption == "row_group_size" || loption == "chunk_size") {
 			bind_data->row_group_size = option.second[0].GetValue<uint64_t>();
+			if (!dictionary_size_limit_set) {
+				bind_data->SetToDefaultDictionarySizeLimit();
+			}
 		} else if (loption == "row_group_size_bytes") {
 			auto roption = option.second[0];
 			if (roption.GetTypeMutable().id() == LogicalTypeId::VARCHAR) {
@@ -1269,6 +1278,7 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyFunctionBi
 				throw BinderException("dictionary_size_limit must be greater than 0 or 0 to disable");
 			}
 			bind_data->dictionary_size_limit = val;
+			dictionary_size_limit_set = true;
 		} else if (loption == "bloom_filter_false_positive_ratio") {
 			auto val = option.second[0].GetValue<double>();
 			if (val <= 0) {
diff --git a/extension/parquet/writer/enum_column_writer.cpp b/extension/parquet/writer/enum_column_writer.cpp
index 51a2959cf36c..8518019efedd 100644
--- a/extension/parquet/writer/enum_column_writer.cpp
+++ b/extension/parquet/writer/enum_column_writer.cpp
@@ -36,15 +36,12 @@ void EnumColumnWriter::WriteEnumInternal(WriteStream &temp_writer, Vector &input
 	for (idx_t r = chunk_start; r < chunk_end; r++) {
 		if (mask.RowIsValid(r)) {
 			if (!page_state.written_value) {
-				// first value
-				// write the bit-width as a one-byte entry
+				// first value: write the bit-width as a one-byte entry and initialize writer
 				temp_writer.Write<uint8_t>(bit_width);
-				// now begin writing the actual value
-				page_state.encoder.BeginWrite(temp_writer, ptr[r]);
+				page_state.encoder.BeginWrite();
 				page_state.written_value = true;
-			} else {
-				page_state.encoder.WriteValue(temp_writer, ptr[r]);
 			}
+			page_state.encoder.WriteValue(temp_writer, ptr[r]);
 		}
 	}
 }
diff --git a/extension/parquet/writer/primitive_column_writer.cpp b/extension/parquet/writer/primitive_column_writer.cpp
index 627605fa23fa..d69504717365 100644
--- a/extension/parquet/writer/primitive_column_writer.cpp
+++ b/extension/parquet/writer/primitive_column_writer.cpp
@@ -123,22 +123,21 @@ void PrimitiveColumnWriter::WriteLevels(WriteStream &temp_writer, const unsafe_v
 	}
 
 	// write the levels using the RLE-BP encoding
-	auto bit_width = RleBpDecoder::ComputeBitWidth((max_value));
+	const auto bit_width = RleBpDecoder::ComputeBitWidth((max_value));
 	RleBpEncoder rle_encoder(bit_width);
 
-	rle_encoder.BeginPrepare(levels[offset]);
-	for (idx_t i = offset + 1; i < offset + count; i++) {
-		rle_encoder.PrepareValue(levels[i]);
+	// have to write to an intermediate stream first because we need to know the size
+	MemoryStream intermediate_stream(Allocator::DefaultAllocator());
+	rle_encoder.BeginWrite();
+	for (idx_t i = offset; i < offset + count; i++) {
+		rle_encoder.WriteValue(intermediate_stream, levels[i]);
 	}
-	rle_encoder.FinishPrepare();
+	rle_encoder.FinishWrite(intermediate_stream);
 
 	// start off by writing the byte count as a uint32_t
-	temp_writer.Write<uint32_t>(rle_encoder.GetByteCount());
-	rle_encoder.BeginWrite(temp_writer, levels[offset]);
-	for (idx_t i = offset + 1; i < offset + count; i++) {
-		rle_encoder.WriteValue(temp_writer, levels[i]);
-	}
-	rle_encoder.FinishWrite(temp_writer);
+	temp_writer.Write(NumericCast<uint32_t>(intermediate_stream.GetPosition()));
+	// copy over the written data
+	temp_writer.WriteData(intermediate_stream.GetData(), intermediate_stream.GetPosition());
 }
 
 void PrimitiveColumnWriter::NextPage(PrimitiveColumnWriterState &state) {

From c8e5916cb4f41dc5c31559af8fc4b1b1c6198e48 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Wed, 12 Feb 2025 16:21:16 +0100
Subject: [PATCH 050/142] init primitive dictionary

---
 extension/parquet/column_writer.cpp           |   6 +-
 .../writer/primitive_column_writer.hpp        |   5 +-
 .../writer/templated_column_writer.hpp        |  15 ++-
 .../writer/primitive_column_writer.cpp        |   2 +-
 .../duckdb/common/primitive_dictionary.hpp    | 106 ++++++++++++++++++
 5 files changed, 122 insertions(+), 12 deletions(-)
 create mode 100644 src/include/duckdb/common/primitive_dictionary.hpp

diff --git a/extension/parquet/column_writer.cpp b/extension/parquet/column_writer.cpp
index 9693724d2c35..178d2fe91d0b 100644
--- a/extension/parquet/column_writer.cpp
+++ b/extension/parquet/column_writer.cpp
@@ -209,8 +209,8 @@ void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterStat
 // GeoParquet files.
 class WKBColumnWriterState final : public StandardColumnWriterState<string_t> {
 public:
-	WKBColumnWriterState(ClientContext &context, duckdb_parquet::RowGroup &row_group, idx_t col_idx)
-	    : StandardColumnWriterState(row_group, col_idx), geo_data(), geo_data_writer(context) {
+	WKBColumnWriterState(ParquetWriter &writer, duckdb_parquet::RowGroup &row_group, idx_t col_idx)
+	    : StandardColumnWriterState(writer, row_group, col_idx), geo_data(), geo_data_writer(writer.GetContext()) {
 	}
 
 	GeoParquetColumnMetadata geo_data;
@@ -228,7 +228,7 @@ class WKBColumnWriter final : public StandardColumnWriter<string_t, string_t, Pa
 	}
 
 	unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override {
-		auto result = make_uniq<WKBColumnWriterState>(context, row_group, row_group.columns.size());
+		auto result = make_uniq<WKBColumnWriterState>(writer, row_group, row_group.columns.size());
 		result->encoding = Encoding::RLE_DICTIONARY;
 		RegisterToRowGroup(row_group);
 		return std::move(result);
diff --git a/extension/parquet/include/writer/primitive_column_writer.hpp b/extension/parquet/include/writer/primitive_column_writer.hpp
index 0a97064e918a..4e9e55436a6d 100644
--- a/extension/parquet/include/writer/primitive_column_writer.hpp
+++ b/extension/parquet/include/writer/primitive_column_writer.hpp
@@ -36,12 +36,13 @@ struct PageWriteInformation {
 
 class PrimitiveColumnWriterState : public ColumnWriterState {
 public:
-	PrimitiveColumnWriterState(duckdb_parquet::RowGroup &row_group, idx_t col_idx)
-	    : row_group(row_group), col_idx(col_idx) {
+	PrimitiveColumnWriterState(ParquetWriter &writer_p, duckdb_parquet::RowGroup &row_group, idx_t col_idx)
+	    : writer(writer_p), row_group(row_group), col_idx(col_idx) {
 		page_info.emplace_back();
 	}
 	~PrimitiveColumnWriterState() override = default;
 
+	ParquetWriter &writer;
 	duckdb_parquet::RowGroup &row_group;
 	idx_t col_idx;
 	vector<PageInformation> page_info;
diff --git a/extension/parquet/include/writer/templated_column_writer.hpp b/extension/parquet/include/writer/templated_column_writer.hpp
index d9dbbc4bb4c4..5adfbd832e94 100644
--- a/extension/parquet/include/writer/templated_column_writer.hpp
+++ b/extension/parquet/include/writer/templated_column_writer.hpp
@@ -13,6 +13,7 @@
 #include "parquet_dbp_encoder.hpp"
 #include "parquet_dlba_encoder.hpp"
 #include "parquet_rle_bp_encoder.hpp"
+#include "duckdb/common/primitive_dictionary.hpp"
 
 namespace duckdb {
 
@@ -34,8 +35,10 @@ static void TemplatedWritePlain(Vector &col, ColumnWriterStatistics *stats, cons
 template <class T>
 class StandardColumnWriterState : public PrimitiveColumnWriterState {
 public:
-	StandardColumnWriterState(duckdb_parquet::RowGroup &row_group, idx_t col_idx)
-	    : PrimitiveColumnWriterState(row_group, col_idx) {
+	StandardColumnWriterState(ParquetWriter &writer, duckdb_parquet::RowGroup &row_group, idx_t col_idx)
+	    : PrimitiveColumnWriterState(writer, row_group, col_idx),
+	      dictionary(BufferAllocator::Get(writer.GetContext()), writer.DictionarySizeLimit(),
+	                 2e6) { // TODO: make size configurable
 	}
 	~StandardColumnWriterState() override = default;
 
@@ -44,7 +47,7 @@ class StandardColumnWriterState : public PrimitiveColumnWriterState {
 	idx_t total_string_size = 0;
 	uint32_t key_bit_width = 0;
 
-	unordered_map<T, uint32_t> dictionary;
+	PrimitiveDictionary<T> dictionary;
 	duckdb_parquet::Encoding::type encoding;
 };
 
@@ -53,7 +56,7 @@ class StandardWriterPageState : public ColumnWriterPageState {
 public:
 	explicit StandardWriterPageState(const idx_t total_value_count, const idx_t total_string_size,
 	                                 duckdb_parquet::Encoding::type encoding_p,
-	                                 const unordered_map<SRC, uint32_t> &dictionary_p)
+	                                 const PrimitiveDictionary<SRC> &dictionary_p)
 	    : encoding(encoding_p), dbp_initialized(false), dbp_encoder(total_value_count), dlba_initialized(false),
 	      dlba_encoder(total_value_count, total_string_size), bss_encoder(total_value_count, sizeof(TGT)),
 	      dictionary(dictionary_p), dict_written_value(false),
@@ -69,7 +72,7 @@ class StandardWriterPageState : public ColumnWriterPageState {
 
 	BssEncoder bss_encoder;
 
-	const unordered_map<SRC, uint32_t> &dictionary;
+	const PrimitiveDictionary<SRC> &dictionary;
 	bool dict_written_value;
 	uint32_t dict_bit_width;
 	RleBpEncoder dict_encoder;
@@ -86,7 +89,7 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 
 public:
 	unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override {
-		auto result = make_uniq<StandardColumnWriterState<SRC>>(row_group, row_group.columns.size());
+		auto result = make_uniq<StandardColumnWriterState<SRC>>(writer, row_group, row_group.columns.size());
 		result->encoding = duckdb_parquet::Encoding::RLE_DICTIONARY;
 		RegisterToRowGroup(row_group);
 		return std::move(result);
diff --git a/extension/parquet/writer/primitive_column_writer.cpp b/extension/parquet/writer/primitive_column_writer.cpp
index d69504717365..675379873809 100644
--- a/extension/parquet/writer/primitive_column_writer.cpp
+++ b/extension/parquet/writer/primitive_column_writer.cpp
@@ -13,7 +13,7 @@ PrimitiveColumnWriter::PrimitiveColumnWriter(ParquetWriter &writer, idx_t schema
 }
 
 unique_ptr<ColumnWriterState> PrimitiveColumnWriter::InitializeWriteState(duckdb_parquet::RowGroup &row_group) {
-	auto result = make_uniq<PrimitiveColumnWriterState>(row_group, row_group.columns.size());
+	auto result = make_uniq<PrimitiveColumnWriterState>(writer, row_group, row_group.columns.size());
 	RegisterToRowGroup(row_group);
 	return std::move(result);
 }
diff --git a/src/include/duckdb/common/primitive_dictionary.hpp b/src/include/duckdb/common/primitive_dictionary.hpp
new file mode 100644
index 000000000000..db77cad84410
--- /dev/null
+++ b/src/include/duckdb/common/primitive_dictionary.hpp
@@ -0,0 +1,106 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/common/primitive_dictionary.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb/common/types/string_type.hpp"
+#include "duckdb/common/allocator.hpp"
+
+namespace duckdb {
+
+template <class T>
+class PrimitiveDictionary {
+private:
+	static constexpr uint32_t INVALID_OFFSET = static_cast<uint32_t>(-1);
+
+	struct primitive_dictionary_entry_t {
+		T value;
+		uint32_t offset;
+	};
+
+public:
+	PrimitiveDictionary(Allocator &allocator, idx_t maximum_size_p, idx_t plain_capacity_p)
+	    : maximum_size(maximum_size_p), size(0), capacity(NextPowerOfTwo(maximum_size * 2)),
+	      capacity_mask(capacity - 1), plain_capacity(plain_capacity_p), plain_offset(0),
+	      allocated_dictionary(allocator.Allocate(capacity * sizeof(primitive_dictionary_entry_t))),
+	      allocated_plain(allocator.Allocate(std::is_same<T, string_t>::value ? plain_capacity : capacity * sizeof(T))),
+	      dictionary(reinterpret_cast<primitive_dictionary_entry_t *>(allocated_dictionary.get())),
+	      plain(allocated_plain.get()) {
+		// Initialize empty
+		for (idx_t i = 0; i < capacity; i++) {
+			dictionary[i].offset = INVALID_OFFSET;
+		}
+	}
+
+public:
+	bool Insert(T value, uint32_t &offset) {
+		auto &entry = Lookup(value);
+		bool success = size < capacity;
+		if (entry.offset == INVALID_OFFSET) {
+			success &= AddToPlain(value);
+			entry.value = value;
+			entry.offset = size++;
+		}
+		offset = entry.offset;
+		return success;
+	}
+
+	uint32_t GetOffset(const T &value) const {
+		return Lookup(value).offset;
+	}
+
+private:
+	primitive_dictionary_entry_t &Lookup(const T &value) const {
+		return dictionary[Hash(value) & capacity_mask];
+	}
+
+	bool AddToPlain(const T &value) {
+		static_cast<T *const>(plain)[plain_offset++] = value;
+		return true;
+	}
+
+	bool AddToPlain(string_t &value) {
+		if (plain_offset + sizeof(uint32_t) + value.GetSize() > plain_capacity) {
+			return false; // Out of capacity
+		}
+
+		// Store string length and increment offset
+		Store<uint32_t>(UnsafeNumericCast<uint32_t>(value.GetSize()), plain + plain_offset);
+		plain_offset += sizeof(uint32_t);
+
+		// Copy over string data to plain, update "value" to point to it, and increment offset
+		memcpy(plain + plain_offset, value.GetData(), value.GetSize());
+		value = string_t(char_ptr_cast(plain + plain_offset), value.GetSize());
+		plain_offset += value.GetSize();
+
+		return true;
+	}
+
+private:
+	//! Maximum size and current size
+	const idx_t maximum_size;
+	idx_t size;
+
+	//! Capacity (power of two) and corresponding mask
+	const idx_t capacity;
+	const idx_t capacity_mask;
+
+	//! Capacity/offset of plain encoded data
+	const idx_t plain_capacity;
+	idx_t plain_offset;
+
+	//! Allocated regions for dictionary/plain
+	AllocatedData allocated_dictionary;
+	AllocatedData allocated_plain;
+
+	//! Pointers to allocated regions for convenience
+	primitive_dictionary_entry_t *const dictionary;
+	data_ptr_t const plain;
+};
+
+} // namespace duckdb

From 1e5d01dee7555f01ce403d576a06d08c893a6802 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Wed, 12 Feb 2025 17:34:55 +0100
Subject: [PATCH 051/142] support unnamed structs to appear in the other casts
 (MAP KEY+VALUE, (named)STRUCT VALUE, LIST VALUE)

---
 src/function/cast/vector_cast_helpers.cpp | 26 +++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index f00bb8157249..0600eb1de4ce 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -273,6 +273,16 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 				return false;
 			}
 			end_pos = pos;
+		} else if (buf[pos] == '(') {
+			if (!start_pos.IsValid()) {
+				start_pos = pos;
+			}
+			//! Start of an (unnamed) STRUCT
+			idx_t struct_lvl = 0;
+			if (!SkipToClose(input_state, struct_lvl, ')')) {
+				return false;
+			}
+			end_pos = pos;
 		} else if ((buf[pos] == ',' || buf[pos] == ']')) {
 			if (buf[pos] != ']' || start_pos.IsValid() || seen_value) {
 				if (!start_pos.IsValid()) {
@@ -411,6 +421,14 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 					return false;
 				}
 				end_pos = pos;
+			} else if (buf[pos] == '(') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!SkipToClose(input_state, lvl, ')')) {
+					return false;
+				}
+				end_pos = pos;
 			} else if (buf[pos] == '[') {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
@@ -474,6 +492,14 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 					return false;
 				}
 				end_pos = pos;
+			} else if (buf[pos] == '(') {
+				if (!start_pos.IsValid()) {
+					start_pos = pos;
+				}
+				if (!SkipToClose(input_state, lvl, ')')) {
+					return false;
+				}
+				end_pos = pos;
 			} else if (buf[pos] == '[') {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;

From 7642be6e9a8b18e0989ae98fff831dcd224c808e Mon Sep 17 00:00:00 2001
From: Richard Wesley <hawkfish@electricfish.com>
Date: Wed, 12 Feb 2025 11:04:59 -0800
Subject: [PATCH 052/142] Issue #8265: AsOf Nested Loop

* Set default asof_loop_join_threshold based on tuning tests.
---
 src/include/duckdb/main/client_config.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/include/duckdb/main/client_config.hpp b/src/include/duckdb/main/client_config.hpp
index 4e73f8f922b8..4398a788585c 100644
--- a/src/include/duckdb/main/client_config.hpp
+++ b/src/include/duckdb/main/client_config.hpp
@@ -102,7 +102,7 @@ struct ClientConfig {
 	//! The number of rows we need on either table to choose a merge join over an IE join
 	idx_t merge_join_threshold = 1000;
 	//! The maximum number of rows to use the nested loop join implementation
-	idx_t asof_loop_join_threshold = 2048;
+	idx_t asof_loop_join_threshold = 64;
 
 	//! The maximum amount of memory to keep buffered in a streaming query result. Default: 1mb.
 	idx_t streaming_buffer_size = 1000000;

From 86292911e02543a6b81cd89e059b89b9c4394ddf Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Wed, 12 Feb 2025 17:46:06 -0300
Subject: [PATCH 053/142] Also use destroy_statement

---
 src/common/adbc/adbc.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/common/adbc/adbc.cpp b/src/common/adbc/adbc.cpp
index 95cc90639d7a..b83bcbc67585 100644
--- a/src/common/adbc/adbc.cpp
+++ b/src/common/adbc/adbc.cpp
@@ -875,7 +875,7 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
 		duckdb_destroy_prepare(&wrapper->statement);
 		wrapper->statement = nullptr;
 	}
-	duckdb_extracted_statements extracted_statements;
+	duckdb_extracted_statements extracted_statements = nullptr;
 	auto extract_statements_size = duckdb_extract_statements(wrapper->connection, query, &extracted_statements);
 	auto error_msg_extract_statements = duckdb_extract_statements_error(extracted_statements);
 	if (error_msg_extract_statements != nullptr) {
@@ -886,7 +886,7 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
 	}
 	// Now lets loop over the statements, and execute every one
 	for (idx_t i = 0; i < extract_statements_size - 1; i++) {
-		duckdb_prepared_statement statement_internal;
+		duckdb_prepared_statement statement_internal = nullptr;
 		auto res =
 		    duckdb_prepare_extracted_statement(wrapper->connection, extracted_statements, i, &statement_internal);
 		auto error_msg = duckdb_prepare_error(statement_internal);
@@ -894,7 +894,7 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
 		if (adbc_status != ADBC_STATUS_OK) {
 			// Things went wrong when executing internal prepared statement
 			duckdb_destroy_extracted(&extracted_statements);
-			delete statement_internal;
+			duckdb_destroy_prepare(&statement_internal);
 			return adbc_status;
 		}
 		// Execute
@@ -903,12 +903,12 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
 		if (res != DuckDBSuccess) {
 			SetError(error, duckdb_query_arrow_error(out_result));
 			delete out_result;
-			delete statement_internal;
+			duckdb_destroy_prepare(&statement_internal);
 			duckdb_destroy_extracted(&extracted_statements);
 			return ADBC_STATUS_INVALID_ARGUMENT;
 		}
 		delete out_result;
-		delete statement_internal;
+		duckdb_destroy_prepare(&statement_internal);
 	}
 	// Besides ze last, this one we return
 	auto res = duckdb_prepare_extracted_statement(wrapper->connection, extracted_statements,

From 5649f9141c88a2139300025955fd329a76eeebf4 Mon Sep 17 00:00:00 2001
From: Richard Wesley <13156216+hawkfish@users.noreply.github.com>
Date: Wed, 12 Feb 2025 15:34:47 -0800
Subject: [PATCH 054/142] Issue #8265: AsOf Nested Loop

* Fix ASCII art final baskslash silliness with an NBS...
---
 src/execution/physical_plan/plan_asof_join.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/execution/physical_plan/plan_asof_join.cpp b/src/execution/physical_plan/plan_asof_join.cpp
index aa2df50d6313..10fc0d47d059 100644
--- a/src/execution/physical_plan/plan_asof_join.cpp
+++ b/src/execution/physical_plan/plan_asof_join.cpp
@@ -31,7 +31,7 @@ static unique_ptr<PhysicalOperator> PlanAsOfLoopJoin(LogicalComparisonJoin &op,
 	//		 ∏ *,inequality
 	//		 |
 	//       ⨝ swapped
-	//     /   \
+	//     /   \ 
 	//    B     W pk:row_number
 	//          |
 	//          P

From de5a838119946e42ab773950218ab79afbe61966 Mon Sep 17 00:00:00 2001
From: Richard Wesley <13156216+hawkfish@users.noreply.github.com>
Date: Wed, 12 Feb 2025 18:38:03 -0800
Subject: [PATCH 055/142] Issue #8265: AsOf Nested Loop

* Make test deterministic
---
 test/sql/join/asof/test_asof_join_pushdown.test | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/sql/join/asof/test_asof_join_pushdown.test b/test/sql/join/asof/test_asof_join_pushdown.test
index 465b407ad850..8a55a0ebd400 100644
--- a/test/sql/join/asof/test_asof_join_pushdown.test
+++ b/test/sql/join/asof/test_asof_join_pushdown.test
@@ -63,7 +63,8 @@ FROM right_pushdown d1
 ASOF LEFT JOIN (
 	SELECT * FROM right_pushdown WHERE value is not NULL
 	) d2
-	ON d1.time >= d2.time;
+	ON d1.time >= d2.time
+ORDER BY ALL;
 ----
 0 	0	0.0	0.0
 1 	0	NULL	0.0

From e136bc76034e84f0788a39edc8985c2e74f8ab35 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Thu, 13 Feb 2025 11:51:43 +0100
Subject: [PATCH 056/142] integrate PrimitiveDictionary into Parquet writer and
 improve writing levels

---
 extension/parquet/column_writer.cpp           |  68 +++++-------
 .../include/parquet_rle_bp_encoder.hpp        |  15 ++-
 .../writer/primitive_column_writer.hpp        |   2 +-
 .../writer/templated_column_writer.hpp        |  50 +++------
 .../writer/primitive_column_writer.cpp        |  15 ++-
 src/common/serializer/memory_stream.cpp       |   4 +
 src/common/types/hash.cpp                     |   5 +
 .../duckdb/common/primitive_dictionary.hpp    | 104 ++++++++++++++----
 .../common/serializer/memory_stream.hpp       |  41 +++----
 src/include/duckdb/common/types/datetime.hpp  |   8 ++
 src/include/duckdb/common/types/hash.hpp      |   3 +
 11 files changed, 195 insertions(+), 120 deletions(-)

diff --git a/extension/parquet/column_writer.cpp b/extension/parquet/column_writer.cpp
index 178d2fe91d0b..4841aca02355 100644
--- a/extension/parquet/column_writer.cpp
+++ b/extension/parquet/column_writer.cpp
@@ -191,10 +191,14 @@ void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterStat
 		}
 	} else {
 		// no parent: set definition levels only from this validity mask
-		for (idx_t i = 0; i < count; i++) {
-			const auto is_null = !validity.RowIsValid(i);
-			state.definition_levels.emplace_back(is_null ? null_value : define_value);
-			state.null_count += is_null;
+		if (validity.AllValid()) {
+			state.definition_levels.insert(state.definition_levels.end(), count, define_value);
+		} else {
+			for (idx_t i = 0; i < count; i++) {
+				const auto is_null = !validity.RowIsValid(i);
+				state.definition_levels.emplace_back(is_null ? null_value : define_value);
+				state.null_count += is_null;
+			}
 		}
 		if (!can_have_nulls && state.null_count != 0) {
 			throw IOException("Parquet writer: map key column is not allowed to contain NULL values");
@@ -219,10 +223,10 @@ class WKBColumnWriterState final : public StandardColumnWriterState<string_t> {
 
 class WKBColumnWriter final : public StandardColumnWriter<string_t, string_t, ParquetStringOperator> {
 public:
-	WKBColumnWriter(ClientContext &context_p, ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p,
-	                idx_t max_repeat, idx_t max_define, bool can_have_nulls, string name)
+	WKBColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, idx_t max_repeat,
+	                idx_t max_define, bool can_have_nulls, string name)
 	    : StandardColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls),
-	      column_name(std::move(name)), context(context_p) {
+	      column_name(std::move(name)) {
 
 		this->writer.GetGeoParquetData().RegisterGeometryColumn(column_name);
 	}
@@ -253,7 +257,6 @@ class WKBColumnWriter final : public StandardColumnWriter<string_t, string_t, Pa
 
 private:
 	string column_name;
-	ClientContext &context;
 };
 
 // special double/float class to deal with dictionary encoding and NaN equality
@@ -461,7 +464,7 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
 	schema_path.push_back(name);
 	if (type.id() == LogicalTypeId::BLOB && type.GetAlias() == "WKB_BLOB" &&
 	    GeoParquetFileMetadata::IsGeoParquetConversionEnabled(context)) {
-		return make_uniq<WKBColumnWriter>(context, writer, schema_idx, std::move(schema_path), max_repeat, max_define,
+		return make_uniq<WKBColumnWriter>(writer, schema_idx, std::move(schema_path), max_repeat, max_define,
 		                                  can_have_nulls, name);
 	}
 
@@ -584,41 +587,30 @@ struct NumericLimits<double_na_equal> {
 	}
 };
 
-} // namespace duckdb
-
-namespace std {
 template <>
-struct hash<duckdb::ParquetIntervalTargetType> {
-	size_t operator()(const duckdb::ParquetIntervalTargetType &val) const {
-		return duckdb::Hash(duckdb::const_char_ptr_cast(val.bytes),
-		                    duckdb::ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE);
-	}
-};
+hash_t Hash(ParquetIntervalTargetType val) {
+	return Hash(const_char_ptr_cast(val.bytes), ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE);
+}
 
 template <>
-struct hash<duckdb::ParquetUUIDTargetType> {
-	size_t operator()(const duckdb::ParquetUUIDTargetType &val) const {
-		return duckdb::Hash(duckdb::const_char_ptr_cast(val.bytes), duckdb::ParquetUUIDTargetType::PARQUET_UUID_SIZE);
-	}
-};
+hash_t Hash(ParquetUUIDTargetType val) {
+	return Hash(const_char_ptr_cast(val.bytes), ParquetUUIDTargetType::PARQUET_UUID_SIZE);
+}
 
 template <>
-struct hash<duckdb::float_na_equal> {
-	size_t operator()(const duckdb::float_na_equal &val) const {
-		if (std::isnan(val.val)) {
-			return duckdb::Hash<float>(std::numeric_limits<float>::quiet_NaN());
-		}
-		return duckdb::Hash<float>(val.val);
+hash_t Hash(float_na_equal val) {
+	if (std::isnan(val.val)) {
+		return Hash<float>(std::numeric_limits<float>::quiet_NaN());
 	}
-};
+	return Hash<float>(val.val);
+}
 
 template <>
-struct hash<duckdb::double_na_equal> {
-	inline size_t operator()(const duckdb::double_na_equal &val) const {
-		if (std::isnan(val.val)) {
-			return duckdb::Hash<double>(std::numeric_limits<double>::quiet_NaN());
-		}
-		return duckdb::Hash<double>(val.val);
+hash_t Hash(double_na_equal val) {
+	if (std::isnan(val.val)) {
+		return Hash<double>(std::numeric_limits<double>::quiet_NaN());
 	}
-};
-} // namespace std
+	return Hash<double>(val.val);
+}
+
+} // namespace duckdb
diff --git a/extension/parquet/include/parquet_rle_bp_encoder.hpp b/extension/parquet/include/parquet_rle_bp_encoder.hpp
index 689d0fee132c..b0fd130a33f4 100644
--- a/extension/parquet/include/parquet_rle_bp_encoder.hpp
+++ b/extension/parquet/include/parquet_rle_bp_encoder.hpp
@@ -8,9 +8,7 @@
 
 #pragma once
 
-#include "parquet_types.h"
-#include "thrift_tools.hpp"
-#include "resizable_buffer.hpp"
+#include "decode_utils.hpp"
 
 namespace duckdb {
 
@@ -25,7 +23,7 @@ class RleBpEncoder {
 		bp_block_count = 0;
 	}
 
-	void WriteValue(WriteStream &writer, uint32_t value) {
+	void WriteValue(WriteStream &writer, const uint32_t &value) {
 		if (bp_block_count != 0) {
 			// We already committed to a BP run
 			D_ASSERT(rle_count == 0);
@@ -68,6 +66,15 @@ class RleBpEncoder {
 		rle_count = 0;
 	}
 
+	void WriteMany(WriteStream &writer, uint32_t value, idx_t count) {
+		D_ASSERT(bp_block_count == 0);
+		if (rle_count != 0) {
+			WriteRun(writer);
+		}
+		rle_value = value;
+		rle_count = count;
+	}
+
 	void FinishWrite(WriteStream &writer) {
 		WriteRun(writer);
 	}
diff --git a/extension/parquet/include/writer/primitive_column_writer.hpp b/extension/parquet/include/writer/primitive_column_writer.hpp
index 4e9e55436a6d..ccaa02f79503 100644
--- a/extension/parquet/include/writer/primitive_column_writer.hpp
+++ b/extension/parquet/include/writer/primitive_column_writer.hpp
@@ -82,7 +82,7 @@ class PrimitiveColumnWriter : public ColumnWriter {
 
 protected:
 	static void WriteLevels(WriteStream &temp_writer, const unsafe_vector<uint16_t> &levels, idx_t max_value,
-	                        idx_t start_offset, idx_t count);
+	                        idx_t start_offset, idx_t count, optional_idx null_count = optional_idx());
 
 	virtual duckdb_parquet::Encoding::type GetEncoding(PrimitiveColumnWriterState &state);
 
diff --git a/extension/parquet/include/writer/templated_column_writer.hpp b/extension/parquet/include/writer/templated_column_writer.hpp
index 5adfbd832e94..ca3b3ca8a804 100644
--- a/extension/parquet/include/writer/templated_column_writer.hpp
+++ b/extension/parquet/include/writer/templated_column_writer.hpp
@@ -38,7 +38,8 @@ class StandardColumnWriterState : public PrimitiveColumnWriterState {
 	StandardColumnWriterState(ParquetWriter &writer, duckdb_parquet::RowGroup &row_group, idx_t col_idx)
 	    : PrimitiveColumnWriterState(writer, row_group, col_idx),
 	      dictionary(BufferAllocator::Get(writer.GetContext()), writer.DictionarySizeLimit(),
-	                 2e6) { // TODO: make size configurable
+	                 2097152), // TODO: make size configurable
+	      encoding(duckdb_parquet::Encoding::PLAIN) {
 	}
 	~StandardColumnWriterState() override = default;
 
@@ -60,7 +61,7 @@ class StandardWriterPageState : public ColumnWriterPageState {
 	    : encoding(encoding_p), dbp_initialized(false), dbp_encoder(total_value_count), dlba_initialized(false),
 	      dlba_encoder(total_value_count, total_string_size), bss_encoder(total_value_count, sizeof(TGT)),
 	      dictionary(dictionary_p), dict_written_value(false),
-	      dict_bit_width(RleBpDecoder::ComputeBitWidth(dictionary.size())), dict_encoder(dict_bit_width) {
+	      dict_bit_width(RleBpDecoder::ComputeBitWidth(dictionary.GetSize())), dict_encoder(dict_bit_width) {
 	}
 	duckdb_parquet::Encoding::type encoding;
 
@@ -152,7 +153,6 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 
 		auto data_ptr = FlatVector::GetData<SRC>(vector);
 		idx_t vector_index = 0;
-		uint32_t new_value_index = state.dictionary.size();
 
 		const bool check_parent_empty = parent && !parent->is_empty.empty();
 		const idx_t parent_index = state.definition_levels.size();
@@ -168,12 +168,7 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 			}
 			if (validity.RowIsValid(vector_index)) {
 				const auto &src_value = data_ptr[vector_index];
-				if (state.dictionary.size() <= writer.DictionarySizeLimit()) {
-					if (state.dictionary.find(src_value) == state.dictionary.end()) {
-						state.dictionary[src_value] = new_value_index;
-						new_value_index++;
-					}
-				}
+				state.dictionary.Insert(src_value);
 				state.total_value_count++;
 				state.total_string_size += dlba_encoder::GetDlbaStringSize(src_value);
 			}
@@ -185,7 +180,7 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 		const auto type = writer.GetType(schema_idx);
 
 		auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
-		if (state.dictionary.size() == 0 || state.dictionary.size() > writer.DictionarySizeLimit()) {
+		if (state.dictionary.GetSize() == 0 || state.dictionary.IsFull()) {
 			if (writer.GetParquetVersion() == ParquetVersion::V1) {
 				// Can't do the cool stuff for V1
 				state.encoding = duckdb_parquet::Encoding::PLAIN;
@@ -207,9 +202,8 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 					state.encoding = duckdb_parquet::Encoding::PLAIN;
 				}
 			}
-			state.dictionary.clear();
 		} else {
-			state.key_bit_width = RleBpDecoder::ComputeBitWidth(state.dictionary.size());
+			state.key_bit_width = RleBpDecoder::ComputeBitWidth(state.dictionary.GetSize());
 		}
 	}
 
@@ -224,7 +218,7 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 
 	idx_t DictionarySize(PrimitiveColumnWriterState &state_p) override {
 		auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
-		return state.dictionary.size();
+		return state.dictionary.GetSize();
 	}
 
 	void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state_p,
@@ -240,14 +234,14 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 				if (!mask.RowIsValid(r)) {
 					continue;
 				}
-				auto &src_val = data_ptr[r];
-				auto value_index = page_state.dictionary.at(src_val);
 				if (!page_state.dict_written_value) {
 					// first value: write the bit-width as a one-byte entry and initialize writer
 					temp_writer.Write<uint8_t>(page_state.dict_bit_width);
 					page_state.dict_encoder.BeginWrite();
 					page_state.dict_written_value = true;
 				}
+				const auto &src_value = data_ptr[r];
+				const auto value_index = page_state.dictionary.GetIndex(src_value);
 				page_state.dict_encoder.WriteValue(temp_writer, value_index);
 			}
 			break;
@@ -329,34 +323,22 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 
 	void FlushDictionary(PrimitiveColumnWriterState &state_p, ColumnWriterStatistics *stats) override {
 		auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
-
 		D_ASSERT(state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY);
 
-		// first we need to sort the values in index order
-		auto values = vector<SRC>(state.dictionary.size());
-		for (const auto &entry : state.dictionary) {
-			values[entry.second] = entry.first;
-		}
-
 		state.bloom_filter =
-		    make_uniq<ParquetBloomFilter>(state.dictionary.size(), writer.BloomFilterFalsePositiveRatio());
-
-		// first write the contents of the dictionary page to a temporary buffer
-		auto temp_writer = make_uniq<MemoryStream>(
-		    Allocator::Get(writer.GetContext()), MaxValue<idx_t>(NextPowerOfTwo(state.dictionary.size() * sizeof(TGT)),
-		                                                         MemoryStream::DEFAULT_INITIAL_CAPACITY));
-		for (idx_t r = 0; r < values.size(); r++) {
-			const TGT target_value = OP::template Operation<SRC, TGT>(values[r]);
+		    make_uniq<ParquetBloomFilter>(state.dictionary.GetSize(), writer.BloomFilterFalsePositiveRatio());
+
+		state.dictionary.IterateValues([&](const SRC &value) {
+			const TGT target_value = OP::template Operation<SRC, TGT>(value);
 			// update the statistics
 			OP::template HandleStats<SRC, TGT>(stats, target_value);
 			// update the bloom filter
 			auto hash = OP::template XXHash64<SRC, TGT>(target_value);
 			state.bloom_filter->FilterInsert(hash);
-			// actually write the dictionary value
-			OP::template WriteToStream<SRC, TGT>(target_value, *temp_writer);
-		}
+		});
+
 		// flush the dictionary page and add it to the to-be-written pages
-		WriteDictionary(state, std::move(temp_writer), values.size());
+		WriteDictionary(state, state.dictionary.GetPlainMemoryStream(), state.dictionary.GetSize());
 		// bloom filter will be queued for writing in ParquetWriter::BufferBloomFilter one level up
 	}
 
diff --git a/extension/parquet/writer/primitive_column_writer.cpp b/extension/parquet/writer/primitive_column_writer.cpp
index 675379873809..589f94ab316f 100644
--- a/extension/parquet/writer/primitive_column_writer.cpp
+++ b/extension/parquet/writer/primitive_column_writer.cpp
@@ -117,7 +117,7 @@ void PrimitiveColumnWriter::BeginWrite(ColumnWriterState &state_p) {
 }
 
 void PrimitiveColumnWriter::WriteLevels(WriteStream &temp_writer, const unsafe_vector<uint16_t> &levels,
-                                        idx_t max_value, idx_t offset, idx_t count) {
+                                        idx_t max_value, idx_t offset, idx_t count, optional_idx null_count) {
 	if (levels.empty() || count == 0) {
 		return;
 	}
@@ -128,9 +128,15 @@ void PrimitiveColumnWriter::WriteLevels(WriteStream &temp_writer, const unsafe_v
 
 	// have to write to an intermediate stream first because we need to know the size
 	MemoryStream intermediate_stream(Allocator::DefaultAllocator());
+
 	rle_encoder.BeginWrite();
-	for (idx_t i = offset; i < offset + count; i++) {
-		rle_encoder.WriteValue(intermediate_stream, levels[i]);
+	if (null_count.IsValid() && null_count.GetIndex() == 0 || null_count.GetIndex() == count) {
+		// All are NULL or none are NULL
+		rle_encoder.WriteMany(intermediate_stream, levels[0], count);
+	} else {
+		for (idx_t i = offset; i < offset + count; i++) {
+			rle_encoder.WriteValue(intermediate_stream, levels[i]);
+		}
 	}
 	rle_encoder.FinishWrite(intermediate_stream);
 
@@ -159,7 +165,8 @@ void PrimitiveColumnWriter::NextPage(PrimitiveColumnWriterState &state) {
 	WriteLevels(temp_writer, state.repetition_levels, max_repeat, page_info.offset, page_info.row_count);
 
 	// write the definition levels
-	WriteLevels(temp_writer, state.definition_levels, max_define, page_info.offset, page_info.row_count);
+	WriteLevels(temp_writer, state.definition_levels, max_define, page_info.offset, page_info.row_count,
+	            state.null_count);
 }
 
 void PrimitiveColumnWriter::FlushPage(PrimitiveColumnWriterState &state) {
diff --git a/src/common/serializer/memory_stream.cpp b/src/common/serializer/memory_stream.cpp
index 92419b8d04c8..d608392e8cf7 100644
--- a/src/common/serializer/memory_stream.cpp
+++ b/src/common/serializer/memory_stream.cpp
@@ -102,4 +102,8 @@ idx_t MemoryStream::GetCapacity() const {
 	return capacity;
 }
 
+void MemoryStream::SetPosition(idx_t position_p) {
+	position = position_p;
+}
+
 } // namespace duckdb
diff --git a/src/common/types/hash.cpp b/src/common/types/hash.cpp
index 160d5f3c2924..f54295af4656 100644
--- a/src/common/types/hash.cpp
+++ b/src/common/types/hash.cpp
@@ -66,6 +66,11 @@ hash_t Hash(interval_t val) {
 	return Hash(days) ^ Hash(months) ^ Hash(micros);
 }
 
+template <>
+hash_t Hash(dtime_tz_t val) {
+	return Hash(val.bits);
+}
+
 template <>
 hash_t Hash(const char *str) {
 	return Hash(str, strlen(str));
diff --git a/src/include/duckdb/common/primitive_dictionary.hpp b/src/include/duckdb/common/primitive_dictionary.hpp
index db77cad84410..0976882a2c8d 100644
--- a/src/include/duckdb/common/primitive_dictionary.hpp
+++ b/src/include/duckdb/common/primitive_dictionary.hpp
@@ -10,72 +10,132 @@
 
 #include "duckdb/common/types/string_type.hpp"
 #include "duckdb/common/allocator.hpp"
+#include "duckdb/common/serializer/memory_stream.hpp"
 
 namespace duckdb {
 
 template <class T>
 class PrimitiveDictionary {
 private:
-	static constexpr uint32_t INVALID_OFFSET = static_cast<uint32_t>(-1);
+	static constexpr idx_t LOAD_FACTOR = 2;
 
+	static constexpr uint32_t INVALID_INDEX = static_cast<uint32_t>(-1);
 	struct primitive_dictionary_entry_t {
 		T value;
-		uint32_t offset;
+		uint32_t index;
+		bool IsEmpty() const {
+			return index == INVALID_INDEX;
+		}
 	};
 
 public:
+	//! PrimitiveDictionary is a fixed-size linear probing hash table for primitive types
+	//! It is used to dictionary-encode data in, e.g., Parquet files
 	PrimitiveDictionary(Allocator &allocator, idx_t maximum_size_p, idx_t plain_capacity_p)
-	    : maximum_size(maximum_size_p), size(0), capacity(NextPowerOfTwo(maximum_size * 2)),
+	    : maximum_size(maximum_size_p), size(0), capacity(NextPowerOfTwo(maximum_size * LOAD_FACTOR)),
 	      capacity_mask(capacity - 1), plain_capacity(plain_capacity_p), plain_offset(0),
 	      allocated_dictionary(allocator.Allocate(capacity * sizeof(primitive_dictionary_entry_t))),
 	      allocated_plain(allocator.Allocate(std::is_same<T, string_t>::value ? plain_capacity : capacity * sizeof(T))),
 	      dictionary(reinterpret_cast<primitive_dictionary_entry_t *>(allocated_dictionary.get())),
-	      plain(allocated_plain.get()) {
+	      plain(reinterpret_cast<T *>(allocated_plain.get())), plain_raw(allocated_plain.get()), full(false) {
 		// Initialize empty
 		for (idx_t i = 0; i < capacity; i++) {
-			dictionary[i].offset = INVALID_OFFSET;
+			dictionary[i].index = INVALID_INDEX;
 		}
 	}
 
 public:
-	bool Insert(T value, uint32_t &offset) {
+	//! Insert value into dictionary (if not full)
+	void Insert(T value) {
+		if (full | (size == capacity)) {
+			full = true;
+			return;
+		}
 		auto &entry = Lookup(value);
-		bool success = size < capacity;
-		if (entry.offset == INVALID_OFFSET) {
-			success &= AddToPlain(value);
+		if (entry.IsEmpty()) {
+			if (!AddToPlain(value)) {
+				full = true;
+				return;
+			}
 			entry.value = value;
-			entry.offset = size++;
+			entry.index = size++;
 		}
-		offset = entry.offset;
-		return success;
 	}
 
-	uint32_t GetOffset(const T &value) const {
-		return Lookup(value).offset;
+	//! Get dictionary index of an already inserted value
+	uint32_t GetIndex(const T &value) const {
+		const auto &entry = Lookup(value);
+		D_ASSERT(!entry.IsEmpty());
+		return entry.index;
+	}
+
+	//! Iterates over inserted values
+	template <typename U = T, typename std::enable_if<!std::is_same<U, string_t>::value, int>::type = 0>
+	void IterateValues(const std::function<void(const T &)> &fun) const {
+		for (idx_t i = 0; i < size; i++) {
+			fun(plain[i]);
+		}
+	}
+
+	//! Specialized template to iterate over string_t values
+	template <typename U = T, typename std::enable_if<std::is_same<U, string_t>::value, int>::type = 0>
+	void IterateValues(const std::function<void(const string_t &)> &fun) const {
+		for (idx_t i = 0; i < capacity; i++) {
+			auto &entry = dictionary[i];
+			if (entry.IsEmpty()) {
+				continue;
+			}
+			fun(entry.value);
+		}
+	}
+
+	//! Get the number of unique values in the dictionary
+	idx_t GetSize() const {
+		return size;
+	}
+
+	//! If any of the inserts caused the dictionary to be full, this returns true
+	bool IsFull() const {
+		return full;
+	}
+
+	//! Get the plain written values as a memory stream (zero-copy)
+	unique_ptr<MemoryStream> GetPlainMemoryStream() const {
+		auto result = make_uniq<MemoryStream>(plain_raw, plain_capacity);
+		result->SetPosition(plain_offset);
+		return result;
 	}
 
 private:
+	//! Looks up a value in the dictionary using linear probing
 	primitive_dictionary_entry_t &Lookup(const T &value) const {
-		return dictionary[Hash(value) & capacity_mask];
+		auto offset = Hash(value) & capacity_mask;
+		while (!dictionary[offset].IsEmpty() && dictionary[offset].value != value) {
+			++offset &= capacity_mask;
+		}
+		return dictionary[offset];
 	}
 
+	//! Writes a value to the plain data
 	bool AddToPlain(const T &value) {
-		static_cast<T *const>(plain)[plain_offset++] = value;
+		plain[size] = value;
+		plain_offset += sizeof(T);
 		return true;
 	}
 
+	//! Specialized template to add a string_t value to the plain data
 	bool AddToPlain(string_t &value) {
 		if (plain_offset + sizeof(uint32_t) + value.GetSize() > plain_capacity) {
 			return false; // Out of capacity
 		}
 
 		// Store string length and increment offset
-		Store<uint32_t>(UnsafeNumericCast<uint32_t>(value.GetSize()), plain + plain_offset);
+		Store<uint32_t>(UnsafeNumericCast<uint32_t>(value.GetSize()), plain_raw + plain_offset);
 		plain_offset += sizeof(uint32_t);
 
 		// Copy over string data to plain, update "value" to point to it, and increment offset
-		memcpy(plain + plain_offset, value.GetData(), value.GetSize());
-		value = string_t(char_ptr_cast(plain + plain_offset), value.GetSize());
+		memcpy(plain_raw + plain_offset, value.GetData(), value.GetSize());
+		value = string_t(char_ptr_cast(plain_raw + plain_offset), value.GetSize());
 		plain_offset += value.GetSize();
 
 		return true;
@@ -100,7 +160,11 @@ class PrimitiveDictionary {
 
 	//! Pointers to allocated regions for convenience
 	primitive_dictionary_entry_t *const dictionary;
-	data_ptr_t const plain;
+	T *const plain;
+	data_ptr_t const plain_raw;
+
+	//! More values inserted than possible
+	bool full;
 };
 
 } // namespace duckdb
diff --git a/src/include/duckdb/common/serializer/memory_stream.hpp b/src/include/duckdb/common/serializer/memory_stream.hpp
index a735ad1aa90e..f5cd1c153049 100644
--- a/src/include/duckdb/common/serializer/memory_stream.hpp
+++ b/src/include/duckdb/common/serializer/memory_stream.hpp
@@ -25,20 +25,20 @@ class MemoryStream : public WriteStream, public ReadStream {
 public:
 	static constexpr idx_t DEFAULT_INITIAL_CAPACITY = 512;
 
-	// Create a new owning MemoryStream with an internal  backing buffer with the specified capacity. The stream will
-	// own the backing buffer, resize it when needed and free its memory when the stream is destroyed
+	//! Create a new owning MemoryStream with an internal  backing buffer with the specified capacity. The stream will
+	//! own the backing buffer, resize it when needed and free its memory when the stream is destroyed
 	explicit MemoryStream(Allocator &allocator, idx_t capacity = DEFAULT_INITIAL_CAPACITY);
 
-	// Create a new owning MemoryStream with an internal  backing buffer with the specified capacity. The stream will
-	// own the backing buffer, resize it when needed and free its memory when the stream is destroyed
+	//! Create a new owning MemoryStream with an internal  backing buffer with the specified capacity. The stream will
+	//! own the backing buffer, resize it when needed and free its memory when the stream is destroyed
 	explicit MemoryStream(idx_t capacity = DEFAULT_INITIAL_CAPACITY);
 
-	// Create a new non-owning MemoryStream over the specified external buffer and capacity. The stream will not take
-	// ownership of the backing buffer, will not attempt to resize it and will not free the memory when the stream
-	// is destroyed
+	//! Create a new non-owning MemoryStream over the specified external buffer and capacity. The stream will not take
+	//! ownership of the backing buffer, will not attempt to resize it and will not free the memory when the stream
+	//! is destroyed
 	explicit MemoryStream(data_ptr_t buffer, idx_t capacity);
 
-	// Cant copy!
+	//! Cant copy!
 	MemoryStream(const MemoryStream &) = delete;
 	MemoryStream &operator=(const MemoryStream &) = delete;
 
@@ -47,30 +47,33 @@ class MemoryStream : public WriteStream, public ReadStream {
 
 	~MemoryStream() override;
 
-	// Write data to the stream.
-	// Throws if the write would exceed the capacity of the stream and the backing buffer is not owned by the stream
+	//! Write data to the stream.
+	//! Throws if the write would exceed the capacity of the stream and the backing buffer is not owned by the stream
 	void WriteData(const_data_ptr_t buffer, idx_t write_size) override;
 
-	// Read data from the stream.
-	// Throws if the read would exceed the capacity of the stream
+	//! Read data from the stream.
+	//! Throws if the read would exceed the capacity of the stream
 	void ReadData(data_ptr_t buffer, idx_t read_size) override;
 
-	// Rewind the stream to the start, keeping the capacity and the backing buffer intact
+	//! Rewind the stream to the start, keeping the capacity and the backing buffer intact
 	void Rewind();
 
-	// Release ownership of the backing buffer and turn a owning stream into a non-owning one.
-	// The stream will no longer be responsible for freeing the data.
-	// The stream will also no longer attempt to automatically resize the buffer when the capacity is reached.
+	//! Release ownership of the backing buffer and turn a owning stream into a non-owning one.
+	//! The stream will no longer be responsible for freeing the data.
+	//! The stream will also no longer attempt to automatically resize the buffer when the capacity is reached.
 	void Release();
 
-	// Get a pointer to the underlying backing buffer
+	//! Get a pointer to the underlying backing buffer
 	data_ptr_t GetData() const;
 
-	// Get the current position in the stream
+	//! Get the current position in the stream
 	idx_t GetPosition() const;
 
-	// Get the capacity of the stream
+	//! Get the capacity of the stream
 	idx_t GetCapacity() const;
+
+	//! Set the position in the stream
+	void SetPosition(idx_t position);
 };
 
 } // namespace duckdb
diff --git a/src/include/duckdb/common/types/datetime.hpp b/src/include/duckdb/common/types/datetime.hpp
index ccbb95524244..d52edd57379c 100644
--- a/src/include/duckdb/common/types/datetime.hpp
+++ b/src/include/duckdb/common/types/datetime.hpp
@@ -1,3 +1,11 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/common/types/datetime.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
 #pragma once
 
 #include "duckdb/common/common.hpp"
diff --git a/src/include/duckdb/common/types/hash.hpp b/src/include/duckdb/common/types/hash.hpp
index b213f249e402..128ee634dcd7 100644
--- a/src/include/duckdb/common/types/hash.hpp
+++ b/src/include/duckdb/common/types/hash.hpp
@@ -10,6 +10,7 @@
 
 #include "duckdb/common/common.hpp"
 #include "duckdb/common/types.hpp"
+#include "duckdb/common/types/datetime.hpp"
 
 namespace duckdb {
 
@@ -63,6 +64,8 @@ template <>
 DUCKDB_API hash_t Hash(string_t val);
 template <>
 DUCKDB_API hash_t Hash(interval_t val);
+template <>
+DUCKDB_API hash_t Hash(dtime_tz_t val);
 DUCKDB_API hash_t Hash(const char *val, size_t size);
 DUCKDB_API hash_t Hash(uint8_t *val, size_t size);
 

From 29172987ada96ea87651bd8b493b720b813f1ac6 Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Thu, 13 Feb 2025 08:06:41 -0300
Subject: [PATCH 057/142] more destroy

---
 src/common/adbc/adbc.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/common/adbc/adbc.cpp b/src/common/adbc/adbc.cpp
index b83bcbc67585..17618c66ec55 100644
--- a/src/common/adbc/adbc.cpp
+++ b/src/common/adbc/adbc.cpp
@@ -892,9 +892,8 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
 		auto error_msg = duckdb_prepare_error(statement_internal);
 		auto adbc_status = CheckResult(res, error, error_msg);
 		if (adbc_status != ADBC_STATUS_OK) {
-			// Things went wrong when executing internal prepared statement
-			duckdb_destroy_extracted(&extracted_statements);
 			duckdb_destroy_prepare(&statement_internal);
+			duckdb_destroy_extracted(&extracted_statements);
 			return adbc_status;
 		}
 		// Execute
@@ -902,15 +901,16 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
 		res = duckdb_execute_prepared_arrow(statement_internal, &out_result);
 		if (res != DuckDBSuccess) {
 			SetError(error, duckdb_query_arrow_error(out_result));
-			delete out_result;
+			duckdb_destroy_arrow(&out_result);
 			duckdb_destroy_prepare(&statement_internal);
 			duckdb_destroy_extracted(&extracted_statements);
 			return ADBC_STATUS_INVALID_ARGUMENT;
 		}
-		delete out_result;
+		duckdb_destroy_arrow(&out_result);
 		duckdb_destroy_prepare(&statement_internal);
 	}
-	// Besides ze last, this one we return
+
+	// Final statement (returned to caller)
 	auto res = duckdb_prepare_extracted_statement(wrapper->connection, extracted_statements,
 	                                              extract_statements_size - 1, &wrapper->statement);
 	auto error_msg = duckdb_prepare_error(wrapper->statement);

From 61be508a733a7d31ee1c58174dee0317e328fb3f Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Thu, 13 Feb 2025 12:18:51 +0100
Subject: [PATCH 058/142] simplify SkipToClose

---
 src/function/cast/vector_cast_helpers.cpp | 41 ++++++++++-------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 0600eb1de4ce..64d9780312f5 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -70,15 +70,15 @@ static bool SkipToCloseQuotes(StringCastInputState &input_state) {
 	return false;
 }
 
-static bool SkipToClose(StringCastInputState &input_state, idx_t &lvl, char close_bracket) {
+static bool SkipToClose(StringCastInputState &input_state, idx_t &lvl) {
 	auto &idx = input_state.pos;
 	auto &buf = input_state.buf;
 	auto &len = input_state.len;
 	auto &escaped = input_state.escaped;
-	idx++;
+
+	D_ASSERT(buf[idx] == '{' || buf[idx] == '[' || buf[idx] == '(');
 
 	vector<char> brackets;
-	brackets.push_back(close_bracket);
 	while (idx < len) {
 		if (!escaped) {
 			if (buf[idx] == '"' || buf[idx] == '\'') {
@@ -250,8 +250,7 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 				start_pos = pos;
 			}
 			//! Start of a LIST
-			lvl++;
-			if (!SkipToClose(input_state, lvl, ']')) {
+			if (!SkipToClose(input_state, lvl)) {
 				return false;
 			}
 			end_pos = pos;
@@ -269,7 +268,7 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 			}
 			//! Start of a STRUCT
 			idx_t struct_lvl = 0;
-			if (!SkipToClose(input_state, struct_lvl, '}')) {
+			if (!SkipToClose(input_state, struct_lvl)) {
 				return false;
 			}
 			end_pos = pos;
@@ -279,7 +278,7 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 			}
 			//! Start of an (unnamed) STRUCT
 			idx_t struct_lvl = 0;
-			if (!SkipToClose(input_state, struct_lvl, ')')) {
+			if (!SkipToClose(input_state, struct_lvl)) {
 				return false;
 			}
 			end_pos = pos;
@@ -417,7 +416,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!SkipToClose(input_state, lvl, '}')) {
+				if (!SkipToClose(input_state, lvl)) {
 					return false;
 				}
 				end_pos = pos;
@@ -425,7 +424,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!SkipToClose(input_state, lvl, ')')) {
+				if (!SkipToClose(input_state, lvl)) {
 					return false;
 				}
 				end_pos = pos;
@@ -433,8 +432,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				lvl++;
-				if (!SkipToClose(input_state, lvl, ']')) {
+				if (!SkipToClose(input_state, lvl)) {
 					return false;
 				}
 				end_pos = pos;
@@ -488,7 +486,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!SkipToClose(input_state, lvl, '}')) {
+				if (!SkipToClose(input_state, lvl)) {
 					return false;
 				}
 				end_pos = pos;
@@ -496,7 +494,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!SkipToClose(input_state, lvl, ')')) {
+				if (!SkipToClose(input_state, lvl)) {
 					return false;
 				}
 				end_pos = pos;
@@ -504,8 +502,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				lvl++;
-				if (!SkipToClose(input_state, lvl, ']')) {
+				if (!SkipToClose(input_state, lvl)) {
 					return false;
 				}
 				end_pos = pos;
@@ -662,7 +659,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!SkipToClose(input_state, lvl, '}')) {
+					if (!SkipToClose(input_state, lvl)) {
 						return false;
 					}
 					end_pos = pos;
@@ -670,7 +667,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!SkipToClose(input_state, lvl, ')')) {
+					if (!SkipToClose(input_state, lvl)) {
 						return false;
 					}
 					end_pos = pos;
@@ -678,8 +675,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					lvl++;
-					if (!SkipToClose(input_state, lvl, ']')) {
+					if (!SkipToClose(input_state, lvl)) {
 						return false;
 					}
 					end_pos = pos;
@@ -757,7 +753,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!SkipToClose(input_state, lvl, '}')) {
+					if (!SkipToClose(input_state, lvl)) {
 						return false;
 					}
 					end_pos = pos;
@@ -765,7 +761,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!SkipToClose(input_state, lvl, ')')) {
+					if (!SkipToClose(input_state, lvl)) {
 						return false;
 					}
 					end_pos = pos;
@@ -773,8 +769,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					lvl++;
-					if (!SkipToClose(input_state, lvl, ']')) {
+					if (!SkipToClose(input_state, lvl)) {
 						return false;
 					}
 					end_pos = pos;

From 3f2430ca383d4a595c9c99a0d9dcbab891dad206 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Thu, 13 Feb 2025 12:52:56 +0100
Subject: [PATCH 059/142] trigger a cast to unnamed struct

---
 src/function/cast/string_cast.cpp           |  3 --
 src/function/cast/vector_cast_helpers.cpp   |  4 +-
 test/sql/cast/string_to_unnamed_struct.test | 54 +++++++++++++--------
 3 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/src/function/cast/string_cast.cpp b/src/function/cast/string_cast.cpp
index f3a19c4273bf..b39f6623e7da 100644
--- a/src/function/cast/string_cast.cpp
+++ b/src/function/cast/string_cast.cpp
@@ -233,9 +233,6 @@ bool VectorStringToStruct::StringToNestedTypeCastLoop(const string_t *source_dat
 			result_mask.SetInvalid(i);
 			continue;
 		}
-		if (is_unnamed) {
-			throw ConversionException("Casting strings to unnamed structs is unsupported");
-		}
 		if (!VectorStringToStruct::SplitStruct(source_data[idx], child_vectors, i, child_names, child_masks)) {
 			string text = "Type VARCHAR with value '" + source_data[idx].GetString() +
 			              "' can't be cast to the destination type STRUCT";
diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 64d9780312f5..e5dac2b37201 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -580,6 +580,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 	}
 
 	if (end_char == '}') {
+
 		//! Regular struct, in the form of `{name: value, name_2: value_2, ...}`
 		while (pos < len) {
 			optional_idx start_pos;
@@ -727,7 +728,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 		D_ASSERT(end_char == ')');
 		idx_t child_idx = 0;
 		while (pos < len) {
-			if (child_idx == child_names.size()) {
+			if (child_idx == child_masks.size()) {
 				return false;
 			}
 
@@ -817,6 +818,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 			pos++;
 			SkipWhitespace(input_state);
 		}
+		(void)child_idx;
 	}
 	pos++;
 	SkipWhitespace(input_state);
diff --git a/test/sql/cast/string_to_unnamed_struct.test b/test/sql/cast/string_to_unnamed_struct.test
index ecf4c39ec468..ff32a9183447 100644
--- a/test/sql/cast/string_to_unnamed_struct.test
+++ b/test/sql/cast/string_to_unnamed_struct.test
@@ -3,64 +3,76 @@
 
 # Basic single value struct
 query I
-select $$(abc)$$::STRUCT(a VARCHAR)
+select [row('a'), $$(abc)$$]
 ----
-{'a': abc}
+[(a), (abc)]
 
 # Multiple values
 query I
-select $$(abc, def, ghi)$$::STRUCT(a VARCHAR, b VARCHAR, c VARCHAR)
+select [row('a', 'b', 'c'), $$(abc, def, ghi)$$]
 ----
-{'a': abc, 'b': def, 'c': ghi}
+[(a, b, c), (abc, def, ghi)]
 
 # Empty unnamed struct
 query I
-select $$()$$::STRUCT(a VARCHAR)
+select [row('a'),$$()$$]
 ----
-{'a': NULL}
+[(a), (NULL)]
+
+# Empty string in unnamed struct
+query I
+select [row('a'),$$('')$$]
+----
+[(a), ()]
 
 # Nested regular struct inside unnamed struct
 query I
-select $$({'amount': 42})$$::STRUCT(a STRUCT(amount INT))
+select [row({'amount': 21}), $$({'amount': 42})$$]
 ----
-{'a': {'amount': 42}}
+[({'amount': 21}), ({'amount': 42})]
 
 # Nested unnamed struct inside unnamed struct
 query I
-select $$((42))$$::STRUCT(a STRUCT(amount INT))
+select [row(row(21)), $$((42))$$]
 ----
-{'a': {'amount': 42}}
+[((21)), ((42))]
 
 # Nested unnamed struct AND regular struct inside unnamed struct
 query I
-select $$((42), {amount: 21})$$::STRUCT(a STRUCT(amount INT), b STRUCT(amount INT))
+select [row(row(21), {'amount': 42}), $$((42), {amount: 21})$$]
 ----
-{'a': {'amount': 42}, 'b': {'amount': 21}}
+[((21), {'amount': 42}), ((42), {'amount': 21})]
 
 # List inside unnamed struct
 query I
-select $$([1,2,3], [4,5,6])$$::STRUCT(a INTEGER[], b INTEGER[])
+select [row([7,8,9], [10,11,12]), $$([1,2,3], [4,5,6])$$]
 ----
-{'a': [1, 2, 3], 'b': [4, 5, 6]}
+[([7, 8, 9], [10, 11, 12]), ([1, 2, 3], [4, 5, 6])]
 
 statement error
-select $$([1,2,3],)$$::STRUCT(a INTEGER[])
+select [row([4,5,6]), $$([1,2,3],)$$]
 ----
 can't be cast to the destination type STRUCT
 
 # Empty string in the second child of the unnamed struct
 query I
-select $$([1,2,3],)$$::STRUCT(a INTEGER[], b VARCHAR)
+select [row([4,5,6], 'abc'), $$([1,2,3],)$$]
 ----
-{'a': [1, 2, 3], 'b': }
+[([4, 5, 6], abc), ([1, 2, 3], )]
 
 # Empty string in the second child of a named struct
 query I
-select $${'a': [1,2,3],'b':}$$::STRUCT(a INTEGER[], b VARCHAR)
+select [{'a': [4,5,6], 'b': 'abc'}, $${'a': [1,2,3],'b':}$$]
 ----
-{'a': [1, 2, 3], 'b': }
+[{'a': [4, 5, 6], 'b': abc}, {'a': [1, 2, 3], 'b': }]
 
 query I
-select $$[(("  test  ")), {'a': (\\  test  \\)}]$$::STRUCT(a STRUCT("inner" VARCHAR))[]
+select [
+	[
+		row(row('  test  ')),
+		{'a': {'inner': '\  test  \'}}
+	],
+	$$[(("  test  ")), {'a': (\\  test  \\)}]$$
+]
 ----
-[{'a': {'inner':   test  }}, {'a': {'inner': \  test  \}}]
+[[{'a': {'inner':   test  }}, {'a': {'inner': \  test  \}}], [{'a': {'inner':   test  }}, {'a': {'inner': \  test  \}}]]

From c6749649533c08bf50baecc4023f06984ee6e326 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Thu, 13 Feb 2025 13:07:56 +0100
Subject: [PATCH 060/142] no need for a 'lvl' list nesting tracker

---
 src/function/cast/vector_cast_helpers.cpp | 52 ++++++++---------------
 1 file changed, 18 insertions(+), 34 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index e5dac2b37201..53364abbfc1a 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -70,7 +70,7 @@ static bool SkipToCloseQuotes(StringCastInputState &input_state) {
 	return false;
 }
 
-static bool SkipToClose(StringCastInputState &input_state, idx_t &lvl) {
+static bool SkipToClose(StringCastInputState &input_state) {
 	auto &idx = input_state.pos;
 	auto &buf = input_state.buf;
 	auto &len = input_state.len;
@@ -91,14 +91,7 @@ static bool SkipToClose(StringCastInputState &input_state, idx_t &lvl) {
 				brackets.push_back(')');
 			} else if (buf[idx] == '[') {
 				brackets.push_back(']');
-				lvl++;
 			} else if (buf[idx] == brackets.back()) {
-				if (buf[idx] == ']') {
-					if (lvl == 0) {
-						return false;
-					}
-					lvl--;
-				}
 				brackets.pop_back();
 				if (brackets.empty()) {
 					return true;
@@ -217,7 +210,6 @@ template <class OP>
 static bool SplitStringListInternal(const string_t &input, OP &state) {
 	const char *buf = input.GetData();
 	idx_t len = input.GetSize();
-	idx_t lvl = 1;
 	idx_t pos = 0;
 
 	StringCastInputState input_state(buf, pos, len);
@@ -250,7 +242,7 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 				start_pos = pos;
 			}
 			//! Start of a LIST
-			if (!SkipToClose(input_state, lvl)) {
+			if (!SkipToClose(input_state)) {
 				return false;
 			}
 			end_pos = pos;
@@ -267,8 +259,7 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 				start_pos = pos;
 			}
 			//! Start of a STRUCT
-			idx_t struct_lvl = 0;
-			if (!SkipToClose(input_state, struct_lvl)) {
+			if (!SkipToClose(input_state)) {
 				return false;
 			}
 			end_pos = pos;
@@ -277,8 +268,7 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 				start_pos = pos;
 			}
 			//! Start of an (unnamed) STRUCT
-			idx_t struct_lvl = 0;
-			if (!SkipToClose(input_state, struct_lvl)) {
+			if (!SkipToClose(input_state)) {
 				return false;
 			}
 			end_pos = pos;
@@ -293,10 +283,6 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 				seen_value = true;
 			}
 			if (buf[pos] == ']') {
-				if (lvl == 0) {
-					return false;
-				}
-				lvl--;
 				break;
 			}
 			start_pos = optional_idx();
@@ -318,7 +304,7 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 	}
 	pos++;
 	SkipWhitespace(input_state);
-	return (pos == len && lvl == 0);
+	return (pos == len);
 }
 
 bool VectorStringToList::SplitStringList(const string_t &input, string_t *child_data, idx_t &child_start,
@@ -377,7 +363,6 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 	idx_t len = input.GetSize();
 	idx_t pos = 0;
 	StringCastInputState input_state(buf, pos, len);
-	idx_t lvl = 0;
 
 	SkipWhitespace(input_state);
 	if (pos == len || buf[pos] != '{') {
@@ -416,7 +401,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!SkipToClose(input_state, lvl)) {
+				if (!SkipToClose(input_state)) {
 					return false;
 				}
 				end_pos = pos;
@@ -424,7 +409,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!SkipToClose(input_state, lvl)) {
+				if (!SkipToClose(input_state)) {
 					return false;
 				}
 				end_pos = pos;
@@ -432,7 +417,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!SkipToClose(input_state, lvl)) {
+				if (!SkipToClose(input_state)) {
 					return false;
 				}
 				end_pos = pos;
@@ -486,7 +471,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!SkipToClose(input_state, lvl)) {
+				if (!SkipToClose(input_state)) {
 					return false;
 				}
 				end_pos = pos;
@@ -494,7 +479,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!SkipToClose(input_state, lvl)) {
+				if (!SkipToClose(input_state)) {
 					return false;
 				}
 				end_pos = pos;
@@ -502,7 +487,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 				if (!start_pos.IsValid()) {
 					start_pos = pos;
 				}
-				if (!SkipToClose(input_state, lvl)) {
+				if (!SkipToClose(input_state)) {
 					return false;
 				}
 				end_pos = pos;
@@ -538,7 +523,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 	}
 	pos++;
 	SkipWhitespace(input_state);
-	return (pos == len && lvl == 0);
+	return (pos == len);
 }
 
 bool VectorStringToMap::SplitStringMap(const string_t &input, string_t *child_key_data, string_t *child_val_data,
@@ -561,7 +546,6 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 	idx_t len = input.GetSize();
 	idx_t pos = 0;
 	idx_t child_idx;
-	idx_t lvl = 0;
 
 	Vector temp_vec(LogicalType::VARCHAR);
 	StringCastInputState input_state(buf, pos, len);
@@ -660,7 +644,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!SkipToClose(input_state, lvl)) {
+					if (!SkipToClose(input_state)) {
 						return false;
 					}
 					end_pos = pos;
@@ -668,7 +652,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!SkipToClose(input_state, lvl)) {
+					if (!SkipToClose(input_state)) {
 						return false;
 					}
 					end_pos = pos;
@@ -676,7 +660,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!SkipToClose(input_state, lvl)) {
+					if (!SkipToClose(input_state)) {
 						return false;
 					}
 					end_pos = pos;
@@ -754,7 +738,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!SkipToClose(input_state, lvl)) {
+					if (!SkipToClose(input_state)) {
 						return false;
 					}
 					end_pos = pos;
@@ -762,7 +746,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!SkipToClose(input_state, lvl)) {
+					if (!SkipToClose(input_state)) {
 						return false;
 					}
 					end_pos = pos;
@@ -770,7 +754,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 					if (!start_pos.IsValid()) {
 						start_pos = pos;
 					}
-					if (!SkipToClose(input_state, lvl)) {
+					if (!SkipToClose(input_state)) {
 						return false;
 					}
 					end_pos = pos;

From 7de75f6b8c7bc281ae8d4599b89e44fcea95cd58 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Thu, 13 Feb 2025 13:14:09 +0100
Subject: [PATCH 061/142] remove temp_state, IsNull just needs a buf, start and
 end

---
 src/function/cast/vector_cast_helpers.cpp | 24 ++++++++---------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 53364abbfc1a..1e62089fbf79 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -21,10 +21,8 @@ struct StringCastInputState {
 namespace duckdb {
 
 // ------- Helper functions for splitting string nested types  -------
-static bool IsNull(StringCastInputState &input_state) {
-	auto &buf = input_state.buf;
-	auto &pos = input_state.pos;
-	if (input_state.pos + 4 != input_state.len) {
+static bool IsNull(const char *buf, idx_t pos, idx_t end_pos) {
+	if (pos + 4 != end_pos) {
 		return false;
 	}
 	return StringUtil::CIEquals(string(buf + pos, buf + pos + 4), "null");
@@ -190,8 +188,7 @@ struct SplitStringListOperation {
 
 public:
 	void HandleValue(const char *buf, idx_t start, idx_t end) {
-		StringCastInputState temp_state(buf, start, end);
-		if (IsNull(temp_state)) {
+		if (IsNull(buf, start, end)) {
 			FlatVector::SetNull(child, entry_count, true);
 			entry_count++;
 			return;
@@ -334,8 +331,7 @@ struct SplitStringMapOperation {
 	Vector &varchar_val;
 
 	bool HandleKey(const char *buf, idx_t start_pos, idx_t pos) {
-		StringCastInputState temp_state(buf, start_pos, pos);
-		if (IsNull(temp_state)) {
+		if (IsNull(buf, start_pos, pos)) {
 			FlatVector::SetNull(varchar_val, child_start, true);
 			FlatVector::SetNull(varchar_key, child_start, true);
 			child_start++;
@@ -346,8 +342,7 @@ struct SplitStringMapOperation {
 	}
 
 	void HandleValue(const char *buf, idx_t start_pos, idx_t pos) {
-		StringCastInputState temp_state(buf, start_pos, pos);
-		if (IsNull(temp_state)) {
+		if (IsNull(buf, start_pos, pos)) {
 			FlatVector::SetNull(varchar_val, child_start, true);
 			child_start++;
 			return;
@@ -609,8 +604,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 			}
 			idx_t key_start = start_pos.GetIndex();
 			end_pos++;
-			StringCastInputState key_temp_state(buf, key_start, end_pos);
-			if (IsNull(key_temp_state)) {
+			if (IsNull(buf, key_start, end_pos)) {
 				//! Key can not be NULL
 				return false;
 			}
@@ -693,8 +687,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 				end_pos++;
 			}
 			auto value_start = start_pos.GetIndex();
-			StringCastInputState value_temp_state(buf, value_start, end_pos);
-			if (IsNull(value_temp_state)) {
+			if (IsNull(buf, value_start, end_pos)) {
 				child_mask.SetInvalid(row_idx);
 			} else {
 				string_data[row_idx] = HandleString(child_vec, buf, value_start, end_pos);
@@ -787,8 +780,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 				end_pos++;
 			}
 			auto value_start = start_pos.GetIndex();
-			StringCastInputState value_temp_state(buf, value_start, end_pos);
-			if (IsNull(value_temp_state)) {
+			if (IsNull(buf, value_start, end_pos)) {
 				child_mask.SetInvalid(row_idx);
 			} else {
 				string_data[row_idx] = HandleString(child_vec, buf, value_start, end_pos);

From 9ca9ca9a330ef1e195aca928cf2651a8f1a3f8b4 Mon Sep 17 00:00:00 2001
From: Tmonster <tom@ebergen.com>
Date: Thu, 13 Feb 2025 13:18:24 +0100
Subject: [PATCH 062/142] use random seeds for bernoulli sample when parallel
 is enabled

---
 .../helper/physical_streaming_sample.cpp      | 23 +++++---
 src/execution/physical_plan/plan_sample.cpp   |  3 +-
 .../helper/physical_streaming_sample.hpp      |  9 +--
 .../prepared_statement_in_pushdown.test       |  0
 test/sql/sample/bernoulli_sampling.test       | 57 +++++++++++++++++++
 5 files changed, 77 insertions(+), 15 deletions(-)
 create mode 100644 test/optimizer/filter_pushdown/prepared_statement_in_pushdown.test
 create mode 100644 test/sql/sample/bernoulli_sampling.test

diff --git a/src/execution/operator/helper/physical_streaming_sample.cpp b/src/execution/operator/helper/physical_streaming_sample.cpp
index 309256244927..1062deb27ed7 100644
--- a/src/execution/operator/helper/physical_streaming_sample.cpp
+++ b/src/execution/operator/helper/physical_streaming_sample.cpp
@@ -5,10 +5,11 @@
 
 namespace duckdb {
 
-PhysicalStreamingSample::PhysicalStreamingSample(vector<LogicalType> types, SampleMethod method, double percentage,
-                                                 int64_t seed, idx_t estimated_cardinality)
-    : PhysicalOperator(PhysicalOperatorType::STREAMING_SAMPLE, std::move(types), estimated_cardinality), method(method),
-      percentage(percentage / 100), seed(seed) {
+PhysicalStreamingSample::PhysicalStreamingSample(vector<LogicalType> types, unique_ptr<SampleOptions> options,
+							idx_t estimated_cardinality)
+    : PhysicalOperator(PhysicalOperatorType::STREAMING_SAMPLE, std::move(types), estimated_cardinality),
+		sample_options(std::move(options)) {
+	percentage = sample_options->sample_size.GetValue<double>() / 100;
 }
 
 //===--------------------------------------------------------------------===//
@@ -49,13 +50,21 @@ void PhysicalStreamingSample::BernoulliSample(DataChunk &input, DataChunk &resul
 	}
 }
 
+bool PhysicalStreamingSample::ParallelOperator() const {
+	return !sample_options->repeatable;
+}
+
 unique_ptr<OperatorState> PhysicalStreamingSample::GetOperatorState(ExecutionContext &context) const {
-	return make_uniq<StreamingSampleOperatorState>(seed);
+	if (!ParallelOperator()) {
+		return make_uniq<StreamingSampleOperatorState>(static_cast<int64_t>(sample_options->seed.GetIndex()));
+	}
+	RandomEngine random;
+	return make_uniq<StreamingSampleOperatorState>(static_cast<int64_t>(random.NextRandomInteger64()));
 }
 
 OperatorResultType PhysicalStreamingSample::Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
                                                     GlobalOperatorState &gstate, OperatorState &state) const {
-	switch (method) {
+	switch (sample_options->method) {
 	case SampleMethod::BERNOULLI_SAMPLE:
 		BernoulliSample(input, chunk, state);
 		break;
@@ -70,7 +79,7 @@ OperatorResultType PhysicalStreamingSample::Execute(ExecutionContext &context, D
 
 InsertionOrderPreservingMap<string> PhysicalStreamingSample::ParamsToString() const {
 	InsertionOrderPreservingMap<string> result;
-	result["Sample Method"] = EnumUtil::ToString(method) + ": " + to_string(100 * percentage) + "%";
+	result["Sample Method"] = EnumUtil::ToString(sample_options->method) + ": " + to_string(100 * percentage) + "%";
 	return result;
 }
 
diff --git a/src/execution/physical_plan/plan_sample.cpp b/src/execution/physical_plan/plan_sample.cpp
index be55784779fb..2ccfacb8ac8c 100644
--- a/src/execution/physical_plan/plan_sample.cpp
+++ b/src/execution/physical_plan/plan_sample.cpp
@@ -29,8 +29,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalSample &op
 			                      EnumUtil::ToString(op.sample_options->method));
 		}
 		sample = make_uniq<PhysicalStreamingSample>(
-		    op.types, op.sample_options->method, op.sample_options->sample_size.GetValue<double>(),
-		    static_cast<int64_t>(op.sample_options->seed.GetIndex()), op.estimated_cardinality);
+		    op.types, std::move(op.sample_options), op.estimated_cardinality);
 		break;
 	default:
 		throw InternalException("Unimplemented sample method");
diff --git a/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp b/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp
index dafaf849f556..68df848fec9f 100644
--- a/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp
+++ b/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp
@@ -19,12 +19,11 @@ class PhysicalStreamingSample : public PhysicalOperator {
 	static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::STREAMING_SAMPLE;
 
 public:
-	PhysicalStreamingSample(vector<LogicalType> types, SampleMethod method, double percentage, int64_t seed,
+	PhysicalStreamingSample(vector<LogicalType> types, unique_ptr<SampleOptions> options,
 	                        idx_t estimated_cardinality);
 
-	SampleMethod method;
+	unique_ptr<SampleOptions> sample_options;
 	double percentage;
-	int64_t seed;
 
 public:
 	// Operator interface
@@ -32,9 +31,7 @@ class PhysicalStreamingSample : public PhysicalOperator {
 	OperatorResultType Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
 	                           GlobalOperatorState &gstate, OperatorState &state) const override;
 
-	bool ParallelOperator() const override {
-		return true;
-	}
+	bool ParallelOperator() const override;
 
 	InsertionOrderPreservingMap<string> ParamsToString() const override;
 
diff --git a/test/optimizer/filter_pushdown/prepared_statement_in_pushdown.test b/test/optimizer/filter_pushdown/prepared_statement_in_pushdown.test
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/sql/sample/bernoulli_sampling.test b/test/sql/sample/bernoulli_sampling.test
new file mode 100644
index 000000000000..a00ff7311325
--- /dev/null
+++ b/test/sql/sample/bernoulli_sampling.test
@@ -0,0 +1,57 @@
+# name: test/sql/sample/bernoulli_sampling.test
+# description: Test reservoir sample crash on large data sets
+# group: [sample]
+
+
+statement ok
+create table output (num_rows INT);
+
+statement ok
+select setseed(0.3);
+
+loop i 0 500
+
+statement ok
+WITH some_tab AS (
+    SELECT UNNEST(range(1000)) AS id
+),
+some_tab_unq AS (
+    SELECT distinct(id) AS id FROM some_tab
+),
+sampled AS (
+    select id from some_tab_unq
+    USING SAMPLE 1% (bernoulli)
+)
+INSERT INTO output select count(*) as n_rows FROM sampled;
+
+endloop
+
+
+query III
+select min(num_rows) > 0, max(num_rows) < 25, count(*) FILTER (num_rows = 0) = 0 from output;
+----
+true	true	true
+
+query III
+select avg(rowid), min(rowid), max(rowid) from output where num_rows = 0;
+----
+NULL	NULL	NULL
+
+
+
+statement ok
+create table t1 as select range id from range(1000);
+
+statement ok
+select setseed(0.6);
+
+query I nosort result_1
+select id from t1 USING SAMPLE 1% (bernoulli, 5);
+----
+
+query I nosort result_1
+select id from t1 USING SAMPLE 1% (bernoulli, 5);
+----
+
+
+

From 981b4c2f65671b0ebac8449af6c97fda925dc1d2 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Thu, 13 Feb 2025 13:29:51 +0100
Subject: [PATCH 063/142] deduplicate some logic

---
 src/function/cast/vector_cast_helpers.cpp | 171 ++++++++--------------
 1 file changed, 65 insertions(+), 106 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 1e62089fbf79..d016ebeaa0d5 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -224,9 +224,6 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 	idx_t end_pos;
 	bool seen_value = false;
 	while (pos < len) {
-		if (pos == len) {
-			return false;
-		}
 		bool set_escaped = false;
 
 		if (input_state.escaped) {
@@ -352,6 +349,67 @@ struct SplitStringMapOperation {
 	}
 };
 
+static inline bool MapKeyOrValueStateTransition(StringCastInputState &input_state, optional_idx &start_pos,
+                                                idx_t &end_pos) {
+	auto &buf = input_state.buf;
+	auto &pos = input_state.pos;
+
+	bool set_escaped = false;
+	if (input_state.escaped) {
+		if (!start_pos.IsValid()) {
+			start_pos = pos;
+		}
+		end_pos = pos;
+	} else if (buf[pos] == '"' || buf[pos] == '\'') {
+		if (!start_pos.IsValid()) {
+			start_pos = pos;
+		}
+		if (!SkipToCloseQuotes(input_state)) {
+			return false;
+		}
+		end_pos = pos;
+	} else if (buf[pos] == '{') {
+		if (!start_pos.IsValid()) {
+			start_pos = pos;
+		}
+		if (!SkipToClose(input_state)) {
+			return false;
+		}
+		end_pos = pos;
+	} else if (buf[pos] == '(') {
+		if (!start_pos.IsValid()) {
+			start_pos = pos;
+		}
+		if (!SkipToClose(input_state)) {
+			return false;
+		}
+		end_pos = pos;
+	} else if (buf[pos] == '[') {
+		if (!start_pos.IsValid()) {
+			start_pos = pos;
+		}
+		if (!SkipToClose(input_state)) {
+			return false;
+		}
+		end_pos = pos;
+	} else if (buf[pos] == '\\') {
+		if (!start_pos.IsValid()) {
+			start_pos = pos;
+		}
+		set_escaped = true;
+		end_pos = pos;
+	} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
+		if (!start_pos.IsValid()) {
+			start_pos = pos;
+		}
+		end_pos = pos;
+	}
+	input_state.escaped = set_escaped;
+	pos++;
+
+	return true;
+}
+
 template <class OP>
 static bool SplitStringMapInternal(const string_t &input, OP &state) {
 	const char *buf = input.GetData();
@@ -378,58 +436,9 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 		optional_idx start_pos;
 		idx_t end_pos;
 		while (pos < len && (buf[pos] != '=' || input_state.escaped)) {
-			bool set_escaped = false;
-			if (input_state.escaped) {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				end_pos = pos;
-			} else if (buf[pos] == '"' || buf[pos] == '\'') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				if (!SkipToCloseQuotes(input_state)) {
-					return false;
-				}
-				end_pos = pos;
-			} else if (buf[pos] == '{') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				if (!SkipToClose(input_state)) {
-					return false;
-				}
-				end_pos = pos;
-			} else if (buf[pos] == '(') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				if (!SkipToClose(input_state)) {
-					return false;
-				}
-				end_pos = pos;
-			} else if (buf[pos] == '[') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				if (!SkipToClose(input_state)) {
-					return false;
-				}
-				end_pos = pos;
-			} else if (buf[pos] == '\\') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				set_escaped = true;
-				end_pos = pos;
-			} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				end_pos = pos;
+			if (!MapKeyOrValueStateTransition(input_state, start_pos, end_pos)) {
+				return false;
 			}
-			input_state.escaped = set_escaped;
-			pos++;
 		}
 		if (pos == len) {
 			return false;
@@ -447,59 +456,9 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 		pos++;
 		SkipWhitespace(input_state);
 		while (pos < len && ((buf[pos] != ',' && buf[pos] != '}') || input_state.escaped)) {
-			bool set_escaped = false;
-
-			if (input_state.escaped) {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				end_pos = pos;
-			} else if (buf[pos] == '"' || buf[pos] == '\'') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				if (!SkipToCloseQuotes(input_state)) {
-					return false;
-				}
-				end_pos = pos;
-			} else if (buf[pos] == '{') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				if (!SkipToClose(input_state)) {
-					return false;
-				}
-				end_pos = pos;
-			} else if (buf[pos] == '(') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				if (!SkipToClose(input_state)) {
-					return false;
-				}
-				end_pos = pos;
-			} else if (buf[pos] == '[') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				if (!SkipToClose(input_state)) {
-					return false;
-				}
-				end_pos = pos;
-			} else if (buf[pos] == '\\') {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				set_escaped = true;
-				end_pos = pos;
-			} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
-				if (!start_pos.IsValid()) {
-					start_pos = pos;
-				}
-				end_pos = pos;
+			if (!MapKeyOrValueStateTransition(input_state, start_pos, end_pos)) {
+				return false;
 			}
-			input_state.escaped = set_escaped;
-			pos++;
 		}
 		if (pos == len) {
 			return false;

From 90ff46c233f1ab8f5ca2a5033c87f32ade14c2d2 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Thu, 13 Feb 2025 13:38:46 +0100
Subject: [PATCH 064/142] use the same function in the list value

---
 src/function/cast/vector_cast_helpers.cpp | 208 +++++++++-------------
 1 file changed, 80 insertions(+), 128 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index d016ebeaa0d5..750db4b0e2c7 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -203,6 +203,66 @@ struct SplitStringListOperation {
 	Vector &child;
 };
 
+static inline bool ValueStateTransition(StringCastInputState &input_state, optional_idx &start_pos, idx_t &end_pos) {
+	auto &buf = input_state.buf;
+	auto &pos = input_state.pos;
+
+	bool set_escaped = false;
+	if (input_state.escaped) {
+		if (!start_pos.IsValid()) {
+			start_pos = pos;
+		}
+		end_pos = pos;
+	} else if (buf[pos] == '"' || buf[pos] == '\'') {
+		if (!start_pos.IsValid()) {
+			start_pos = pos;
+		}
+		if (!SkipToCloseQuotes(input_state)) {
+			return false;
+		}
+		end_pos = pos;
+	} else if (buf[pos] == '{') {
+		if (!start_pos.IsValid()) {
+			start_pos = pos;
+		}
+		if (!SkipToClose(input_state)) {
+			return false;
+		}
+		end_pos = pos;
+	} else if (buf[pos] == '(') {
+		if (!start_pos.IsValid()) {
+			start_pos = pos;
+		}
+		if (!SkipToClose(input_state)) {
+			return false;
+		}
+		end_pos = pos;
+	} else if (buf[pos] == '[') {
+		if (!start_pos.IsValid()) {
+			start_pos = pos;
+		}
+		if (!SkipToClose(input_state)) {
+			return false;
+		}
+		end_pos = pos;
+	} else if (buf[pos] == '\\') {
+		if (!start_pos.IsValid()) {
+			start_pos = pos;
+		}
+		set_escaped = true;
+		end_pos = pos;
+	} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
+		if (!start_pos.IsValid()) {
+			start_pos = pos;
+		}
+		end_pos = pos;
+	}
+	input_state.escaped = set_escaped;
+	pos++;
+
+	return true;
+}
+
 template <class OP>
 static bool SplitStringListInternal(const string_t &input, OP &state) {
 	const char *buf = input.GetData();
@@ -220,79 +280,32 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 	//! Skip the '['
 	pos++;
 	SkipWhitespace(input_state);
-	optional_idx start_pos;
-	idx_t end_pos;
 	bool seen_value = false;
 	while (pos < len) {
-		bool set_escaped = false;
+		optional_idx start_pos;
+		idx_t end_pos;
 
-		if (input_state.escaped) {
-			if (!start_pos.IsValid()) {
-				start_pos = pos;
-			}
-			end_pos = pos;
-		} else if (buf[pos] == '[') {
-			if (!start_pos.IsValid()) {
-				start_pos = pos;
-			}
-			//! Start of a LIST
-			if (!SkipToClose(input_state)) {
-				return false;
-			}
-			end_pos = pos;
-		} else if ((buf[pos] == '"' || buf[pos] == '\'')) {
-			if (!start_pos.IsValid()) {
-				start_pos = pos;
-			}
-			if (!SkipToCloseQuotes(input_state)) {
-				return false;
-			}
-			end_pos = pos;
-		} else if (buf[pos] == '{') {
-			if (!start_pos.IsValid()) {
-				start_pos = pos;
-			}
-			//! Start of a STRUCT
-			if (!SkipToClose(input_state)) {
-				return false;
-			}
-			end_pos = pos;
-		} else if (buf[pos] == '(') {
-			if (!start_pos.IsValid()) {
-				start_pos = pos;
-			}
-			//! Start of an (unnamed) STRUCT
-			if (!SkipToClose(input_state)) {
+		while (pos < len && ((buf[pos] != ',' && buf[pos] != ']') || input_state.escaped)) {
+			if (!ValueStateTransition(input_state, start_pos, end_pos)) {
 				return false;
 			}
-			end_pos = pos;
-		} else if ((buf[pos] == ',' || buf[pos] == ']')) {
-			if (buf[pos] != ']' || start_pos.IsValid() || seen_value) {
-				if (!start_pos.IsValid()) {
-					state.HandleValue(buf, 0, 0);
-				} else {
-					auto start = start_pos.GetIndex();
-					state.HandleValue(buf, start, end_pos + 1);
-				}
-				seen_value = true;
-			}
-			if (buf[pos] == ']') {
-				break;
-			}
-			start_pos = optional_idx();
-		} else if (buf[pos] == '\\') {
-			if (!start_pos.IsValid()) {
-				start_pos = pos;
-			}
-			set_escaped = true;
-			end_pos = pos;
-		} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
+		}
+		if (pos == len) {
+			return false;
+		}
+		if (buf[pos] != ']' || start_pos.IsValid() || seen_value) {
 			if (!start_pos.IsValid()) {
-				start_pos = pos;
+				state.HandleValue(buf, 0, 0);
+			} else {
+				auto start = start_pos.GetIndex();
+				state.HandleValue(buf, start, end_pos + 1);
 			}
-			end_pos = pos;
+			seen_value = true;
 		}
-		input_state.escaped = set_escaped;
+		if (buf[pos] == ']') {
+			break;
+		}
+
 		pos++;
 		SkipWhitespace(input_state);
 	}
@@ -349,67 +362,6 @@ struct SplitStringMapOperation {
 	}
 };
 
-static inline bool MapKeyOrValueStateTransition(StringCastInputState &input_state, optional_idx &start_pos,
-                                                idx_t &end_pos) {
-	auto &buf = input_state.buf;
-	auto &pos = input_state.pos;
-
-	bool set_escaped = false;
-	if (input_state.escaped) {
-		if (!start_pos.IsValid()) {
-			start_pos = pos;
-		}
-		end_pos = pos;
-	} else if (buf[pos] == '"' || buf[pos] == '\'') {
-		if (!start_pos.IsValid()) {
-			start_pos = pos;
-		}
-		if (!SkipToCloseQuotes(input_state)) {
-			return false;
-		}
-		end_pos = pos;
-	} else if (buf[pos] == '{') {
-		if (!start_pos.IsValid()) {
-			start_pos = pos;
-		}
-		if (!SkipToClose(input_state)) {
-			return false;
-		}
-		end_pos = pos;
-	} else if (buf[pos] == '(') {
-		if (!start_pos.IsValid()) {
-			start_pos = pos;
-		}
-		if (!SkipToClose(input_state)) {
-			return false;
-		}
-		end_pos = pos;
-	} else if (buf[pos] == '[') {
-		if (!start_pos.IsValid()) {
-			start_pos = pos;
-		}
-		if (!SkipToClose(input_state)) {
-			return false;
-		}
-		end_pos = pos;
-	} else if (buf[pos] == '\\') {
-		if (!start_pos.IsValid()) {
-			start_pos = pos;
-		}
-		set_escaped = true;
-		end_pos = pos;
-	} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
-		if (!start_pos.IsValid()) {
-			start_pos = pos;
-		}
-		end_pos = pos;
-	}
-	input_state.escaped = set_escaped;
-	pos++;
-
-	return true;
-}
-
 template <class OP>
 static bool SplitStringMapInternal(const string_t &input, OP &state) {
 	const char *buf = input.GetData();
@@ -436,7 +388,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 		optional_idx start_pos;
 		idx_t end_pos;
 		while (pos < len && (buf[pos] != '=' || input_state.escaped)) {
-			if (!MapKeyOrValueStateTransition(input_state, start_pos, end_pos)) {
+			if (!ValueStateTransition(input_state, start_pos, end_pos)) {
 				return false;
 			}
 		}
@@ -456,7 +408,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 		pos++;
 		SkipWhitespace(input_state);
 		while (pos < len && ((buf[pos] != ',' && buf[pos] != '}') || input_state.escaped)) {
-			if (!MapKeyOrValueStateTransition(input_state, start_pos, end_pos)) {
+			if (!ValueStateTransition(input_state, start_pos, end_pos)) {
 				return false;
 			}
 		}

From 201ba3fc4acf679f6cb7573f9283b0280b2353ce Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Thu, 13 Feb 2025 13:41:35 +0100
Subject: [PATCH 065/142] also use the same function in struct value

---
 src/function/cast/vector_cast_helpers.cpp | 54 +----------------------
 1 file changed, 2 insertions(+), 52 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 750db4b0e2c7..b5aa34682d0c 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -530,59 +530,9 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 			pos++;
 			SkipWhitespace(input_state);
 			while (pos < len && ((buf[pos] != ',' && buf[pos] != '}') || input_state.escaped)) {
-				bool set_escaped = false;
-
-				if (input_state.escaped) {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					end_pos = pos;
-				} else if (buf[pos] == '"' || buf[pos] == '\'') {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					if (!SkipToCloseQuotes(input_state)) {
-						return false;
-					}
-					end_pos = pos;
-				} else if (buf[pos] == '{') {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					if (!SkipToClose(input_state)) {
-						return false;
-					}
-					end_pos = pos;
-				} else if (buf[pos] == '(') {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					if (!SkipToClose(input_state)) {
-						return false;
-					}
-					end_pos = pos;
-				} else if (buf[pos] == '[') {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					if (!SkipToClose(input_state)) {
-						return false;
-					}
-					end_pos = pos;
-				} else if (buf[pos] == '\\') {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					set_escaped = true;
-					end_pos = pos;
-				} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					end_pos = pos;
+				if (!ValueStateTransition(input_state, start_pos, end_pos)) {
+					return false;
 				}
-				pos++;
-				input_state.escaped = set_escaped;
 			}
 			if (pos == len) {
 				return false;

From a46076580a1b350f4cc9a49d0bc4107a384c7110 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Thu, 13 Feb 2025 13:43:09 +0100
Subject: [PATCH 066/142] also use the same function in unnamed struct cast

---
 src/function/cast/vector_cast_helpers.cpp | 54 +----------------------
 1 file changed, 2 insertions(+), 52 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index b5aa34682d0c..e7c87164c955 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -573,59 +573,9 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 			optional_idx start_pos;
 			idx_t end_pos;
 			while (pos < len && ((buf[pos] != ',' && buf[pos] != ')') || input_state.escaped)) {
-				bool set_escaped = false;
-
-				if (input_state.escaped) {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					end_pos = pos;
-				} else if (buf[pos] == '"' || buf[pos] == '\'') {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					if (!SkipToCloseQuotes(input_state)) {
-						return false;
-					}
-					end_pos = pos;
-				} else if (buf[pos] == '{') {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					if (!SkipToClose(input_state)) {
-						return false;
-					}
-					end_pos = pos;
-				} else if (buf[pos] == '(') {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					if (!SkipToClose(input_state)) {
-						return false;
-					}
-					end_pos = pos;
-				} else if (buf[pos] == '[') {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					if (!SkipToClose(input_state)) {
-						return false;
-					}
-					end_pos = pos;
-				} else if (buf[pos] == '\\') {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					set_escaped = true;
-					end_pos = pos;
-				} else if (!StringUtil::CharacterIsSpace(buf[pos])) {
-					if (!start_pos.IsValid()) {
-						start_pos = pos;
-					}
-					end_pos = pos;
+				if (!ValueStateTransition(input_state, start_pos, end_pos)) {
+					return false;
 				}
-				pos++;
-				input_state.escaped = set_escaped;
 			}
 			if (pos == len) {
 				return false;

From a6cc094b0d328f244d50aef74ee71ad54d9b165d Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Thu, 13 Feb 2025 14:01:46 +0100
Subject: [PATCH 067/142] improve cast error message for VARCHAR -> nested type

---
 src/function/cast/string_cast.cpp             | 31 +++++++++----------
 src/function/cast/vector_cast_helpers.cpp     |  1 -
 test/sql/cast/string_to_list_cast.test        |  2 +-
 test/sql/cast/string_to_struct_cast.test      | 14 ++++++++-
 test/sql/cast/string_to_unnamed_struct.test   |  2 +-
 test/sql/types/nested/array/array_cast.test   |  6 +++-
 .../types/nested/array/array_try_cast.test    |  4 +--
 .../types/struct/unnamed_struct_casts.test    |  5 +--
 8 files changed, 40 insertions(+), 25 deletions(-)

diff --git a/src/function/cast/string_cast.cpp b/src/function/cast/string_cast.cpp
index b39f6623e7da..07375bf7bd37 100644
--- a/src/function/cast/string_cast.cpp
+++ b/src/function/cast/string_cast.cpp
@@ -160,9 +160,9 @@ bool VectorStringToList::StringToNestedTypeCastLoop(const string_t *source_data,
 
 		list_data[i].offset = total;
 		if (!VectorStringToList::SplitStringList(source_data[idx], child_data, total, varchar_vector)) {
-			string text = "Type VARCHAR with value '" + source_data[idx].GetString() +
-			              "' can't be cast to the destination type LIST";
-			HandleVectorCastError::Operation<string_t>(text, result_mask, i, vector_cast_data);
+			auto error = StringUtil::Format("Type VARCHAR with value '%s' can't be cast to the destination type %s",
+			                                source_data[idx].GetString(), result.GetType().ToString());
+			HandleVectorCastError::Operation<string_t>(error, result_mask, i, vector_cast_data);
 		}
 		list_data[i].length = total - list_data[i].offset; // length is the amount of parts coming from this string
 	}
@@ -234,12 +234,12 @@ bool VectorStringToStruct::StringToNestedTypeCastLoop(const string_t *source_dat
 			continue;
 		}
 		if (!VectorStringToStruct::SplitStruct(source_data[idx], child_vectors, i, child_names, child_masks)) {
-			string text = "Type VARCHAR with value '" + source_data[idx].GetString() +
-			              "' can't be cast to the destination type STRUCT";
+			auto error = StringUtil::Format("Type VARCHAR with value '%s' can't be cast to the destination type %s",
+			                                source_data[idx].GetString(), result.GetType().ToString());
 			for (auto &child_mask : child_masks) {
 				child_mask.get().SetInvalid(i); // some values may have already been found and set valid
 			}
-			HandleVectorCastError::Operation<string_t>(text, result_mask, i, vector_cast_data);
+			HandleVectorCastError::Operation<string_t>(error, result_mask, i, vector_cast_data);
 		}
 	}
 
@@ -316,10 +316,10 @@ bool VectorStringToMap::StringToNestedTypeCastLoop(const string_t *source_data,
 		list_data[i].offset = total;
 		if (!VectorStringToMap::SplitStringMap(source_data[idx], child_key_data, child_val_data, total,
 		                                       varchar_key_vector, varchar_val_vector)) {
-			string text = "Type VARCHAR with value '" + source_data[idx].GetString() +
-			              "' can't be cast to the destination type MAP";
+			auto error = StringUtil::Format("Type VARCHAR with value '%s' can't be cast to the destination type %s",
+			                                source_data[idx].GetString(), result.GetType().ToString());
 			FlatVector::SetNull(result, i, true);
-			HandleVectorCastError::Operation<string_t>(text, result_mask, i, vector_cast_data);
+			HandleVectorCastError::Operation<string_t>(error, result_mask, i, vector_cast_data);
 		}
 		list_data[i].length = total - list_data[i].offset;
 	}
@@ -379,10 +379,9 @@ bool VectorStringToArray::StringToNestedTypeCastLoop(const string_t *source_data
 		if (array_size != str_array_size) {
 			if (all_lengths_match) {
 				all_lengths_match = false;
-				auto msg =
-				    StringUtil::Format("Type VARCHAR with value '%s' can't be cast to the destination type ARRAY[%u]"
-				                       ", the size of the array must match the destination type",
-				                       source_data[idx].GetString(), array_size);
+				auto msg = StringUtil::Format("Type VARCHAR with value '%s' can't be cast to the destination type %s"
+				                              ", the size of the array must match the destination type",
+				                              source_data[idx].GetString(), result.GetType().ToString());
 				if (parameters.strict) {
 					throw ConversionException(msg);
 				}
@@ -418,9 +417,9 @@ bool VectorStringToArray::StringToNestedTypeCastLoop(const string_t *source_data
 		}
 
 		if (!VectorStringToList::SplitStringList(source_data[idx], child_data, total, varchar_vector)) {
-			auto text = StringUtil::Format("Type VARCHAR with value '%s' can't be cast to the destination type ARRAY",
-			                               source_data[idx].GetString());
-			HandleVectorCastError::Operation<string_t>(text, result_mask, i, vector_cast_data);
+			auto error = StringUtil::Format("Type VARCHAR with value '%s' can't be cast to the destination type %s",
+			                                source_data[idx].GetString(), result.GetType().ToString());
+			HandleVectorCastError::Operation<string_t>(error, result_mask, i, vector_cast_data);
 		}
 	}
 	D_ASSERT(total == child_count);
diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index e7c87164c955..1d654da9b735 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -470,7 +470,6 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 	}
 
 	if (end_char == '}') {
-
 		//! Regular struct, in the form of `{name: value, name_2: value_2, ...}`
 		while (pos < len) {
 			optional_idx start_pos;
diff --git a/test/sql/cast/string_to_list_cast.test b/test/sql/cast/string_to_list_cast.test
index 61f367326ac4..7283a8594170 100644
--- a/test/sql/cast/string_to_list_cast.test
+++ b/test/sql/cast/string_to_list_cast.test
@@ -515,7 +515,7 @@ select '[{"bar":"\\""}]'::VARCHAR[];
 statement error
 select '[{"bar":"\\""}]'::STRUCT(bar VARCHAR)[];
 ----
-can't be cast to the destination type LIST
+can't be cast to the destination type STRUCT(bar VARCHAR)[]
 
 # uneven amount of escapes does escape the "
 query I
diff --git a/test/sql/cast/string_to_struct_cast.test b/test/sql/cast/string_to_struct_cast.test
index f4cae07ae960..0ae4d26b5d6a 100644
--- a/test/sql/cast/string_to_struct_cast.test
+++ b/test/sql/cast/string_to_struct_cast.test
@@ -205,7 +205,7 @@ SELECT ' {      }   '::STRUCT(a INT, b DATE);
 statement error
 SELECT '{  key_A:     2, key_B: {key_C: hello world    }     X   }'::STRUCT(key_A INT, key_B STRUCT(key_C VARCHAR));
 ----
-
+can't be cast to the destination type STRUCT(key_C VARCHAR)
 
 
 #               NULL values
@@ -319,50 +319,62 @@ NULL
 statement error
 SELECT CAST('[{a:3}]' AS STRUCT(a INT));
 ----
+can't be cast to the destination type STRUCT(a INTEGER)
 
 statement error
 SELECT CAST('Hello World' AS STRUCT(a VARCHAR));
 ----
+can't be cast to the destination type STRUCT(a VARCHAR)
 
 statement error
 SELECT CAST('{a: 3}}' AS STRUCT(a INT));
 ----
+can't be cast to the destination type STRUCT(a INTEGER)
 
 statement error
 SELECT CAST('{a: 3, b:{c: 8}}}' AS STRUCT(a INT, b STRUCT(c INT)));
 ----
+can't be cast to the destination type STRUCT(a INTEGER, b STRUCT(c INTEGER))
 
 statement error
 SELECT CAST('{{a: 3}' AS STRUCT(a INT));
 ----
+can't be cast to the destination type STRUCT(a INTEGER)
 
 statement error
 SELECT CAST('{a:3}, {b:1}' AS STRUCT(a INT, b INT));
 ----
+can't be cast to the destination type STRUCT(a INTEGER, b INTEGER)
 
 statement error
 SELECT CAST('{a:{a:3}, b:{{b:1}}}' AS STRUCT(a STRUCT(a INT), b STRUCT(b INT)));
 ----
+can't be cast to the destination type STRUCT(b INTEGER)
 
 statement error
 SELECT CAST('{a: 3 1}' AS STRUCT(a INT));
 ----
+Could not convert string '3 1' to INT32
 
 statement error
 SELECT CAST('{a:3,, b:1}' AS STRUCT(a INT, b INT));
 ----
+can't be cast to the destination type STRUCT(a INTEGER, b INTEGER)
 
 statement error
 SELECT CAST('}{a:5}' AS STRUCT(a INT));
 ----
+can't be cast to the destination type STRUCT(a INTEGER)
 
 statement error
 SELECT CAST('{a:{b:{d: 800}, {c: "Duck"}}}' AS STRUCT(a STRUCT(b STRUCT(d INT), c STRUCT(c VARCHAR))));
 ----
+can't be cast to the destination type STRUCT(b STRUCT(d INTEGER), c STRUCT(c VARCHAR))
 
 statement error
 SELECT CAST('{[{]}}' AS STRUCT(a VARCHAR[]));
 ----
+can't be cast to the destination type STRUCT(a VARCHAR[])
 
 
 
diff --git a/test/sql/cast/string_to_unnamed_struct.test b/test/sql/cast/string_to_unnamed_struct.test
index ff32a9183447..5066463b05da 100644
--- a/test/sql/cast/string_to_unnamed_struct.test
+++ b/test/sql/cast/string_to_unnamed_struct.test
@@ -52,7 +52,7 @@ select [row([7,8,9], [10,11,12]), $$([1,2,3], [4,5,6])$$]
 statement error
 select [row([4,5,6]), $$([1,2,3],)$$]
 ----
-can't be cast to the destination type STRUCT
+can't be cast to the destination type STRUCT(INTEGER[])
 
 # Empty string in the second child of the unnamed struct
 query I
diff --git a/test/sql/types/nested/array/array_cast.test b/test/sql/types/nested/array/array_cast.test
index 9c8abcda0260..dc5efbaba844 100644
--- a/test/sql/types/nested/array/array_cast.test
+++ b/test/sql/types/nested/array/array_cast.test
@@ -29,6 +29,7 @@ SELECT list_extract(array_value(1, 2, 3), 2)
 statement error
 SELECT * FROM UNNEST(array_value(1, 2, 3))
 ----
+UNNEST requires a single list as input
 
 # Should be able to cast to list with unnest
 query I
@@ -72,6 +73,7 @@ SELECT ['1.0', '2.0', '3.0']::DOUBLE[3]
 statement error
 SELECT [1, 2, 3]::BLOB[3]
 ----
+Unimplemented type for cast (INTEGER -> BLOB)
 
 # Should be able to cast from NULL
 query I
@@ -138,11 +140,13 @@ NULL
 statement error
 SELECT (['2', 'abc', '3']::VARCHAR[3])::INT[]
 ----
+Could not convert string 'abc' to INT32
 
 # Should not be able to cast to unrelated type
 statement error
 SELECT ([1,2,3]::INT[3])::INT;
 ----
+Unimplemented type for cast (INTEGER[3] -> INTEGER)
 
 # Should not be able to cast to list if child types fail
 query I
@@ -166,4 +170,4 @@ SELECT '[1, 2, 3]'::INTEGER[3]
 query I
 SELECT TRY_CAST(l AS INTEGER[][3]) FROM VALUES (['foo']) as v(l);
 ----
-NULL
\ No newline at end of file
+NULL
diff --git a/test/sql/types/nested/array/array_try_cast.test b/test/sql/types/nested/array/array_try_cast.test
index d3398b54fa3d..4e90f308d6ff 100644
--- a/test/sql/types/nested/array/array_try_cast.test
+++ b/test/sql/types/nested/array/array_try_cast.test
@@ -53,7 +53,7 @@ NULL
 statement error
 SELECT CAST('[1,2]' as INTEGER[3]);
 ----
-Conversion Error: Type VARCHAR with value '[1,2]' can't be cast to the destination type ARRAY[3], the size of the array must match the destination type
+Conversion Error: Type VARCHAR with value '[1,2]' can't be cast to the destination type INTEGER[3], the size of the array must match the destination type
 
 query I
 SELECT CAST('[NULL, [1], [NULL]]' as INTEGER[1][3]);
@@ -78,7 +78,7 @@ SELECT CAST('[NULL, [1,NULL,3], [1,2,3]]' as INTEGER[3][3]);
 statement error
 SELECT CAST('[NULL, [1,NULL,3], [1,2]]' as INTEGER[3][3]);
 ----
-Conversion Error: Type VARCHAR with value '[1,2]' can't be cast to the destination type ARRAY[3], the size of the array must match the destination type
+Conversion Error: Type VARCHAR with value '[1,2]' can't be cast to the destination type INTEGER[3], the size of the array must match the destination type
 
 query I
 SELECT TRY_CAST('[NULL, [1,NULL,3], [1,2]]' as INTEGER[3][3]);
diff --git a/test/sql/types/struct/unnamed_struct_casts.test b/test/sql/types/struct/unnamed_struct_casts.test
index 9ab777d37ffb..5b8d3d6b4a25 100644
--- a/test/sql/types/struct/unnamed_struct_casts.test
+++ b/test/sql/types/struct/unnamed_struct_casts.test
@@ -10,7 +10,8 @@ select row(42, 'hello') union all select '{'': 42,'': hello}';
 ----
 Conversion Error
 
-statement error
+query I
 select row(42, 'hello') union all select '(84, world)';
 ----
-unsupported
+(42, hello)
+(84, world)

From 591f090ebcd624fc6251711134e0e06ecc41da2e Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Thu, 13 Feb 2025 14:04:54 +0100
Subject: [PATCH 068/142] make string_dictionary_page_size_limit for Parquet
 writer configurable

---
 .../include/parquet_rle_bp_encoder.hpp        | 17 ++++++++--
 extension/parquet/include/parquet_writer.hpp  |  8 +++--
 .../writer/templated_column_writer.hpp        |  3 +-
 extension/parquet/parquet_extension.cpp       | 20 ++++++++++--
 extension/parquet/parquet_writer.cpp          |  5 +--
 .../writer/primitive_column_writer.cpp        |  2 +-
 src/common/types/hash.cpp                     |  2 +-
 .../duckdb/common/primitive_dictionary.hpp    |  7 +++--
 test/sql/copy/parquet/bloom_filters.test      | 31 ++++++++++++++++++-
 9 files changed, 78 insertions(+), 17 deletions(-)

diff --git a/extension/parquet/include/parquet_rle_bp_encoder.hpp b/extension/parquet/include/parquet_rle_bp_encoder.hpp
index b0fd130a33f4..f29c85edcf79 100644
--- a/extension/parquet/include/parquet_rle_bp_encoder.hpp
+++ b/extension/parquet/include/parquet_rle_bp_encoder.hpp
@@ -67,12 +67,23 @@ class RleBpEncoder {
 	}
 
 	void WriteMany(WriteStream &writer, uint32_t value, idx_t count) {
-		D_ASSERT(bp_block_count == 0);
 		if (rle_count != 0) {
-			WriteRun(writer);
+			// If an RLE run is going on, write a single value to either finish it or convert to BP
+			WriteValue(writer, value);
+			count--;
 		}
+
+		if (bp_block_count != 0) {
+			// If a BP run is going on, finish it
+			while (bp_block_count != 0 && count > 0) {
+				WriteValue(writer, value);
+				count--;
+			}
+		}
+
+		// Set remaining as current RLE run
 		rle_value = value;
-		rle_count = count;
+		rle_count += count;
 	}
 
 	void FinishWrite(WriteStream &writer) {
diff --git a/extension/parquet/include/parquet_writer.hpp b/extension/parquet/include/parquet_writer.hpp
index 1ad586489067..8af50765e50f 100644
--- a/extension/parquet/include/parquet_writer.hpp
+++ b/extension/parquet/include/parquet_writer.hpp
@@ -79,8 +79,8 @@ class ParquetWriter {
 	              vector<string> names, duckdb_parquet::CompressionCodec::type codec, ChildFieldIDs field_ids,
 	              const vector<pair<string, string>> &kv_metadata,
 	              shared_ptr<ParquetEncryptionConfig> encryption_config, idx_t dictionary_size_limit,
-	              double bloom_filter_false_positive_ratio, int64_t compression_level, bool debug_use_openssl,
-	              ParquetVersion parquet_version);
+	              idx_t string_dictionary_page_size_limit, double bloom_filter_false_positive_ratio,
+	              int64_t compression_level, bool debug_use_openssl, ParquetVersion parquet_version);
 
 public:
 	void PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGroup &result);
@@ -116,6 +116,9 @@ class ParquetWriter {
 	idx_t DictionarySizeLimit() const {
 		return dictionary_size_limit;
 	}
+	idx_t StringDictionaryPageSizeLimit() const {
+		return string_dictionary_page_size_limit;
+	}
 	double BloomFilterFalsePositiveRatio() const {
 		return bloom_filter_false_positive_ratio;
 	}
@@ -149,6 +152,7 @@ class ParquetWriter {
 	ChildFieldIDs field_ids;
 	shared_ptr<ParquetEncryptionConfig> encryption_config;
 	idx_t dictionary_size_limit;
+	idx_t string_dictionary_page_size_limit;
 	double bloom_filter_false_positive_ratio;
 	int64_t compression_level;
 	bool debug_use_openssl;
diff --git a/extension/parquet/include/writer/templated_column_writer.hpp b/extension/parquet/include/writer/templated_column_writer.hpp
index ca3b3ca8a804..544ced9c04df 100644
--- a/extension/parquet/include/writer/templated_column_writer.hpp
+++ b/extension/parquet/include/writer/templated_column_writer.hpp
@@ -20,7 +20,6 @@ namespace duckdb {
 template <class SRC, class TGT, class OP = ParquetCastOperator>
 static void TemplatedWritePlain(Vector &col, ColumnWriterStatistics *stats, const idx_t chunk_start,
                                 const idx_t chunk_end, const ValidityMask &mask, WriteStream &ser) {
-
 	const auto *ptr = FlatVector::GetData<SRC>(col);
 	for (idx_t r = chunk_start; r < chunk_end; r++) {
 		if (!mask.RowIsValid(r)) {
@@ -38,7 +37,7 @@ class StandardColumnWriterState : public PrimitiveColumnWriterState {
 	StandardColumnWriterState(ParquetWriter &writer, duckdb_parquet::RowGroup &row_group, idx_t col_idx)
 	    : PrimitiveColumnWriterState(writer, row_group, col_idx),
 	      dictionary(BufferAllocator::Get(writer.GetContext()), writer.DictionarySizeLimit(),
-	                 2097152), // TODO: make size configurable
+	                 writer.StringDictionaryPageSizeLimit()),
 	      encoding(duckdb_parquet::Encoding::PLAIN) {
 	}
 	~StandardColumnWriterState() override = default;
diff --git a/extension/parquet/parquet_extension.cpp b/extension/parquet/parquet_extension.cpp
index 9545b3cb96c6..9142c0db681e 100644
--- a/extension/parquet/parquet_extension.cpp
+++ b/extension/parquet/parquet_extension.cpp
@@ -49,6 +49,7 @@
 #include "duckdb/planner/operator/logical_get.hpp"
 #include "duckdb/storage/statistics/base_statistics.hpp"
 #include "duckdb/storage/table/row_group.hpp"
+#include "duckdb/common/primitive_dictionary.hpp"
 #endif
 
 namespace duckdb {
@@ -199,6 +200,8 @@ struct ParquetWriteBindData : public TableFunctionData {
 		dictionary_size_limit = row_group_size / 20;
 	}
 
+	idx_t string_dictionary_page_size_limit = 2097152;
+
 	//! What false positive rate are we willing to accept for bloom filters
 	double bloom_filter_false_positive_ratio = 0.01;
 
@@ -1279,6 +1282,13 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyFunctionBi
 			}
 			bind_data->dictionary_size_limit = val;
 			dictionary_size_limit_set = true;
+		} else if (loption == "string_dictionary_page_size_limit") {
+			auto val = option.second[0].GetValue<uint64_t>();
+			if (val > PrimitiveDictionary<uint32_t>::MAXIMUM_POSSIBLE_SIZE) {
+				throw BinderException("string_dictionary_page_size_limit must be less than or equal to %llu",
+				                      PrimitiveDictionary<uint32_t>::MAXIMUM_POSSIBLE_SIZE);
+			}
+			bind_data->string_dictionary_page_size_limit = val;
 		} else if (loption == "bloom_filter_false_positive_ratio") {
 			auto val = option.second[0].GetValue<double>();
 			if (val <= 0) {
@@ -1341,8 +1351,9 @@ unique_ptr<GlobalFunctionData> ParquetWriteInitializeGlobal(ClientContext &conte
 	global_state->writer = make_uniq<ParquetWriter>(
 	    context, fs, file_path, parquet_bind.sql_types, parquet_bind.column_names, parquet_bind.codec,
 	    parquet_bind.field_ids.Copy(), parquet_bind.kv_metadata, parquet_bind.encryption_config,
-	    parquet_bind.dictionary_size_limit, parquet_bind.bloom_filter_false_positive_ratio,
-	    parquet_bind.compression_level, parquet_bind.debug_use_openssl, parquet_bind.parquet_version);
+	    parquet_bind.dictionary_size_limit, parquet_bind.string_dictionary_page_size_limit,
+	    parquet_bind.bloom_filter_false_positive_ratio, parquet_bind.compression_level, parquet_bind.debug_use_openssl,
+	    parquet_bind.parquet_version);
 	return std::move(global_state);
 }
 
@@ -1520,6 +1531,9 @@ static void ParquetCopySerialize(Serializer &serializer, const FunctionData &bin
 	                                    default_value.bloom_filter_false_positive_ratio);
 	serializer.WritePropertyWithDefault(114, "parquet_version", bind_data.parquet_version,
 	                                    default_value.parquet_version);
+	serializer.WritePropertyWithDefault(115, "string_dictionary_page_size_limit",
+	                                    bind_data.string_dictionary_page_size_limit,
+	                                    default_value.string_dictionary_page_size_limit);
 }
 
 static unique_ptr<FunctionData> ParquetCopyDeserialize(Deserializer &deserializer, CopyFunction &function) {
@@ -1550,6 +1564,8 @@ static unique_ptr<FunctionData> ParquetCopyDeserialize(Deserializer &deserialize
 	    113, "bloom_filter_false_positive_ratio", default_value.bloom_filter_false_positive_ratio);
 	data->parquet_version =
 	    deserializer.ReadPropertyWithExplicitDefault(114, "parquet_version", default_value.parquet_version);
+	data->string_dictionary_page_size_limit = deserializer.ReadPropertyWithExplicitDefault(
+	    115, "string_dictionary_page_size_limit", default_value.string_dictionary_page_size_limit);
 
 	return std::move(data);
 }
diff --git a/extension/parquet/parquet_writer.cpp b/extension/parquet/parquet_writer.cpp
index 9977f67ecd27..b3af8efe3e81 100644
--- a/extension/parquet/parquet_writer.cpp
+++ b/extension/parquet/parquet_writer.cpp
@@ -320,11 +320,12 @@ ParquetWriter::ParquetWriter(ClientContext &context, FileSystem &fs, string file
                              vector<string> names_p, CompressionCodec::type codec, ChildFieldIDs field_ids_p,
                              const vector<pair<string, string>> &kv_metadata,
                              shared_ptr<ParquetEncryptionConfig> encryption_config_p, idx_t dictionary_size_limit_p,
-                             double bloom_filter_false_positive_ratio_p, int64_t compression_level_p,
-                             bool debug_use_openssl_p, ParquetVersion parquet_version)
+                             idx_t string_dictionary_page_size_limit_p, double bloom_filter_false_positive_ratio_p,
+                             int64_t compression_level_p, bool debug_use_openssl_p, ParquetVersion parquet_version)
     : context(context), file_name(std::move(file_name_p)), sql_types(std::move(types_p)),
       column_names(std::move(names_p)), codec(codec), field_ids(std::move(field_ids_p)),
       encryption_config(std::move(encryption_config_p)), dictionary_size_limit(dictionary_size_limit_p),
+      string_dictionary_page_size_limit(string_dictionary_page_size_limit_p),
       bloom_filter_false_positive_ratio(bloom_filter_false_positive_ratio_p), compression_level(compression_level_p),
       debug_use_openssl(debug_use_openssl_p), parquet_version(parquet_version) {
 
diff --git a/extension/parquet/writer/primitive_column_writer.cpp b/extension/parquet/writer/primitive_column_writer.cpp
index 589f94ab316f..357c2138e14d 100644
--- a/extension/parquet/writer/primitive_column_writer.cpp
+++ b/extension/parquet/writer/primitive_column_writer.cpp
@@ -130,7 +130,7 @@ void PrimitiveColumnWriter::WriteLevels(WriteStream &temp_writer, const unsafe_v
 	MemoryStream intermediate_stream(Allocator::DefaultAllocator());
 
 	rle_encoder.BeginWrite();
-	if (null_count.IsValid() && null_count.GetIndex() == 0 || null_count.GetIndex() == count) {
+	if (null_count.IsValid() && (null_count.GetIndex() == 0 || null_count.GetIndex() == count)) {
 		// All are NULL or none are NULL
 		rle_encoder.WriteMany(intermediate_stream, levels[0], count);
 	} else {
diff --git a/src/common/types/hash.cpp b/src/common/types/hash.cpp
index f54295af4656..f9fe42ffcbd5 100644
--- a/src/common/types/hash.cpp
+++ b/src/common/types/hash.cpp
@@ -100,7 +100,7 @@ hash_t HashBytes(const_data_ptr_t ptr, const idx_t len) noexcept {
 
 	// XOR with remaining (<8) bytes
 	hash_t hr = 0;
-	FastMemcpy(&hr, ptr, len & 7U);
+	memcpy(&hr, ptr, len & 7U);
 	h ^= hr;
 
 	// Finalize
diff --git a/src/include/duckdb/common/primitive_dictionary.hpp b/src/include/duckdb/common/primitive_dictionary.hpp
index 0976882a2c8d..a4369776d37d 100644
--- a/src/include/duckdb/common/primitive_dictionary.hpp
+++ b/src/include/duckdb/common/primitive_dictionary.hpp
@@ -29,6 +29,8 @@ class PrimitiveDictionary {
 	};
 
 public:
+	static constexpr uint32_t MAXIMUM_POSSIBLE_SIZE = INVALID_INDEX - 1;
+
 	//! PrimitiveDictionary is a fixed-size linear probing hash table for primitive types
 	//! It is used to dictionary-encode data in, e.g., Parquet files
 	PrimitiveDictionary(Allocator &allocator, idx_t maximum_size_p, idx_t plain_capacity_p)
@@ -47,13 +49,12 @@ class PrimitiveDictionary {
 public:
 	//! Insert value into dictionary (if not full)
 	void Insert(T value) {
-		if (full | (size == capacity)) {
-			full = true;
+		if (full) {
 			return;
 		}
 		auto &entry = Lookup(value);
 		if (entry.IsEmpty()) {
-			if (!AddToPlain(value)) {
+			if (size + 1 > maximum_size || !AddToPlain(value)) {
 				full = true;
 				return;
 			}
diff --git a/test/sql/copy/parquet/bloom_filters.test b/test/sql/copy/parquet/bloom_filters.test
index 8ca061e9dbb7..05f166c23598 100644
--- a/test/sql/copy/parquet/bloom_filters.test
+++ b/test/sql/copy/parquet/bloom_filters.test
@@ -226,4 +226,33 @@ statement error
 copy (select (r1.range*10)::BIGINT r,
 from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom8.parquet' (format parquet, ROW_GROUP_SIZE 10000, dictionary_size_limit 1000, bloom_filter_false_positive_ratio 0);
 ----
-bloom_filter_false_positive_ratio must be greater than 0
\ No newline at end of file
+bloom_filter_false_positive_ratio must be greater than 0
+
+# some tests for string_dictionary_page_size_limit
+
+# no bloom filter, limit too low
+statement ok
+copy (select (r1.range*10)::VARCHAR r,
+from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom9.parquet' (format parquet, ROW_GROUP_SIZE 10000, string_dictionary_page_size_limit 10);
+
+query III
+select row_group_id, bloom_filter_offset IS NOT NULL, bloom_filter_length IS NOT NULL from parquet_metadata('__TEST_DIR__/bloom9.parquet') order by row_group_id;
+----
+0	false	false
+
+# big enough
+statement ok
+copy (select (r1.range*10)::VARCHAR r,
+from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom9.parquet' (format parquet, ROW_GROUP_SIZE 10000, string_dictionary_page_size_limit 100000);
+
+query III
+select row_group_id, bloom_filter_offset IS NOT NULL, bloom_filter_length IS NOT NULL from parquet_metadata('__TEST_DIR__/bloom9.parquet') order by row_group_id;
+----
+0	true	true
+
+# too big
+statement error
+copy (select (r1.range*10)::VARCHAR r,
+from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom9.parquet' (format parquet, ROW_GROUP_SIZE 10000, string_dictionary_page_size_limit 4294967295);
+----
+Binder Error

From 5d79f5df60c49e40f62569ebc069923d4504d375 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Thu, 13 Feb 2025 15:01:40 +0100
Subject: [PATCH 069/142] some more fast paths/optimizations for parquet writer

---
 .../writer/templated_column_writer.hpp        | 21 +++++++---
 .../writer/primitive_column_writer.cpp        | 42 ++++++++++++-------
 2 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/extension/parquet/include/writer/templated_column_writer.hpp b/extension/parquet/include/writer/templated_column_writer.hpp
index 544ced9c04df..81aeb659652c 100644
--- a/extension/parquet/include/writer/templated_column_writer.hpp
+++ b/extension/parquet/include/writer/templated_column_writer.hpp
@@ -229,15 +229,24 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 
 		switch (page_state.encoding) {
 		case duckdb_parquet::Encoding::RLE_DICTIONARY: {
-			for (idx_t r = chunk_start; r < chunk_end; r++) {
-				if (!mask.RowIsValid(r)) {
-					continue;
-				}
-				if (!page_state.dict_written_value) {
-					// first value: write the bit-width as a one-byte entry and initialize writer
+			idx_t r = chunk_start;
+			if (!page_state.dict_written_value) {
+				// find first non-null value
+				for (; r < chunk_end; r++) {
+					if (!mask.RowIsValid(r)) {
+						continue;
+					}
+					// write the bit-width as a one-byte entry and initialize writer
 					temp_writer.Write<uint8_t>(page_state.dict_bit_width);
 					page_state.dict_encoder.BeginWrite();
 					page_state.dict_written_value = true;
+					break;
+				}
+			}
+
+			for (; r < chunk_end; r++) {
+				if (!mask.RowIsValid(r)) {
+					continue;
 				}
 				const auto &src_value = data_ptr[r];
 				const auto value_index = page_state.dictionary.GetIndex(src_value);
diff --git a/extension/parquet/writer/primitive_column_writer.cpp b/extension/parquet/writer/primitive_column_writer.cpp
index 357c2138e14d..0bd85d0894a9 100644
--- a/extension/parquet/writer/primitive_column_writer.cpp
+++ b/extension/parquet/writer/primitive_column_writer.cpp
@@ -40,7 +40,6 @@ void PrimitiveColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterStat
 	auto &state = state_p.Cast<PrimitiveColumnWriterState>();
 	auto &col_chunk = state.row_group.columns[state.col_idx];
 
-	idx_t start = 0;
 	idx_t vcount = parent ? parent->definition_levels.size() - state.definition_levels.size() : count;
 	idx_t parent_index = state.definition_levels.size();
 	auto &validity = FlatVector::Validity(vector);
@@ -49,24 +48,35 @@ void PrimitiveColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterStat
 
 	idx_t vector_index = 0;
 	reference<PageInformation> page_info_ref = state.page_info.back();
-	for (idx_t i = start; i < vcount; i++) {
+	col_chunk.meta_data.num_values += vcount;
+
+	const bool check_parent_empty = parent && !parent->is_empty.empty();
+	if (!check_parent_empty && validity.AllValid() && TypeIsConstantSize(vector.GetType().InternalType()) &&
+	    page_info_ref.get().estimated_page_size + GetRowSize(vector, vector_index, state) * vcount <
+	        MAX_UNCOMPRESSED_PAGE_SIZE) {
+		// Fast path
 		auto &page_info = page_info_ref.get();
-		page_info.row_count++;
-		col_chunk.meta_data.num_values++;
-		if (parent && !parent->is_empty.empty() && parent->is_empty[parent_index + i]) {
-			page_info.empty_count++;
-			continue;
-		}
-		if (validity.RowIsValid(vector_index)) {
-			page_info.estimated_page_size += GetRowSize(vector, vector_index, state);
-			if (page_info.estimated_page_size >= MAX_UNCOMPRESSED_PAGE_SIZE) {
-				PageInformation new_info;
-				new_info.offset = page_info.offset + page_info.row_count;
-				state.page_info.push_back(new_info);
-				page_info_ref = state.page_info.back();
+		page_info.row_count += vcount;
+		page_info.estimated_page_size += GetRowSize(vector, vector_index, state) * vcount;
+	} else {
+		for (idx_t i = 0; i < vcount; i++) {
+			auto &page_info = page_info_ref.get();
+			page_info.row_count++;
+			if (check_parent_empty && parent->is_empty[parent_index + i]) {
+				page_info.empty_count++;
+				continue;
+			}
+			if (validity.RowIsValid(vector_index)) {
+				page_info.estimated_page_size += GetRowSize(vector, vector_index, state);
+				if (page_info.estimated_page_size >= MAX_UNCOMPRESSED_PAGE_SIZE) {
+					PageInformation new_info;
+					new_info.offset = page_info.offset + page_info.row_count;
+					state.page_info.push_back(new_info);
+					page_info_ref = state.page_info.back();
+				}
 			}
+			vector_index++;
 		}
-		vector_index++;
 	}
 }
 

From 503d0c0f6fdb479bffb58847363f6ddeef7d8d54 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Thu, 13 Feb 2025 15:39:06 +0100
Subject: [PATCH 070/142] add another fast path

---
 .../parquet/include/parquet_dlba_encoder.hpp  |  2 +-
 .../writer/templated_column_writer.hpp        | 22 ++++++++++++++-----
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/extension/parquet/include/parquet_dlba_encoder.hpp b/extension/parquet/include/parquet_dlba_encoder.hpp
index 5e39c5e1fea2..ef7d19f0cfcb 100644
--- a/extension/parquet/include/parquet_dlba_encoder.hpp
+++ b/extension/parquet/include/parquet_dlba_encoder.hpp
@@ -69,7 +69,7 @@ void WriteValue(DlbaEncoder &encoder, WriteStream &writer, const string_t &value
 
 // helpers to get size from strings
 template <class SRC>
-static idx_t GetDlbaStringSize(const SRC &src_value) {
+static idx_t GetDlbaStringSize(const SRC &) {
 	return 0;
 }
 
diff --git a/extension/parquet/include/writer/templated_column_writer.hpp b/extension/parquet/include/writer/templated_column_writer.hpp
index 81aeb659652c..6e20c6dd6e23 100644
--- a/extension/parquet/include/writer/templated_column_writer.hpp
+++ b/extension/parquet/include/writer/templated_column_writer.hpp
@@ -161,17 +161,27 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 
 		const auto &validity = FlatVector::Validity(vector);
 
-		for (idx_t i = 0; i < vcount; i++) {
-			if (check_parent_empty && parent->is_empty[parent_index + i]) {
-				continue;
-			}
-			if (validity.RowIsValid(vector_index)) {
+		if (!check_parent_empty && validity.AllValid()) {
+			// Fast path
+			for (; vector_index < vcount; vector_index++) {
 				const auto &src_value = data_ptr[vector_index];
 				state.dictionary.Insert(src_value);
 				state.total_value_count++;
 				state.total_string_size += dlba_encoder::GetDlbaStringSize(src_value);
 			}
-			vector_index++;
+		} else {
+			for (idx_t i = 0; i < vcount; i++) {
+				if (check_parent_empty && parent->is_empty[parent_index + i]) {
+					continue;
+				}
+				if (validity.RowIsValid(vector_index)) {
+					const auto &src_value = data_ptr[vector_index];
+					state.dictionary.Insert(src_value);
+					state.total_value_count++;
+					state.total_string_size += dlba_encoder::GetDlbaStringSize(src_value);
+				}
+				vector_index++;
+			}
 		}
 	}
 

From 301cc5526d3628296bb448800b25270c689a5f20 Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Thu, 13 Feb 2025 17:12:06 +0100
Subject: [PATCH 071/142] WIP generalize rowid column to "virtual columns", and
 make "filename" one of these virtual columns

---
 extension/parquet/parquet_extension.cpp       | 16 ++--
 .../catalog_entry/table_catalog_entry.cpp     |  6 ++
 src/common/constants.cpp                      |  5 ++
 src/common/multi_file_reader.cpp              | 10 ++-
 src/execution/physical_plan/plan_get.cpp      |  6 +-
 .../catalog_entry/table_catalog_entry.hpp     |  7 +-
 src/include/duckdb/common/column_index.hpp    |  3 +
 src/include/duckdb/common/constants.hpp       |  2 +
 .../duckdb/common/multi_file_reader.hpp       | 12 ++-
 src/include/duckdb/common/table_column.hpp    | 26 ++++++
 .../duckdb/function/table_function.hpp        | 12 ++-
 .../duckdb/optimizer/late_materialization.hpp |  2 +
 src/include/duckdb/planner/bind_context.hpp   |  2 +-
 .../duckdb/planner/operator/logical_get.hpp   | 14 ++--
 src/include/duckdb/planner/table_binding.hpp  | 10 +--
 src/optimizer/late_materialization.cpp        | 25 +++---
 src/planner/bind_context.cpp                  | 16 +++-
 src/planner/binder/statement/bind_delete.cpp  |  9 ++-
 src/planner/binder/statement/bind_update.cpp  |  9 ++-
 .../binder/tableref/bind_basetableref.cpp     |  2 +-
 .../binder/tableref/bind_table_function.cpp   |  9 ++-
 src/planner/operator/logical_get.cpp          | 79 +++++++++++++------
 src/planner/table_binding.cpp                 | 30 ++++---
 .../copy/parquet/parquet_virtual_columns.test | 21 +++++
 24 files changed, 235 insertions(+), 98 deletions(-)
 create mode 100644 src/include/duckdb/common/table_column.hpp
 create mode 100644 test/sql/copy/parquet/parquet_virtual_columns.test

diff --git a/extension/parquet/parquet_extension.cpp b/extension/parquet/parquet_extension.cpp
index f93b3f04acbe..b7100cf7b363 100644
--- a/extension/parquet/parquet_extension.cpp
+++ b/extension/parquet/parquet_extension.cpp
@@ -450,11 +450,11 @@ class ParquetScanFunction {
 		return nullptr;
 	}
 
-	static unique_ptr<FunctionData> ParquetScanBindInternal(ClientContext &context,
-	                                                        unique_ptr<MultiFileReader> multi_file_reader,
-	                                                        shared_ptr<MultiFileList> file_list,
-	                                                        vector<LogicalType> &return_types, vector<string> &names,
-	                                                        ParquetOptions parquet_options) {
+	static unique_ptr<FunctionData>
+	ParquetScanBindInternal(ClientContext &context, unique_ptr<MultiFileReader> multi_file_reader,
+	                        shared_ptr<MultiFileList> file_list, vector<LogicalType> &return_types,
+	                        vector<string> &names, ParquetOptions parquet_options,
+	                        optional_ptr<virtual_column_map_t> virtual_columns = nullptr) {
 		auto result = make_uniq<ParquetReadBindData>();
 		result->multi_file_reader = std::move(multi_file_reader);
 		result->file_list = std::move(file_list);
@@ -463,7 +463,7 @@ class ParquetScanFunction {
 		if (result->multi_file_reader->Bind(parquet_options.file_options, *result->file_list, result->types,
 		                                    result->names, result->reader_bind)) {
 			result->multi_file_reader->BindOptions(parquet_options.file_options, *result->file_list, result->types,
-			                                       result->names, result->reader_bind);
+			                                       result->names, result->reader_bind, virtual_columns);
 			// Enable the parquet file_row_number on the parquet options if the file_row_number_idx was set
 			if (result->reader_bind.file_row_number_idx != DConstants::INVALID_INDEX) {
 				parquet_options.file_row_number = true;
@@ -476,7 +476,7 @@ class ParquetScanFunction {
 			parquet_options.file_options.AutoDetectHivePartitioning(*result->file_list, context);
 			// Default bind
 			result->reader_bind = result->multi_file_reader->BindReader<ParquetReader>(
-			    context, result->types, result->names, *result->file_list, *result, parquet_options);
+			    context, result->types, result->names, *result->file_list, *result, parquet_options, virtual_columns);
 		}
 
 		// Set the explicit cardinality if requested
@@ -617,7 +617,7 @@ class ParquetScanFunction {
 
 		auto file_list = multi_file_reader->CreateFileList(context, input.inputs[0]);
 		return ParquetScanBindInternal(context, std::move(multi_file_reader), std::move(file_list), return_types, names,
-		                               parquet_options);
+		                               parquet_options, &input.virtual_columns);
 	}
 
 	static double ParquetProgress(ClientContext &context, const FunctionData *bind_data_p,
diff --git a/src/catalog/catalog_entry/table_catalog_entry.cpp b/src/catalog/catalog_entry/table_catalog_entry.cpp
index 3070b2e30d48..a7ca2fab69a3 100644
--- a/src/catalog/catalog_entry/table_catalog_entry.cpp
+++ b/src/catalog/catalog_entry/table_catalog_entry.cpp
@@ -332,4 +332,10 @@ bool TableCatalogEntry::HasPrimaryKey() const {
 	return GetPrimaryKey() != nullptr;
 }
 
+virtual_column_map_t TableCatalogEntry::GetVirtualColumns() const {
+	virtual_column_map_t virtual_columns;
+	virtual_columns.insert(make_pair(COLUMN_IDENTIFIER_ROW_ID, TableColumn("rowid", LogicalType::ROW_TYPE)));
+	return virtual_columns;
+}
+
 } // namespace duckdb
diff --git a/src/common/constants.cpp b/src/common/constants.cpp
index edafe6b67650..4db7245e235f 100644
--- a/src/common/constants.cpp
+++ b/src/common/constants.cpp
@@ -10,6 +10,7 @@ constexpr const idx_t DConstants::INVALID_INDEX;
 const row_t MAX_ROW_ID = 36028797018960000ULL;       // 2^55
 const row_t MAX_ROW_ID_LOCAL = 72057594037920000ULL; // 2^56
 const column_t COLUMN_IDENTIFIER_ROW_ID = (column_t)-1;
+const column_t VIRTUAL_COLUMN_START = UINT64_C(9223372036854775808); // 2^63
 const double PI = 3.141592653589793;
 
 const transaction_t TRANSACTION_ID_START = 4611686018427388000ULL;                // 2^62
@@ -56,4 +57,8 @@ bool IsRowIdColumnId(column_t column_id) {
 	return column_id == COLUMN_IDENTIFIER_ROW_ID;
 }
 
+bool IsVirtualColumn(column_t column_id) {
+	return column_id >= VIRTUAL_COLUMN_START;
+}
+
 } // namespace duckdb
diff --git a/src/common/multi_file_reader.cpp b/src/common/multi_file_reader.cpp
index be17aaf9516b..9016780e422b 100644
--- a/src/common/multi_file_reader.cpp
+++ b/src/common/multi_file_reader.cpp
@@ -15,6 +15,8 @@
 
 namespace duckdb {
 
+constexpr column_t MultiFileReader::COLUMN_IDENTIFIER_FILENAME;
+
 MultiFileReaderGlobalState::~MultiFileReaderGlobalState() {
 }
 
@@ -169,7 +171,8 @@ bool MultiFileReader::Bind(MultiFileReaderOptions &options, MultiFileList &files
 
 void MultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFileList &files,
                                   vector<LogicalType> &return_types, vector<string> &names,
-                                  MultiFileReaderBindData &bind_data) {
+                                  MultiFileReaderBindData &bind_data,
+                                  optional_ptr<virtual_column_map_t> virtual_columns) {
 	// Add generated constant column for filename
 	if (options.filename) {
 		if (std::find(names.begin(), names.end(), options.filename_column) != names.end()) {
@@ -180,6 +183,10 @@ void MultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFileList
 		bind_data.filename_idx = names.size();
 		return_types.emplace_back(LogicalType::VARCHAR);
 		names.emplace_back(options.filename_column);
+	} else if (virtual_columns) {
+		// filename is not specified - add it to the virtual columns list
+		virtual_columns->insert(make_pair(COLUMN_IDENTIFIER_FILENAME, TableColumn("filename", LogicalType::VARCHAR)));
+		bind_data.filename_idx = COLUMN_IDENTIFIER_FILENAME;
 	}
 
 	// Add generated constant columns from hive partitioning scheme
@@ -253,6 +260,7 @@ void MultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options, c
 		auto &col_idx = global_column_ids[i];
 		if (col_idx.IsRowIdColumn()) {
 			// row-id
+			// FIXME: this should probably be removed
 			reader_data.constant_map.emplace_back(i, Value::BIGINT(42));
 			continue;
 		}
diff --git a/src/execution/physical_plan/plan_get.cpp b/src/execution/physical_plan/plan_get.cpp
index 3b5d940eb924..843ad1537172 100644
--- a/src/execution/physical_plan/plan_get.cpp
+++ b/src/execution/physical_plan/plan_get.cpp
@@ -159,10 +159,8 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalGet &op) {
 		vector<LogicalType> types;
 		vector<unique_ptr<Expression>> expressions;
 		for (auto &column_id : column_ids) {
-			if (column_id.IsRowIdColumn()) {
-				types.emplace_back(op.GetRowIdType());
-				// Now how to make that a constant expression.
-				expressions.push_back(make_uniq<BoundConstantExpression>(Value(op.GetRowIdType())));
+			if (column_id.IsVirtualColumn()) {
+				throw NotImplementedException("Virtual columns require projection pushdown");
 			} else {
 				auto col_id = column_id.GetPrimaryIndex();
 				auto type = op.returned_types[col_id];
diff --git a/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp b/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp
index 398e49974b88..0adb7857b24d 100644
--- a/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp
+++ b/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp
@@ -18,6 +18,7 @@
 #include "duckdb/common/case_insensitive_map.hpp"
 #include "duckdb/catalog/catalog_entry/table_column_type.hpp"
 #include "duckdb/catalog/catalog_entry/column_dependency_manager.hpp"
+#include "duckdb/common/table_column.hpp"
 
 namespace duckdb {
 
@@ -117,10 +118,8 @@ class TableCatalogEntry : public StandardEntry {
 	//! Returns true, if the table has a primary key, else false.
 	bool HasPrimaryKey() const;
 
-	//! Returns the rowid type of this table
-	virtual LogicalType GetRowIdType() const {
-		return LogicalType::ROW_TYPE;
-	}
+	//! Returns the virtual columns for this table
+	virtual virtual_column_map_t GetVirtualColumns() const;
 
 protected:
 	//! A list of columns that are part of this table
diff --git a/src/include/duckdb/common/column_index.hpp b/src/include/duckdb/common/column_index.hpp
index 32d2e1828462..a563005f43f3 100644
--- a/src/include/duckdb/common/column_index.hpp
+++ b/src/include/duckdb/common/column_index.hpp
@@ -61,6 +61,9 @@ struct ColumnIndex {
 	bool IsRowIdColumn() const {
 		return index == DConstants::INVALID_INDEX;
 	}
+	bool IsVirtualColumn() const {
+		return index >= VIRTUAL_COLUMN_START;
+	}
 	void Serialize(Serializer &serializer) const;
 	static ColumnIndex Deserialize(Deserializer &deserializer);
 
diff --git a/src/include/duckdb/common/constants.hpp b/src/include/duckdb/common/constants.hpp
index d4a0d7cda1c3..387dd4127579 100644
--- a/src/include/duckdb/common/constants.hpp
+++ b/src/include/duckdb/common/constants.hpp
@@ -40,7 +40,9 @@ DUCKDB_API bool IsInvalidCatalog(const string &str);
 
 //! Special value used to signify the ROW ID of a table
 DUCKDB_API extern const column_t COLUMN_IDENTIFIER_ROW_ID;
+DUCKDB_API extern const column_t VIRTUAL_COLUMN_START;
 DUCKDB_API bool IsRowIdColumnId(column_t column_id);
+DUCKDB_API bool IsVirtualColumn(column_t column_id);
 
 //! The maximum row identifier used in tables
 extern const row_t MAX_ROW_ID;
diff --git a/src/include/duckdb/common/multi_file_reader.hpp b/src/include/duckdb/common/multi_file_reader.hpp
index 942f72c1e277..c1a262fa875f 100644
--- a/src/include/duckdb/common/multi_file_reader.hpp
+++ b/src/include/duckdb/common/multi_file_reader.hpp
@@ -200,6 +200,10 @@ struct MultiFileReaderData {
 
 //! The MultiFileReader class provides a set of helper methods to handle scanning from multiple files
 struct MultiFileReader {
+public:
+	static constexpr column_t COLUMN_IDENTIFIER_FILENAME = UINT64_C(9223372036854775808);
+
+public:
 	virtual ~MultiFileReader();
 
 	//! Create a MultiFileReader for a specific TableFunction, using its function name for errors
@@ -246,7 +250,8 @@ struct MultiFileReader {
 	//! Bind the options of the multi-file reader, potentially emitting any extra columns that are required
 	DUCKDB_API virtual void BindOptions(MultiFileReaderOptions &options, MultiFileList &files,
 	                                    vector<LogicalType> &return_types, vector<string> &names,
-	                                    MultiFileReaderBindData &bind_data);
+	                                    MultiFileReaderBindData &bind_data,
+	                                    optional_ptr<virtual_column_map_t> virtual_columns = nullptr);
 
 	//! Initialize global state used by the MultiFileReader
 	DUCKDB_API virtual unique_ptr<MultiFileReaderGlobalState>
@@ -317,7 +322,8 @@ struct MultiFileReader {
 
 	template <class READER_CLASS, class RESULT_CLASS, class OPTIONS_CLASS>
 	MultiFileReaderBindData BindReader(ClientContext &context, vector<LogicalType> &return_types, vector<string> &names,
-	                                   MultiFileList &files, RESULT_CLASS &result, OPTIONS_CLASS &options) {
+	                                   MultiFileList &files, RESULT_CLASS &result, OPTIONS_CLASS &options,
+	                                   optional_ptr<virtual_column_map_t> virtual_columns = nullptr) {
 		if (options.file_options.union_by_name) {
 			return BindUnionReader<READER_CLASS>(context, return_types, names, files, result, options);
 		} else {
@@ -330,7 +336,7 @@ struct MultiFileReader {
 			}
 			result.Initialize(std::move(reader));
 			MultiFileReaderBindData bind_data;
-			BindOptions(options.file_options, files, return_types, names, bind_data);
+			BindOptions(options.file_options, files, return_types, names, bind_data, virtual_columns);
 			return bind_data;
 		}
 	}
diff --git a/src/include/duckdb/common/table_column.hpp b/src/include/duckdb/common/table_column.hpp
new file mode 100644
index 000000000000..6cc4b8fe7e60
--- /dev/null
+++ b/src/include/duckdb/common/table_column.hpp
@@ -0,0 +1,26 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/common/table_column.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb/common/types.hpp"
+#include "duckdb/common/unordered_map.hpp"
+
+namespace duckdb {
+
+struct TableColumn {
+	TableColumn(string name_p, LogicalType type_p) : name(std::move(name_p)), type(std::move(type_p)) {
+	}
+
+	string name;
+	LogicalType type;
+};
+
+using virtual_column_map_t = unordered_map<column_t, TableColumn>;
+
+} // namespace duckdb
diff --git a/src/include/duckdb/function/table_function.hpp b/src/include/duckdb/function/table_function.hpp
index 6293100ef1d1..15625ff61e3a 100644
--- a/src/include/duckdb/function/table_function.hpp
+++ b/src/include/duckdb/function/table_function.hpp
@@ -12,11 +12,12 @@
 #include "duckdb/common/optional_ptr.hpp"
 #include "duckdb/execution/execution_context.hpp"
 #include "duckdb/function/function.hpp"
-#include "duckdb/planner/bind_context.hpp"
 #include "duckdb/planner/logical_operator.hpp"
 #include "duckdb/storage/statistics/node_statistics.hpp"
 #include "duckdb/common/column_index.hpp"
+#include "duckdb/common/table_column.hpp"
 #include "duckdb/function/partition_stats.hpp"
+#include "duckdb/common/exception/binder_exception.hpp"
 
 #include <functional>
 
@@ -27,7 +28,9 @@ class LogicalDependencyList;
 class LogicalGet;
 class TableFunction;
 class TableFilterSet;
+class TableFunctionRef;
 class TableCatalogEntry;
+class SampleOptions;
 struct MultiFileReader;
 struct OperatorPartitionData;
 struct OperatorPartitionInfo;
@@ -90,9 +93,11 @@ struct TableFunctionBindInput {
 	TableFunctionBindInput(vector<Value> &inputs, named_parameter_map_t &named_parameters,
 	                       vector<LogicalType> &input_table_types, vector<string> &input_table_names,
 	                       optional_ptr<TableFunctionInfo> info, optional_ptr<Binder> binder,
-	                       TableFunction &table_function, const TableFunctionRef &ref)
+	                       TableFunction &table_function, const TableFunctionRef &ref,
+	                       virtual_column_map_t &virtual_columns)
 	    : inputs(inputs), named_parameters(named_parameters), input_table_types(input_table_types),
-	      input_table_names(input_table_names), info(info), binder(binder), table_function(table_function), ref(ref) {
+	      input_table_names(input_table_names), info(info), binder(binder), table_function(table_function), ref(ref),
+	      virtual_columns(virtual_columns) {
 	}
 
 	vector<Value> &inputs;
@@ -103,6 +108,7 @@ struct TableFunctionBindInput {
 	optional_ptr<Binder> binder;
 	TableFunction &table_function;
 	const TableFunctionRef &ref;
+	virtual_column_map_t &virtual_columns;
 };
 
 struct TableFunctionInitInput {
diff --git a/src/include/duckdb/optimizer/late_materialization.hpp b/src/include/duckdb/optimizer/late_materialization.hpp
index 76f4f05e86cc..16350601a807 100644
--- a/src/include/duckdb/optimizer/late_materialization.hpp
+++ b/src/include/duckdb/optimizer/late_materialization.hpp
@@ -40,6 +40,8 @@ class LateMaterialization : public BaseColumnPruner {
 	Optimizer &optimizer;
 	//! The max row count for which we will consider late materialization
 	idx_t max_row_count;
+	//! The type of the row id column
+	LogicalType row_id_type;
 };
 
 } // namespace duckdb
diff --git a/src/include/duckdb/planner/bind_context.hpp b/src/include/duckdb/planner/bind_context.hpp
index 8234805e7ba9..ff070ceb49d6 100644
--- a/src/include/duckdb/planner/bind_context.hpp
+++ b/src/include/duckdb/planner/bind_context.hpp
@@ -100,7 +100,7 @@ class BindContext {
 	//! Adds a call to a table function with the given alias to the BindContext.
 	void AddTableFunction(idx_t index, const string &alias, const vector<string> &names,
 	                      const vector<LogicalType> &types, vector<ColumnIndex> &bound_column_ids,
-	                      optional_ptr<StandardEntry> entry);
+	                      optional_ptr<StandardEntry> entry, virtual_column_map_t virtual_columns);
 	//! Adds a table view with a given alias to the BindContext.
 	void AddView(idx_t index, const string &alias, SubqueryRef &ref, BoundQueryNode &subquery, ViewCatalogEntry &view);
 	//! Adds a subquery with a given alias to the BindContext.
diff --git a/src/include/duckdb/planner/operator/logical_get.hpp b/src/include/duckdb/planner/operator/logical_get.hpp
index 81b93accb48b..65f90fe85ae2 100644
--- a/src/include/duckdb/planner/operator/logical_get.hpp
+++ b/src/include/duckdb/planner/operator/logical_get.hpp
@@ -24,7 +24,7 @@ class LogicalGet : public LogicalOperator {
 public:
 	LogicalGet(idx_t table_index, TableFunction function, unique_ptr<FunctionData> bind_data,
 	           vector<LogicalType> returned_types, vector<string> returned_names,
-	           LogicalType rowid_type = LogicalType(LogicalType::ROW_TYPE));
+	           virtual_column_map_t virtual_columns = virtual_column_map_t());
 
 	//! The table index in the current bind context
 	idx_t table_index;
@@ -36,6 +36,8 @@ class LogicalGet : public LogicalOperator {
 	vector<LogicalType> returned_types;
 	//! The names of ALL columns that can be returned by the table function
 	vector<string> names;
+	//! A mapping of column index -> type/name for all virtual columns
+	virtual_column_map_t virtual_columns;
 	//! Columns that are used outside the scan
 	vector<idx_t> projection_ids;
 	//! Filters pushed down for table scan
@@ -62,6 +64,9 @@ class LogicalGet : public LogicalOperator {
 	//! Returns the underlying table that is being scanned, or nullptr if there is none
 	optional_ptr<TableCatalogEntry> GetTable() const;
 
+	const LogicalType &GetColumnType(const ColumnIndex &column_index) const;
+	const string &GetColumnName(const ColumnIndex &column_index) const;
+
 public:
 	void SetColumnIds(vector<ColumnIndex> &&column_ids);
 	void AddColumnId(column_t column_id);
@@ -80,10 +85,6 @@ class LogicalGet : public LogicalOperator {
 	void Serialize(Serializer &serializer) const override;
 	static unique_ptr<LogicalOperator> Deserialize(Deserializer &deserializer);
 
-	const LogicalType &GetRowIdType() const {
-		return rowid_type;
-	}
-
 protected:
 	void ResolveTypes() override;
 
@@ -93,8 +94,5 @@ class LogicalGet : public LogicalOperator {
 private:
 	//! Bound column IDs
 	vector<ColumnIndex> column_ids;
-
-	//! The type of the rowid column
-	LogicalType rowid_type = LogicalType(LogicalType::ROW_TYPE);
 };
 } // namespace duckdb
diff --git a/src/include/duckdb/planner/table_binding.hpp b/src/include/duckdb/planner/table_binding.hpp
index 50631f57ac45..9aedc7e70058 100644
--- a/src/include/duckdb/planner/table_binding.hpp
+++ b/src/include/duckdb/planner/table_binding.hpp
@@ -16,6 +16,7 @@
 #include "duckdb/catalog/catalog_entry/table_column_type.hpp"
 #include "duckdb/planner/binding_alias.hpp"
 #include "duckdb/common/column_index.hpp"
+#include "duckdb/common/table_column.hpp"
 
 namespace duckdb {
 class BindContext;
@@ -33,8 +34,7 @@ enum class BindingType { BASE, TABLE, DUMMY, CATALOG_ENTRY };
 
 //! A Binding represents a binding to a table, table-producing function or subquery with a specified table index.
 struct Binding {
-	Binding(BindingType binding_type, BindingAlias alias, vector<LogicalType> types, vector<string> names, idx_t index,
-	        LogicalType rowid_type = LogicalType(LogicalType::ROW_TYPE));
+	Binding(BindingType binding_type, BindingAlias alias, vector<LogicalType> types, vector<string> names, idx_t index);
 	virtual ~Binding() = default;
 
 	//! The type of Binding
@@ -50,8 +50,6 @@ struct Binding {
 	//! Name -> index for the names
 	case_insensitive_map_t<column_t> name_map;
 
-	LogicalType rowid_type;
-
 public:
 	bool TryGetBindingIndex(const string &column_name, column_t &column_index);
 	column_t GetBindingIndex(const string &column_name);
@@ -104,12 +102,14 @@ struct TableBinding : public Binding {
 public:
 	TableBinding(const string &alias, vector<LogicalType> types, vector<string> names,
 	             vector<ColumnIndex> &bound_column_ids, optional_ptr<StandardEntry> entry, idx_t index,
-	             bool add_row_id = false);
+	             virtual_column_map_t virtual_columns);
 
 	//! A reference to the set of bound column ids
 	vector<ColumnIndex> &bound_column_ids;
 	//! The underlying catalog entry (if any)
 	optional_ptr<StandardEntry> entry;
+	//! Virtual columns
+	virtual_column_map_t virtual_columns;
 
 public:
 	unique_ptr<ParsedExpression> ExpandGeneratedColumn(const string &column_name);
diff --git a/src/optimizer/late_materialization.cpp b/src/optimizer/late_materialization.cpp
index aa4e5c4c1fe4..01f8118f5257 100644
--- a/src/optimizer/late_materialization.cpp
+++ b/src/optimizer/late_materialization.cpp
@@ -35,7 +35,7 @@ idx_t LateMaterialization::GetOrInsertRowId(LogicalGet &get) {
 		get.projection_ids.push_back(column_ids.size() - 1);
 	}
 	if (!get.types.empty()) {
-		get.types.push_back(get.GetRowIdType());
+		get.types.push_back(row_id_type);
 	}
 	return column_ids.size() - 1;
 }
@@ -44,7 +44,7 @@ unique_ptr<LogicalGet> LateMaterialization::ConstructLHS(LogicalGet &get) {
 	// we need to construct a new scan of the same table
 	auto table_index = optimizer.binder.GenerateTableIndex();
 	auto new_get = make_uniq<LogicalGet>(table_index, get.function, get.bind_data->Copy(), get.returned_types,
-	                                     get.names, get.GetRowIdType());
+	                                     get.names, get.virtual_columns);
 	new_get->GetMutableColumnIds() = get.GetColumnIds();
 	new_get->projection_ids = get.projection_ids;
 	return new_get;
@@ -73,8 +73,7 @@ ColumnBinding LateMaterialization::ConstructRHS(unique_ptr<LogicalOperator> &op)
 		case LogicalOperatorType::LOGICAL_PROJECTION: {
 			auto &proj = op.Cast<LogicalProjection>();
 			// push a projection of the row-id column
-			proj.expressions.push_back(
-			    make_uniq<BoundColumnRefExpression>("rowid", get.GetRowIdType(), row_id_binding));
+			proj.expressions.push_back(make_uniq<BoundColumnRefExpression>("rowid", row_id_type, row_id_binding));
 			// modify the row-id-binding to push to the new projection
 			row_id_binding = ColumnBinding(proj.table_index, proj.expressions.size() - 1);
 			column_count = proj.expressions.size();
@@ -153,9 +152,8 @@ unique_ptr<Expression> LateMaterialization::GetExpression(LogicalOperator &op, i
 	case LogicalOperatorType::LOGICAL_GET: {
 		auto &get = op.Cast<LogicalGet>();
 		auto &column_id = get.GetColumnIds()[column_index];
-		auto is_row_id = column_id.IsRowIdColumn();
-		auto column_name = is_row_id ? "rowid" : get.names[column_id.GetPrimaryIndex()];
-		auto &column_type = is_row_id ? get.GetRowIdType() : get.returned_types[column_id.GetPrimaryIndex()];
+		auto column_name = get.GetColumnName(column_id);
+		auto &column_type = get.GetColumnType(column_id);
 		auto expr =
 		    make_uniq<BoundColumnRefExpression>(column_name, column_type, ColumnBinding(get.table_index, column_index));
 		return std::move(expr);
@@ -235,16 +233,17 @@ bool LateMaterialization::TryLateMaterialization(unique_ptr<LogicalOperator> &op
 		}
 	}
 	auto &get = child.get().Cast<LogicalGet>();
-	auto table = get.GetTable();
-	if (!table || !table->IsDuckTable()) {
-		// we can only do the late-materialization optimization for DuckDB tables currently
-		return false;
-	}
 	if (column_references.size() >= get.GetColumnIds().size()) {
 		// we do not benefit from late materialization
 		// we need all of the columns to compute the root node anyway (Top-N/Limit/etc)
 		return false;
 	}
+	auto entry = get.virtual_columns.find(COLUMN_IDENTIFIER_ROW_ID);
+	if (entry == get.virtual_columns.end()) {
+		// we can only do the late-materialization optimization for tables that support the rowid column
+		return false;
+	}
+	row_id_type = entry->second.type;
 	// we benefit from late materialization
 	// we need to transform this plan into a semi-join with the row-id
 	// we need to ensure the operator returns exactly the same column bindings as before
@@ -258,8 +257,6 @@ bool LateMaterialization::TryLateMaterialization(unique_ptr<LogicalOperator> &op
 	auto lhs_row_idx = GetOrInsertRowId(lhs_get);
 	ColumnBinding lhs_binding(lhs_index, lhs_row_idx);
 
-	auto &row_id_type = get.GetRowIdType();
-
 	// after constructing the LHS but before constructing the RHS we construct the final projections/orders
 	// - we do this before constructing the RHS because that alter the original plan
 	vector<unique_ptr<Expression>> final_proj_list;
diff --git a/src/planner/bind_context.cpp b/src/planner/bind_context.cpp
index d135ed2ae604..d22fe4799b7b 100644
--- a/src/planner/bind_context.cpp
+++ b/src/planner/bind_context.cpp
@@ -607,20 +607,28 @@ void BindContext::AddBinding(unique_ptr<Binding> binding) {
 void BindContext::AddBaseTable(idx_t index, const string &alias, const vector<string> &names,
                                const vector<LogicalType> &types, vector<ColumnIndex> &bound_column_ids,
                                StandardEntry &entry, bool add_row_id) {
-	AddBinding(make_uniq<TableBinding>(alias, types, names, bound_column_ids, &entry, index, add_row_id));
+	virtual_column_map_t virtual_columns;
+	if (add_row_id) {
+		virtual_columns.insert(make_pair(COLUMN_IDENTIFIER_ROW_ID, TableColumn("rowid", LogicalType::ROW_TYPE)));
+	}
+	AddBinding(
+	    make_uniq<TableBinding>(alias, types, names, bound_column_ids, &entry, index, std::move(virtual_columns)));
 }
 
 void BindContext::AddBaseTable(idx_t index, const string &alias, const vector<string> &names,
                                const vector<LogicalType> &types, vector<ColumnIndex> &bound_column_ids,
                                const string &table_name) {
+	virtual_column_map_t virtual_columns;
+	virtual_columns.insert(make_pair(COLUMN_IDENTIFIER_ROW_ID, TableColumn("rowid", LogicalType::ROW_TYPE)));
 	AddBinding(make_uniq<TableBinding>(alias.empty() ? table_name : alias, types, names, bound_column_ids, nullptr,
-	                                   index, true));
+	                                   index, std::move(virtual_columns)));
 }
 
 void BindContext::AddTableFunction(idx_t index, const string &alias, const vector<string> &names,
                                    const vector<LogicalType> &types, vector<ColumnIndex> &bound_column_ids,
-                                   optional_ptr<StandardEntry> entry) {
-	AddBinding(make_uniq<TableBinding>(alias, types, names, bound_column_ids, entry, index));
+                                   optional_ptr<StandardEntry> entry, virtual_column_map_t virtual_columns) {
+	AddBinding(
+	    make_uniq<TableBinding>(alias, types, names, bound_column_ids, entry, index, std::move(virtual_columns)));
 }
 
 static string AddColumnNameToBinding(const string &base_name, case_insensitive_set_t &current_names) {
diff --git a/src/planner/binder/statement/bind_delete.cpp b/src/planner/binder/statement/bind_delete.cpp
index 822f487edcd6..a27d5b9d5209 100644
--- a/src/planner/binder/statement/bind_delete.cpp
+++ b/src/planner/binder/statement/bind_delete.cpp
@@ -73,10 +73,15 @@ BoundStatement Binder::Bind(DeleteStatement &stmt) {
 	del->bound_constraints = BindConstraints(table);
 	del->AddChild(std::move(root));
 
+	auto virtual_columns = table.GetVirtualColumns();
+	auto row_id_entry = virtual_columns.find(COLUMN_IDENTIFIER_ROW_ID);
+	if (row_id_entry == virtual_columns.end()) {
+		throw InternalException("BindDelete could not find the row id column in the virtual columns list of the table");
+	}
 	// set up the delete expression
 	auto &column_ids = get.GetColumnIds();
-	del->expressions.push_back(
-	    make_uniq<BoundColumnRefExpression>(table.GetRowIdType(), ColumnBinding(get.table_index, column_ids.size())));
+	del->expressions.push_back(make_uniq<BoundColumnRefExpression>(row_id_entry->second.type,
+	                                                               ColumnBinding(get.table_index, column_ids.size())));
 	get.AddColumnId(COLUMN_IDENTIFIER_ROW_ID);
 
 	if (!stmt.returning_list.empty()) {
diff --git a/src/planner/binder/statement/bind_update.cpp b/src/planner/binder/statement/bind_update.cpp
index 75dd39074e0a..eb0f0f7c5339 100644
--- a/src/planner/binder/statement/bind_update.cpp
+++ b/src/planner/binder/statement/bind_update.cpp
@@ -133,9 +133,14 @@ BoundStatement Binder::Bind(UpdateStatement &stmt) {
 	table.BindUpdateConstraints(*this, *get, *proj, *update, context);
 
 	// finally add the row id column to the projection list
+	auto virtual_columns = table.GetVirtualColumns();
+	auto row_id_entry = virtual_columns.find(COLUMN_IDENTIFIER_ROW_ID);
+	if (row_id_entry == virtual_columns.end()) {
+		throw InternalException("BindDelete could not find the row id column in the virtual columns list of the table");
+	}
 	auto &column_ids = get->GetColumnIds();
-	proj->expressions.push_back(
-	    make_uniq<BoundColumnRefExpression>(table.GetRowIdType(), ColumnBinding(get->table_index, column_ids.size())));
+	proj->expressions.push_back(make_uniq<BoundColumnRefExpression>(
+	    row_id_entry->second.type, ColumnBinding(get->table_index, column_ids.size())));
 	get->AddColumnId(COLUMN_IDENTIFIER_ROW_ID);
 
 	// set the projection as child of the update node and finalize the result
diff --git a/src/planner/binder/tableref/bind_basetableref.cpp b/src/planner/binder/tableref/bind_basetableref.cpp
index d0302a1086e5..8209b7ef85af 100644
--- a/src/planner/binder/tableref/bind_basetableref.cpp
+++ b/src/planner/binder/tableref/bind_basetableref.cpp
@@ -262,7 +262,7 @@ unique_ptr<BoundTableRef> Binder::Bind(BaseTableRef &ref) {
 
 		auto logical_get =
 		    make_uniq<LogicalGet>(table_index, scan_function, std::move(bind_data), std::move(return_types),
-		                          std::move(return_names), table.GetRowIdType());
+		                          std::move(return_names), table.GetVirtualColumns());
 		auto table_entry = logical_get->GetTable();
 		auto &col_ids = logical_get->GetMutableColumnIds();
 		if (!table_entry) {
diff --git a/src/planner/binder/tableref/bind_table_function.cpp b/src/planner/binder/tableref/bind_table_function.cpp
index ace96207bf98..b17884c76532 100644
--- a/src/planner/binder/tableref/bind_table_function.cpp
+++ b/src/planner/binder/tableref/bind_table_function.cpp
@@ -197,9 +197,11 @@ unique_ptr<LogicalOperator> Binder::BindTableFunctionInternal(TableFunction &tab
 	unique_ptr<FunctionData> bind_data;
 	vector<LogicalType> return_types;
 	vector<string> return_names;
+	unordered_map<column_t, TableColumn> virtual_columns;
 	if (table_function.bind || table_function.bind_replace) {
 		TableFunctionBindInput bind_input(parameters, named_parameters, input_table_types, input_table_names,
-		                                  table_function.function_info.get(), this, table_function, ref);
+		                                  table_function.function_info.get(), this, table_function, ref,
+		                                  virtual_columns);
 		if (table_function.bind_replace) {
 			auto new_plan = table_function.bind_replace(context, bind_input);
 			if (new_plan) {
@@ -237,7 +239,8 @@ unique_ptr<LogicalOperator> Binder::BindTableFunctionInternal(TableFunction &tab
 		}
 	}
 
-	auto get = make_uniq<LogicalGet>(bind_index, table_function, std::move(bind_data), return_types, return_names);
+	auto get = make_uniq<LogicalGet>(bind_index, table_function, std::move(bind_data), return_types, return_names,
+	                                 virtual_columns);
 	get->parameters = parameters;
 	get->named_parameters = named_parameters;
 	get->input_table_types = input_table_types;
@@ -249,7 +252,7 @@ unique_ptr<LogicalOperator> Binder::BindTableFunctionInternal(TableFunction &tab
 	}
 	// now add the table function to the bind context so its columns can be bound
 	bind_context.AddTableFunction(bind_index, function_name, return_names, return_types, get->GetMutableColumnIds(),
-	                              get->GetTable().get());
+	                              get->GetTable().get(), std::move(virtual_columns));
 	return std::move(get);
 }
 
diff --git a/src/planner/operator/logical_get.cpp b/src/planner/operator/logical_get.cpp
index be7b5aa5d796..37e475e66a89 100644
--- a/src/planner/operator/logical_get.cpp
+++ b/src/planner/operator/logical_get.cpp
@@ -17,10 +17,11 @@ LogicalGet::LogicalGet() : LogicalOperator(LogicalOperatorType::LOGICAL_GET) {
 }
 
 LogicalGet::LogicalGet(idx_t table_index, TableFunction function, unique_ptr<FunctionData> bind_data,
-                       vector<LogicalType> returned_types, vector<string> returned_names, LogicalType rowid_type)
+                       vector<LogicalType> returned_types, vector<string> returned_names,
+                       virtual_column_map_t virtual_columns_p)
     : LogicalOperator(LogicalOperatorType::LOGICAL_GET), table_index(table_index), function(std::move(function)),
       bind_data(std::move(bind_data)), returned_types(std::move(returned_types)), names(std::move(returned_names)),
-      extra_info(), rowid_type(std::move(rowid_type)) {
+      virtual_columns(std::move(virtual_columns_p)), extra_info() {
 }
 
 optional_ptr<TableCatalogEntry> LogicalGet::GetTable() const {
@@ -118,6 +119,28 @@ vector<ColumnBinding> LogicalGet::GetColumnBindings() {
 	return result;
 }
 
+const LogicalType &LogicalGet::GetColumnType(const ColumnIndex &index) const {
+	if (index.IsVirtualColumn()) {
+		auto entry = virtual_columns.find(index.GetPrimaryIndex());
+		if (entry == virtual_columns.end()) {
+			throw InternalException("Failed to find referenced virtual column %d", index.GetPrimaryIndex());
+		}
+		return entry->second.type;
+	}
+	return returned_types[index.GetPrimaryIndex()];
+}
+
+const string &LogicalGet::GetColumnName(const ColumnIndex &index) const {
+	if (index.IsVirtualColumn()) {
+		auto entry = virtual_columns.find(index.GetPrimaryIndex());
+		if (entry == virtual_columns.end()) {
+			throw InternalException("Failed to find referenced virtual column %d", index.GetPrimaryIndex());
+		}
+		return entry->second.name;
+	}
+	return names[index.GetPrimaryIndex()];
+}
+
 void LogicalGet::ResolveTypes() {
 	if (column_ids.empty()) {
 		column_ids.emplace_back(COLUMN_IDENTIFIER_ROW_ID);
@@ -125,20 +148,12 @@ void LogicalGet::ResolveTypes() {
 	types.clear();
 	if (projection_ids.empty()) {
 		for (auto &index : column_ids) {
-			if (index.IsRowIdColumn()) {
-				types.emplace_back(LogicalType(rowid_type));
-			} else {
-				types.push_back(returned_types[index.GetPrimaryIndex()]);
-			}
+			types.push_back(GetColumnType(index));
 		}
 	} else {
 		for (auto &proj_index : projection_ids) {
 			auto &index = column_ids[proj_index];
-			if (index.IsRowIdColumn()) {
-				types.emplace_back(LogicalType(rowid_type));
-			} else {
-				types.push_back(returned_types[index.GetPrimaryIndex()]);
-			}
+			types.push_back(GetColumnType(index));
 		}
 	}
 	if (!projected_input.empty()) {
@@ -227,9 +242,10 @@ unique_ptr<LogicalOperator> LogicalGet::Deserialize(Deserializer &deserializer)
 	}
 	if (!has_serialize) {
 		TableFunctionRef empty_ref;
+		unordered_map<column_t, TableColumn> virtual_columns;
 		TableFunctionBindInput input(result->parameters, result->named_parameters, result->input_table_types,
 		                             result->input_table_names, function.function_info.get(), nullptr, result->function,
-		                             empty_ref);
+		                             empty_ref, virtual_columns);
 
 		vector<LogicalType> bind_return_types;
 		vector<string> bind_names;
@@ -239,20 +255,35 @@ unique_ptr<LogicalOperator> LogicalGet::Deserialize(Deserializer &deserializer)
 		bind_data = function.bind(deserializer.Get<ClientContext &>(), input, bind_return_types, bind_names);
 
 		for (auto &col_id : result->column_ids) {
-			if (col_id.IsRowIdColumn()) {
-				// rowid
-				continue;
-			}
-			auto idx = col_id.GetPrimaryIndex();
-			auto &ret_type = result->returned_types[idx];
-			auto &col_name = result->names[idx];
-			if (bind_return_types[idx] != ret_type) {
-				throw SerializationException("Table function deserialization failure in function \"%s\" - column with "
-				                             "name %s was serialized with type %s, but now has type %s",
-				                             function.name, col_name, ret_type, bind_return_types[idx]);
+			if (col_id.IsVirtualColumn()) {
+				auto idx = col_id.GetPrimaryIndex();
+				auto ventry = virtual_columns.find(idx);
+				if (ventry == virtual_columns.end()) {
+					throw SerializationException(
+					    "Table function deserialization failure - could not find virtual column with id %d", idx);
+				}
+				auto &ret_type = ventry->second.type;
+				auto &col_name = ventry->second.name;
+				if (bind_return_types[idx] != ret_type) {
+					throw SerializationException(
+					    "Table function deserialization failure in function \"%s\" - virtual column with "
+					    "name %s was serialized with type %s, but now has type %s",
+					    function.name, col_name, ret_type, bind_return_types[idx]);
+				}
+			} else {
+				auto idx = col_id.GetPrimaryIndex();
+				auto &ret_type = result->returned_types[idx];
+				auto &col_name = result->names[idx];
+				if (bind_return_types[idx] != ret_type) {
+					throw SerializationException(
+					    "Table function deserialization failure in function \"%s\" - column with "
+					    "name %s was serialized with type %s, but now has type %s",
+					    function.name, col_name, ret_type, bind_return_types[idx]);
+				}
 			}
 		}
 		result->returned_types = std::move(bind_return_types);
+		result->virtual_columns = std::move(virtual_columns);
 	}
 	result->bind_data = std::move(bind_data);
 	return std::move(result);
diff --git a/src/planner/table_binding.cpp b/src/planner/table_binding.cpp
index 455834a4bb0d..934814ec7516 100644
--- a/src/planner/table_binding.cpp
+++ b/src/planner/table_binding.cpp
@@ -16,9 +16,9 @@
 namespace duckdb {
 
 Binding::Binding(BindingType binding_type, BindingAlias alias_p, vector<LogicalType> coltypes, vector<string> colnames,
-                 idx_t index, LogicalType rowid_type)
+                 idx_t index)
     : binding_type(binding_type), alias(std::move(alias_p)), index(index), types(std::move(coltypes)),
-      names(std::move(colnames)), rowid_type(std::move(rowid_type)) {
+      names(std::move(colnames)) {
 	D_ASSERT(types.size() == names.size());
 	for (idx_t i = 0; i < names.size(); i++) {
 		auto &name = names[i];
@@ -114,13 +114,19 @@ optional_ptr<StandardEntry> EntryBinding::GetStandardEntry() {
 
 TableBinding::TableBinding(const string &alias, vector<LogicalType> types_p, vector<string> names_p,
                            vector<ColumnIndex> &bound_column_ids, optional_ptr<StandardEntry> entry, idx_t index,
-                           bool add_row_id)
-    : Binding(BindingType::TABLE, GetAlias(alias, entry), std::move(types_p), std::move(names_p), index,
-              (add_row_id && entry) ? entry->Cast<TableCatalogEntry>().GetRowIdType() : LogicalType::ROW_TYPE),
-      bound_column_ids(bound_column_ids), entry(entry) {
-	if (add_row_id) {
-		if (name_map.find("rowid") == name_map.end()) {
-			name_map["rowid"] = COLUMN_IDENTIFIER_ROW_ID;
+                           virtual_column_map_t virtual_columns_p)
+    : Binding(BindingType::TABLE, GetAlias(alias, entry), std::move(types_p), std::move(names_p), index),
+      bound_column_ids(bound_column_ids), entry(entry), virtual_columns(std::move(virtual_columns_p)) {
+	for (auto &ventry : virtual_columns) {
+		auto idx = ventry.first;
+		auto &name = ventry.second.name;
+		if (idx < VIRTUAL_COLUMN_START) {
+			throw BinderException(
+			    "Virtual column index must be larger than VIRTUAL_COLUMN_START - found %d for column \"%s\"", idx,
+			    name);
+		}
+		if (name_map.find(name) == name_map.end()) {
+			name_map[name] = idx;
 		}
 	}
 }
@@ -238,8 +244,10 @@ BindResult TableBinding::Bind(ColumnRefExpression &colref, idx_t depth) {
 	}
 	// fetch the type of the column
 	LogicalType col_type;
-	if (column_index == COLUMN_IDENTIFIER_ROW_ID) {
-		col_type = LogicalType(rowid_type);
+	auto ventry = virtual_columns.find(column_index);
+	if (ventry != virtual_columns.end()) {
+		// virtual column - fetch type from there
+		col_type = ventry->second.type;
 	} else {
 		// normal column: fetch type from base column
 		col_type = types[column_index];
diff --git a/test/sql/copy/parquet/parquet_virtual_columns.test b/test/sql/copy/parquet/parquet_virtual_columns.test
new file mode 100644
index 000000000000..1abddbe36928
--- /dev/null
+++ b/test/sql/copy/parquet/parquet_virtual_columns.test
@@ -0,0 +1,21 @@
+# name: test/sql/copy/parquet/parquet_virtual_columns.test
+# description: Test virtual columns
+# group: [parquet]
+
+require parquet
+
+# Filename without the filename option
+query III
+select i, j, replace(filename, '\', '/') from 'data/parquet-testing/glob*/t?.parquet' order by i;
+----
+1	a	data/parquet-testing/glob/t1.parquet
+2	b	data/parquet-testing/glob/t2.parquet
+3	c	data/parquet-testing/glob2/t1.parquet
+
+# not projected in *
+query II
+select * from 'data/parquet-testing/glob*/t?.parquet' order by i;
+----
+1	a
+2	b
+3	c

From b3a67cd3aeb7b8b5dce45eb5f922963097314c5d Mon Sep 17 00:00:00 2001
From: Mark <mark.raasveldt@gmail.com>
Date: Tue, 11 Feb 2025 10:33:21 +0100
Subject: [PATCH 072/142] Backport #16115

---
 .../duckdb/optimizer/optimizer_extension.hpp     | 16 +++++++++++-----
 src/optimizer/optimizer.cpp                      | 13 ++++++++++++-
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/src/include/duckdb/optimizer/optimizer_extension.hpp b/src/include/duckdb/optimizer/optimizer_extension.hpp
index 6ccc6277c1a8..d2202fcb11a1 100644
--- a/src/include/duckdb/optimizer/optimizer_extension.hpp
+++ b/src/include/duckdb/optimizer/optimizer_extension.hpp
@@ -29,14 +29,20 @@ struct OptimizerExtensionInput {
 };
 
 typedef void (*optimize_function_t)(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan);
+typedef void (*pre_optimize_function_t)(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan);
 
 class OptimizerExtension {
 public:
-	//! The parse function of the parser extension.
-	//! Takes a query string as input and returns ParserExtensionParseData (on success) or an error
-	optimize_function_t optimize_function;
-
-	//! Additional parser info passed to the parse function
+	//! The optimize function of the optimizer extension.
+	//! Takes a logical query plan as an input, which it can modify in place
+	//! This runs, after the DuckDB optimizers have run
+	optimize_function_t optimize_function = nullptr;
+	//! The pre-optimize function of the optimizer extension.
+	//! Takes a logical query plan as an input, which it can modify in place
+	//! This runs, before the DuckDB optimizers have run
+	pre_optimize_function_t pre_optimize_function = nullptr;
+
+	//! Additional optimizer info passed to the optimize functions
 	shared_ptr<OptimizerExtensionInfo> optimizer_info;
 };
 
diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp
index 8ac4cdd87da8..dc1ddfa59224 100644
--- a/src/optimizer/optimizer.cpp
+++ b/src/optimizer/optimizer.cpp
@@ -272,12 +272,23 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
 
 	this->plan = std::move(plan_p);
 
+	for (auto &pre_optimizer_extension : DBConfig::GetConfig(context).optimizer_extensions) {
+		RunOptimizer(OptimizerType::EXTENSION, [&]() {
+			OptimizerExtensionInput input {GetContext(), *this, pre_optimizer_extension.optimizer_info.get()};
+			if (pre_optimizer_extension.pre_optimize_function) {
+				pre_optimizer_extension.pre_optimize_function(input, plan);
+			}
+		});
+	}
+
 	RunBuiltInOptimizers();
 
 	for (auto &optimizer_extension : DBConfig::GetConfig(context).optimizer_extensions) {
 		RunOptimizer(OptimizerType::EXTENSION, [&]() {
 			OptimizerExtensionInput input {GetContext(), *this, optimizer_extension.optimizer_info.get()};
-			optimizer_extension.optimize_function(input, plan);
+			if (optimizer_extension.optimize_function) {
+				optimizer_extension.optimize_function(input, plan);
+			}
 		});
 	}
 

From 32a0f490f146b367dca05cd9d0db4c59e6a36d77 Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Thu, 13 Feb 2025 19:36:05 +0100
Subject: [PATCH 073/142] Move get_virtual_columns to a separate table function
 instead of trying to move it into the bind

---
 extension/parquet/parquet_extension.cpp       | 24 ++++++++++++-------
 src/common/multi_file_reader.cpp              | 21 +++++++---------
 src/function/table_function.cpp               | 11 +++++----
 .../duckdb/common/multi_file_reader.hpp       | 11 +++++----
 .../duckdb/function/table_function.hpp        | 12 ++++++----
 .../binder/tableref/bind_table_function.cpp   |  8 ++++---
 src/planner/operator/logical_get.cpp          | 24 +++++++++++++++----
 7 files changed, 68 insertions(+), 43 deletions(-)

diff --git a/extension/parquet/parquet_extension.cpp b/extension/parquet/parquet_extension.cpp
index b7100cf7b363..294e35c021a2 100644
--- a/extension/parquet/parquet_extension.cpp
+++ b/extension/parquet/parquet_extension.cpp
@@ -359,6 +359,13 @@ TablePartitionInfo ParquetGetPartitionInfo(ClientContext &context, TableFunction
 	return parquet_bind.multi_file_reader->GetPartitionInfo(context, parquet_bind.reader_bind, input);
 }
 
+virtual_column_map_t ParquetGetVirtualColumns(ClientContext &context, optional_ptr<FunctionData> bind_data) {
+	auto &parquet_bind = bind_data->Cast<ParquetReadBindData>();
+	virtual_column_map_t result;
+	parquet_bind.multi_file_reader->GetVirtualColumns(context, parquet_bind.reader_bind, result);
+	return result;
+}
+
 class ParquetScanFunction {
 public:
 	static TableFunctionSet GetFunctionSet() {
@@ -384,6 +391,7 @@ class ParquetScanFunction {
 		table_function.filter_prune = true;
 		table_function.pushdown_complex_filter = ParquetComplexFilterPushdown;
 		table_function.get_partition_info = ParquetGetPartitionInfo;
+		table_function.get_virtual_columns = ParquetGetVirtualColumns;
 
 		MultiFileReader::AddParameters(table_function);
 
@@ -450,11 +458,11 @@ class ParquetScanFunction {
 		return nullptr;
 	}
 
-	static unique_ptr<FunctionData>
-	ParquetScanBindInternal(ClientContext &context, unique_ptr<MultiFileReader> multi_file_reader,
-	                        shared_ptr<MultiFileList> file_list, vector<LogicalType> &return_types,
-	                        vector<string> &names, ParquetOptions parquet_options,
-	                        optional_ptr<virtual_column_map_t> virtual_columns = nullptr) {
+	static unique_ptr<FunctionData> ParquetScanBindInternal(ClientContext &context,
+	                                                        unique_ptr<MultiFileReader> multi_file_reader,
+	                                                        shared_ptr<MultiFileList> file_list,
+	                                                        vector<LogicalType> &return_types, vector<string> &names,
+	                                                        ParquetOptions parquet_options) {
 		auto result = make_uniq<ParquetReadBindData>();
 		result->multi_file_reader = std::move(multi_file_reader);
 		result->file_list = std::move(file_list);
@@ -463,7 +471,7 @@ class ParquetScanFunction {
 		if (result->multi_file_reader->Bind(parquet_options.file_options, *result->file_list, result->types,
 		                                    result->names, result->reader_bind)) {
 			result->multi_file_reader->BindOptions(parquet_options.file_options, *result->file_list, result->types,
-			                                       result->names, result->reader_bind, virtual_columns);
+			                                       result->names, result->reader_bind);
 			// Enable the parquet file_row_number on the parquet options if the file_row_number_idx was set
 			if (result->reader_bind.file_row_number_idx != DConstants::INVALID_INDEX) {
 				parquet_options.file_row_number = true;
@@ -476,7 +484,7 @@ class ParquetScanFunction {
 			parquet_options.file_options.AutoDetectHivePartitioning(*result->file_list, context);
 			// Default bind
 			result->reader_bind = result->multi_file_reader->BindReader<ParquetReader>(
-			    context, result->types, result->names, *result->file_list, *result, parquet_options, virtual_columns);
+			    context, result->types, result->names, *result->file_list, *result, parquet_options);
 		}
 
 		// Set the explicit cardinality if requested
@@ -617,7 +625,7 @@ class ParquetScanFunction {
 
 		auto file_list = multi_file_reader->CreateFileList(context, input.inputs[0]);
 		return ParquetScanBindInternal(context, std::move(multi_file_reader), std::move(file_list), return_types, names,
-		                               parquet_options, &input.virtual_columns);
+		                               parquet_options);
 	}
 
 	static double ParquetProgress(ClientContext &context, const FunctionData *bind_data_p,
diff --git a/src/common/multi_file_reader.cpp b/src/common/multi_file_reader.cpp
index 9016780e422b..8a819505d86e 100644
--- a/src/common/multi_file_reader.cpp
+++ b/src/common/multi_file_reader.cpp
@@ -171,8 +171,7 @@ bool MultiFileReader::Bind(MultiFileReaderOptions &options, MultiFileList &files
 
 void MultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFileList &files,
                                   vector<LogicalType> &return_types, vector<string> &names,
-                                  MultiFileReaderBindData &bind_data,
-                                  optional_ptr<virtual_column_map_t> virtual_columns) {
+                                  MultiFileReaderBindData &bind_data) {
 	// Add generated constant column for filename
 	if (options.filename) {
 		if (std::find(names.begin(), names.end(), options.filename_column) != names.end()) {
@@ -183,10 +182,6 @@ void MultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFileList
 		bind_data.filename_idx = names.size();
 		return_types.emplace_back(LogicalType::VARCHAR);
 		names.emplace_back(options.filename_column);
-	} else if (virtual_columns) {
-		// filename is not specified - add it to the virtual columns list
-		virtual_columns->insert(make_pair(COLUMN_IDENTIFIER_FILENAME, TableColumn("filename", LogicalType::VARCHAR)));
-		bind_data.filename_idx = COLUMN_IDENTIFIER_FILENAME;
 	}
 
 	// Add generated constant columns from hive partitioning scheme
@@ -242,6 +237,14 @@ void MultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFileList
 	}
 }
 
+void MultiFileReader::GetVirtualColumns(ClientContext &context, MultiFileReaderBindData &bind_data,
+                                        virtual_column_map_t &result) {
+	if (bind_data.filename_idx == DConstants::INVALID_INDEX) {
+		bind_data.filename_idx = COLUMN_IDENTIFIER_FILENAME;
+		result.insert(make_pair(COLUMN_IDENTIFIER_FILENAME, TableColumn("filename", LogicalType::VARCHAR)));
+	}
+}
+
 void MultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options,
                                    const string &filename, const vector<MultiFileReaderColumnDefinition> &local_columns,
                                    const vector<MultiFileReaderColumnDefinition> &global_columns,
@@ -258,12 +261,6 @@ void MultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options, c
 	}
 	for (idx_t i = 0; i < global_column_ids.size(); i++) {
 		auto &col_idx = global_column_ids[i];
-		if (col_idx.IsRowIdColumn()) {
-			// row-id
-			// FIXME: this should probably be removed
-			reader_data.constant_map.emplace_back(i, Value::BIGINT(42));
-			continue;
-		}
 		auto column_id = col_idx.GetPrimaryIndex();
 		if (column_id == options.filename_idx) {
 			// filename
diff --git a/src/function/table_function.cpp b/src/function/table_function.cpp
index 6e9df81943ae..cb8af192017c 100644
--- a/src/function/table_function.cpp
+++ b/src/function/table_function.cpp
@@ -22,8 +22,9 @@ TableFunction::TableFunction(string name, vector<LogicalType> arguments, table_f
       in_out_function_final(nullptr), statistics(nullptr), dependency(nullptr), cardinality(nullptr),
       pushdown_complex_filter(nullptr), to_string(nullptr), table_scan_progress(nullptr), get_partition_data(nullptr),
       get_bind_info(nullptr), type_pushdown(nullptr), get_multi_file_reader(nullptr), supports_pushdown_type(nullptr),
-      get_partition_info(nullptr), get_partition_stats(nullptr), serialize(nullptr), deserialize(nullptr),
-      projection_pushdown(false), filter_pushdown(false), filter_prune(false), sampling_pushdown(false) {
+      get_partition_info(nullptr), get_partition_stats(nullptr), get_virtual_columns(nullptr), serialize(nullptr),
+      deserialize(nullptr), projection_pushdown(false), filter_pushdown(false), filter_prune(false),
+      sampling_pushdown(false) {
 }
 
 TableFunction::TableFunction(const vector<LogicalType> &arguments, table_function_t function,
@@ -36,9 +37,9 @@ TableFunction::TableFunction()
       init_local(nullptr), function(nullptr), in_out_function(nullptr), statistics(nullptr), dependency(nullptr),
       cardinality(nullptr), pushdown_complex_filter(nullptr), to_string(nullptr), table_scan_progress(nullptr),
       get_partition_data(nullptr), get_bind_info(nullptr), type_pushdown(nullptr), get_multi_file_reader(nullptr),
-      supports_pushdown_type(nullptr), get_partition_info(nullptr), get_partition_stats(nullptr), serialize(nullptr),
-      deserialize(nullptr), projection_pushdown(false), filter_pushdown(false), filter_prune(false),
-      sampling_pushdown(false) {
+      supports_pushdown_type(nullptr), get_partition_info(nullptr), get_partition_stats(nullptr),
+      get_virtual_columns(nullptr), serialize(nullptr), deserialize(nullptr), projection_pushdown(false),
+      filter_pushdown(false), filter_prune(false), sampling_pushdown(false) {
 }
 
 bool TableFunction::Equal(const TableFunction &rhs) const {
diff --git a/src/include/duckdb/common/multi_file_reader.hpp b/src/include/duckdb/common/multi_file_reader.hpp
index c1a262fa875f..b6716856dda0 100644
--- a/src/include/duckdb/common/multi_file_reader.hpp
+++ b/src/include/duckdb/common/multi_file_reader.hpp
@@ -250,8 +250,7 @@ struct MultiFileReader {
 	//! Bind the options of the multi-file reader, potentially emitting any extra columns that are required
 	DUCKDB_API virtual void BindOptions(MultiFileReaderOptions &options, MultiFileList &files,
 	                                    vector<LogicalType> &return_types, vector<string> &names,
-	                                    MultiFileReaderBindData &bind_data,
-	                                    optional_ptr<virtual_column_map_t> virtual_columns = nullptr);
+	                                    MultiFileReaderBindData &bind_data);
 
 	//! Initialize global state used by the MultiFileReader
 	DUCKDB_API virtual unique_ptr<MultiFileReaderGlobalState>
@@ -294,6 +293,9 @@ struct MultiFileReader {
 	                                         const OperatorPartitionInfo &partition_info,
 	                                         OperatorPartitionData &partition_data);
 
+	DUCKDB_API virtual void GetVirtualColumns(ClientContext &context, MultiFileReaderBindData &bind_data,
+	                                          virtual_column_map_t &result);
+
 	template <class READER_CLASS, class RESULT_CLASS, class OPTIONS_CLASS>
 	MultiFileReaderBindData BindUnionReader(ClientContext &context, vector<LogicalType> &return_types,
 	                                        vector<string> &names, MultiFileList &files, RESULT_CLASS &result,
@@ -322,8 +324,7 @@ struct MultiFileReader {
 
 	template <class READER_CLASS, class RESULT_CLASS, class OPTIONS_CLASS>
 	MultiFileReaderBindData BindReader(ClientContext &context, vector<LogicalType> &return_types, vector<string> &names,
-	                                   MultiFileList &files, RESULT_CLASS &result, OPTIONS_CLASS &options,
-	                                   optional_ptr<virtual_column_map_t> virtual_columns = nullptr) {
+	                                   MultiFileList &files, RESULT_CLASS &result, OPTIONS_CLASS &options) {
 		if (options.file_options.union_by_name) {
 			return BindUnionReader<READER_CLASS>(context, return_types, names, files, result, options);
 		} else {
@@ -336,7 +337,7 @@ struct MultiFileReader {
 			}
 			result.Initialize(std::move(reader));
 			MultiFileReaderBindData bind_data;
-			BindOptions(options.file_options, files, return_types, names, bind_data, virtual_columns);
+			BindOptions(options.file_options, files, return_types, names, bind_data);
 			return bind_data;
 		}
 	}
diff --git a/src/include/duckdb/function/table_function.hpp b/src/include/duckdb/function/table_function.hpp
index 15625ff61e3a..0ac43fe73812 100644
--- a/src/include/duckdb/function/table_function.hpp
+++ b/src/include/duckdb/function/table_function.hpp
@@ -93,11 +93,9 @@ struct TableFunctionBindInput {
 	TableFunctionBindInput(vector<Value> &inputs, named_parameter_map_t &named_parameters,
 	                       vector<LogicalType> &input_table_types, vector<string> &input_table_names,
 	                       optional_ptr<TableFunctionInfo> info, optional_ptr<Binder> binder,
-	                       TableFunction &table_function, const TableFunctionRef &ref,
-	                       virtual_column_map_t &virtual_columns)
+	                       TableFunction &table_function, const TableFunctionRef &ref)
 	    : inputs(inputs), named_parameters(named_parameters), input_table_types(input_table_types),
-	      input_table_names(input_table_names), info(info), binder(binder), table_function(table_function), ref(ref),
-	      virtual_columns(virtual_columns) {
+	      input_table_names(input_table_names), info(info), binder(binder), table_function(table_function), ref(ref) {
 	}
 
 	vector<Value> &inputs;
@@ -108,7 +106,6 @@ struct TableFunctionBindInput {
 	optional_ptr<Binder> binder;
 	TableFunction &table_function;
 	const TableFunctionRef &ref;
-	virtual_column_map_t &virtual_columns;
 };
 
 struct TableFunctionInitInput {
@@ -298,6 +295,9 @@ typedef TablePartitionInfo (*table_function_get_partition_info_t)(ClientContext
 typedef vector<PartitionStatistics> (*table_function_get_partition_stats_t)(ClientContext &context,
                                                                             GetPartitionStatsInput &input);
 
+typedef virtual_column_map_t (*table_function_get_virtual_columns_t)(ClientContext &context,
+                                                                     optional_ptr<FunctionData> bind_data);
+
 //! When to call init_global to initialize the table function
 enum class TableFunctionInitialization { INITIALIZE_ON_EXECUTE, INITIALIZE_ON_SCHEDULE };
 
@@ -366,6 +366,8 @@ class TableFunction : public SimpleNamedParameterFunction { // NOLINT: work-arou
 	table_function_get_partition_info_t get_partition_info;
 	//! (Optional) get a list of all the partition stats of the table
 	table_function_get_partition_stats_t get_partition_stats;
+	//! (Optional) returns a list of virtual columns emitted by the table function
+	table_function_get_virtual_columns_t get_virtual_columns;
 
 	table_function_serialize_t serialize;
 	table_function_deserialize_t deserialize;
diff --git a/src/planner/binder/tableref/bind_table_function.cpp b/src/planner/binder/tableref/bind_table_function.cpp
index b17884c76532..dbb162495088 100644
--- a/src/planner/binder/tableref/bind_table_function.cpp
+++ b/src/planner/binder/tableref/bind_table_function.cpp
@@ -197,11 +197,9 @@ unique_ptr<LogicalOperator> Binder::BindTableFunctionInternal(TableFunction &tab
 	unique_ptr<FunctionData> bind_data;
 	vector<LogicalType> return_types;
 	vector<string> return_names;
-	unordered_map<column_t, TableColumn> virtual_columns;
 	if (table_function.bind || table_function.bind_replace) {
 		TableFunctionBindInput bind_input(parameters, named_parameters, input_table_types, input_table_names,
-		                                  table_function.function_info.get(), this, table_function, ref,
-		                                  virtual_columns);
+		                                  table_function.function_info.get(), this, table_function, ref);
 		if (table_function.bind_replace) {
 			auto new_plan = table_function.bind_replace(context, bind_input);
 			if (new_plan) {
@@ -238,6 +236,10 @@ unique_ptr<LogicalOperator> Binder::BindTableFunctionInternal(TableFunction &tab
 			return_names[i] = "C" + to_string(i);
 		}
 	}
+	virtual_column_map_t virtual_columns;
+	if (table_function.get_virtual_columns) {
+		virtual_columns = table_function.get_virtual_columns(context, bind_data.get());
+	}
 
 	auto get = make_uniq<LogicalGet>(bind_index, table_function, std::move(bind_data), return_types, return_names,
 	                                 virtual_columns);
diff --git a/src/planner/operator/logical_get.cpp b/src/planner/operator/logical_get.cpp
index 37e475e66a89..c15fde6b923a 100644
--- a/src/planner/operator/logical_get.cpp
+++ b/src/planner/operator/logical_get.cpp
@@ -143,7 +143,15 @@ const string &LogicalGet::GetColumnName(const ColumnIndex &index) const {
 
 void LogicalGet::ResolveTypes() {
 	if (column_ids.empty()) {
-		column_ids.emplace_back(COLUMN_IDENTIFIER_ROW_ID);
+		// no projection - we need to push a column
+		auto entry = virtual_columns.find(COLUMN_IDENTIFIER_ROW_ID);
+		if (entry != virtual_columns.end()) {
+			// push the rowid column if the projection supports it
+			column_ids.emplace_back(COLUMN_IDENTIFIER_ROW_ID);
+		} else {
+			// otherwise push the first column
+			column_ids.emplace_back(0);
+		}
 	}
 	types.clear();
 	if (projection_ids.empty()) {
@@ -240,19 +248,23 @@ unique_ptr<LogicalOperator> LogicalGet::Deserialize(Deserializer &deserializer)
 			result->column_ids.emplace_back(col_id);
 		}
 	}
+	auto &context = deserializer.Get<ClientContext &>();
+	virtual_column_map_t virtual_columns;
 	if (!has_serialize) {
 		TableFunctionRef empty_ref;
-		unordered_map<column_t, TableColumn> virtual_columns;
 		TableFunctionBindInput input(result->parameters, result->named_parameters, result->input_table_types,
 		                             result->input_table_names, function.function_info.get(), nullptr, result->function,
-		                             empty_ref, virtual_columns);
+		                             empty_ref);
 
 		vector<LogicalType> bind_return_types;
 		vector<string> bind_names;
 		if (!function.bind) {
 			throw InternalException("Table function \"%s\" has neither bind nor (de)serialize", function.name);
 		}
-		bind_data = function.bind(deserializer.Get<ClientContext &>(), input, bind_return_types, bind_names);
+		bind_data = function.bind(context, input, bind_return_types, bind_names);
+		if (function.get_virtual_columns) {
+			virtual_columns = function.get_virtual_columns(context, bind_data.get());
+		}
 
 		for (auto &col_id : result->column_ids) {
 			if (col_id.IsVirtualColumn()) {
@@ -283,8 +295,10 @@ unique_ptr<LogicalOperator> LogicalGet::Deserialize(Deserializer &deserializer)
 			}
 		}
 		result->returned_types = std::move(bind_return_types);
-		result->virtual_columns = std::move(virtual_columns);
+	} else if (function.get_virtual_columns) {
+		virtual_columns = function.get_virtual_columns(context, bind_data.get());
 	}
+	result->virtual_columns = std::move(virtual_columns);
 	result->bind_data = std::move(bind_data);
 	return std::move(result);
 }

From cd514e9c7e3dd0e8702ed490958f7bc1e0d32fa3 Mon Sep 17 00:00:00 2001
From: Tmonster <tom@ebergen.com>
Date: Thu, 13 Feb 2025 20:03:05 +0100
Subject: [PATCH 074/142] format-fix

---
 src/execution/operator/helper/physical_streaming_sample.cpp   | 4 ++--
 src/execution/physical_plan/plan_sample.cpp                   | 3 +--
 .../execution/operator/helper/physical_streaming_sample.hpp   | 3 +--
 .../filter_pushdown/prepared_statement_in_pushdown.test       | 0
 test/sql/sample/bernoulli_sampling.test                       | 1 -
 5 files changed, 4 insertions(+), 7 deletions(-)
 delete mode 100644 test/optimizer/filter_pushdown/prepared_statement_in_pushdown.test

diff --git a/src/execution/operator/helper/physical_streaming_sample.cpp b/src/execution/operator/helper/physical_streaming_sample.cpp
index 1062deb27ed7..721717989f88 100644
--- a/src/execution/operator/helper/physical_streaming_sample.cpp
+++ b/src/execution/operator/helper/physical_streaming_sample.cpp
@@ -6,9 +6,9 @@
 namespace duckdb {
 
 PhysicalStreamingSample::PhysicalStreamingSample(vector<LogicalType> types, unique_ptr<SampleOptions> options,
-							idx_t estimated_cardinality)
+                                                 idx_t estimated_cardinality)
     : PhysicalOperator(PhysicalOperatorType::STREAMING_SAMPLE, std::move(types), estimated_cardinality),
-		sample_options(std::move(options)) {
+      sample_options(std::move(options)) {
 	percentage = sample_options->sample_size.GetValue<double>() / 100;
 }
 
diff --git a/src/execution/physical_plan/plan_sample.cpp b/src/execution/physical_plan/plan_sample.cpp
index 2ccfacb8ac8c..883c7055d46f 100644
--- a/src/execution/physical_plan/plan_sample.cpp
+++ b/src/execution/physical_plan/plan_sample.cpp
@@ -28,8 +28,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalSample &op
 			                      "reservoir sampling or use a sample_size",
 			                      EnumUtil::ToString(op.sample_options->method));
 		}
-		sample = make_uniq<PhysicalStreamingSample>(
-		    op.types, std::move(op.sample_options), op.estimated_cardinality);
+		sample = make_uniq<PhysicalStreamingSample>(op.types, std::move(op.sample_options), op.estimated_cardinality);
 		break;
 	default:
 		throw InternalException("Unimplemented sample method");
diff --git a/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp b/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp
index 68df848fec9f..6f75b2cf1964 100644
--- a/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp
+++ b/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp
@@ -19,8 +19,7 @@ class PhysicalStreamingSample : public PhysicalOperator {
 	static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::STREAMING_SAMPLE;
 
 public:
-	PhysicalStreamingSample(vector<LogicalType> types, unique_ptr<SampleOptions> options,
-	                        idx_t estimated_cardinality);
+	PhysicalStreamingSample(vector<LogicalType> types, unique_ptr<SampleOptions> options, idx_t estimated_cardinality);
 
 	unique_ptr<SampleOptions> sample_options;
 	double percentage;
diff --git a/test/optimizer/filter_pushdown/prepared_statement_in_pushdown.test b/test/optimizer/filter_pushdown/prepared_statement_in_pushdown.test
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/test/sql/sample/bernoulli_sampling.test b/test/sql/sample/bernoulli_sampling.test
index a00ff7311325..e8953bb38e40 100644
--- a/test/sql/sample/bernoulli_sampling.test
+++ b/test/sql/sample/bernoulli_sampling.test
@@ -2,7 +2,6 @@
 # description: Test reservoir sample crash on large data sets
 # group: [sample]
 
-
 statement ok
 create table output (num_rows INT);
 

From 6549a0ea76cc4e3cc29c65ea37b9485e1f18e4cb Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Thu, 13 Feb 2025 21:57:32 +0100
Subject: [PATCH 075/142] Correctly deal with and propagate virtual columns

---
 extension/parquet/parquet_extension.cpp       | 15 ++++++++++----
 src/common/multi_file_list.cpp                |  8 ++++++--
 .../operator/scan/physical_table_scan.cpp     | 13 ++++++++----
 src/execution/physical_plan/plan_get.cpp      | 10 ++++++----
 src/function/table/table_scan.cpp             |  6 ++++++
 .../operator/scan/physical_table_scan.hpp     |  4 +++-
 .../duckdb/planner/operator/logical_get.hpp   |  3 +++
 src/optimizer/remove_unused_columns.cpp       |  5 ++---
 src/planner/operator/logical_get.cpp          | 20 +++++++++++--------
 .../copy/parquet/parquet_filename_filter.test | 12 +++++++++--
 .../copy/parquet/parquet_virtual_columns.test |  8 ++++++++
 11 files changed, 76 insertions(+), 28 deletions(-)

diff --git a/extension/parquet/parquet_extension.cpp b/extension/parquet/parquet_extension.cpp
index 294e35c021a2..e88f111564b1 100644
--- a/extension/parquet/parquet_extension.cpp
+++ b/extension/parquet/parquet_extension.cpp
@@ -61,6 +61,7 @@ struct ParquetReadBindData : public TableFunctionData {
 	atomic<idx_t> chunk_count;
 	vector<string> names;
 	vector<LogicalType> types;
+	virtual_column_map_t virtual_columns;
 	vector<MultiFileReaderColumnDefinition> columns;
 	//! Table column names - set when using COPY tbl FROM file.parquet
 	vector<string> table_columns;
@@ -363,6 +364,7 @@ virtual_column_map_t ParquetGetVirtualColumns(ClientContext &context, optional_p
 	auto &parquet_bind = bind_data->Cast<ParquetReadBindData>();
 	virtual_column_map_t result;
 	parquet_bind.multi_file_reader->GetVirtualColumns(context, parquet_bind.reader_bind, result);
+	parquet_bind.virtual_columns = result;
 	return result;
 }
 
@@ -754,12 +756,17 @@ class ParquetScanFunction {
 				iota(begin(result->projection_ids), end(result->projection_ids), 0);
 			}
 
-			const auto table_types = bind_data.types;
+			const auto &table_types = bind_data.types;
 			for (const auto &col_idx : input.column_indexes) {
-				if (col_idx.IsRowIdColumn()) {
-					result->scanned_types.emplace_back(LogicalType::ROW_TYPE);
+				auto column_id = col_idx.GetPrimaryIndex();
+				if (col_idx.IsVirtualColumn()) {
+					auto entry = bind_data.virtual_columns.find(column_id);
+					if (entry == bind_data.virtual_columns.end()) {
+						throw InternalException("Parquet - virtual column definition not found");
+					}
+					result->scanned_types.emplace_back(entry->second.type);
 				} else {
-					result->scanned_types.push_back(table_types[col_idx.GetPrimaryIndex()]);
+					result->scanned_types.push_back(table_types[column_id]);
 				}
 			}
 		}
diff --git a/src/common/multi_file_list.cpp b/src/common/multi_file_list.cpp
index 668a5b36399e..fd5d84a856ec 100644
--- a/src/common/multi_file_list.cpp
+++ b/src/common/multi_file_list.cpp
@@ -31,9 +31,10 @@ bool PushdownInternal(ClientContext &context, const MultiFileReaderOptions &opti
                       vector<unique_ptr<Expression>> &filters, vector<string> &expanded_files) {
 	HivePartitioningFilterInfo filter_info;
 	for (idx_t i = 0; i < info.column_ids.size(); i++) {
-		if (!IsRowIdColumnId(info.column_ids[i])) {
-			filter_info.column_map.insert({info.column_names[info.column_ids[i]], i});
+		if (IsVirtualColumn(info.column_ids[i])) {
+			continue;
 		}
+		filter_info.column_map.insert({info.column_names[info.column_ids[i]], i});
 	}
 	filter_info.hive_enabled = options.hive_partitioning;
 	filter_info.filename_enabled = options.filename;
@@ -61,6 +62,9 @@ bool PushdownInternal(ClientContext &context, const MultiFileReaderOptions &opti
 	vector<unique_ptr<Expression>> filter_expressions;
 	for (auto &entry : filters.filters) {
 		auto column_idx = column_ids[entry.first];
+		if (IsVirtualColumn(column_idx)) {
+			continue;
+		}
 		auto column_ref =
 		    make_uniq<BoundColumnRefExpression>(types[column_idx], ColumnBinding(table_index, entry.first));
 		auto filter_expr = entry.second->ToExpression(*column_ref);
diff --git a/src/execution/operator/scan/physical_table_scan.cpp b/src/execution/operator/scan/physical_table_scan.cpp
index 0ea996d341c8..2c821526cee6 100644
--- a/src/execution/operator/scan/physical_table_scan.cpp
+++ b/src/execution/operator/scan/physical_table_scan.cpp
@@ -14,11 +14,12 @@ PhysicalTableScan::PhysicalTableScan(vector<LogicalType> types, TableFunction fu
                                      vector<ColumnIndex> column_ids_p, vector<idx_t> projection_ids_p,
                                      vector<string> names_p, unique_ptr<TableFilterSet> table_filters_p,
                                      idx_t estimated_cardinality, ExtraOperatorInfo extra_info,
-                                     vector<Value> parameters_p)
+                                     vector<Value> parameters_p, virtual_column_map_t virtual_columns_p)
     : PhysicalOperator(PhysicalOperatorType::TABLE_SCAN, std::move(types), estimated_cardinality),
       function(std::move(function_p)), bind_data(std::move(bind_data_p)), returned_types(std::move(returned_types_p)),
       column_ids(std::move(column_ids_p)), projection_ids(std::move(projection_ids_p)), names(std::move(names_p)),
-      table_filters(std::move(table_filters_p)), extra_info(extra_info), parameters(std::move(parameters_p)) {
+      table_filters(std::move(table_filters_p)), extra_info(extra_info), parameters(std::move(parameters_p)),
+      virtual_columns(std::move(virtual_columns_p)) {
 }
 
 class TableScanGlobalSourceState : public GlobalSourceState {
@@ -214,8 +215,12 @@ InsertionOrderPreservingMap<string> PhysicalTableScan::ParamsToString() const {
 				first_item = false;
 
 				const auto col_id = column_ids[column_index].GetPrimaryIndex();
-				if (col_id == COLUMN_IDENTIFIER_ROW_ID) {
-					filters_info += filter->ToString("rowid");
+				if (IsVirtualColumn(col_id)) {
+					auto entry = virtual_columns.find(col_id);
+					if (entry == virtual_columns.end()) {
+						throw InternalException("Virtual column not found");
+					}
+					filters_info += filter->ToString(entry->second.name);
 				} else {
 					filters_info += filter->ToString(names[col_id]);
 				}
diff --git a/src/execution/physical_plan/plan_get.cpp b/src/execution/physical_plan/plan_get.cpp
index 843ad1537172..978bf4b70220 100644
--- a/src/execution/physical_plan/plan_get.cpp
+++ b/src/execution/physical_plan/plan_get.cpp
@@ -133,9 +133,10 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalGet &op) {
 	// create the table scan node
 	if (!op.function.projection_pushdown) {
 		// function does not support projection pushdown
-		auto node = make_uniq<PhysicalTableScan>(
-		    op.returned_types, op.function, std::move(op.bind_data), op.returned_types, column_ids, vector<column_t>(),
-		    op.names, std::move(table_filters), op.estimated_cardinality, op.extra_info, std::move(op.parameters));
+		auto node = make_uniq<PhysicalTableScan>(op.returned_types, op.function, std::move(op.bind_data),
+		                                         op.returned_types, column_ids, vector<column_t>(), op.names,
+		                                         std::move(table_filters), op.estimated_cardinality, op.extra_info,
+		                                         std::move(op.parameters), std::move(op.virtual_columns));
 		// first check if an additional projection is necessary
 		if (column_ids.size() == op.returned_types.size()) {
 			bool projection_necessary = false;
@@ -180,7 +181,8 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalGet &op) {
 	} else {
 		auto node = make_uniq<PhysicalTableScan>(op.types, op.function, std::move(op.bind_data), op.returned_types,
 		                                         column_ids, op.projection_ids, op.names, std::move(table_filters),
-		                                         op.estimated_cardinality, op.extra_info, std::move(op.parameters));
+		                                         op.estimated_cardinality, op.extra_info, std::move(op.parameters),
+		                                         std::move(op.virtual_columns));
 		node->dynamic_filters = op.dynamic_filters;
 		if (filter) {
 			filter->children.push_back(std::move(node));
diff --git a/src/function/table/table_scan.cpp b/src/function/table/table_scan.cpp
index 0cf85d64a2b9..41b95f1269eb 100644
--- a/src/function/table/table_scan.cpp
+++ b/src/function/table/table_scan.cpp
@@ -703,6 +703,11 @@ static unique_ptr<FunctionData> TableScanDeserialize(Deserializer &deserializer,
 	return std::move(result);
 }
 
+virtual_column_map_t TableScanGetVirtualColumns(ClientContext &context, optional_ptr<FunctionData> bind_data_p) {
+	auto &bind_data = bind_data_p->Cast<TableScanBindData>();
+	return bind_data.table.GetVirtualColumns();
+}
+
 TableFunction TableScanFunction::GetFunction() {
 	TableFunction scan_function("seq_scan", {}, TableScanFunc);
 	scan_function.init_local = TableScanInitLocal;
@@ -722,6 +727,7 @@ TableFunction TableScanFunction::GetFunction() {
 	scan_function.sampling_pushdown = true;
 	scan_function.serialize = TableScanSerialize;
 	scan_function.deserialize = TableScanDeserialize;
+	scan_function.get_virtual_columns = TableScanGetVirtualColumns;
 	return scan_function;
 }
 
diff --git a/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp b/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp
index 45ac1e34c5b7..ca7cd4a0db34 100644
--- a/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp
+++ b/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp
@@ -27,7 +27,7 @@ class PhysicalTableScan : public PhysicalOperator {
 	PhysicalTableScan(vector<LogicalType> types, TableFunction function, unique_ptr<FunctionData> bind_data,
 	                  vector<LogicalType> returned_types, vector<ColumnIndex> column_ids, vector<idx_t> projection_ids,
 	                  vector<string> names, unique_ptr<TableFilterSet> table_filters, idx_t estimated_cardinality,
-	                  ExtraOperatorInfo extra_info, vector<Value> parameters);
+	                  ExtraOperatorInfo extra_info, vector<Value> parameters, virtual_column_map_t virtual_columns);
 
 	//! The table function
 	TableFunction function;
@@ -50,6 +50,8 @@ class PhysicalTableScan : public PhysicalOperator {
 	vector<Value> parameters;
 	//! Contains a reference to dynamically generated table filters (through e.g. a join up in the tree)
 	shared_ptr<DynamicTableFilterSet> dynamic_filters;
+	//! Virtual columns
+	virtual_column_map_t virtual_columns;
 
 public:
 	string GetName() const override;
diff --git a/src/include/duckdb/planner/operator/logical_get.hpp b/src/include/duckdb/planner/operator/logical_get.hpp
index 65f90fe85ae2..395224bc89dd 100644
--- a/src/include/duckdb/planner/operator/logical_get.hpp
+++ b/src/include/duckdb/planner/operator/logical_get.hpp
@@ -63,6 +63,9 @@ class LogicalGet : public LogicalOperator {
 	InsertionOrderPreservingMap<string> ParamsToString() const override;
 	//! Returns the underlying table that is being scanned, or nullptr if there is none
 	optional_ptr<TableCatalogEntry> GetTable() const;
+	//! Returns any column to query - preferably the cheapest column
+	//! This is used when we are running e.g. a COUNT(*) and don't care about the contents of any columns in the table
+	column_t GetAnyColumn() const;
 
 	const LogicalType &GetColumnType(const ColumnIndex &column_index) const;
 	const string &GetColumnName(const ColumnIndex &column_index) const;
diff --git a/src/optimizer/remove_unused_columns.cpp b/src/optimizer/remove_unused_columns.cpp
index 48ea7cbffe83..eb93856e9e4a 100644
--- a/src/optimizer/remove_unused_columns.cpp
+++ b/src/optimizer/remove_unused_columns.cpp
@@ -240,8 +240,7 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
 					throw InternalException("Could not find column index for table filter");
 				}
 
-				auto column_type =
-				    filter.first == COLUMN_IDENTIFIER_ROW_ID ? LogicalType::ROW_TYPE : get.returned_types[filter.first];
+				auto column_type = get.GetColumnType(ColumnIndex(filter.first));
 
 				ColumnBinding filter_binding(get.table_index, index.GetIndex());
 				auto column_ref = make_uniq<BoundColumnRefExpression>(std::move(column_type), filter_binding);
@@ -268,7 +267,7 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
 				// this generally means we are only interested in whether or not anything exists in the table (e.g.
 				// EXISTS(SELECT * FROM tbl)) in this case, we just scan the row identifier column as it means we do not
 				// need to read any of the columns
-				column_ids.emplace_back(COLUMN_IDENTIFIER_ROW_ID);
+				column_ids.emplace_back(get.GetAnyColumn());
 			}
 			get.SetColumnIds(std::move(column_ids));
 
diff --git a/src/planner/operator/logical_get.cpp b/src/planner/operator/logical_get.cpp
index c15fde6b923a..c4b5a2f47ffc 100644
--- a/src/planner/operator/logical_get.cpp
+++ b/src/planner/operator/logical_get.cpp
@@ -141,17 +141,21 @@ const string &LogicalGet::GetColumnName(const ColumnIndex &index) const {
 	return names[index.GetPrimaryIndex()];
 }
 
+column_t LogicalGet::GetAnyColumn() const {
+	auto entry = virtual_columns.find(COLUMN_IDENTIFIER_ROW_ID);
+	if (entry != virtual_columns.end()) {
+		// return the rowid column if the projection supports it
+		return COLUMN_IDENTIFIER_ROW_ID;
+	} else {
+		// otherwise return the first column
+		return 0;
+	}
+}
+
 void LogicalGet::ResolveTypes() {
 	if (column_ids.empty()) {
 		// no projection - we need to push a column
-		auto entry = virtual_columns.find(COLUMN_IDENTIFIER_ROW_ID);
-		if (entry != virtual_columns.end()) {
-			// push the rowid column if the projection supports it
-			column_ids.emplace_back(COLUMN_IDENTIFIER_ROW_ID);
-		} else {
-			// otherwise push the first column
-			column_ids.emplace_back(0);
-		}
+		column_ids.emplace_back(GetAnyColumn());
 	}
 	types.clear();
 	if (projection_ids.empty()) {
diff --git a/test/sql/copy/parquet/parquet_filename_filter.test b/test/sql/copy/parquet/parquet_filename_filter.test
index f236ed81de2e..a10611ef267b 100644
--- a/test/sql/copy/parquet/parquet_filename_filter.test
+++ b/test/sql/copy/parquet/parquet_filename_filter.test
@@ -4,6 +4,10 @@
 
 require parquet
 
+query III
+select id, value as f, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where filename='value1';
+----
+
 # requires notwindows for windows-style path backslash reasons
 require notwindows
 
@@ -49,15 +53,19 @@ select id, value, date, filename from parquet_scan('data/parquet-testing/hive-pa
 
 # Ensure we don't somehow endup mixing things up
 query III
-select id, value as filename, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where filename='value2';
+select id, value as f, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where f='value2';
 ----
 2	value2	2013-01-01
 
 query III
-select id, value as filename, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where filename='value1';
+select id, value as f, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where f='value1';
 ----
 1	value1	2012-01-01
 
+query III
+select id, value as f, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where filename='value1';
+----
+
 # These tests confirm that the ParquetScanStats will properly handle the pruned files list
 
 statement ok
diff --git a/test/sql/copy/parquet/parquet_virtual_columns.test b/test/sql/copy/parquet/parquet_virtual_columns.test
index 1abddbe36928..f1f84155036b 100644
--- a/test/sql/copy/parquet/parquet_virtual_columns.test
+++ b/test/sql/copy/parquet/parquet_virtual_columns.test
@@ -19,3 +19,11 @@ select * from 'data/parquet-testing/glob*/t?.parquet' order by i;
 1	a
 2	b
 3	c
+
+require notwindows
+
+# filename in filter
+query III
+select i, j, replace(filename, '\', '/') from 'data/parquet-testing/glob*/t?.parquet' where filename='data/parquet-testing/glob/t1.parquet'
+----
+1	a	data/parquet-testing/glob/t1.parquet

From 2a75b22e52fcea3fd175774e0271d96532e7d5cc Mon Sep 17 00:00:00 2001
From: Richard Wesley <13156216+hawkfish@users.noreply.github.com>
Date: Thu, 13 Feb 2025 13:35:41 -0800
Subject: [PATCH 076/142] Issue #8265: AsOf Nested Loop

* Fix unique pointer cast
---
 src/execution/physical_plan/plan_asof_join.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/execution/physical_plan/plan_asof_join.cpp b/src/execution/physical_plan/plan_asof_join.cpp
index 10fc0d47d059..32fa52280e62 100644
--- a/src/execution/physical_plan/plan_asof_join.cpp
+++ b/src/execution/physical_plan/plan_asof_join.cpp
@@ -230,7 +230,7 @@ static unique_ptr<PhysicalOperator> PlanAsOfLoopJoin(LogicalComparisonJoin &op,
 	auto proj = make_uniq<PhysicalProjection>(op.types, std::move(project_list), probe_cardinality);
 	proj->children.emplace_back(std::move(aggr));
 
-	return proj;
+	return std::move(proj);
 }
 
 unique_ptr<PhysicalOperator> PhysicalPlanGenerator::PlanAsOfJoin(LogicalComparisonJoin &op) {

From b6e8b3339f64dd386f911bc42de33bb991692d11 Mon Sep 17 00:00:00 2001
From: cfis <cfis@savagexi.com>
Date: Fri, 14 Feb 2025 00:05:06 -0800
Subject: [PATCH 077/142] Fix building Duckdb on Windows with MSVC 2022. _win32
 is the correct define for MSVC (and I believe mingw64 these days) - see
 https://learn.microsoft.com/en-us/cpp/preprocessor/predefined-macros?view=msvc-170

---
 extension/tpcds/dsdgen/include/dsdgen-c/porting.h | 4 ++--
 extension/tpch/dbgen/text.cpp                     | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/extension/tpcds/dsdgen/include/dsdgen-c/porting.h b/extension/tpcds/dsdgen/include/dsdgen-c/porting.h
index 6923a0f35286..cf27a036f813 100644
--- a/extension/tpcds/dsdgen/include/dsdgen-c/porting.h
+++ b/extension/tpcds/dsdgen/include/dsdgen-c/porting.h
@@ -57,7 +57,7 @@
 
 #include <stdint.h>
 
-#ifdef WIN32
+#ifdef _WIN32
 #include <sys/timeb.h>
 #define timeb _timeb
 #define ftime _ftime
@@ -76,7 +76,7 @@ typedef HUGE_TYPE ds_key_t;
 char *strdup(const char *);
 #endif
 
-#ifdef WIN32
+#ifdef _WIN32
 #include <winsock2.h>
 #include <windows.h>
 #include <winbase.h>
diff --git a/extension/tpch/dbgen/text.cpp b/extension/tpch/dbgen/text.cpp
index df67048b54ea..cfe9f8ef370b 100644
--- a/extension/tpch/dbgen/text.cpp
+++ b/extension/tpch/dbgen/text.cpp
@@ -22,10 +22,10 @@
 #include "dbgen/config.h"
 
 #include <stdlib.h>
-#ifndef WIN32
+#ifndef _WIN32
  /* Change for Windows NT */
 #include <unistd.h>
-#endif /* WIN32 */
+#endif /* _WIN32 */
 #include <ctype.h>
 #include <errno.h>
 #include <limits.h>

From 012b6933194989e54719b504927417843af092c7 Mon Sep 17 00:00:00 2001
From: cfis <cfis@savagexi.com>
Date: Fri, 14 Feb 2025 00:08:07 -0800
Subject: [PATCH 078/142] Building the Python bindings on Windows fails with
 MSVC and having Python installed in program files by the python installer.
 CMake finds python just fine, but the python executable is python.exe not
 python3.exe.

---
 CMakeLists.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6e4a73176cb9..c61c7f9def65 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1065,7 +1065,7 @@ endfunction()
 macro(register_external_extension NAME URL COMMIT DONT_LINK DONT_BUILD LOAD_TESTS PATH INCLUDE_PATH TEST_PATH APPLY_PATCHES LINKED_LIBS SUBMODULES EXTENSION_VERSION)
   include(FetchContent)
   if (${APPLY_PATCHES})
-    set(PATCH_COMMAND python3 ${CMAKE_SOURCE_DIR}/scripts/apply_extension_patches.py ${CMAKE_SOURCE_DIR}/.github/patches/extensions/${NAME}/)
+    set(PATCH_COMMAND ${Python3_EXECUTABLE} ${CMAKE_SOURCE_DIR}/scripts/apply_extension_patches.py ${CMAKE_SOURCE_DIR}/.github/patches/extensions/${NAME}/)
   endif()
   FETCHCONTENT_DECLARE(
           ${NAME}_extension_fc
@@ -1389,7 +1389,7 @@ if(${EXTENSION_CONFIG_BUILD})
 
   add_custom_target(
           duckdb_merge_vcpkg_manifests ALL
-          COMMAND  python3 scripts/merge_vcpkg_deps.py ${VCPKG_PATHS} ${EXT_NAMES}
+          COMMAND  ${Python3_EXECUTABLE} scripts/merge_vcpkg_deps.py ${VCPKG_PATHS} ${EXT_NAMES}
           WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
           COMMENT Generates a shared vcpkg manifest from the individual extensions)
   string(REPLACE ";"  ", " VCPKG_NAMES_COMMAS "${VCPKG_NAMES}")
@@ -1432,9 +1432,9 @@ if(BUILD_PYTHON)
     )
 
   if(PYTHON_EDITABLE_BUILD)
-    set(PIP_COMMAND ${PIP_COMMAND} python3 -m pip install --editable .)
+    set(PIP_COMMAND ${PIP_COMMAND} ${Python3_EXECUTABLE} -m pip install --editable .)
   else()
-    set(PIP_COMMAND ${PIP_COMMAND} python3 -m pip install .)
+    set(PIP_COMMAND ${PIP_COMMAND} ${Python3_EXECUTABLE} -m pip install .)
   endif()
 
   if(USER_SPACE)

From 535fe5a77c01d038384ddd5cd5cc217397faa5a9 Mon Sep 17 00:00:00 2001
From: cfis <cfis@savagexi.com>
Date: Fri, 14 Feb 2025 00:14:17 -0800
Subject: [PATCH 079/142] -std=c++11 is invalid with MSVC. It it is set
 correctly here -
 https://github.com/duckdb/duckdb/blob/main/tools/pythonpkg/setup.py#L162 and
 L165, but then reset again at
 https://github.com/duckdb/duckdb/blob/main/tools/pythonpkg/setup.py#L176 but
 incorrectly for all compilers.

---
 tools/pythonpkg/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/pythonpkg/setup.py b/tools/pythonpkg/setup.py
index 0152d0d6b06b..6273472da676 100644
--- a/tools/pythonpkg/setup.py
+++ b/tools/pythonpkg/setup.py
@@ -173,7 +173,7 @@ def open_utf8(fpath, flags):
 if 'DUCKDB_BINARY_DIR' in os.environ:
     existing_duckdb_dir = os.environ['DUCKDB_BINARY_DIR']
 if 'DUCKDB_COMPILE_FLAGS' in os.environ:
-    toolchain_args = ['-std=c++11'] + os.environ['DUCKDB_COMPILE_FLAGS'].split()
+    toolchain_args = os.environ['DUCKDB_COMPILE_FLAGS'].split()
 if 'DUCKDB_LIBS' in os.environ:
     libraries = os.environ['DUCKDB_LIBS'].split(' ')
 

From 057c2d4a9d7737da6d0196832d17589b0cc7c449 Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Fri, 14 Feb 2025 09:24:35 +0100
Subject: [PATCH 080/142] Various fixes for virtual columns <> MultiFileReader
 interaction

---
 extension/parquet/parquet_extension.cpp         |  2 +-
 src/common/constants.cpp                        |  2 +-
 src/common/multi_file_reader.cpp                |  7 +++++++
 src/function/table/read_csv.cpp                 | 12 +++++++++++-
 src/function/table/table_scan.cpp               |  1 +
 src/function/table_function.cpp                 |  4 ++--
 src/include/duckdb/common/multi_file_reader.hpp |  2 +-
 src/include/duckdb/function/table_function.hpp  |  2 ++
 src/optimizer/late_materialization.cpp          |  7 +++++--
 9 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/extension/parquet/parquet_extension.cpp b/extension/parquet/parquet_extension.cpp
index e88f111564b1..1a05891eb314 100644
--- a/extension/parquet/parquet_extension.cpp
+++ b/extension/parquet/parquet_extension.cpp
@@ -363,7 +363,7 @@ TablePartitionInfo ParquetGetPartitionInfo(ClientContext &context, TableFunction
 virtual_column_map_t ParquetGetVirtualColumns(ClientContext &context, optional_ptr<FunctionData> bind_data) {
 	auto &parquet_bind = bind_data->Cast<ParquetReadBindData>();
 	virtual_column_map_t result;
-	parquet_bind.multi_file_reader->GetVirtualColumns(context, parquet_bind.reader_bind, result);
+	MultiFileReader::GetVirtualColumns(context, parquet_bind.reader_bind, result);
 	parquet_bind.virtual_columns = result;
 	return result;
 }
diff --git a/src/common/constants.cpp b/src/common/constants.cpp
index 4db7245e235f..ee51460033ff 100644
--- a/src/common/constants.cpp
+++ b/src/common/constants.cpp
@@ -9,7 +9,7 @@ namespace duckdb {
 constexpr const idx_t DConstants::INVALID_INDEX;
 const row_t MAX_ROW_ID = 36028797018960000ULL;       // 2^55
 const row_t MAX_ROW_ID_LOCAL = 72057594037920000ULL; // 2^56
-const column_t COLUMN_IDENTIFIER_ROW_ID = (column_t)-1;
+const column_t COLUMN_IDENTIFIER_ROW_ID = UINT64_C(18446744073709551615);
 const column_t VIRTUAL_COLUMN_START = UINT64_C(9223372036854775808); // 2^63
 const double PI = 3.141592653589793;
 
diff --git a/src/common/multi_file_reader.cpp b/src/common/multi_file_reader.cpp
index 8a819505d86e..e9e8f40ca3f4 100644
--- a/src/common/multi_file_reader.cpp
+++ b/src/common/multi_file_reader.cpp
@@ -267,6 +267,9 @@ void MultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options, c
 			reader_data.constant_map.emplace_back(i, Value(filename));
 			continue;
 		}
+		if (IsVirtualColumn(column_id)) {
+			continue;
+		}
 		if (!options.hive_partitioning_indexes.empty()) {
 			// hive partition constants
 			auto partitions = HivePartitioning::Parse(filename);
@@ -340,6 +343,10 @@ void MultiFileReader::CreateColumnMappingByName(const string &file_name,
 		// not constant - look up the column in the name map
 		auto &global_idx = global_column_ids[i];
 		auto global_id = global_idx.GetPrimaryIndex();
+		if (IsVirtualColumn(global_id)) {
+			// virtual column - these are emitted for every file
+			continue;
+		}
 		if (global_id >= global_columns.size()) {
 			throw InternalException(
 			    "MultiFileReader::CreateColumnMappingByName - global_id is out of range in global_types for this file");
diff --git a/src/function/table/read_csv.cpp b/src/function/table/read_csv.cpp
index e5bce2264db1..d9603abfe81b 100644
--- a/src/function/table/read_csv.cpp
+++ b/src/function/table/read_csv.cpp
@@ -140,7 +140,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 
 	auto result = make_uniq<ReadCSVData>();
 	auto &options = result->options;
-	const auto multi_file_reader = MultiFileReader::Create(input.table_function);
+	auto multi_file_reader = MultiFileReader::Create(input.table_function);
 	const auto multi_file_list = multi_file_reader->CreateFileList(context, input.inputs[0]);
 	if (multi_file_list->GetTotalFileCount() > 1) {
 		options.multi_file_reader = true;
@@ -415,6 +415,15 @@ void PushdownTypeToCSVScanner(ClientContext &context, optional_ptr<FunctionData>
 	}
 }
 
+virtual_column_map_t ReadCSVGetVirtualColumns(ClientContext &context, optional_ptr<FunctionData> bind_data) {
+	auto &csv_bind = bind_data->Cast<ReadCSVData>();
+	virtual_column_map_t result;
+	MultiFileReader::GetVirtualColumns(context, csv_bind.reader_bind, result);
+	result.insert(make_pair(COLUMN_IDENTIFIER_ROW_ID, TableColumn("rowid", LogicalType::ROW_TYPE)));
+	return result;
+}
+
+
 TableFunction ReadCSVTableFunction::GetFunction() {
 	TableFunction read_csv("read_csv", {LogicalType::VARCHAR}, ReadCSVFunction, ReadCSVBind, ReadCSVInitGlobal,
 	                       ReadCSVInitLocal);
@@ -426,6 +435,7 @@ TableFunction ReadCSVTableFunction::GetFunction() {
 	read_csv.cardinality = CSVReaderCardinality;
 	read_csv.projection_pushdown = true;
 	read_csv.type_pushdown = PushdownTypeToCSVScanner;
+	read_csv.get_virtual_columns = ReadCSVGetVirtualColumns;
 	ReadCSVAddNamedParameters(read_csv);
 	return read_csv;
 }
diff --git a/src/function/table/table_scan.cpp b/src/function/table/table_scan.cpp
index 41b95f1269eb..178a2a6a8bb0 100644
--- a/src/function/table/table_scan.cpp
+++ b/src/function/table/table_scan.cpp
@@ -725,6 +725,7 @@ TableFunction TableScanFunction::GetFunction() {
 	scan_function.filter_pushdown = true;
 	scan_function.filter_prune = true;
 	scan_function.sampling_pushdown = true;
+	scan_function.late_materialization = true;
 	scan_function.serialize = TableScanSerialize;
 	scan_function.deserialize = TableScanDeserialize;
 	scan_function.get_virtual_columns = TableScanGetVirtualColumns;
diff --git a/src/function/table_function.cpp b/src/function/table_function.cpp
index cb8af192017c..e2172cef2b79 100644
--- a/src/function/table_function.cpp
+++ b/src/function/table_function.cpp
@@ -24,7 +24,7 @@ TableFunction::TableFunction(string name, vector<LogicalType> arguments, table_f
       get_bind_info(nullptr), type_pushdown(nullptr), get_multi_file_reader(nullptr), supports_pushdown_type(nullptr),
       get_partition_info(nullptr), get_partition_stats(nullptr), get_virtual_columns(nullptr), serialize(nullptr),
       deserialize(nullptr), projection_pushdown(false), filter_pushdown(false), filter_prune(false),
-      sampling_pushdown(false) {
+      sampling_pushdown(false), late_materialization(false) {
 }
 
 TableFunction::TableFunction(const vector<LogicalType> &arguments, table_function_t function,
@@ -39,7 +39,7 @@ TableFunction::TableFunction()
       get_partition_data(nullptr), get_bind_info(nullptr), type_pushdown(nullptr), get_multi_file_reader(nullptr),
       supports_pushdown_type(nullptr), get_partition_info(nullptr), get_partition_stats(nullptr),
       get_virtual_columns(nullptr), serialize(nullptr), deserialize(nullptr), projection_pushdown(false),
-      filter_pushdown(false), filter_prune(false), sampling_pushdown(false) {
+      filter_pushdown(false), filter_prune(false), sampling_pushdown(false), late_materialization(false) {
 }
 
 bool TableFunction::Equal(const TableFunction &rhs) const {
diff --git a/src/include/duckdb/common/multi_file_reader.hpp b/src/include/duckdb/common/multi_file_reader.hpp
index b6716856dda0..39fd78654d28 100644
--- a/src/include/duckdb/common/multi_file_reader.hpp
+++ b/src/include/duckdb/common/multi_file_reader.hpp
@@ -293,7 +293,7 @@ struct MultiFileReader {
 	                                         const OperatorPartitionInfo &partition_info,
 	                                         OperatorPartitionData &partition_data);
 
-	DUCKDB_API virtual void GetVirtualColumns(ClientContext &context, MultiFileReaderBindData &bind_data,
+	DUCKDB_API static void GetVirtualColumns(ClientContext &context, MultiFileReaderBindData &bind_data,
 	                                          virtual_column_map_t &result);
 
 	template <class READER_CLASS, class RESULT_CLASS, class OPTIONS_CLASS>
diff --git a/src/include/duckdb/function/table_function.hpp b/src/include/duckdb/function/table_function.hpp
index 0ac43fe73812..e432e0f9568c 100644
--- a/src/include/duckdb/function/table_function.hpp
+++ b/src/include/duckdb/function/table_function.hpp
@@ -385,6 +385,8 @@ class TableFunction : public SimpleNamedParameterFunction { // NOLINT: work-arou
 	//! Whether or not the table function supports sampling pushdown. If not supported a sample will be taken after the
 	//! table function.
 	bool sampling_pushdown;
+	//! Whether or not the table function supports late materialization
+	bool late_materialization;
 	//! Additional function info, passed to the bind
 	shared_ptr<TableFunctionInfo> function_info;
 
diff --git a/src/optimizer/late_materialization.cpp b/src/optimizer/late_materialization.cpp
index 01f8118f5257..67884f483070 100644
--- a/src/optimizer/late_materialization.cpp
+++ b/src/optimizer/late_materialization.cpp
@@ -238,10 +238,13 @@ bool LateMaterialization::TryLateMaterialization(unique_ptr<LogicalOperator> &op
 		// we need all of the columns to compute the root node anyway (Top-N/Limit/etc)
 		return false;
 	}
+	if (!get.function.late_materialization) {
+		// this function does not support late materialization
+		return false;
+	}
 	auto entry = get.virtual_columns.find(COLUMN_IDENTIFIER_ROW_ID);
 	if (entry == get.virtual_columns.end()) {
-		// we can only do the late-materialization optimization for tables that support the rowid column
-		return false;
+		throw InternalException("Table function supports late materialization but does not expose a rowid column");
 	}
 	row_id_type = entry->second.type;
 	// we benefit from late materialization

From 94e50026b862cf19997e240df12796460b83c641 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Fri, 14 Feb 2025 09:36:22 +0100
Subject: [PATCH 081/142] use cast operator for src/target types in primitive
 dictionary, and add fast path to TemplatedWritePlain

---
 extension/parquet/column_writer.cpp           |   2 +-
 .../writer/templated_column_writer.hpp        |  86 ++++++++++-----
 .../duckdb/common/primitive_dictionary.hpp    | 104 +++++++++++-------
 3 files changed, 122 insertions(+), 70 deletions(-)

diff --git a/extension/parquet/column_writer.cpp b/extension/parquet/column_writer.cpp
index 4841aca02355..6d3bd63618bf 100644
--- a/extension/parquet/column_writer.cpp
+++ b/extension/parquet/column_writer.cpp
@@ -211,7 +211,7 @@ void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterStat
 //===--------------------------------------------------------------------===//
 // Used to store the metadata for a WKB-encoded geometry column when writing
 // GeoParquet files.
-class WKBColumnWriterState final : public StandardColumnWriterState<string_t> {
+class WKBColumnWriterState final : public StandardColumnWriterState<string_t, string_t, ParquetCastOperator> {
 public:
 	WKBColumnWriterState(ParquetWriter &writer, duckdb_parquet::RowGroup &row_group, idx_t col_idx)
 	    : StandardColumnWriterState(writer, row_group, col_idx), geo_data(), geo_data_writer(writer.GetContext()) {
diff --git a/extension/parquet/include/writer/templated_column_writer.hpp b/extension/parquet/include/writer/templated_column_writer.hpp
index 6e20c6dd6e23..bd2a4e220342 100644
--- a/extension/parquet/include/writer/templated_column_writer.hpp
+++ b/extension/parquet/include/writer/templated_column_writer.hpp
@@ -17,21 +17,47 @@
 
 namespace duckdb {
 
-template <class SRC, class TGT, class OP = ParquetCastOperator>
+template <class SRC, class TGT, class OP = ParquetCastOperator, bool ALL_VALID>
 static void TemplatedWritePlain(Vector &col, ColumnWriterStatistics *stats, const idx_t chunk_start,
                                 const idx_t chunk_end, const ValidityMask &mask, WriteStream &ser) {
-	const auto *ptr = FlatVector::GetData<SRC>(col);
+	static constexpr bool COPY_DIRECTLY_FROM_VECTOR =
+	    ALL_VALID && std::is_same<SRC, TGT>::value && std::is_arithmetic<TGT>::value;
+
+	const auto *const ptr = FlatVector::GetData<SRC>(col);
+	TGT local_write[STANDARD_VECTOR_SIZE];
+	idx_t local_write_count = 0;
+
 	for (idx_t r = chunk_start; r < chunk_end; r++) {
-		if (!mask.RowIsValid(r)) {
+		if (!ALL_VALID && !mask.RowIsValid(r)) {
 			continue;
 		}
+
 		TGT target_value = OP::template Operation<SRC, TGT>(ptr[r]);
 		OP::template HandleStats<SRC, TGT>(stats, target_value);
-		OP::template WriteToStream<SRC, TGT>(target_value, ser);
+
+		if (COPY_DIRECTLY_FROM_VECTOR) {
+			continue;
+		}
+
+		if (std::is_arithmetic<TGT>::value) {
+			local_write[local_write_count++] = target_value;
+		} else {
+			OP::template WriteToStream<SRC, TGT>(target_value, ser);
+		}
+	}
+
+	if (COPY_DIRECTLY_FROM_VECTOR) {
+		ser.WriteData(const_data_ptr_cast(&ptr[chunk_start]), (chunk_end - chunk_start) * sizeof(TGT));
+		return;
 	}
+
+	if (std::is_arithmetic<TGT>::value) {
+		ser.WriteData(data_ptr_cast(local_write), local_write_count * sizeof(TGT));
+	}
+	// Else we already wrote to stream
 }
 
-template <class T>
+template <class SRC, class TGT, class OP>
 class StandardColumnWriterState : public PrimitiveColumnWriterState {
 public:
 	StandardColumnWriterState(ParquetWriter &writer, duckdb_parquet::RowGroup &row_group, idx_t col_idx)
@@ -47,16 +73,16 @@ class StandardColumnWriterState : public PrimitiveColumnWriterState {
 	idx_t total_string_size = 0;
 	uint32_t key_bit_width = 0;
 
-	PrimitiveDictionary<T> dictionary;
+	PrimitiveDictionary<SRC, TGT, OP> dictionary;
 	duckdb_parquet::Encoding::type encoding;
 };
 
-template <class SRC, class TGT>
+template <class SRC, class TGT, class OP>
 class StandardWriterPageState : public ColumnWriterPageState {
 public:
 	explicit StandardWriterPageState(const idx_t total_value_count, const idx_t total_string_size,
 	                                 duckdb_parquet::Encoding::type encoding_p,
-	                                 const PrimitiveDictionary<SRC> &dictionary_p)
+	                                 const PrimitiveDictionary<SRC, TGT, OP> &dictionary_p)
 	    : encoding(encoding_p), dbp_initialized(false), dbp_encoder(total_value_count), dlba_initialized(false),
 	      dlba_encoder(total_value_count, total_string_size), bss_encoder(total_value_count, sizeof(TGT)),
 	      dictionary(dictionary_p), dict_written_value(false),
@@ -72,7 +98,7 @@ class StandardWriterPageState : public ColumnWriterPageState {
 
 	BssEncoder bss_encoder;
 
-	const PrimitiveDictionary<SRC> &dictionary;
+	const PrimitiveDictionary<SRC, TGT, OP> &dictionary;
 	bool dict_written_value;
 	uint32_t dict_bit_width;
 	RleBpEncoder dict_encoder;
@@ -89,22 +115,22 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 
 public:
 	unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override {
-		auto result = make_uniq<StandardColumnWriterState<SRC>>(writer, row_group, row_group.columns.size());
+		auto result = make_uniq<StandardColumnWriterState<SRC, TGT, OP>>(writer, row_group, row_group.columns.size());
 		result->encoding = duckdb_parquet::Encoding::RLE_DICTIONARY;
 		RegisterToRowGroup(row_group);
 		return std::move(result);
 	}
 
 	unique_ptr<ColumnWriterPageState> InitializePageState(PrimitiveColumnWriterState &state_p) override {
-		auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
 
-		auto result = make_uniq<StandardWriterPageState<SRC, TGT>>(state.total_value_count, state.total_string_size,
-		                                                           state.encoding, state.dictionary);
+		auto result = make_uniq<StandardWriterPageState<SRC, TGT, OP>>(state.total_value_count, state.total_string_size,
+		                                                               state.encoding, state.dictionary);
 		return std::move(result);
 	}
 
 	void FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state_p) override {
-		auto &page_state = state_p->Cast<StandardWriterPageState<SRC, TGT>>();
+		auto &page_state = state_p->Cast<StandardWriterPageState<SRC, TGT, OP>>();
 		switch (page_state.encoding) {
 		case duckdb_parquet::Encoding::DELTA_BINARY_PACKED:
 			if (!page_state.dbp_initialized) {
@@ -139,7 +165,7 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 	}
 
 	duckdb_parquet::Encoding::type GetEncoding(PrimitiveColumnWriterState &state_p) override {
-		auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
 		return state.encoding;
 	}
 
@@ -148,7 +174,7 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 	}
 
 	void Analyze(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) override {
-		auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
 
 		auto data_ptr = FlatVector::GetData<SRC>(vector);
 		idx_t vector_index = 0;
@@ -188,7 +214,7 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 	void FinalizeAnalyze(ColumnWriterState &state_p) override {
 		const auto type = writer.GetType(schema_idx);
 
-		auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
 		if (state.dictionary.GetSize() == 0 || state.dictionary.IsFull()) {
 			if (writer.GetParquetVersion() == ParquetVersion::V1) {
 				// Can't do the cool stuff for V1
@@ -221,18 +247,18 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 	}
 
 	bool HasDictionary(PrimitiveColumnWriterState &state_p) override {
-		auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
 		return state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY;
 	}
 
 	idx_t DictionarySize(PrimitiveColumnWriterState &state_p) override {
-		auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
 		return state.dictionary.GetSize();
 	}
 
 	void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state_p,
 	                 Vector &input_column, idx_t chunk_start, idx_t chunk_end) override {
-		auto &page_state = page_state_p->Cast<StandardWriterPageState<SRC, TGT>>();
+		auto &page_state = page_state_p->Cast<StandardWriterPageState<SRC, TGT, OP>>();
 
 		const auto &mask = FlatVector::Validity(input_column);
 		const auto *data_ptr = FlatVector::GetData<SRC>(input_column);
@@ -331,7 +357,12 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 		}
 		case duckdb_parquet::Encoding::PLAIN: {
 			D_ASSERT(page_state.encoding == duckdb_parquet::Encoding::PLAIN);
-			TemplatedWritePlain<SRC, TGT, OP>(input_column, stats, chunk_start, chunk_end, mask, temp_writer);
+			if (mask.AllValid()) {
+				TemplatedWritePlain<SRC, TGT, OP, true>(input_column, stats, chunk_start, chunk_end, mask, temp_writer);
+			} else {
+				TemplatedWritePlain<SRC, TGT, OP, false>(input_column, stats, chunk_start, chunk_end, mask,
+				                                         temp_writer);
+			}
 			break;
 		}
 		default:
@@ -340,29 +371,28 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 	}
 
 	void FlushDictionary(PrimitiveColumnWriterState &state_p, ColumnWriterStatistics *stats) override {
-		auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
 		D_ASSERT(state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY);
 
 		state.bloom_filter =
 		    make_uniq<ParquetBloomFilter>(state.dictionary.GetSize(), writer.BloomFilterFalsePositiveRatio());
 
-		state.dictionary.IterateValues([&](const SRC &value) {
-			const TGT target_value = OP::template Operation<SRC, TGT>(value);
+		state.dictionary.IterateValues([&](const SRC &src_value, const TGT &tgt_value) {
 			// update the statistics
-			OP::template HandleStats<SRC, TGT>(stats, target_value);
+			OP::template HandleStats<SRC, TGT>(stats, tgt_value);
 			// update the bloom filter
-			auto hash = OP::template XXHash64<SRC, TGT>(target_value);
+			auto hash = OP::template XXHash64<SRC, TGT>(tgt_value);
 			state.bloom_filter->FilterInsert(hash);
 		});
 
 		// flush the dictionary page and add it to the to-be-written pages
-		WriteDictionary(state, state.dictionary.GetPlainMemoryStream(), state.dictionary.GetSize());
+		WriteDictionary(state, state.dictionary.GetTargetMemoryStream(), state.dictionary.GetSize());
 		// bloom filter will be queued for writing in ParquetWriter::BufferBloomFilter one level up
 	}
 
 	idx_t GetRowSize(const Vector &vector, const idx_t index,
 	                 const PrimitiveColumnWriterState &state_p) const override {
-		auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
 		if (state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY) {
 			return (state.key_bit_width + 7) / 8;
 		} else {
diff --git a/src/include/duckdb/common/primitive_dictionary.hpp b/src/include/duckdb/common/primitive_dictionary.hpp
index a4369776d37d..4b48b7ab3dd3 100644
--- a/src/include/duckdb/common/primitive_dictionary.hpp
+++ b/src/include/duckdb/common/primitive_dictionary.hpp
@@ -14,14 +14,25 @@
 
 namespace duckdb {
 
-template <class T>
+struct PrimitiveCastOperator {
+	template <class SRC, class TGT>
+	static TGT Operation(SRC input) {
+		return TGT(input);
+	}
+};
+
+template <class SRC, class TGT = SRC, class CAST_OP = PrimitiveCastOperator>
 class PrimitiveDictionary {
 private:
+	static_assert(!std::is_same<SRC, string_t>::value ||
+	                  (std::is_same<SRC, string_t>::value && std::is_same<TGT, string_t>::value),
+	              "If SRC is string_t, TGT must also be string_t");
+
 	static constexpr idx_t LOAD_FACTOR = 2;
 
 	static constexpr uint32_t INVALID_INDEX = static_cast<uint32_t>(-1);
 	struct primitive_dictionary_entry_t {
-		T value;
+		SRC value;
 		uint32_t index;
 		bool IsEmpty() const {
 			return index == INVALID_INDEX;
@@ -33,13 +44,15 @@ class PrimitiveDictionary {
 
 	//! PrimitiveDictionary is a fixed-size linear probing hash table for primitive types
 	//! It is used to dictionary-encode data in, e.g., Parquet files
-	PrimitiveDictionary(Allocator &allocator, idx_t maximum_size_p, idx_t plain_capacity_p)
+	PrimitiveDictionary(Allocator &allocator, idx_t maximum_size_p, idx_t target_capacity_p)
 	    : maximum_size(maximum_size_p), size(0), capacity(NextPowerOfTwo(maximum_size * LOAD_FACTOR)),
-	      capacity_mask(capacity - 1), plain_capacity(plain_capacity_p), plain_offset(0),
+	      capacity_mask(capacity - 1), target_capacity(target_capacity_p), target_offset(0),
 	      allocated_dictionary(allocator.Allocate(capacity * sizeof(primitive_dictionary_entry_t))),
-	      allocated_plain(allocator.Allocate(std::is_same<T, string_t>::value ? plain_capacity : capacity * sizeof(T))),
+	      allocated_target(
+	          allocator.Allocate(std::is_same<TGT, string_t>::value ? target_capacity : capacity * sizeof(TGT))),
 	      dictionary(reinterpret_cast<primitive_dictionary_entry_t *>(allocated_dictionary.get())),
-	      plain(reinterpret_cast<T *>(allocated_plain.get())), plain_raw(allocated_plain.get()), full(false) {
+	      target_values(reinterpret_cast<TGT *>(allocated_target.get())), target_raw(allocated_target.get()),
+	      full(false) {
 		// Initialize empty
 		for (idx_t i = 0; i < capacity; i++) {
 			dictionary[i].index = INVALID_INDEX;
@@ -48,13 +61,13 @@ class PrimitiveDictionary {
 
 public:
 	//! Insert value into dictionary (if not full)
-	void Insert(T value) {
+	void Insert(SRC value) {
 		if (full) {
 			return;
 		}
 		auto &entry = Lookup(value);
 		if (entry.IsEmpty()) {
-			if (size + 1 > maximum_size || !AddToPlain(value)) {
+			if (size + 1 > maximum_size || !AddToTarget(value)) {
 				full = true;
 				return;
 			}
@@ -64,29 +77,33 @@ class PrimitiveDictionary {
 	}
 
 	//! Get dictionary index of an already inserted value
-	uint32_t GetIndex(const T &value) const {
+	uint32_t GetIndex(const SRC &value) const {
 		const auto &entry = Lookup(value);
 		D_ASSERT(!entry.IsEmpty());
 		return entry.index;
 	}
 
 	//! Iterates over inserted values
-	template <typename U = T, typename std::enable_if<!std::is_same<U, string_t>::value, int>::type = 0>
-	void IterateValues(const std::function<void(const T &)> &fun) const {
-		for (idx_t i = 0; i < size; i++) {
-			fun(plain[i]);
+	template <typename U = SRC, typename std::enable_if<!std::is_same<U, string_t>::value, int>::type = 0>
+	void IterateValues(const std::function<void(const SRC &, const TGT &)> &fun) const {
+		for (idx_t i = 0; i < capacity; i++) {
+			auto &entry = dictionary[i];
+			if (entry.IsEmpty()) {
+				continue;
+			}
+			fun(entry.value, target_values[entry.index]);
 		}
 	}
 
 	//! Specialized template to iterate over string_t values
-	template <typename U = T, typename std::enable_if<std::is_same<U, string_t>::value, int>::type = 0>
-	void IterateValues(const std::function<void(const string_t &)> &fun) const {
+	template <typename U = SRC, typename std::enable_if<std::is_same<U, string_t>::value, int>::type = 0>
+	void IterateValues(const std::function<void(const SRC &, const TGT &)> &fun) const {
 		for (idx_t i = 0; i < capacity; i++) {
 			auto &entry = dictionary[i];
 			if (entry.IsEmpty()) {
 				continue;
 			}
-			fun(entry.value);
+			fun(entry.value, entry.value);
 		}
 	}
 
@@ -100,16 +117,16 @@ class PrimitiveDictionary {
 		return full;
 	}
 
-	//! Get the plain written values as a memory stream (zero-copy)
-	unique_ptr<MemoryStream> GetPlainMemoryStream() const {
-		auto result = make_uniq<MemoryStream>(plain_raw, plain_capacity);
-		result->SetPosition(plain_offset);
+	//! Get the target written values as a memory stream (zero-copy)
+	unique_ptr<MemoryStream> GetTargetMemoryStream() const {
+		auto result = make_uniq<MemoryStream>(target_raw, target_capacity);
+		result->SetPosition(target_offset);
 		return result;
 	}
 
 private:
 	//! Looks up a value in the dictionary using linear probing
-	primitive_dictionary_entry_t &Lookup(const T &value) const {
+	primitive_dictionary_entry_t &Lookup(const SRC &value) const {
 		auto offset = Hash(value) & capacity_mask;
 		while (!dictionary[offset].IsEmpty() && dictionary[offset].value != value) {
 			++offset &= capacity_mask;
@@ -117,27 +134,32 @@ class PrimitiveDictionary {
 		return dictionary[offset];
 	}
 
-	//! Writes a value to the plain data
-	bool AddToPlain(const T &value) {
-		plain[size] = value;
-		plain_offset += sizeof(T);
+	//! Writes a value to the target data
+	template <typename U = SRC, typename std::enable_if<!std::is_same<U, string_t>::value, int>::type = 0>
+	bool AddToTarget(const SRC &src_value) {
+		const auto tgt_value = CAST_OP::template Operation<SRC, TGT>(src_value);
+		target_values[size] = tgt_value;
+		target_offset += sizeof(TGT);
 		return true;
 	}
 
-	//! Specialized template to add a string_t value to the plain data
-	bool AddToPlain(string_t &value) {
-		if (plain_offset + sizeof(uint32_t) + value.GetSize() > plain_capacity) {
+	//! Specialized template to add a string_t value to the target data
+	template <typename U = SRC, typename std::enable_if<std::is_same<U, string_t>::value, int>::type = 0>
+	bool AddToTarget(SRC &src_value) {
+		if (target_offset + sizeof(uint32_t) + src_value.GetSize() > target_capacity) {
 			return false; // Out of capacity
 		}
 
 		// Store string length and increment offset
-		Store<uint32_t>(UnsafeNumericCast<uint32_t>(value.GetSize()), plain_raw + plain_offset);
-		plain_offset += sizeof(uint32_t);
+		Store<uint32_t>(UnsafeNumericCast<uint32_t>(src_value.GetSize()), target_raw + target_offset);
+		target_offset += sizeof(uint32_t);
 
-		// Copy over string data to plain, update "value" to point to it, and increment offset
-		memcpy(plain_raw + plain_offset, value.GetData(), value.GetSize());
-		value = string_t(char_ptr_cast(plain_raw + plain_offset), value.GetSize());
-		plain_offset += value.GetSize();
+		// Copy over string data to target, update "value" to point to it, and increment offset
+		memcpy(target_raw + target_offset, src_value.GetData(), src_value.GetSize());
+		if (!src_value.IsInlined()) {
+			src_value.SetPointer(char_ptr_cast(target_raw + target_offset));
+		}
+		target_offset += src_value.GetSize();
 
 		return true;
 	}
@@ -151,18 +173,18 @@ class PrimitiveDictionary {
 	const idx_t capacity;
 	const idx_t capacity_mask;
 
-	//! Capacity/offset of plain encoded data
-	const idx_t plain_capacity;
-	idx_t plain_offset;
+	//! Capacity/offset of target encoded data
+	const idx_t target_capacity;
+	idx_t target_offset;
 
-	//! Allocated regions for dictionary/plain
+	//! Allocated regions for dictionary/target
 	AllocatedData allocated_dictionary;
-	AllocatedData allocated_plain;
+	AllocatedData allocated_target;
 
 	//! Pointers to allocated regions for convenience
 	primitive_dictionary_entry_t *const dictionary;
-	T *const plain;
-	data_ptr_t const plain_raw;
+	TGT *const target_values;
+	data_ptr_t const target_raw;
 
 	//! More values inserted than possible
 	bool full;

From 19ca17f1270bb848e187729a3353585f2fdffd8b Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Fri, 14 Feb 2025 09:41:31 +0100
Subject: [PATCH 082/142] Add EMPTY column that can be used for COUNT(*) - but
 not queried - and add union by name test

---
 src/common/constants.cpp                  |  1 +
 src/common/multi_file_reader.cpp          |  2 +-
 src/function/table/read_csv.cpp           |  2 +-
 src/include/duckdb/common/constants.hpp   |  2 ++
 src/planner/operator/logical_get.cpp      | 12 ++++++++----
 src/planner/table_binding.cpp             |  4 ++++
 test/sql/copy/csv/test_union_by_name.test | 19 +++++++++++++++++++
 7 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/src/common/constants.cpp b/src/common/constants.cpp
index ee51460033ff..2bddd619532f 100644
--- a/src/common/constants.cpp
+++ b/src/common/constants.cpp
@@ -10,6 +10,7 @@ constexpr const idx_t DConstants::INVALID_INDEX;
 const row_t MAX_ROW_ID = 36028797018960000ULL;       // 2^55
 const row_t MAX_ROW_ID_LOCAL = 72057594037920000ULL; // 2^56
 const column_t COLUMN_IDENTIFIER_ROW_ID = UINT64_C(18446744073709551615);
+const column_t COLUMN_IDENTIFIER_EMPTY = UINT64_C(18446744073709551614);
 const column_t VIRTUAL_COLUMN_START = UINT64_C(9223372036854775808); // 2^63
 const double PI = 3.141592653589793;
 
diff --git a/src/common/multi_file_reader.cpp b/src/common/multi_file_reader.cpp
index e9e8f40ca3f4..3aacf065dca3 100644
--- a/src/common/multi_file_reader.cpp
+++ b/src/common/multi_file_reader.cpp
@@ -239,7 +239,7 @@ void MultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFileList
 
 void MultiFileReader::GetVirtualColumns(ClientContext &context, MultiFileReaderBindData &bind_data,
                                         virtual_column_map_t &result) {
-	if (bind_data.filename_idx == DConstants::INVALID_INDEX) {
+	if (bind_data.filename_idx == DConstants::INVALID_INDEX || bind_data.filename_idx == COLUMN_IDENTIFIER_FILENAME) {
 		bind_data.filename_idx = COLUMN_IDENTIFIER_FILENAME;
 		result.insert(make_pair(COLUMN_IDENTIFIER_FILENAME, TableColumn("filename", LogicalType::VARCHAR)));
 	}
diff --git a/src/function/table/read_csv.cpp b/src/function/table/read_csv.cpp
index d9603abfe81b..1e442873b296 100644
--- a/src/function/table/read_csv.cpp
+++ b/src/function/table/read_csv.cpp
@@ -419,7 +419,7 @@ virtual_column_map_t ReadCSVGetVirtualColumns(ClientContext &context, optional_p
 	auto &csv_bind = bind_data->Cast<ReadCSVData>();
 	virtual_column_map_t result;
 	MultiFileReader::GetVirtualColumns(context, csv_bind.reader_bind, result);
-	result.insert(make_pair(COLUMN_IDENTIFIER_ROW_ID, TableColumn("rowid", LogicalType::ROW_TYPE)));
+	result.insert(make_pair(COLUMN_IDENTIFIER_EMPTY, TableColumn("", LogicalType::BOOLEAN)));
 	return result;
 }
 
diff --git a/src/include/duckdb/common/constants.hpp b/src/include/duckdb/common/constants.hpp
index 387dd4127579..e9f816a17619 100644
--- a/src/include/duckdb/common/constants.hpp
+++ b/src/include/duckdb/common/constants.hpp
@@ -40,6 +40,8 @@ DUCKDB_API bool IsInvalidCatalog(const string &str);
 
 //! Special value used to signify the ROW ID of a table
 DUCKDB_API extern const column_t COLUMN_IDENTIFIER_ROW_ID;
+//! Special value used to signify an empty column (used for e.g. COUNT(*))
+DUCKDB_API extern const column_t COLUMN_IDENTIFIER_EMPTY;
 DUCKDB_API extern const column_t VIRTUAL_COLUMN_START;
 DUCKDB_API bool IsRowIdColumnId(column_t column_id);
 DUCKDB_API bool IsVirtualColumn(column_t column_id);
diff --git a/src/planner/operator/logical_get.cpp b/src/planner/operator/logical_get.cpp
index c4b5a2f47ffc..e894cd8fab9a 100644
--- a/src/planner/operator/logical_get.cpp
+++ b/src/planner/operator/logical_get.cpp
@@ -142,14 +142,18 @@ const string &LogicalGet::GetColumnName(const ColumnIndex &index) const {
 }
 
 column_t LogicalGet::GetAnyColumn() const {
-	auto entry = virtual_columns.find(COLUMN_IDENTIFIER_ROW_ID);
+	auto entry = virtual_columns.find(COLUMN_IDENTIFIER_EMPTY);
+	if (entry != virtual_columns.end()) {
+		// return the empty column if the projection supports it
+		return COLUMN_IDENTIFIER_EMPTY;
+	}
+	entry = virtual_columns.find(COLUMN_IDENTIFIER_ROW_ID);
 	if (entry != virtual_columns.end()) {
 		// return the rowid column if the projection supports it
 		return COLUMN_IDENTIFIER_ROW_ID;
-	} else {
-		// otherwise return the first column
-		return 0;
 	}
+	// otherwise return the first column
+	return 0;
 }
 
 void LogicalGet::ResolveTypes() {
diff --git a/src/planner/table_binding.cpp b/src/planner/table_binding.cpp
index 934814ec7516..e18d899d60a8 100644
--- a/src/planner/table_binding.cpp
+++ b/src/planner/table_binding.cpp
@@ -125,6 +125,10 @@ TableBinding::TableBinding(const string &alias, vector<LogicalType> types_p, vec
 			    "Virtual column index must be larger than VIRTUAL_COLUMN_START - found %d for column \"%s\"", idx,
 			    name);
 		}
+		if (idx == COLUMN_IDENTIFIER_EMPTY) {
+			// the empty column cannot be queried by the user
+			continue;
+		}
 		if (name_map.find(name) == name_map.end()) {
 			name_map[name] = idx;
 		}
diff --git a/test/sql/copy/csv/test_union_by_name.test b/test/sql/copy/csv/test_union_by_name.test
index 6d9032516759..ebe98d332378 100644
--- a/test/sql/copy/csv/test_union_by_name.test
+++ b/test/sql/copy/csv/test_union_by_name.test
@@ -67,6 +67,25 @@ ORDER BY a;
 102	NULL	103
 9223372036854775807	NULL	NULL
 
+query IIII
+SELECT a, b, c, replace(replace(filename, '\', '/'), '__TEST_DIR__/', '')
+FROM  read_csv_auto(['__TEST_DIR__/ubn1.csv', '__TEST_DIR__/ubn2.csv', '__TEST_DIR__/ubn3.csv'], UNION_BY_NAME=TRUE)
+ORDER BY a;
+----
+1	NULL	NULL	ubn1.csv
+2	NULL	NULL	ubn1.csv
+3	4	NULL	ubn2.csv
+5	6	NULL	ubn2.csv
+100	NULL	101	ubn3.csv
+102	NULL	103	ubn3.csv
+9223372036854775807	NULL	NULL	ubn1.csv
+
+query IIII
+SELECT COUNT(a), COUNT(b), COUNT(c), COUNT(filename)
+FROM  read_csv_auto(['__TEST_DIR__/ubn1.csv', '__TEST_DIR__/ubn2.csv', '__TEST_DIR__/ubn3.csv'], UNION_BY_NAME=TRUE)
+----
+7	2	2	7
+
 query TTT
 SELECT typeof(a), typeof(b), typeof(c)
 FROM read_csv_auto(['__TEST_DIR__/ubn1.csv', '__TEST_DIR__/ubn2.csv', '__TEST_DIR__/ubn3.csv'], UNION_BY_NAME=TRUE)

From fad85466924a65b0b9ad92b5ae1940f921c1f780 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Fri, 14 Feb 2025 09:55:57 +0100
Subject: [PATCH 083/142] take vectors larger than standard into account

---
 extension/parquet/include/writer/templated_column_writer.hpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/extension/parquet/include/writer/templated_column_writer.hpp b/extension/parquet/include/writer/templated_column_writer.hpp
index bd2a4e220342..027af57fe6c5 100644
--- a/extension/parquet/include/writer/templated_column_writer.hpp
+++ b/extension/parquet/include/writer/templated_column_writer.hpp
@@ -24,6 +24,7 @@ static void TemplatedWritePlain(Vector &col, ColumnWriterStatistics *stats, cons
 	    ALL_VALID && std::is_same<SRC, TGT>::value && std::is_arithmetic<TGT>::value;
 
 	const auto *const ptr = FlatVector::GetData<SRC>(col);
+
 	TGT local_write[STANDARD_VECTOR_SIZE];
 	idx_t local_write_count = 0;
 
@@ -41,6 +42,10 @@ static void TemplatedWritePlain(Vector &col, ColumnWriterStatistics *stats, cons
 
 		if (std::is_arithmetic<TGT>::value) {
 			local_write[local_write_count++] = target_value;
+			if (local_write_count == STANDARD_VECTOR_SIZE) {
+				ser.WriteData(data_ptr_cast(local_write), local_write_count * sizeof(TGT));
+				local_write_count = 0;
+			}
 		} else {
 			OP::template WriteToStream<SRC, TGT>(target_value, ser);
 		}

From 8cb3c56d815128189420a9f9474a7c0d9e853be9 Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Fri, 14 Feb 2025 10:29:47 +0100
Subject: [PATCH 084/142] Support virtual columns in the CSV reader

---
 extension/json/include/json_scan.hpp          |  2 ++
 extension/json/json_scan.cpp                  | 11 +++++++++-
 .../json/table/json_multi_file_reader.test    | 20 +++++++++++++++++++
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/extension/json/include/json_scan.hpp b/extension/json/include/json_scan.hpp
index 4fd7bc0a0768..0504c21b8542 100644
--- a/extension/json/include/json_scan.hpp
+++ b/extension/json/include/json_scan.hpp
@@ -360,6 +360,8 @@ struct JSONScan {
 	                      const TableFunction &function);
 	static unique_ptr<FunctionData> Deserialize(Deserializer &deserializer, TableFunction &function);
 
+	static virtual_column_map_t GetVirtualColumns(ClientContext &context, optional_ptr<FunctionData> bind_data);
+
 	static void TableFunctionDefaults(TableFunction &table_function);
 };
 
diff --git a/extension/json/json_scan.cpp b/extension/json/json_scan.cpp
index 6fd732a0b102..11777fc3eea2 100644
--- a/extension/json/json_scan.cpp
+++ b/extension/json/json_scan.cpp
@@ -171,7 +171,7 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
 		const auto &col_id = input.column_ids[col_idx];
 
 		// Skip any multi-file reader / row id stuff
-		if (col_id == bind_data.reader_bind.filename_idx || IsRowIdColumnId(col_id)) {
+		if (col_id == bind_data.reader_bind.filename_idx || IsVirtualColumn(col_id)) {
 			continue;
 		}
 		bool skip = false;
@@ -1025,6 +1025,14 @@ unique_ptr<FunctionData> JSONScan::Deserialize(Deserializer &deserializer, Table
 	return std::move(result);
 }
 
+virtual_column_map_t JSONScan::GetVirtualColumns(ClientContext &context, optional_ptr<FunctionData> bind_data) {
+	auto &csv_bind = bind_data->Cast<JSONScanData>();
+	virtual_column_map_t result;
+	MultiFileReader::GetVirtualColumns(context, csv_bind.reader_bind, result);
+	result.insert(make_pair(COLUMN_IDENTIFIER_EMPTY, TableColumn("", LogicalType::BOOLEAN)));
+	return result;
+}
+
 void JSONScan::TableFunctionDefaults(TableFunction &table_function) {
 	MultiFileReader().AddParameters(table_function);
 
@@ -1039,6 +1047,7 @@ void JSONScan::TableFunctionDefaults(TableFunction &table_function) {
 
 	table_function.serialize = Serialize;
 	table_function.deserialize = Deserialize;
+	table_function.get_virtual_columns = GetVirtualColumns;
 
 	table_function.projection_pushdown = true;
 	table_function.filter_pushdown = false;
diff --git a/test/sql/json/table/json_multi_file_reader.test b/test/sql/json/table/json_multi_file_reader.test
index a56f7aee1ca3..e7b041b84ac5 100644
--- a/test/sql/json/table/json_multi_file_reader.test
+++ b/test/sql/json/table/json_multi_file_reader.test
@@ -32,6 +32,26 @@ select * exclude (filename), replace(filename, '\', '/') as filename from read_j
 5	Raising Arizona	data/json/example_r.ndjson
 5	Raising Arizona	data/json/example_rn.ndjson
 
+# virtual column
+query III
+select *, replace(filename, '\', '/') from read_json_auto('data/json/example_*.ndjson') order by all
+----
+1	O Brother, Where Art Thou?	data/json/example_n.ndjson
+1	O Brother, Where Art Thou?	data/json/example_r.ndjson
+1	O Brother, Where Art Thou?	data/json/example_rn.ndjson
+2	Home for the Holidays	data/json/example_n.ndjson
+2	Home for the Holidays	data/json/example_r.ndjson
+2	Home for the Holidays	data/json/example_rn.ndjson
+3	The Firm	data/json/example_n.ndjson
+3	The Firm	data/json/example_r.ndjson
+3	The Firm	data/json/example_rn.ndjson
+4	Broadcast News	data/json/example_n.ndjson
+4	Broadcast News	data/json/example_r.ndjson
+4	Broadcast News	data/json/example_rn.ndjson
+5	Raising Arizona	data/json/example_n.ndjson
+5	Raising Arizona	data/json/example_r.ndjson
+5	Raising Arizona	data/json/example_rn.ndjson
+
 query III
 select * from read_json_auto(['data/json/example_n.ndjson', 'data/json/top_level_array.json'], union_by_name=true) order by all
 ----

From 4b827f09c515c621231e5a4607ad4c15f5fa2be2 Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Fri, 14 Feb 2025 10:30:01 +0100
Subject: [PATCH 085/142] Format fix

---
 src/function/table/read_csv.cpp                 | 1 -
 src/include/duckdb/common/multi_file_reader.hpp | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/function/table/read_csv.cpp b/src/function/table/read_csv.cpp
index 1e442873b296..1bbe01ac8476 100644
--- a/src/function/table/read_csv.cpp
+++ b/src/function/table/read_csv.cpp
@@ -423,7 +423,6 @@ virtual_column_map_t ReadCSVGetVirtualColumns(ClientContext &context, optional_p
 	return result;
 }
 
-
 TableFunction ReadCSVTableFunction::GetFunction() {
 	TableFunction read_csv("read_csv", {LogicalType::VARCHAR}, ReadCSVFunction, ReadCSVBind, ReadCSVInitGlobal,
 	                       ReadCSVInitLocal);
diff --git a/src/include/duckdb/common/multi_file_reader.hpp b/src/include/duckdb/common/multi_file_reader.hpp
index 39fd78654d28..0d5d36484e24 100644
--- a/src/include/duckdb/common/multi_file_reader.hpp
+++ b/src/include/duckdb/common/multi_file_reader.hpp
@@ -294,7 +294,7 @@ struct MultiFileReader {
 	                                         OperatorPartitionData &partition_data);
 
 	DUCKDB_API static void GetVirtualColumns(ClientContext &context, MultiFileReaderBindData &bind_data,
-	                                          virtual_column_map_t &result);
+	                                         virtual_column_map_t &result);
 
 	template <class READER_CLASS, class RESULT_CLASS, class OPTIONS_CLASS>
 	MultiFileReaderBindData BindUnionReader(ClientContext &context, vector<LogicalType> &return_types,

From 1990d37d304bba53af6e1ae4d7e07f205507e357 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Fri, 14 Feb 2025 10:39:20 +0100
Subject: [PATCH 086/142] take parent nulls into account in fast path writing
 define levels

---
 extension/parquet/column_writer.cpp                  | 1 +
 extension/parquet/include/column_writer.hpp          | 1 +
 extension/parquet/parquet_extension.cpp              | 2 +-
 extension/parquet/writer/primitive_column_writer.cpp | 8 ++++----
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/extension/parquet/column_writer.cpp b/extension/parquet/column_writer.cpp
index 6d3bd63618bf..893081658a07 100644
--- a/extension/parquet/column_writer.cpp
+++ b/extension/parquet/column_writer.cpp
@@ -176,6 +176,7 @@ void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterStat
 			idx_t current_index = state.definition_levels.size();
 			if (parent->definition_levels[current_index] != PARQUET_DEFINE_VALID) {
 				state.definition_levels.push_back(parent->definition_levels[current_index]);
+				state.parent_null_count++;
 			} else if (validity.RowIsValid(vector_index)) {
 				state.definition_levels.push_back(define_value);
 			} else {
diff --git a/extension/parquet/include/column_writer.hpp b/extension/parquet/include/column_writer.hpp
index dba6788e7cec..26f01d8d74b0 100644
--- a/extension/parquet/include/column_writer.hpp
+++ b/extension/parquet/include/column_writer.hpp
@@ -27,6 +27,7 @@ class ColumnWriterState {
 	unsafe_vector<uint16_t> definition_levels;
 	unsafe_vector<uint16_t> repetition_levels;
 	vector<bool> is_empty;
+	idx_t parent_null_count = 0;
 	idx_t null_count = 0;
 
 public:
diff --git a/extension/parquet/parquet_extension.cpp b/extension/parquet/parquet_extension.cpp
index 9142c0db681e..adc5ee37cd06 100644
--- a/extension/parquet/parquet_extension.cpp
+++ b/extension/parquet/parquet_extension.cpp
@@ -200,7 +200,7 @@ struct ParquetWriteBindData : public TableFunctionData {
 		dictionary_size_limit = row_group_size / 20;
 	}
 
-	idx_t string_dictionary_page_size_limit = 2097152;
+	idx_t string_dictionary_page_size_limit = 1048576;
 
 	//! What false positive rate are we willing to accept for bloom filters
 	double bloom_filter_false_positive_ratio = 0.01;
diff --git a/extension/parquet/writer/primitive_column_writer.cpp b/extension/parquet/writer/primitive_column_writer.cpp
index 0bd85d0894a9..9e3515de9d78 100644
--- a/extension/parquet/writer/primitive_column_writer.cpp
+++ b/extension/parquet/writer/primitive_column_writer.cpp
@@ -54,7 +54,7 @@ void PrimitiveColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterStat
 	if (!check_parent_empty && validity.AllValid() && TypeIsConstantSize(vector.GetType().InternalType()) &&
 	    page_info_ref.get().estimated_page_size + GetRowSize(vector, vector_index, state) * vcount <
 	        MAX_UNCOMPRESSED_PAGE_SIZE) {
-		// Fast path
+		// Fast path: fixed-size type, all valid, and it fits on the current page
 		auto &page_info = page_info_ref.get();
 		page_info.row_count += vcount;
 		page_info.estimated_page_size += GetRowSize(vector, vector_index, state) * vcount;
@@ -140,8 +140,8 @@ void PrimitiveColumnWriter::WriteLevels(WriteStream &temp_writer, const unsafe_v
 	MemoryStream intermediate_stream(Allocator::DefaultAllocator());
 
 	rle_encoder.BeginWrite();
-	if (null_count.IsValid() && (null_count.GetIndex() == 0 || null_count.GetIndex() == count)) {
-		// All are NULL or none are NULL
+	if (null_count.IsValid() && null_count.GetIndex() == 0) {
+		// Fast path: no nulls
 		rle_encoder.WriteMany(intermediate_stream, levels[0], count);
 	} else {
 		for (idx_t i = offset; i < offset + count; i++) {
@@ -176,7 +176,7 @@ void PrimitiveColumnWriter::NextPage(PrimitiveColumnWriterState &state) {
 
 	// write the definition levels
 	WriteLevels(temp_writer, state.definition_levels, max_define, page_info.offset, page_info.row_count,
-	            state.null_count);
+	            state.null_count + state.parent_null_count);
 }
 
 void PrimitiveColumnWriter::FlushPage(PrimitiveColumnWriterState &state) {

From 09a5da2794d27e7ea11d97eb0d998bc49169520d Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Fri, 14 Feb 2025 10:47:07 +0100
Subject: [PATCH 087/142] prefer allocator over unique array

---
 extension/parquet/column_writer.cpp               | 15 ++++++---------
 extension/parquet/include/column_writer.hpp       |  2 +-
 .../include/writer/primitive_column_writer.hpp    |  2 +-
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/extension/parquet/column_writer.cpp b/extension/parquet/column_writer.cpp
index 893081658a07..3b5c3747e7e9 100644
--- a/extension/parquet/column_writer.cpp
+++ b/extension/parquet/column_writer.cpp
@@ -91,7 +91,7 @@ ColumnWriterState::~ColumnWriterState() {
 }
 
 void ColumnWriter::CompressPage(MemoryStream &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
-                                unique_ptr<data_t[]> &compressed_buf) {
+                                AllocatedData &compressed_buf) {
 	switch (writer.GetCodec()) {
 	case CompressionCodec::UNCOMPRESSED:
 		compressed_size = temp_writer.GetPosition();
@@ -100,7 +100,7 @@ void ColumnWriter::CompressPage(MemoryStream &temp_writer, size_t &compressed_si
 
 	case CompressionCodec::SNAPPY: {
 		compressed_size = duckdb_snappy::MaxCompressedLength(temp_writer.GetPosition());
-		compressed_buf = unique_ptr<data_t[]>(new data_t[compressed_size]);
+		compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size);
 		duckdb_snappy::RawCompress(const_char_ptr_cast(temp_writer.GetData()), temp_writer.GetPosition(),
 		                           char_ptr_cast(compressed_buf.get()), &compressed_size);
 		compressed_data = compressed_buf.get();
@@ -109,7 +109,7 @@ void ColumnWriter::CompressPage(MemoryStream &temp_writer, size_t &compressed_si
 	}
 	case CompressionCodec::LZ4_RAW: {
 		compressed_size = duckdb_lz4::LZ4_compressBound(UnsafeNumericCast<int32_t>(temp_writer.GetPosition()));
-		compressed_buf = unique_ptr<data_t[]>(new data_t[compressed_size]);
+		compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size);
 		compressed_size = duckdb_lz4::LZ4_compress_default(
 		    const_char_ptr_cast(temp_writer.GetData()), char_ptr_cast(compressed_buf.get()),
 		    UnsafeNumericCast<int32_t>(temp_writer.GetPosition()), UnsafeNumericCast<int32_t>(compressed_size));
@@ -119,7 +119,7 @@ void ColumnWriter::CompressPage(MemoryStream &temp_writer, size_t &compressed_si
 	case CompressionCodec::GZIP: {
 		MiniZStream s;
 		compressed_size = s.MaxCompressedLength(temp_writer.GetPosition());
-		compressed_buf = unique_ptr<data_t[]>(new data_t[compressed_size]);
+		compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size);
 		s.Compress(const_char_ptr_cast(temp_writer.GetData()), temp_writer.GetPosition(),
 		           char_ptr_cast(compressed_buf.get()), &compressed_size);
 		compressed_data = compressed_buf.get();
@@ -127,7 +127,7 @@ void ColumnWriter::CompressPage(MemoryStream &temp_writer, size_t &compressed_si
 	}
 	case CompressionCodec::ZSTD: {
 		compressed_size = duckdb_zstd::ZSTD_compressBound(temp_writer.GetPosition());
-		compressed_buf = unique_ptr<data_t[]>(new data_t[compressed_size]);
+		compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size);
 		compressed_size = duckdb_zstd::ZSTD_compress((void *)compressed_buf.get(), compressed_size,
 		                                             (const void *)temp_writer.GetData(), temp_writer.GetPosition(),
 		                                             UnsafeNumericCast<int32_t>(writer.CompressionLevel()));
@@ -135,15 +135,12 @@ void ColumnWriter::CompressPage(MemoryStream &temp_writer, size_t &compressed_si
 		break;
 	}
 	case CompressionCodec::BROTLI: {
-
 		compressed_size = duckdb_brotli::BrotliEncoderMaxCompressedSize(temp_writer.GetPosition());
-		compressed_buf = unique_ptr<data_t[]>(new data_t[compressed_size]);
-
+		compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size);
 		duckdb_brotli::BrotliEncoderCompress(BROTLI_DEFAULT_QUALITY, BROTLI_DEFAULT_WINDOW, BROTLI_DEFAULT_MODE,
 		                                     temp_writer.GetPosition(), temp_writer.GetData(), &compressed_size,
 		                                     compressed_buf.get());
 		compressed_data = compressed_buf.get();
-
 		break;
 	}
 	default:
diff --git a/extension/parquet/include/column_writer.hpp b/extension/parquet/include/column_writer.hpp
index 26f01d8d74b0..09f17c7eeb37 100644
--- a/extension/parquet/include/column_writer.hpp
+++ b/extension/parquet/include/column_writer.hpp
@@ -113,7 +113,7 @@ class ColumnWriter {
 	void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count, idx_t max_repeat) const;
 
 	void CompressPage(MemoryStream &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
-	                  unique_ptr<data_t[]> &compressed_buf);
+	                  AllocatedData &compressed_buf);
 };
 
 } // namespace duckdb
diff --git a/extension/parquet/include/writer/primitive_column_writer.hpp b/extension/parquet/include/writer/primitive_column_writer.hpp
index ccaa02f79503..6315efbd7452 100644
--- a/extension/parquet/include/writer/primitive_column_writer.hpp
+++ b/extension/parquet/include/writer/primitive_column_writer.hpp
@@ -31,7 +31,7 @@ struct PageWriteInformation {
 	idx_t max_write_count = 0;
 	size_t compressed_size;
 	data_ptr_t compressed_data;
-	unique_ptr<data_t[]> compressed_buf;
+	AllocatedData compressed_buf;
 };
 
 class PrimitiveColumnWriterState : public ColumnWriterState {

From e14c6ac8dc390067125323bafa80e7a6e7eb97a7 Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Fri, 14 Feb 2025 12:48:24 +0100
Subject: [PATCH 088/142] Add missing includes

---
 extension/core_functions/scalar/generic/least.cpp                | 1 +
 .../duckdb/execution/operator/persistent/physical_export.hpp     | 1 +
 2 files changed, 2 insertions(+)

diff --git a/extension/core_functions/scalar/generic/least.cpp b/extension/core_functions/scalar/generic/least.cpp
index 40a943101f94..886e909ca557 100644
--- a/extension/core_functions/scalar/generic/least.cpp
+++ b/extension/core_functions/scalar/generic/least.cpp
@@ -2,6 +2,7 @@
 #include "core_functions/scalar/generic_functions.hpp"
 #include "duckdb/function/create_sort_key.hpp"
 #include "duckdb/planner/expression/bound_function_expression.hpp"
+#include "duckdb/planner/expression_binder.hpp"
 
 namespace duckdb {
 
diff --git a/src/include/duckdb/execution/operator/persistent/physical_export.hpp b/src/include/duckdb/execution/operator/persistent/physical_export.hpp
index fd7f4981fc6f..214487dac0f8 100644
--- a/src/include/duckdb/execution/operator/persistent/physical_export.hpp
+++ b/src/include/duckdb/execution/operator/persistent/physical_export.hpp
@@ -14,6 +14,7 @@
 #include "duckdb/function/copy_function.hpp"
 #include "duckdb/parser/parsed_data/copy_info.hpp"
 #include "duckdb/parser/parsed_data/exported_table_data.hpp"
+#include "duckdb/catalog/catalog_entry_map.hpp"
 
 namespace duckdb {
 

From 4a7d440ab2c9e3e519cb92384ef6402ebc88a2c5 Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Fri, 14 Feb 2025 13:31:28 +0100
Subject: [PATCH 089/142] Fix for statistics propagation in Parquet for virtual
 columns

---
 extension/parquet/parquet_extension.cpp            | 2 +-
 test/sql/copy/parquet/parquet_virtual_columns.test | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/extension/parquet/parquet_extension.cpp b/extension/parquet/parquet_extension.cpp
index 1a05891eb314..a7fbb1fe3c1c 100644
--- a/extension/parquet/parquet_extension.cpp
+++ b/extension/parquet/parquet_extension.cpp
@@ -441,7 +441,7 @@ class ParquetScanFunction {
 	                                                   column_t column_index) {
 		auto &bind_data = bind_data_p->Cast<ParquetReadBindData>();
 
-		if (IsRowIdColumnId(column_index)) {
+		if (IsVirtualColumn(column_index)) {
 			return nullptr;
 		}
 
diff --git a/test/sql/copy/parquet/parquet_virtual_columns.test b/test/sql/copy/parquet/parquet_virtual_columns.test
index f1f84155036b..03f5f4677811 100644
--- a/test/sql/copy/parquet/parquet_virtual_columns.test
+++ b/test/sql/copy/parquet/parquet_virtual_columns.test
@@ -5,6 +5,9 @@
 require parquet
 
 # Filename without the filename option
+statement ok
+select filename from 'data/parquet-testing/glob/t1.parquet'
+
 query III
 select i, j, replace(filename, '\', '/') from 'data/parquet-testing/glob*/t?.parquet' order by i;
 ----

From 8cb2b66edf747e1fc0553cdfea3057b8facf31b4 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Fri, 14 Feb 2025 13:57:01 +0100
Subject: [PATCH 090/142] cast and directly write to memorystream in primitive
 dictionary so we can deal with uuid/interval

---
 extension/parquet/column_writer.cpp           |  5 ++
 .../writer/parquet_write_operators.hpp        | 21 ++++++-
 .../duckdb/common/primitive_dictionary.hpp    | 57 ++++++++-----------
 3 files changed, 50 insertions(+), 33 deletions(-)

diff --git a/extension/parquet/column_writer.cpp b/extension/parquet/column_writer.cpp
index 3b5c3747e7e9..607351b3298a 100644
--- a/extension/parquet/column_writer.cpp
+++ b/extension/parquet/column_writer.cpp
@@ -274,6 +274,11 @@ struct double_na_equal {
 		}
 		return val == right;
 	}
+
+	bool operator!=(const double &right) const {
+		return !(*this == right);
+	}
+
 	double val;
 };
 
diff --git a/extension/parquet/include/writer/parquet_write_operators.hpp b/extension/parquet/include/writer/parquet_write_operators.hpp
index d63acba17e80..8bef3067a05e 100644
--- a/extension/parquet/include/writer/parquet_write_operators.hpp
+++ b/extension/parquet/include/writer/parquet_write_operators.hpp
@@ -20,6 +20,11 @@ struct BaseParquetOperator {
 		ser.WriteData(const_data_ptr_cast(&input), sizeof(TGT));
 	}
 
+	template <class SRC, class TGT>
+	static constexpr idx_t WriteSize(const TGT &input) {
+		return sizeof(TGT);
+	}
+
 	template <class SRC, class TGT>
 	static uint64_t XXHash64(const TGT &target_value) {
 		return duckdb_zstd::XXH64(&target_value, sizeof(target_value), 0);
@@ -99,6 +104,11 @@ struct ParquetStringOperator : public BaseParquetOperator {
 		ser.WriteData(const_data_ptr_cast(target_value.GetData()), target_value.GetSize());
 	}
 
+	template <class SRC, class TGT>
+	static idx_t WriteSize(const TGT &target_value) {
+		return sizeof(uint32_t) + target_value.GetSize();
+	}
+
 	template <class SRC, class TGT>
 	static uint64_t XXHash64(const TGT &target_value) {
 		return duckdb_zstd::XXH64(target_value.GetData(), target_value.GetSize(), 0);
@@ -118,7 +128,6 @@ struct ParquetIntervalTargetType {
 struct ParquetIntervalOperator : public BaseParquetOperator {
 	template <class SRC, class TGT>
 	static TGT Operation(SRC input) {
-
 		if (input.days < 0 || input.months < 0 || input.micros < 0) {
 			throw IOException("Parquet files do not support negative intervals");
 		}
@@ -134,6 +143,11 @@ struct ParquetIntervalOperator : public BaseParquetOperator {
 		ser.WriteData(target_value.bytes, ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE);
 	}
 
+	template <class SRC, class TGT>
+	static constexpr idx_t WriteSize(const TGT &target_value) {
+		return ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE;
+	}
+
 	template <class SRC, class TGT>
 	static uint64_t XXHash64(const TGT &target_value) {
 		return duckdb_zstd::XXH64(target_value.bytes, ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE, 0);
@@ -167,6 +181,11 @@ struct ParquetUUIDOperator : public BaseParquetOperator {
 		ser.WriteData(target_value.bytes, ParquetUUIDTargetType::PARQUET_UUID_SIZE);
 	}
 
+	template <class SRC, class TGT>
+	static constexpr idx_t WriteSize(const TGT &target_value) {
+		return ParquetUUIDTargetType::PARQUET_UUID_SIZE;
+	}
+
 	template <class SRC, class TGT>
 	static uint64_t XXHash64(const TGT &target_value) {
 		return duckdb_zstd::XXH64(target_value.bytes, ParquetUUIDTargetType::PARQUET_UUID_SIZE, 0);
diff --git a/src/include/duckdb/common/primitive_dictionary.hpp b/src/include/duckdb/common/primitive_dictionary.hpp
index 4b48b7ab3dd3..8d75389a1a7b 100644
--- a/src/include/duckdb/common/primitive_dictionary.hpp
+++ b/src/include/duckdb/common/primitive_dictionary.hpp
@@ -21,13 +21,9 @@ struct PrimitiveCastOperator {
 	}
 };
 
-template <class SRC, class TGT = SRC, class CAST_OP = PrimitiveCastOperator>
+template <class SRC, class TGT = SRC, class OP = PrimitiveCastOperator>
 class PrimitiveDictionary {
 private:
-	static_assert(!std::is_same<SRC, string_t>::value ||
-	                  (std::is_same<SRC, string_t>::value && std::is_same<TGT, string_t>::value),
-	              "If SRC is string_t, TGT must also be string_t");
-
 	static constexpr idx_t LOAD_FACTOR = 2;
 
 	static constexpr uint32_t INVALID_INDEX = static_cast<uint32_t>(-1);
@@ -46,13 +42,12 @@ class PrimitiveDictionary {
 	//! It is used to dictionary-encode data in, e.g., Parquet files
 	PrimitiveDictionary(Allocator &allocator, idx_t maximum_size_p, idx_t target_capacity_p)
 	    : maximum_size(maximum_size_p), size(0), capacity(NextPowerOfTwo(maximum_size * LOAD_FACTOR)),
-	      capacity_mask(capacity - 1), target_capacity(target_capacity_p), target_offset(0),
+	      capacity_mask(capacity - 1), target_capacity(target_capacity_p),
 	      allocated_dictionary(allocator.Allocate(capacity * sizeof(primitive_dictionary_entry_t))),
 	      allocated_target(
 	          allocator.Allocate(std::is_same<TGT, string_t>::value ? target_capacity : capacity * sizeof(TGT))),
-	      dictionary(reinterpret_cast<primitive_dictionary_entry_t *>(allocated_dictionary.get())),
-	      target_values(reinterpret_cast<TGT *>(allocated_target.get())), target_raw(allocated_target.get()),
-	      full(false) {
+	      target_stream(allocated_target.get(), allocated_target.GetSize()),
+	      dictionary(reinterpret_cast<primitive_dictionary_entry_t *>(allocated_dictionary.get())), full(false) {
 		// Initialize empty
 		for (idx_t i = 0; i < capacity; i++) {
 			dictionary[i].index = INVALID_INDEX;
@@ -86,6 +81,7 @@ class PrimitiveDictionary {
 	//! Iterates over inserted values
 	template <typename U = SRC, typename std::enable_if<!std::is_same<U, string_t>::value, int>::type = 0>
 	void IterateValues(const std::function<void(const SRC &, const TGT &)> &fun) const {
+		const auto target_values = reinterpret_cast<const TGT *>(allocated_target.get());
 		for (idx_t i = 0; i < capacity; i++) {
 			auto &entry = dictionary[i];
 			if (entry.IsEmpty()) {
@@ -119,13 +115,13 @@ class PrimitiveDictionary {
 
 	//! Get the target written values as a memory stream (zero-copy)
 	unique_ptr<MemoryStream> GetTargetMemoryStream() const {
-		auto result = make_uniq<MemoryStream>(target_raw, target_capacity);
-		result->SetPosition(target_offset);
+		auto result = make_uniq<MemoryStream>(target_stream.GetData(), target_stream.GetCapacity());
+		result->SetPosition(target_stream.GetPosition());
 		return result;
 	}
 
 private:
-	//! Looks up a value in the dictionary using linear probing
+	//! Look up a value in the dictionary using linear probing
 	primitive_dictionary_entry_t &Lookup(const SRC &value) const {
 		auto offset = Hash(value) & capacity_mask;
 		while (!dictionary[offset].IsEmpty() && dictionary[offset].value != value) {
@@ -134,32 +130,31 @@ class PrimitiveDictionary {
 		return dictionary[offset];
 	}
 
-	//! Writes a value to the target data
-	template <typename U = SRC, typename std::enable_if<!std::is_same<U, string_t>::value, int>::type = 0>
+	//! Write a value to the target data (if source is not string)
+	template <typename S = SRC, typename std::enable_if<!std::is_same<S, string_t>::value, int>::type = 0>
 	bool AddToTarget(const SRC &src_value) {
-		const auto tgt_value = CAST_OP::template Operation<SRC, TGT>(src_value);
-		target_values[size] = tgt_value;
-		target_offset += sizeof(TGT);
+		const auto tgt_value = OP::template Operation<SRC, TGT>(src_value);
+		if (target_stream.GetPosition() + OP::template WriteSize<SRC, TGT>(tgt_value) > target_stream.GetCapacity()) {
+			return false; // Out of capacity
+		}
+		OP::template WriteToStream<SRC, TGT>(tgt_value, target_stream);
 		return true;
 	}
 
-	//! Specialized template to add a string_t value to the target data
-	template <typename U = SRC, typename std::enable_if<std::is_same<U, string_t>::value, int>::type = 0>
+	//! Write a value to the target data (if source is string)
+	template <typename S = SRC, typename std::enable_if<std::is_same<S, string_t>::value, int>::type = 0>
 	bool AddToTarget(SRC &src_value) {
-		if (target_offset + sizeof(uint32_t) + src_value.GetSize() > target_capacity) {
+		// If source is string, target must also be string
+		if (target_stream.GetPosition() + OP::template WriteSize<SRC, TGT>(src_value) > target_stream.GetCapacity()) {
 			return false; // Out of capacity
 		}
 
-		// Store string length and increment offset
-		Store<uint32_t>(UnsafeNumericCast<uint32_t>(src_value.GetSize()), target_raw + target_offset);
-		target_offset += sizeof(uint32_t);
+		const auto ptr = target_stream.GetData() + target_stream.GetPosition() + sizeof(uint32_t);
+		OP::template WriteToStream<SRC, TGT>(src_value, target_stream);
 
-		// Copy over string data to target, update "value" to point to it, and increment offset
-		memcpy(target_raw + target_offset, src_value.GetData(), src_value.GetSize());
 		if (!src_value.IsInlined()) {
-			src_value.SetPointer(char_ptr_cast(target_raw + target_offset));
+			src_value.SetPointer(char_ptr_cast(ptr));
 		}
-		target_offset += src_value.GetSize();
 
 		return true;
 	}
@@ -169,22 +164,20 @@ class PrimitiveDictionary {
 	const idx_t maximum_size;
 	idx_t size;
 
-	//! Capacity (power of two) and corresponding mask
+	//! Dictionary capacity (power of two) and corresponding mask
 	const idx_t capacity;
 	const idx_t capacity_mask;
 
-	//! Capacity/offset of target encoded data
+	//! Capacity of target encoded data
 	const idx_t target_capacity;
-	idx_t target_offset;
 
 	//! Allocated regions for dictionary/target
 	AllocatedData allocated_dictionary;
 	AllocatedData allocated_target;
+	MemoryStream target_stream;
 
 	//! Pointers to allocated regions for convenience
 	primitive_dictionary_entry_t *const dictionary;
-	TGT *const target_values;
-	data_ptr_t const target_raw;
 
 	//! More values inserted than possible
 	bool full;

From 32e1058dface2640d01b2d8408581a63a4b9aa87 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Fri, 14 Feb 2025 14:08:53 +0100
Subject: [PATCH 091/142] all parquet tests working again

---
 extension/parquet/column_writer.cpp                         | 6 ++++++
 .../sql/copy/parquet/writer/parquet_write_memory_usage.test | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/extension/parquet/column_writer.cpp b/extension/parquet/column_writer.cpp
index 607351b3298a..f774a8c681eb 100644
--- a/extension/parquet/column_writer.cpp
+++ b/extension/parquet/column_writer.cpp
@@ -291,12 +291,18 @@ struct float_na_equal {
 	operator float() const {
 		return val;
 	}
+
 	bool operator==(const float &right) const {
 		if (std::isnan(val) && std::isnan(right)) {
 			return true;
 		}
 		return val == right;
 	}
+
+	bool operator!=(const float &right) const {
+		return !(*this == right);
+	}
+
 	float val;
 };
 
diff --git a/test/sql/copy/parquet/writer/parquet_write_memory_usage.test b/test/sql/copy/parquet/writer/parquet_write_memory_usage.test
index 29a19bfe4609..2e91149a5cea 100644
--- a/test/sql/copy/parquet/writer/parquet_write_memory_usage.test
+++ b/test/sql/copy/parquet/writer/parquet_write_memory_usage.test
@@ -9,7 +9,7 @@ load __TEST_DIR__/parquet_write_memory_usage.db
 statement ok
 set threads=1
 
-foreach memory_limit,row_group_size 0.3mb,20480 0.6mb,40960
+foreach memory_limit,row_group_size 0.5mb,20480 1.0mb,40960
 
 statement ok
 set memory_limit='${memory_limit}'

From 3148218404701fa471623553e653a38b2f5e05b7 Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Fri, 14 Feb 2025 14:21:43 +0100
Subject: [PATCH 092/142] Fix for filename on windows

---
 test/sql/copy/csv/test_union_by_name.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/sql/copy/csv/test_union_by_name.test b/test/sql/copy/csv/test_union_by_name.test
index ebe98d332378..a531f9eddda0 100644
--- a/test/sql/copy/csv/test_union_by_name.test
+++ b/test/sql/copy/csv/test_union_by_name.test
@@ -68,7 +68,7 @@ ORDER BY a;
 9223372036854775807	NULL	NULL
 
 query IIII
-SELECT a, b, c, replace(replace(filename, '\', '/'), '__TEST_DIR__/', '')
+SELECT a, b, c, replace(replace(filename, '__TEST_DIR__', ''), '\', '/')[2:]
 FROM  read_csv_auto(['__TEST_DIR__/ubn1.csv', '__TEST_DIR__/ubn2.csv', '__TEST_DIR__/ubn3.csv'], UNION_BY_NAME=TRUE)
 ORDER BY a;
 ----

From 5f27683ea456f850bff4f91d9c9c540ec2e5c324 Mon Sep 17 00:00:00 2001
From: Niclas Haderer <niclas.haderer.dev@gmail.com>
Date: Fri, 14 Feb 2025 14:29:06 +0100
Subject: [PATCH 093/142] Deleted copy constructor of pending query

---
 src/include/duckdb/main/pending_query_result.hpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/include/duckdb/main/pending_query_result.hpp b/src/include/duckdb/main/pending_query_result.hpp
index 72fe9405fef8..28e87e9d86bb 100644
--- a/src/include/duckdb/main/pending_query_result.hpp
+++ b/src/include/duckdb/main/pending_query_result.hpp
@@ -29,6 +29,9 @@ class PendingQueryResult : public BaseQueryResult {
 	DUCKDB_API explicit PendingQueryResult(ErrorData error_message);
 	DUCKDB_API ~PendingQueryResult() override;
 	DUCKDB_API bool AllowStreamResult() const;
+	PendingQueryResult(const PendingQueryResult&) = delete;
+	PendingQueryResult& operator=(const PendingQueryResult&) = delete;
+
 
 public:
 	//! Executes a single task within the query, returning whether or not the query is ready.

From 20a0961fc1e71464070687a8fd1fa16fea8287f4 Mon Sep 17 00:00:00 2001
From: Niclas Haderer <niclas.haderer.dev@gmail.com>
Date: Fri, 14 Feb 2025 14:32:45 +0100
Subject: [PATCH 094/142] format fix

---
 src/include/duckdb/main/pending_query_result.hpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/include/duckdb/main/pending_query_result.hpp b/src/include/duckdb/main/pending_query_result.hpp
index 28e87e9d86bb..cf0268712cba 100644
--- a/src/include/duckdb/main/pending_query_result.hpp
+++ b/src/include/duckdb/main/pending_query_result.hpp
@@ -29,9 +29,8 @@ class PendingQueryResult : public BaseQueryResult {
 	DUCKDB_API explicit PendingQueryResult(ErrorData error_message);
 	DUCKDB_API ~PendingQueryResult() override;
 	DUCKDB_API bool AllowStreamResult() const;
-	PendingQueryResult(const PendingQueryResult&) = delete;
-	PendingQueryResult& operator=(const PendingQueryResult&) = delete;
-
+	PendingQueryResult(const PendingQueryResult &) = delete;
+	PendingQueryResult &operator=(const PendingQueryResult &) = delete;
 
 public:
 	//! Executes a single task within the query, returning whether or not the query is ready.

From efc541314d99daa60e883a6e04b41e25faf0b275 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Fri, 14 Feb 2025 14:43:17 +0100
Subject: [PATCH 095/142] slightly tweak hash function

---
 src/common/types/hash.cpp                |  8 ++++-
 test/sql/function/generic/hash_func.test | 44 ++++++++++++------------
 2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/src/common/types/hash.cpp b/src/common/types/hash.cpp
index f9fe42ffcbd5..9a9fd5daf9e8 100644
--- a/src/common/types/hash.cpp
+++ b/src/common/types/hash.cpp
@@ -98,9 +98,13 @@ hash_t HashBytes(const_data_ptr_t ptr, const idx_t len) noexcept {
 		h *= 0xd6e8feb86659fd93U;
 	}
 
-	// XOR with remaining (<8) bytes
+	// Load and process remaining (<8) bytes
 	hash_t hr = 0;
 	memcpy(&hr, ptr, len & 7U);
+	hr *= 0xd6e8feb86659fd93U;
+	hr ^= h >> 32;
+
+	// XOR with hash
 	h ^= hr;
 
 	// Finalize
@@ -108,6 +112,8 @@ hash_t HashBytes(const_data_ptr_t ptr, const idx_t len) noexcept {
 	h ^= h >> 32;
 
 	return h;
+
+	// return Hash(h);
 }
 
 hash_t Hash(const char *val, size_t size) {
diff --git a/test/sql/function/generic/hash_func.test b/test/sql/function/generic/hash_func.test
index 0b933e660ffc..44ca5113eb87 100644
--- a/test/sql/function/generic/hash_func.test
+++ b/test/sql/function/generic/hash_func.test
@@ -44,9 +44,9 @@ CREATE TABLE structs AS
 query II
 SELECT s, HASH(s) FROM structs
 ----
-{'i': 5, 's': string}	16279265163003826010
+{'i': 5, 's': string}	5041354121594313779
 {'i': -2, 's': NULL}	13311620765177879553
-{'i': NULL, 's': not null}	17906579446707938902
+{'i': NULL, 's': not null}	17669771151474316850
 {'i': NULL, 's': NULL}	18212156630472451589
 NULL	18212156630472451589
 
@@ -76,11 +76,11 @@ NULL	13787848793156543929
 query II
 SELECT lg, HASH(lg) FROM lists
 ----
-[TGTA]	6988469852028562792
-[CGGT]	11509251853341801096
-[CCTC]	7465354080729552024
-[TCTA]	8712127848443266422
-[AGGG]	11482125973879342325
+[TGTA]	17595328716338797054
+[CGGT]	10306172129632853293
+[CCTC]	13297701768986389650
+[TCTA]	12532519228232631318
+[AGGG]	18327401687889337414
 NULL	13787848793156543929
 
 # Maps
@@ -98,11 +98,11 @@ CREATE TABLE maps AS
 query II
 SELECT m, HASH(m) FROM maps
 ----
-{1=TGTA}	2794336106852724683
-{1=CGGT, 2=CCTC}	13102305630601287406
+{1=TGTA}	12831981919938534237
+{1=CGGT, 2=CCTC}	13475482557019497469
 {}	13787848793156543929
-{1=TCTA, 2=NULL, 3=CGGT}	4782555145300717917
-{1=TGTA, 2=CGGT, 3=CCTC, 4=TCTA, 5=AGGG}	8572659779500367064
+{1=TCTA, 2=NULL, 3=CGGT}	6801514312074335687
+{1=TGTA, 2=CGGT, 3=CCTC, 4=TCTA, 5=AGGG}	1967491966533763128
 NULL	13787848793156543929
 
 statement ok
@@ -189,17 +189,17 @@ SELECT r, HASH() FROM enums;
 query II
 SELECT r, HASH(r, 'capacitor') FROM enums;
 ----
-black	7369304742611425093
-brown	2341438809461609958
-red	8885610210938720771
-orange	10151273889449338965
-yellow	9455015799163091888
-green	5769395161578968563
-blue	264671877857503589
-violet	13697912152922098530
-grey	6956627843582995222
-white	11070700999111121301
-NULL	2712243419119719673
+black	10215506564763180114
+brown	14699666407584440049
+red	10435339440036763924
+orange	7449326894723801922
+yellow	7545557152300511399
+green	13515514493392674532
+blue	16730185616673645170
+violet	6167961171085770869
+grey	10019148715359395841
+white	8224352891729695362
+NULL	14853453776375799790
 
 query II
 SELECT r, HASH('2022-02-12'::DATE, r) FROM enums;

From 245f034e69680cc7c1c9befc4f80327a188ef4f2 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Fri, 14 Feb 2025 14:44:54 +0100
Subject: [PATCH 096/142] add clickbench write benchmark

---
 benchmark/parquet/clickbench_write.benchmark | 23 ++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 benchmark/parquet/clickbench_write.benchmark

diff --git a/benchmark/parquet/clickbench_write.benchmark b/benchmark/parquet/clickbench_write.benchmark
new file mode 100644
index 000000000000..7f22b7fe706d
--- /dev/null
+++ b/benchmark/parquet/clickbench_write.benchmark
@@ -0,0 +1,23 @@
+# name: benchmark/parquet/clickbench_write.benchmark
+# description: Write ClickBench data to Parquet
+# group: [clickbench]
+
+require httpfs
+
+require parquet
+
+name ClickBench Write Parquet
+group Clickbench
+
+cache clickbench.duckdb
+
+load benchmark/clickbench/queries/load.sql
+
+init
+set preserve_insertion_order=false;
+
+run
+COPY hits TO '${BENCHMARK_DIR}/hits.parquet';
+
+result I
+10000000

From d99ceb6de9d36892c605dcc3fcbd3abe8bd2207f Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Fri, 14 Feb 2025 15:02:17 +0100
Subject: [PATCH 097/142] backslashes only escape double/single quotes outside
 of quotes, inside quotes they escape everything

---
 src/function/cast/vector_cast_helpers.cpp     | 74 +++++++++----------
 test/sql/cast/string_to_list_cast.test        |  5 ++
 test/sql/cast/string_to_list_escapes.test     | 51 ++++++++++++-
 test/sql/cast/string_to_map_escapes.test      | 24 +++---
 .../string_to_nested_types_cast.test_slow     |  6 +-
 test/sql/cast/string_to_struct_escapes.test   | 72 +++++++++++++++---
 test/sql/cast/string_to_unnamed_struct.test   |  2 +-
 7 files changed, 167 insertions(+), 67 deletions(-)

diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp
index 1d654da9b735..c0790d56b4ce 100644
--- a/src/function/cast/vector_cast_helpers.cpp
+++ b/src/function/cast/vector_cast_helpers.cpp
@@ -32,10 +32,6 @@ inline static void SkipWhitespace(StringCastInputState &input_state) {
 	auto &buf = input_state.buf;
 	auto &pos = input_state.pos;
 	auto &len = input_state.len;
-	if (input_state.escaped) {
-		//! Escaped whitespace should not be skipped
-		return;
-	}
 	while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
 		pos++;
 		input_state.escaped = false;
@@ -72,34 +68,35 @@ static bool SkipToClose(StringCastInputState &input_state) {
 	auto &idx = input_state.pos;
 	auto &buf = input_state.buf;
 	auto &len = input_state.len;
-	auto &escaped = input_state.escaped;
 
 	D_ASSERT(buf[idx] == '{' || buf[idx] == '[' || buf[idx] == '(');
 
 	vector<char> brackets;
 	while (idx < len) {
-		if (!escaped) {
-			if (buf[idx] == '"' || buf[idx] == '\'') {
+		bool set_escaped = false;
+		if (buf[idx] == '"' || buf[idx] == '\'') {
+			if (!input_state.escaped) {
 				if (!SkipToCloseQuotes(input_state)) {
 					return false;
 				}
-			} else if (buf[idx] == '{') {
-				brackets.push_back('}');
-			} else if (buf[idx] == '(') {
-				brackets.push_back(')');
-			} else if (buf[idx] == '[') {
-				brackets.push_back(']');
-			} else if (buf[idx] == brackets.back()) {
-				brackets.pop_back();
-				if (brackets.empty()) {
-					return true;
-				}
-			} else if (buf[idx] == '\\') {
-				escaped = true;
 			}
-		} else {
-			escaped = false;
+		} else if (buf[idx] == '{') {
+			brackets.push_back('}');
+		} else if (buf[idx] == '(') {
+			brackets.push_back(')');
+		} else if (buf[idx] == '[') {
+			brackets.push_back(']');
+		} else if (buf[idx] == brackets.back()) {
+			brackets.pop_back();
+			if (brackets.empty()) {
+				return true;
+			}
+		} else if (buf[idx] == '\\') {
+			//! Note that we don't treat `\\` special here, backslashes can't be escaped outside of quotes
+			//! backslashes within quotes will not be encountered in this function
+			set_escaped = true;
 		}
+		input_state.escaped = set_escaped;
 		idx++;
 	}
 	return false;
@@ -133,9 +130,11 @@ static string_t HandleString(Vector &vec, const char *buf, idx_t start, idx_t en
 		auto current_char = buf[start + i];
 		if (!escaped) {
 			if (scopes.empty() && current_char == '\\') {
-				//! Start of escape
-				escaped = true;
-				continue;
+				if (quoted || (start + i + 1 < end && (buf[start + i + 1] == '\'' || buf[start + i + 1] == '"'))) {
+					//! Start of escape
+					escaped = true;
+					continue;
+				}
 			}
 			if (scopes.empty() && (current_char == '\'' || current_char == '"')) {
 				if (quoted && current_char == quote_char) {
@@ -208,17 +207,14 @@ static inline bool ValueStateTransition(StringCastInputState &input_state, optio
 	auto &pos = input_state.pos;
 
 	bool set_escaped = false;
-	if (input_state.escaped) {
+	if (buf[pos] == '"' || buf[pos] == '\'') {
 		if (!start_pos.IsValid()) {
 			start_pos = pos;
 		}
-		end_pos = pos;
-	} else if (buf[pos] == '"' || buf[pos] == '\'') {
-		if (!start_pos.IsValid()) {
-			start_pos = pos;
-		}
-		if (!SkipToCloseQuotes(input_state)) {
-			return false;
+		if (!input_state.escaped) {
+			if (!SkipToCloseQuotes(input_state)) {
+				return false;
+			}
 		}
 		end_pos = pos;
 	} else if (buf[pos] == '{') {
@@ -285,7 +281,7 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
 		optional_idx start_pos;
 		idx_t end_pos;
 
-		while (pos < len && ((buf[pos] != ',' && buf[pos] != ']') || input_state.escaped)) {
+		while (pos < len && (buf[pos] != ',' && buf[pos] != ']')) {
 			if (!ValueStateTransition(input_state, start_pos, end_pos)) {
 				return false;
 			}
@@ -387,7 +383,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 	while (pos < len) {
 		optional_idx start_pos;
 		idx_t end_pos;
-		while (pos < len && (buf[pos] != '=' || input_state.escaped)) {
+		while (pos < len && buf[pos] != '=') {
 			if (!ValueStateTransition(input_state, start_pos, end_pos)) {
 				return false;
 			}
@@ -407,7 +403,7 @@ static bool SplitStringMapInternal(const string_t &input, OP &state) {
 		start_pos = optional_idx();
 		pos++;
 		SkipWhitespace(input_state);
-		while (pos < len && ((buf[pos] != ',' && buf[pos] != '}') || input_state.escaped)) {
+		while (pos < len && (buf[pos] != ',' && buf[pos] != '}')) {
 			if (!ValueStateTransition(input_state, start_pos, end_pos)) {
 				return false;
 			}
@@ -474,7 +470,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 		while (pos < len) {
 			optional_idx start_pos;
 			idx_t end_pos;
-			while (pos < len && (buf[pos] != ':' || input_state.escaped)) {
+			while (pos < len && buf[pos] != ':') {
 				bool set_escaped = false;
 
 				if (input_state.escaped) {
@@ -528,7 +524,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 			start_pos = optional_idx();
 			pos++;
 			SkipWhitespace(input_state);
-			while (pos < len && ((buf[pos] != ',' && buf[pos] != '}') || input_state.escaped)) {
+			while (pos < len && (buf[pos] != ',' && buf[pos] != '}')) {
 				if (!ValueStateTransition(input_state, start_pos, end_pos)) {
 					return false;
 				}
@@ -571,7 +567,7 @@ bool VectorStringToStruct::SplitStruct(const string_t &input, vector<unique_ptr<
 
 			optional_idx start_pos;
 			idx_t end_pos;
-			while (pos < len && ((buf[pos] != ',' && buf[pos] != ')') || input_state.escaped)) {
+			while (pos < len && (buf[pos] != ',' && buf[pos] != ')')) {
 				if (!ValueStateTransition(input_state, start_pos, end_pos)) {
 					return false;
 				}
diff --git a/test/sql/cast/string_to_list_cast.test b/test/sql/cast/string_to_list_cast.test
index 7283a8594170..9fdd256e0ece 100644
--- a/test/sql/cast/string_to_list_cast.test
+++ b/test/sql/cast/string_to_list_cast.test
@@ -532,4 +532,9 @@ select '[{"bar":"\"\"\\\"\"\"\\"}]'::STRUCT(bar VARCHAR)[];
 query III
 select $$[\  \\abc\\ \ , def, ghi]$$::VARCHAR[] a, a[1], len(a[1])
 ----
+[\  \\abc\\ \, def, ghi]	\  \\abc\\ \	12
+
+query III
+select $$["\  \\abc\\ \ ", def, ghi]$$::VARCHAR[] a, a[1], len(a[1])
+----
 [  \abc\  , def, ghi]	  \abc\  	9
diff --git a/test/sql/cast/string_to_list_escapes.test b/test/sql/cast/string_to_list_escapes.test
index aca72ada840f..29f201825a00 100644
--- a/test/sql/cast/string_to_list_escapes.test
+++ b/test/sql/cast/string_to_list_escapes.test
@@ -7,20 +7,35 @@ SELECT $$[hello, world]$$::VARCHAR[];
 [hello, world]
 
 query I
-SELECT $$[hello\ world, world]$$::VARCHAR[];
+SELECT $$["hello\ world", world]$$::VARCHAR[];
 ----
 [hello world, world]
 
+query I
+SELECT $$[hello\ world, world]$$::VARCHAR[];
+----
+[hello\ world, world]
+
 query I
 SELECT $$[hello\,world, test]$$::VARCHAR[];
 ----
+[hello\, world, test]
+
+query I
+SELECT $$["hello\,world", test]$$::VARCHAR[];
+----
 [hello,world, test]
 
 query I
-SELECT $$[hello\,, test]$$::VARCHAR[];
+SELECT $$["hello\,", test]$$::VARCHAR[];
 ----
 [hello,, test]
 
+query I
+SELECT $$[hello\,, test]$$::VARCHAR[];
+----
+[hello\, , test]
+
 query I
 SELECT $$[hello\"quoted\"text, more]$$::VARCHAR[];
 ----
@@ -29,6 +44,11 @@ SELECT $$[hello\"quoted\"text, more]$$::VARCHAR[];
 query I
 SELECT $$[escaped\\backslash, test]$$::VARCHAR[];
 ----
+[escaped\\backslash, test]
+
+query I
+SELECT $$["escaped\\backslash", test]$$::VARCHAR[];
+----
 [escaped\backslash, test]
 
 query I
@@ -36,6 +56,16 @@ SELECT $$[nested[brackets], test]$$::VARCHAR[];
 ----
 [nested[brackets], test]
 
+statement error
+SELECT $$[nested[bracket, test]$$::VARCHAR[];
+----
+can't be cast to the destination type VARCHAR[]
+
+query I
+SELECT $$[nested"["bracket, test]$$::VARCHAR[];
+----
+[nested[bracket, test]
+
 query I
 SELECT $$[quote\'in\'string, test]$$::VARCHAR[];
 ----
@@ -44,6 +74,11 @@ SELECT $$[quote\'in\'string, test]$$::VARCHAR[];
 query I
 SELECT $$[mix\ of\ special\,chars]$$::VARCHAR[];
 ----
+[mix\ of\ special\, chars]
+
+query I
+SELECT $$["mix\ of\ special\,chars"]$$::VARCHAR[];
+----
 [mix of special,chars]
 
 query I
@@ -193,6 +228,16 @@ SELECT $$["\""]$$::VARCHAR[]; -- List with only a quote
 ["]
 
 query I
-SELECT $$[\,]$$::VARCHAR[]; -- List with only a comma
+SELECT $$[\,]$$::VARCHAR[]; -- List with only a comma (not quoted)
+----
+[\, ]
+
+query I
+SELECT $$["\,"]$$::VARCHAR[]; -- List with only a comma
+----
+[,]
+
+query I
+SELECT $$[","]$$::VARCHAR[]; -- List with only a comma
 ----
 [,]
diff --git a/test/sql/cast/string_to_map_escapes.test b/test/sql/cast/string_to_map_escapes.test
index 40cf7834451b..4d3425d1cb59 100644
--- a/test/sql/cast/string_to_map_escapes.test
+++ b/test/sql/cast/string_to_map_escapes.test
@@ -3,7 +3,7 @@
 
 # Valid: key and value with escaped space
 query I
-SELECT $${key\ with\ space = value\ with\ space}$$::MAP(VARCHAR, VARCHAR);
+SELECT $${"key\ with\ space" = "value\ with\ space"}$$::MAP(VARCHAR, VARCHAR);
 ----
 {key with space=value with space}
 
@@ -15,19 +15,19 @@ SELECT $${\"key\" = \"value\"}$$::MAP(VARCHAR, VARCHAR);
 
 # Valid: key with escaped backslash, value with escaped backslash
 query I
-SELECT $${key\ with\ backslash = value\ with\ backslash}$$::MAP(VARCHAR, VARCHAR);
+SELECT $${"key\ with\ backslash" = "value\ with\ backslash"}$$::MAP(VARCHAR, VARCHAR);
 ----
 {key with backslash=value with backslash}
 
 # Valid: key with escaped comma, value with escaped comma
 query I
-SELECT $${key\ with\, comma = value\ with\, comma}$$::MAP(VARCHAR, VARCHAR);
+SELECT $${"key\ with\, comma" = "value\ with\, comma"}$$::MAP(VARCHAR, VARCHAR);
 ----
 {key with, comma=value with, comma}
 
 # Valid: key and value with escaped colon
 query I
-SELECT $${key\ with\ colon\: = value\ with\ colon\:}$$::MAP(VARCHAR, VARCHAR);
+SELECT $${"key\ with\ colon\:" = "value\ with\ colon\:"}$$::MAP(VARCHAR, VARCHAR);
 ----
 {key with colon:=value with colon:}
 
@@ -82,13 +82,13 @@ SELECT $${key = value{with}bracket}$$::MAP(VARCHAR, VARCHAR);
 
 # Valid: key contains useless backslashes
 query I
-SELECT $${key\with\backslash = value}$$::MAP(VARCHAR, VARCHAR);
+SELECT $${"key\with\backslash" = value}$$::MAP(VARCHAR, VARCHAR);
 ----
 {keywithbackslash=value}
 
 # Valid: value contains useless backslashes
 query I
-SELECT $${key = value\with\backslash}$$::MAP(VARCHAR, VARCHAR);
+SELECT $${key = "value\with\backslash"}$$::MAP(VARCHAR, VARCHAR);
 ----
 {key=valuewithbackslash}
 
@@ -100,13 +100,13 @@ SELECT $${key=with=equals = value}$$::MAP(VARCHAR, VARCHAR) a, a['key'];
 
 # Valid: key/value contains unescaped equal sign
 query II
-SELECT $${key\=with=equals = value}$$::MAP(VARCHAR, VARCHAR) a, a['key=with'];
+SELECT $${"key\=with" = equals = value}$$::MAP(VARCHAR, VARCHAR) a, a['key=with'];
 ----
 {key=with=equals = value}	equals = value
 
 # Valid: key/value contains unescaped equal sign
 query II
-SELECT $${key\=with\=equals = value}$$::MAP(VARCHAR, VARCHAR) a, a['key=with=equals'];
+SELECT $${"key\=with\=equals" = value}$$::MAP(VARCHAR, VARCHAR) a, a['key=with=equals'];
 ----
 {key=with=equals=value}	value
 
@@ -123,9 +123,15 @@ SELECT $${=}$$::MAP(VARCHAR, VARCHAR);
 {=}
 
 # Edge Case: MAP with special characters only (escaped)
-query I
+statement error
 SELECT $${\{escaped\brace\} = \}escaped\brace\\}$$::MAP(VARCHAR, VARCHAR);
 ----
+can't be cast to the destination type MAP(VARCHAR, VARCHAR)
+
+# Edge Case: MAP with special characters only (escaped)
+query I
+SELECT $${"\{escaped\brace\}" = "\}escaped\brace\\"}$$::MAP(VARCHAR, VARCHAR);
+----
 {{escapedbrace}=}escapedbrace\}
 
 # Edge Case: MAP with only a key and no value
diff --git a/test/sql/cast/string_to_nested_types_cast.test_slow b/test/sql/cast/string_to_nested_types_cast.test_slow
index b8f1346f59e4..90c97c24128d 100644
--- a/test/sql/cast/string_to_nested_types_cast.test_slow
+++ b/test/sql/cast/string_to_nested_types_cast.test_slow
@@ -78,7 +78,7 @@ SELECT CAST(LIST(timestamp_ns)::VARCHAR AS TIME[]) FROM test_all_types();
 query I
 SELECT CAST(LIST(blob)::VARCHAR AS BLOB[]) FROM test_all_types();
 ----
-[thisisalongblobx00withnullbytes, x00x00x00a, NULL]
+[thisisalongblob\x00withnullbytes, \x00\x00\x00a, NULL]
 
 query I
 SELECT CAST(LIST(interval)::VARCHAR AS INTERVAL[]) FROM test_all_types();
@@ -191,8 +191,8 @@ SELECT CAST(struct_pack(A=>timestamp_ns)::VARCHAR AS STRUCT(A TIME)) FROM test_a
 query I
 SELECT CAST(struct_pack(A=>blob)::VARCHAR AS STRUCT(A BLOB)) FROM test_all_types();
 ----
-{'A': thisisalongblobx00withnullbytes}
-{'A': x00x00x00a}
+{'A': thisisalongblob\x00withnullbytes}
+{'A': \x00\x00\x00a}
 {'A': NULL}
 
 query I
diff --git a/test/sql/cast/string_to_struct_escapes.test b/test/sql/cast/string_to_struct_escapes.test
index 869a48bc35c6..041386e77e74 100644
--- a/test/sql/cast/string_to_struct_escapes.test
+++ b/test/sql/cast/string_to_struct_escapes.test
@@ -67,20 +67,32 @@ can't be cast to the destination type
 
 # Invalid: Name contains a backslash
 statement error
-SELECT $${backslash\name: value}$$::STRUCT("backslash\name" VARCHAR);
+SELECT $${"backslash\name": value}$$::STRUCT("backslash\name" VARCHAR);
 ----
 can't be cast to the destination type
 
+# Valid: Name contains a backslash outside of quotes, interpreted as literal
+query I
+SELECT $${backslash\name: value}$$::STRUCT("backslash\name" VARCHAR);
+----
+{'backslash\name': value}
+
 # first `:` is not escaped, won't match the "name:" struct key
 statement error
 SELECT $${name: test, value: 30}$$::STRUCT("name:" VARCHAR, value INT);
 ----
 can't be cast to the destination type
 
-# Name can contain escaped `:`
-query I
+# Invalid: Name can contain escaped `:`, but only in quotes
+statement error
 SELECT $${name\:: test, value: 30}$$::STRUCT("name:" VARCHAR, value INT);
 ----
+can't be cast to the destination type STRUCT("name:" VARCHAR, "value" INTEGER)
+
+# Valid: Name can contain escaped `:` in quotes
+query I
+SELECT $${"name\:": test, value: 30}$$::STRUCT("name:" VARCHAR, value INT);
+----
 {'name:': test, 'value': 30}
 
 # Name consists of `{}`, not a problem, with this syntax we expect a name, which is a plain string
@@ -125,9 +137,14 @@ SELECT $${description: "Special characters: \\, \", ;, (, )"}$$::STRUCT(descript
 ----
 {'description': Special characters: \, ", ;, (, )}
 
+statement error
+SELECT $${first\ name: "John", age: 30}$$::STRUCT("first name" VARCHAR, age INT);
+----
+can't be cast to the destination type STRUCT("first name" VARCHAR, age INTEGER)
+
 # Valid: Name with escaped space
 query I
-SELECT $${first\ name: "John", age: 30}$$::STRUCT("first name" VARCHAR, age INT);
+SELECT $${"first\ name": "John", age: 30}$$::STRUCT("first name" VARCHAR, age INT);
 ----
 {'first name': John, 'age': 30}
 
@@ -137,22 +154,43 @@ SELECT $${\"quote at start\": "value", age: 30}$$::STRUCT("""quote at start""" V
 ----
 {'"quote at start"': value, 'age': 30}
 
+statement error
+SELECT $${backslash\\name: "John Doe", age: 30}$$::STRUCT("backslash\name" VARCHAR, age INT);
+----
+can't be cast to the destination type STRUCT("backslash\name" VARCHAR, age INTEGER)
+
 # Valid: Name with escaped backslash
 query I
-SELECT $${backslash\\name: "John Doe", age: 30}$$::STRUCT("backslash\name" VARCHAR, age INT);
+SELECT $${"backslash\\name": "John Doe", age: 30}$$::STRUCT("backslash\name" VARCHAR, age INT);
 ----
 {'backslash\name': John Doe, 'age': 30}
 
+statement error
+SELECT $${user\,name: "Alice", age: 25}$$::STRUCT("user,name" VARCHAR, age INT);
+----
+can't be cast to the destination type STRUCT("user,name" VARCHAR, age INTEGER)
+
 # Valid: Name with escaped comma
 query I
-SELECT $${user\,name: "Alice", age: 25}$$::STRUCT("user,name" VARCHAR, age INT);
+SELECT $${"user\,name": "Alice", age: 25}$$::STRUCT("user,name" VARCHAR, age INT);
 ----
 {'user,name': Alice, 'age': 25}
 
-# Valid: Name with escaped parenthesis
+# Valid: Name with comma
 query I
+SELECT $${"user,name": "Alice", age: 25}$$::STRUCT("user,name" VARCHAR, age INT);
+----
+{'user,name': Alice, 'age': 25}
+
+statement error
 SELECT $${user\(name\): "Alice", status: "active"}$$::STRUCT("user(name)" VARCHAR, status VARCHAR);
 ----
+can't be cast to the destination type STRUCT("user(name)" VARCHAR, status VARCHAR)
+
+# Valid: Name with escaped parenthesis
+query I
+SELECT $${"user\(name\)": "Alice", status: "active"}$$::STRUCT("user(name)" VARCHAR, status VARCHAR);
+----
 {'user(name)': Alice, 'status': active}
 
 # Valid: Name with unescaped parenthesis
@@ -163,21 +201,26 @@ SELECT $${user(name): "Alice", status: "active"}$$::STRUCT("user(name)" VARCHAR,
 
 # Valid: Name with escaped space at end
 query I
-SELECT $${user\ name\ : "Alice", age\ : 25}$$::STRUCT("user name " VARCHAR, "age " INT);
+SELECT $${"user\ name\ ": "Alice", "age ": 25}$$::STRUCT("user name " VARCHAR, "age " INT);
 ----
 {'user name ': Alice, 'age ': 25}
 
+statement error
+SELECT $${user\ name\ : "Alice", age\ : 25}$$::STRUCT("user name " VARCHAR, "age " INT);
+----
+can't be cast to the destination type STRUCT("user name " VARCHAR, "age " INTEGER)
+
 # Invalid: Name contains unescaped quote
 statement error
 SELECT $${"quote"start": "value", age: 30}$$::STRUCT("quote""start" VARCHAR, age INT);
 ----
 can't be cast to the destination type
 
-# Invalid: Name contains unescaped backslash
-statement error
+# Valid: Name contains unescaped backslash outside of quotes
+query I
 SELECT $${backslash\name: "John", age: 30}$$::STRUCT("backslash\name" VARCHAR, age INT);
 ----
-can't be cast to the destination type
+{'backslash\name': John, 'age': 30}
 
 # Valid: Name contains (unescaped) opening parenthesis
 query I
@@ -191,9 +234,14 @@ SELECT $${\": "value", age: 30}$$::STRUCT("""" VARCHAR, age INTEGER)
 ----
 {'"': value, 'age': 30}
 
+statement error
+SELECT $${\\: "escaped", age: 30}$$::STRUCT("\" VARCHAR, age INT);
+----
+can't be cast to the destination type STRUCT("\" VARCHAR, age INTEGER)
+
 # Name with only a special character (escaped)
 query I
-SELECT $${\\: "escaped", age: 30}$$::STRUCT("\" VARCHAR, age INT);
+SELECT $${"\\": "escaped", age: 30}$$::STRUCT("\" VARCHAR, age INT);
 ----
 {'\': escaped, 'age': 30}
 
diff --git a/test/sql/cast/string_to_unnamed_struct.test b/test/sql/cast/string_to_unnamed_struct.test
index 5066463b05da..d51c01dcee58 100644
--- a/test/sql/cast/string_to_unnamed_struct.test
+++ b/test/sql/cast/string_to_unnamed_struct.test
@@ -75,4 +75,4 @@ select [
 	$$[(("  test  ")), {'a': (\\  test  \\)}]$$
 ]
 ----
-[[{'a': {'inner':   test  }}, {'a': {'inner': \  test  \}}], [{'a': {'inner':   test  }}, {'a': {'inner': \  test  \}}]]
+[[{'a': {'inner':   test  }}, {'a': {'inner': \  test  \}}], [{'a': {'inner':   test  }}, {'a': {'inner': \\  test  \\}}]]

From d0fbc866de09243766472a92f917021b3714b683 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Fri, 14 Feb 2025 16:04:27 +0100
Subject: [PATCH 098/142] arena allocator for minmaxn and just skip nulls when
 creating enum

---
 .../aggregate/distributive/arg_min_max.cpp    |  6 +-
 .../operator/schema/physical_create_type.cpp  |  2 +-
 .../aggregate/distributive/minmax.cpp         |  8 +-
 .../function/aggregate/minmax_n_helpers.hpp   | 97 ++++++++++---------
 4 files changed, 59 insertions(+), 54 deletions(-)

diff --git a/extension/core_functions/aggregate/distributive/arg_min_max.cpp b/extension/core_functions/aggregate/distributive/arg_min_max.cpp
index 63c112b3ce3c..edb6c77c5371 100644
--- a/extension/core_functions/aggregate/distributive/arg_min_max.cpp
+++ b/extension/core_functions/aggregate/distributive/arg_min_max.cpp
@@ -545,8 +545,8 @@ class ArgMinMaxNState {
 	BinaryAggregateHeap<K, V, COMPARATOR> heap;
 
 	bool is_initialized = false;
-	void Initialize(idx_t nval) {
-		heap.Initialize(nval);
+	void Initialize(ArenaAllocator &allocator, idx_t nval) {
+		heap.Initialize(allocator, nval);
 		is_initialized = true;
 	}
 };
@@ -601,7 +601,7 @@ static void ArgMinMaxNUpdate(Vector inputs[], AggregateInputData &aggr_input, id
 			if (nval >= MAX_N) {
 				throw InvalidInputException("Invalid input for arg_min/arg_max: n value must be < %d", MAX_N);
 			}
-			state.Initialize(UnsafeNumericCast<idx_t>(nval));
+			state.Initialize(aggr_input.allocator, UnsafeNumericCast<idx_t>(nval));
 		}
 
 		// Now add the input to the heap
diff --git a/src/execution/operator/schema/physical_create_type.cpp b/src/execution/operator/schema/physical_create_type.cpp
index 68bc258b36d6..e73ca2662dc8 100644
--- a/src/execution/operator/schema/physical_create_type.cpp
+++ b/src/execution/operator/schema/physical_create_type.cpp
@@ -51,7 +51,7 @@ SinkResultType PhysicalCreateType::Sink(ExecutionContext &context, DataChunk &ch
 	for (idx_t i = 0; i < chunk.size(); i++) {
 		idx_t idx = sdata.sel->get_index(i);
 		if (!sdata.validity.RowIsValid(idx)) {
-			throw InvalidInputException("Attempted to create ENUM type with NULL value!");
+			continue;
 		}
 		auto str = src_ptr[idx];
 		auto entry = gstate.found_strings.find(src_ptr[idx]);
diff --git a/src/function/aggregate/distributive/minmax.cpp b/src/function/aggregate/distributive/minmax.cpp
index b862bf6d9623..32a9518e1889 100644
--- a/src/function/aggregate/distributive/minmax.cpp
+++ b/src/function/aggregate/distributive/minmax.cpp
@@ -412,8 +412,8 @@ class MinMaxNState {
 	UnaryAggregateHeap<T, COMPARATOR> heap;
 	bool is_initialized = false;
 
-	void Initialize(idx_t nval) {
-		heap.Initialize(nval);
+	void Initialize(ArenaAllocator &allocator, idx_t nval) {
+		heap.Initialize(allocator, nval);
 		is_initialized = true;
 	}
 
@@ -432,7 +432,7 @@ static void MinMaxNUpdate(Vector inputs[], AggregateInputData &aggr_input, idx_t
 	UnifiedVectorFormat val_format;
 	UnifiedVectorFormat n_format;
 	UnifiedVectorFormat state_format;
-	;
+
 	auto val_extra_state = STATE::VAL_TYPE::CreateExtraState(val_vector, count);
 
 	STATE::VAL_TYPE::PrepareData(val_vector, count, val_extra_state, val_format);
@@ -464,7 +464,7 @@ static void MinMaxNUpdate(Vector inputs[], AggregateInputData &aggr_input, idx_t
 			if (nval >= MAX_N) {
 				throw InvalidInputException("Invalid input for MIN/MAX: n value must be < %d", MAX_N);
 			}
-			state.Initialize(UnsafeNumericCast<idx_t>(nval));
+			state.Initialize(aggr_input.allocator, UnsafeNumericCast<idx_t>(nval));
 		}
 
 		// Now add the input to the heap
diff --git a/src/include/duckdb/function/aggregate/minmax_n_helpers.hpp b/src/include/duckdb/function/aggregate/minmax_n_helpers.hpp
index 9c59d11cbd1a..07e1c48e9ea7 100644
--- a/src/include/duckdb/function/aggregate/minmax_n_helpers.hpp
+++ b/src/include/duckdb/function/aggregate/minmax_n_helpers.hpp
@@ -102,20 +102,22 @@ class UnaryAggregateHeap {
 public:
 	UnaryAggregateHeap() = default;
 
-	explicit UnaryAggregateHeap(idx_t capacity_p) : capacity(capacity_p) {
-		heap.reserve(capacity);
+	UnaryAggregateHeap(ArenaAllocator &allocator, idx_t capacity_p) {
+		Initialize(allocator, capacity_p);
 	}
 
-	void Initialize(const idx_t capacity_p) {
+	void Initialize(ArenaAllocator &allocator, const idx_t capacity_p) {
 		capacity = capacity_p;
-		heap.reserve(capacity);
+		heap = reinterpret_cast<HeapEntry<T> *>(allocator.AllocateAligned(capacity * sizeof(HeapEntry<T>)));
+		memset(heap, 0, capacity * sizeof(HeapEntry<T>));
+		size = 0;
 	}
 
 	bool IsEmpty() const {
-		return heap.empty();
+		return size == 0;
 	}
 	idx_t Size() const {
-		return heap.size();
+		return size;
 	}
 	idx_t Capacity() const {
 		return capacity;
@@ -125,29 +127,28 @@ class UnaryAggregateHeap {
 		D_ASSERT(capacity != 0); // must be initialized
 
 		// If the heap is not full, insert the value into a new slot
-		if (heap.size() < capacity) {
-			heap.emplace_back();
-			heap.back().Assign(allocator, value);
-			std::push_heap(heap.begin(), heap.end(), Compare);
+		if (size < capacity) {
+			heap[size++].Assign(allocator, value);
+			std::push_heap(heap, heap + size, Compare);
 		}
 		// If the heap is full, check if the value is greater than the smallest value in the heap
 		// If it is, assign the new value to the slot and re-heapify
-		else if (T_COMPARATOR::Operation(value, heap.front().value)) {
-			std::pop_heap(heap.begin(), heap.end(), Compare);
-			heap.back().Assign(allocator, value);
-			std::push_heap(heap.begin(), heap.end(), Compare);
+		else if (T_COMPARATOR::Operation(value, heap[0].value)) {
+			std::pop_heap(heap, heap + size, Compare);
+			heap[size - 1].Assign(allocator, value);
+			std::push_heap(heap, heap + size, Compare);
 		}
-		D_ASSERT(std::is_heap(heap.begin(), heap.end(), Compare));
+		D_ASSERT(std::is_heap(heap, heap + size, Compare));
 	}
 
 	void Insert(ArenaAllocator &allocator, const UnaryAggregateHeap &other) {
-		for (auto &slot : other.heap) {
-			Insert(allocator, slot.value);
+		for (idx_t slot = 0; slot < other.Size(); slot++) {
+			Insert(allocator, other.heap[slot].value);
 		}
 	}
 
-	vector<HeapEntry<T>> &SortAndGetHeap() {
-		std::sort_heap(heap.begin(), heap.end(), Compare);
+	HeapEntry<T> *SortAndGetHeap() {
+		std::sort_heap(heap, heap + size, Compare);
 		return heap;
 	}
 
@@ -160,8 +161,9 @@ class UnaryAggregateHeap {
 		return T_COMPARATOR::Operation(left.value, right.value);
 	}
 
-	vector<HeapEntry<T>> heap;
 	idx_t capacity;
+	HeapEntry<T> *heap;
+	idx_t size;
 };
 
 template <class K, class V, class K_COMPARATOR>
@@ -171,20 +173,22 @@ class BinaryAggregateHeap {
 public:
 	BinaryAggregateHeap() = default;
 
-	explicit BinaryAggregateHeap(idx_t capacity_p) : capacity(capacity_p) {
-		heap.reserve(capacity);
+	BinaryAggregateHeap(ArenaAllocator &allocator, idx_t capacity_p) {
+		Initialize(allocator, capacity_p);
 	}
 
-	void Initialize(const idx_t capacity_p) {
+	void Initialize(ArenaAllocator &allocator, const idx_t capacity_p) {
 		capacity = capacity_p;
-		heap.reserve(capacity);
+		heap = reinterpret_cast<STORAGE_TYPE *>(allocator.AllocateAligned(capacity * sizeof(STORAGE_TYPE)));
+		memset(heap, 0, capacity * sizeof(STORAGE_TYPE));
+		size = 0;
 	}
 
 	bool IsEmpty() const {
-		return heap.empty();
+		return size == 0;
 	}
 	idx_t Size() const {
-		return heap.size();
+		return size;
 	}
 	idx_t Capacity() const {
 		return capacity;
@@ -194,31 +198,31 @@ class BinaryAggregateHeap {
 		D_ASSERT(capacity != 0); // must be initialized
 
 		// If the heap is not full, insert the value into a new slot
-		if (heap.size() < capacity) {
-			heap.emplace_back();
-			heap.back().first.Assign(allocator, key);
-			heap.back().second.Assign(allocator, value);
-			std::push_heap(heap.begin(), heap.end(), Compare);
+		if (size < capacity) {
+			heap[size].first.Assign(allocator, key);
+			heap[size].second.Assign(allocator, value);
+			size++;
+			std::push_heap(heap, heap + size, Compare);
 		}
 		// If the heap is full, check if the value is greater than the smallest value in the heap
 		// If it is, assign the new value to the slot and re-heapify
-		else if (K_COMPARATOR::Operation(key, heap.front().first.value)) {
-			std::pop_heap(heap.begin(), heap.end(), Compare);
-			heap.back().first.Assign(allocator, key);
-			heap.back().second.Assign(allocator, value);
-			std::push_heap(heap.begin(), heap.end(), Compare);
+		else if (K_COMPARATOR::Operation(key, heap[0].first.value)) {
+			std::pop_heap(heap, heap + size, Compare);
+			heap[size - 1].first.Assign(allocator, key);
+			heap[size - 1].second.Assign(allocator, value);
+			std::push_heap(heap, heap + size, Compare);
 		}
-		D_ASSERT(std::is_heap(heap.begin(), heap.end(), Compare));
+		D_ASSERT(std::is_heap(heap, heap + size, Compare));
 	}
 
 	void Insert(ArenaAllocator &allocator, const BinaryAggregateHeap &other) {
-		for (auto &slot : other.heap) {
-			Insert(allocator, slot.first.value, slot.second.value);
+		for (idx_t slot = 0; slot < other.Size(); slot++) {
+			Insert(allocator, other.heap[slot].first.value, other.heap[slot].second.value);
 		}
 	}
 
-	vector<STORAGE_TYPE> &SortAndGetHeap() {
-		std::sort_heap(heap.begin(), heap.end(), Compare);
+	STORAGE_TYPE *SortAndGetHeap() {
+		std::sort_heap(heap, heap + size, Compare);
 		return heap;
 	}
 
@@ -231,8 +235,9 @@ class BinaryAggregateHeap {
 		return K_COMPARATOR::Operation(left.first.value, right.first.value);
 	}
 
-	vector<STORAGE_TYPE> heap;
 	idx_t capacity;
+	STORAGE_TYPE *heap;
+	idx_t size;
 };
 
 //------------------------------------------------------------------------------
@@ -326,7 +331,7 @@ struct MinMaxNOperation {
 		}
 
 		if (!target.is_initialized) {
-			target.Initialize(source.heap.Capacity());
+			target.Initialize(aggr_input.allocator, source.heap.Capacity());
 		} else if (source.heap.Capacity() != target.heap.Capacity()) {
 			throw InvalidInputException("Mismatched n values in min/max/arg_min/arg_max");
 		}
@@ -377,10 +382,10 @@ struct MinMaxNOperation {
 			list_entry.length = state.heap.Size();
 
 			// Turn the heap into a sorted list, invalidating the heap property
-			auto &heap = state.heap.SortAndGetHeap();
+			auto heap = state.heap.SortAndGetHeap();
 
-			for (const auto &slot : heap) {
-				STATE::VAL_TYPE::Assign(child_data, current_offset++, state.heap.GetValue(slot));
+			for (idx_t slot = 0; slot < state.heap.Size(); slot++) {
+				STATE::VAL_TYPE::Assign(child_data, current_offset++, state.heap.GetValue(heap[slot]));
 			}
 		}
 

From 4a2097101da48c83edc301e7fdde77c3365979f1 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Fri, 14 Feb 2025 16:16:33 +0100
Subject: [PATCH 099/142] codequality fixes and buffer-manage parquet
 columndatacollections

---
 benchmark/parquet/clickbench_write.benchmark             | 2 +-
 extension/parquet/include/parquet_writer.hpp             | 1 -
 extension/parquet/parquet_extension.cpp                  | 3 ++-
 extension/parquet/parquet_writer.cpp                     | 8 ++------
 .../operator/persistent/physical_batch_copy_to_file.cpp  | 9 ++++++---
 5 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/benchmark/parquet/clickbench_write.benchmark b/benchmark/parquet/clickbench_write.benchmark
index 7f22b7fe706d..2a4f3bc6340e 100644
--- a/benchmark/parquet/clickbench_write.benchmark
+++ b/benchmark/parquet/clickbench_write.benchmark
@@ -1,6 +1,6 @@
 # name: benchmark/parquet/clickbench_write.benchmark
 # description: Write ClickBench data to Parquet
-# group: [clickbench]
+# group: [parquet]
 
 require httpfs
 
diff --git a/extension/parquet/include/parquet_writer.hpp b/extension/parquet/include/parquet_writer.hpp
index 8af50765e50f..b16a43fab8cb 100644
--- a/extension/parquet/include/parquet_writer.hpp
+++ b/extension/parquet/include/parquet_writer.hpp
@@ -36,7 +36,6 @@ class Deserializer;
 struct PreparedRowGroup {
 	duckdb_parquet::RowGroup row_group;
 	vector<unique_ptr<ColumnWriterState>> states;
-	vector<shared_ptr<StringHeap>> heaps;
 };
 
 struct FieldID;
diff --git a/extension/parquet/parquet_extension.cpp b/extension/parquet/parquet_extension.cpp
index adc5ee37cd06..aae6f44b3be5 100644
--- a/extension/parquet/parquet_extension.cpp
+++ b/extension/parquet/parquet_extension.cpp
@@ -222,7 +222,8 @@ struct ParquetWriteGlobalState : public GlobalFunctionData {
 
 struct ParquetWriteLocalState : public LocalFunctionData {
 	explicit ParquetWriteLocalState(ClientContext &context, const vector<LogicalType> &types)
-	    : buffer(BufferAllocator::Get(context), types) {
+	    : buffer(context, types) {
+		buffer.SetPartitionIndex(0); // Makes the buffer manager less likely to spill this data
 		buffer.InitializeAppend(append_state);
 	}
 
diff --git a/extension/parquet/parquet_writer.cpp b/extension/parquet/parquet_writer.cpp
index b3af8efe3e81..4a8e38bba44b 100644
--- a/extension/parquet/parquet_writer.cpp
+++ b/extension/parquet/parquet_writer.cpp
@@ -389,9 +389,8 @@ void ParquetWriter::PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGro
 	// We write 8 columns at a time so that iterating over ColumnDataCollection is more efficient
 	static constexpr idx_t COLUMNS_PER_PASS = 8;
 
-	// We want these to be in-memory/hybrid so we don't have to copy over strings to the dictionary
-	D_ASSERT(buffer.GetAllocatorType() == ColumnDataAllocatorType::IN_MEMORY_ALLOCATOR ||
-	         buffer.GetAllocatorType() == ColumnDataAllocatorType::HYBRID);
+	// We want these to be buffer-managed
+	D_ASSERT(buffer.GetAllocatorType() == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR);
 
 	// set up a new row group for this chunk collection
 	auto &row_group = result.row_group;
@@ -451,7 +450,6 @@ void ParquetWriter::PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGro
 			states.push_back(std::move(write_state));
 		}
 	}
-	result.heaps = buffer.GetHeapReferences();
 }
 
 // Validation code adapted from Impala
@@ -509,8 +507,6 @@ void ParquetWriter::FlushRowGroup(PreparedRowGroup &prepared) {
 	// append the row group to the file meta data
 	file_meta_data.row_groups.push_back(row_group);
 	file_meta_data.num_rows += row_group.num_rows;
-
-	prepared.heaps.clear();
 }
 
 void ParquetWriter::Flush(ColumnDataCollection &buffer) {
diff --git a/src/execution/operator/persistent/physical_batch_copy_to_file.cpp b/src/execution/operator/persistent/physical_batch_copy_to_file.cpp
index 4effccaff4b7..8b3fa0b9b119 100644
--- a/src/execution/operator/persistent/physical_batch_copy_to_file.cpp
+++ b/src/execution/operator/persistent/physical_batch_copy_to_file.cpp
@@ -143,7 +143,8 @@ class FixedBatchCopyLocalState : public LocalSinkState {
 	FixedBatchCopyState current_task = FixedBatchCopyState::SINKING_DATA;
 
 	void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
-		collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), op.children[0]->types);
+		collection = make_uniq<ColumnDataCollection>(context, op.children[0]->types);
+		collection->SetPartitionIndex(0); // Makes the buffer manager less likely to spill this data
 		collection->InitializeAppend(append_state);
 		local_memory_usage = 0;
 	}
@@ -434,7 +435,8 @@ void PhysicalBatchCopyToFile::RepartitionBatches(ClientContext &context, GlobalS
 				// the collection is too large for a batch - we need to repartition
 				// create an empty collection
 				auto new_collection =
-				    make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), children[0]->types);
+				    make_uniq<ColumnDataCollection>(context, children[0]->types);
+				new_collection->SetPartitionIndex(0); // Makes the buffer manager less likely to spill this data
 				append_batch = make_uniq<FixedRawBatchData>(0U, std::move(new_collection));
 			}
 			if (append_batch) {
@@ -458,7 +460,8 @@ void PhysicalBatchCopyToFile::RepartitionBatches(ClientContext &context, GlobalS
 			// the collection is full - move it to the result and create a new one
 			task_manager.AddTask(make_uniq<PrepareBatchTask>(gstate.scheduled_batch_index++, std::move(append_batch)));
 
-			auto new_collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), children[0]->types);
+			auto new_collection = make_uniq<ColumnDataCollection>(context, children[0]->types);
+			new_collection->SetPartitionIndex(0); // Makes the buffer manager less likely to spill this data
 			append_batch = make_uniq<FixedRawBatchData>(0U, std::move(new_collection));
 			append_batch->collection->InitializeAppend(append_state);
 		}

From 0682cec6d5aaace2d4eadb6fc7d03d15134518fd Mon Sep 17 00:00:00 2001
From: Mathias Lafeldt <mathias.lafeldt@gmail.com>
Date: Sat, 15 Feb 2025 21:17:24 +0100
Subject: [PATCH 100/142] Include extension_util.hpp in libduckdb

---
 scripts/amalgamation.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/amalgamation.py b/scripts/amalgamation.py
index 325cc19f1521..1ba307278910 100644
--- a/scripts/amalgamation.py
+++ b/scripts/amalgamation.py
@@ -39,6 +39,7 @@
     os.path.join(include_dir, 'duckdb', 'common', 'serializer', 'memory_stream.hpp'),
     os.path.join(include_dir, 'duckdb', 'main', 'appender.hpp'),
     os.path.join(include_dir, 'duckdb', 'main', 'client_context.hpp'),
+    os.path.join(include_dir, 'duckdb', 'main', 'extension_util.hpp'),
     os.path.join(include_dir, 'duckdb', 'function', 'function.hpp'),
     os.path.join(include_dir, 'duckdb', 'function', 'table_function.hpp'),
     os.path.join(include_dir, 'duckdb', 'parser', 'parsed_data', 'create_table_function_info.hpp'),

From 141c449eb7f1f828daa6dfb932b164f485c1a8cb Mon Sep 17 00:00:00 2001
From: Mathias Lafeldt <mathias.lafeldt@gmail.com>
Date: Sat, 15 Feb 2025 13:43:27 +0100
Subject: [PATCH 101/142] Report errors caused by get_database in C extensions

---
 src/main/extension/extension_load.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/main/extension/extension_load.cpp b/src/main/extension/extension_load.cpp
index 4be4588371cf..dfdb5a306bcc 100644
--- a/src/main/extension/extension_load.cpp
+++ b/src/main/extension/extension_load.cpp
@@ -92,9 +92,11 @@ struct ExtensionAccess {
 			load_state.database_data->database = make_shared_ptr<DuckDB>(load_state.db);
 			return reinterpret_cast<duckdb_database *>(load_state.database_data.get());
 		} catch (std::exception &ex) {
+			load_state.has_error = true;
 			load_state.error_data = ErrorData(ex);
 			return nullptr;
 		} catch (...) {
+			load_state.has_error = true;
 			load_state.error_data =
 			    ErrorData(ExceptionType::UNKNOWN_TYPE, "Unknown error in GetDatabase when trying to load extension!");
 			return nullptr;

From a46237bd200242f148c2c16cba8ba49289f6a051 Mon Sep 17 00:00:00 2001
From: Mathias Lafeldt <mathias.lafeldt@gmail.com>
Date: Sat, 15 Feb 2025 13:44:59 +0100
Subject: [PATCH 102/142] Simplify SetError

---
 src/main/extension/extension_load.cpp | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/main/extension/extension_load.cpp b/src/main/extension/extension_load.cpp
index dfdb5a306bcc..5499665f1a27 100644
--- a/src/main/extension/extension_load.cpp
+++ b/src/main/extension/extension_load.cpp
@@ -71,15 +71,11 @@ struct ExtensionAccess {
 	static void SetError(duckdb_extension_info info, const char *error) {
 		auto &load_state = DuckDBExtensionLoadState::Get(info);
 
-		if (error) {
-			load_state.has_error = true;
-			load_state.error_data = ErrorData(error);
-		} else {
-			load_state.has_error = true;
-			load_state.error_data = ErrorData(
-			    ExceptionType::UNKNOWN_TYPE,
-			    "Extension has indicated an error occured during initialization, but did not set an error message.");
-		}
+		load_state.has_error = true;
+		load_state.error_data =
+		    error ? ErrorData(error)
+		          : ErrorData(ExceptionType::UNKNOWN_TYPE, "Extension has indicated an error occured during "
+		                                                   "initialization, but did not set an error message.");
 	}
 
 	//! Called by the extension get a pointer to the database that is loading it

From 381f75e76ff870b129773c34effe5f7aeb510ee5 Mon Sep 17 00:00:00 2001
From: Richard Wesley <13156216+hawkfish@users.noreply.github.com>
Date: Mon, 17 Feb 2025 17:06:34 +1300
Subject: [PATCH 103/142] Issue #16250: Window Range Performance

* Use two cursors for range searches
* Reduces benchmark time from 35s to 25s
---
 .../window/window_boundaries_state.cpp        | 110 +++++++++++-------
 .../window/window_boundaries_state.hpp        |   4 +
 2 files changed, 75 insertions(+), 39 deletions(-)

diff --git a/src/function/window/window_boundaries_state.cpp b/src/function/window/window_boundaries_state.cpp
index ce3ba3bbeb85..6ee3c105234d 100644
--- a/src/function/window/window_boundaries_state.cpp
+++ b/src/function/window/window_boundaries_state.cpp
@@ -180,9 +180,9 @@ struct OperationCompare : public std::function<bool(T, T)> {
 };
 
 template <typename T, typename OP, bool FROM>
-static idx_t FindTypedRangeBound(WindowCursor &over, const idx_t order_begin, const idx_t order_end,
-                                 const WindowBoundary range, WindowInputExpression &boundary, const idx_t chunk_idx,
-                                 const FrameBounds &prev) {
+static idx_t FindTypedRangeBound(WindowCursor &range_lo, WindowCursor &range_hi, const idx_t order_begin,
+                                 const idx_t order_end, const WindowBoundary range, WindowInputExpression &boundary,
+                                 const idx_t chunk_idx, const FrameBounds &prev) {
 	D_ASSERT(!boundary.CellIsNull(chunk_idx));
 	const auto val = boundary.GetCell<T>(chunk_idx);
 
@@ -191,14 +191,14 @@ static idx_t FindTypedRangeBound(WindowCursor &over, const idx_t order_begin, co
 	// Check that the value we are searching for is in range.
 	if (range == WindowBoundary::EXPR_PRECEDING_RANGE) {
 		//	Preceding but value past the current value
-		const auto cur_val = over.GetCell<T>(0, order_end - 1);
+		const auto cur_val = range_hi.GetCell<T>(0, order_end - 1);
 		if (comp(cur_val, val)) {
 			throw OutOfRangeException("Invalid RANGE PRECEDING value");
 		}
 	} else {
 		//	Following but value before the current value
 		D_ASSERT(range == WindowBoundary::EXPR_FOLLOWING_RANGE);
-		const auto cur_val = over.GetCell<T>(0, order_begin);
+		const auto cur_val = range_lo.GetCell<T>(0, order_begin);
 		if (comp(val, cur_val)) {
 			throw OutOfRangeException("Invalid RANGE FOLLOWING value");
 		}
@@ -206,18 +206,18 @@ static idx_t FindTypedRangeBound(WindowCursor &over, const idx_t order_begin, co
 	//	Try to reuse the previous bounds to restrict the search.
 	//	This is only valid if the previous bounds were non-empty
 	//	Only inject the comparisons if the previous bounds are a strict subset.
-	WindowColumnIterator<T> begin(over, order_begin);
-	WindowColumnIterator<T> end(over, order_end);
+	WindowColumnIterator<T> begin(range_lo, order_begin);
+	WindowColumnIterator<T> end(range_hi, order_end);
 	if (prev.start < prev.end) {
 		if (order_begin < prev.start && prev.start < order_end) {
-			const auto first = over.GetCell<T>(0, prev.start);
+			const auto first = range_lo.GetCell<T>(0, prev.start);
 			if (!comp(val, first)) {
 				//	prev.first <= val, so we can start further forward
 				begin += UnsafeNumericCast<int64_t>(prev.start - order_begin);
 			}
 		}
 		if (order_begin < prev.end && prev.end < order_end) {
-			const auto second = over.GetCell<T>(0, prev.end - 1);
+			const auto second = range_hi.GetCell<T>(0, prev.end - 1);
 			if (!comp(second, val)) {
 				//	val <= prev.second, so we can end further back
 				// (prev.second is the largest peer)
@@ -234,52 +234,65 @@ static idx_t FindTypedRangeBound(WindowCursor &over, const idx_t order_begin, co
 }
 
 template <typename OP, bool FROM>
-static idx_t FindRangeBound(WindowCursor &over, const idx_t order_begin, const idx_t order_end,
-                            const WindowBoundary range, WindowInputExpression &boundary, const idx_t chunk_idx,
-                            const FrameBounds &prev) {
+static idx_t FindRangeBound(WindowCursor &range_lo, WindowCursor &range_hi, const idx_t order_begin,
+                            const idx_t order_end, const WindowBoundary range, WindowInputExpression &boundary,
+                            const idx_t chunk_idx, const FrameBounds &prev) {
 	switch (boundary.InternalType()) {
 	case PhysicalType::INT8:
-		return FindTypedRangeBound<int8_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
+		return FindTypedRangeBound<int8_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
+		                                             chunk_idx, prev);
 	case PhysicalType::INT16:
-		return FindTypedRangeBound<int16_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
+		return FindTypedRangeBound<int16_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
+		                                              chunk_idx, prev);
 	case PhysicalType::INT32:
-		return FindTypedRangeBound<int32_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
+		return FindTypedRangeBound<int32_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
+		                                              chunk_idx, prev);
 	case PhysicalType::INT64:
-		return FindTypedRangeBound<int64_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
+		return FindTypedRangeBound<int64_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
+		                                              chunk_idx, prev);
 	case PhysicalType::UINT8:
-		return FindTypedRangeBound<uint8_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
+		return FindTypedRangeBound<uint8_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
+		                                              chunk_idx, prev);
 	case PhysicalType::UINT16:
-		return FindTypedRangeBound<uint16_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
+		return FindTypedRangeBound<uint16_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
+		                                               chunk_idx, prev);
 	case PhysicalType::UINT32:
-		return FindTypedRangeBound<uint32_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
+		return FindTypedRangeBound<uint32_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
+		                                               chunk_idx, prev);
 	case PhysicalType::UINT64:
-		return FindTypedRangeBound<uint64_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
+		return FindTypedRangeBound<uint64_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
+		                                               chunk_idx, prev);
 	case PhysicalType::INT128:
-		return FindTypedRangeBound<hugeint_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
+		return FindTypedRangeBound<hugeint_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
+		                                                chunk_idx, prev);
 	case PhysicalType::UINT128:
-		return FindTypedRangeBound<uhugeint_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx,
-		                                                 prev);
+		return FindTypedRangeBound<uhugeint_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
+		                                                 chunk_idx, prev);
 	case PhysicalType::FLOAT:
-		return FindTypedRangeBound<float, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
+		return FindTypedRangeBound<float, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
+		                                            chunk_idx, prev);
 	case PhysicalType::DOUBLE:
-		return FindTypedRangeBound<double, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
+		return FindTypedRangeBound<double, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
+		                                             chunk_idx, prev);
 	case PhysicalType::INTERVAL:
-		return FindTypedRangeBound<interval_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx,
-		                                                 prev);
+		return FindTypedRangeBound<interval_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
+		                                                 chunk_idx, prev);
 	default:
 		throw InternalException("Unsupported column type for RANGE");
 	}
 }
 
 template <bool FROM>
-static idx_t FindOrderedRangeBound(WindowCursor &over, const OrderType range_sense, const idx_t order_begin,
-                                   const idx_t order_end, const WindowBoundary range, WindowInputExpression &boundary,
-                                   const idx_t chunk_idx, const FrameBounds &prev) {
+static idx_t FindOrderedRangeBound(WindowCursor &range_lo, WindowCursor &range_hi, const OrderType range_sense,
+                                   const idx_t order_begin, const idx_t order_end, const WindowBoundary range,
+                                   WindowInputExpression &boundary, const idx_t chunk_idx, const FrameBounds &prev) {
 	switch (range_sense) {
 	case OrderType::ASCENDING:
-		return FindRangeBound<LessThan, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
+		return FindRangeBound<LessThan, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary, chunk_idx,
+		                                      prev);
 	case OrderType::DESCENDING:
-		return FindRangeBound<GreaterThan, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
+		return FindRangeBound<GreaterThan, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary, chunk_idx,
+		                                         prev);
 	default:
 		throw InternalException("Unsupported ORDER BY sense for RANGE");
 	}
@@ -718,6 +731,13 @@ void WindowBoundariesState::FrameBegin(DataChunk &bounds, idx_t row_idx, const i
 	prev.start = valid_begin_data[0];
 	prev.end = valid_end_data[0];
 
+	if (has_preceding_range || has_following_range) {
+		if (range_lo.get() != range.get()) {
+			range_lo = range.get();
+			range_hi = range_lo->Copy();
+		}
+	}
+
 	switch (start_boundary) {
 	case WindowBoundary::UNBOUNDED_PRECEDING:
 		bounds.data[FRAME_BEGIN].Reference(bounds.data[PARTITION_BEGIN]);
@@ -766,7 +786,12 @@ void WindowBoundariesState::FrameBegin(DataChunk &bounds, idx_t row_idx, const i
 			} else {
 				const auto valid_start = valid_begin_data[chunk_idx];
 				prev.end = valid_end_data[chunk_idx];
-				window_start = FindOrderedRangeBound<true>(*range, range_sense, valid_start, row_idx + 1,
+				const auto cur_partition = partition_begin_data[chunk_idx];
+				if (cur_partition != prev_partition) {
+					prev.start = valid_start;
+					prev_partition = cur_partition;
+				}
+				window_start = FindOrderedRangeBound<true>(*range_lo, *range_hi, range_sense, valid_start, row_idx + 1,
 				                                           start_boundary, boundary_begin, chunk_idx, prev);
 				prev.start = window_start;
 			}
@@ -785,8 +810,8 @@ void WindowBoundariesState::FrameBegin(DataChunk &bounds, idx_t row_idx, const i
 					prev.start = valid_begin_data[chunk_idx];
 					prev_partition = cur_partition;
 				}
-				window_start = FindOrderedRangeBound<true>(*range, range_sense, row_idx, valid_end, start_boundary,
-				                                           boundary_begin, chunk_idx, prev);
+				window_start = FindOrderedRangeBound<true>(*range_lo, *range_hi, range_sense, row_idx, valid_end,
+				                                           start_boundary, boundary_begin, chunk_idx, prev);
 				prev.start = window_start;
 			}
 			frame_begin_data[chunk_idx] = window_start;
@@ -862,6 +887,13 @@ void WindowBoundariesState::FrameEnd(DataChunk &bounds, idx_t row_idx, const idx
 	prev.start = valid_begin_data[0];
 	prev.end = valid_end_data[0];
 
+	if (has_preceding_range || has_following_range) {
+		if (range_lo.get() != range.get()) {
+			range_lo = range.get();
+			range_hi = range_lo->Copy();
+		}
+	}
+
 	switch (end_boundary) {
 	case WindowBoundary::CURRENT_ROW_ROWS:
 		for (idx_t chunk_idx = 0; chunk_idx < count; ++chunk_idx, ++row_idx) {
@@ -911,8 +943,8 @@ void WindowBoundariesState::FrameEnd(DataChunk &bounds, idx_t row_idx, const idx
 			} else {
 				const auto valid_start = valid_begin_data[chunk_idx];
 				prev.start = valid_start;
-				window_end = FindOrderedRangeBound<false>(*range, range_sense, valid_start, row_idx + 1, end_boundary,
-				                                          boundary_end, chunk_idx, prev);
+				window_end = FindOrderedRangeBound<false>(*range_lo, *range_hi, range_sense, valid_start, row_idx + 1,
+				                                          end_boundary, boundary_end, chunk_idx, prev);
 				prev.end = window_end;
 			}
 			frame_end_data[chunk_idx] = window_end;
@@ -930,8 +962,8 @@ void WindowBoundariesState::FrameEnd(DataChunk &bounds, idx_t row_idx, const idx
 					prev.end = valid_end;
 					prev_partition = cur_partition;
 				}
-				window_end = FindOrderedRangeBound<false>(*range, range_sense, row_idx, valid_end, end_boundary,
-				                                          boundary_end, chunk_idx, prev);
+				window_end = FindOrderedRangeBound<false>(*range_lo, *range_hi, range_sense, row_idx, valid_end,
+				                                          end_boundary, boundary_end, chunk_idx, prev);
 				prev.end = window_end;
 			}
 			frame_end_data[chunk_idx] = window_end;
diff --git a/src/include/duckdb/function/window/window_boundaries_state.hpp b/src/include/duckdb/function/window/window_boundaries_state.hpp
index 2748bc7a0600..11c724d9b638 100644
--- a/src/include/duckdb/function/window/window_boundaries_state.hpp
+++ b/src/include/duckdb/function/window/window_boundaries_state.hpp
@@ -148,6 +148,10 @@ struct WindowBoundariesState {
 	idx_t valid_end = 0;
 
 	FrameBounds prev;
+
+	// Extra range cursor
+	optional_ptr<WindowCursor> range_lo;
+	unique_ptr<WindowCursor> range_hi;
 };
 
 } // namespace duckdb

From 5ac9f9e1840ebb5e4461565cc8617006b321617d Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Mon, 17 Feb 2025 10:27:15 +0100
Subject: [PATCH 104/142] format/test fixes for parquet writer

---
 extension/parquet/parquet_extension.cpp                        | 3 +--
 src/common/types/column/column_data_collection.cpp             | 2 +-
 .../operator/persistent/physical_batch_copy_to_file.cpp        | 3 +--
 test/sql/copy/parquet/writer/parquet_write_memory_usage.test   | 2 +-
 4 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/extension/parquet/parquet_extension.cpp b/extension/parquet/parquet_extension.cpp
index aae6f44b3be5..cb86e1bd93bb 100644
--- a/extension/parquet/parquet_extension.cpp
+++ b/extension/parquet/parquet_extension.cpp
@@ -221,8 +221,7 @@ struct ParquetWriteGlobalState : public GlobalFunctionData {
 };
 
 struct ParquetWriteLocalState : public LocalFunctionData {
-	explicit ParquetWriteLocalState(ClientContext &context, const vector<LogicalType> &types)
-	    : buffer(context, types) {
+	explicit ParquetWriteLocalState(ClientContext &context, const vector<LogicalType> &types) : buffer(context, types) {
 		buffer.SetPartitionIndex(0); // Makes the buffer manager less likely to spill this data
 		buffer.InitializeAppend(append_state);
 	}
diff --git a/src/common/types/column/column_data_collection.cpp b/src/common/types/column/column_data_collection.cpp
index 17be6722389b..a2480f44d624 100644
--- a/src/common/types/column/column_data_collection.cpp
+++ b/src/common/types/column/column_data_collection.cpp
@@ -562,7 +562,7 @@ void ColumnDataCopy<string_t>(ColumnDataMetaData &meta_data, const UnifiedVector
 		offset += append_count;
 		remaining -= append_count;
 
-		if (vector_remaining - append_count == 0) {
+		if (remaining != 0 && vector_remaining - append_count == 0) {
 			// need to append more, check if we need to allocate a new vector or not
 			if (!current_segment.next_data.IsValid()) {
 				segment.AllocateVector(source.GetType(), meta_data.chunk_data, append_state, current_index);
diff --git a/src/execution/operator/persistent/physical_batch_copy_to_file.cpp b/src/execution/operator/persistent/physical_batch_copy_to_file.cpp
index 8b3fa0b9b119..80a761d34f20 100644
--- a/src/execution/operator/persistent/physical_batch_copy_to_file.cpp
+++ b/src/execution/operator/persistent/physical_batch_copy_to_file.cpp
@@ -434,8 +434,7 @@ void PhysicalBatchCopyToFile::RepartitionBatches(ClientContext &context, GlobalS
 			} else {
 				// the collection is too large for a batch - we need to repartition
 				// create an empty collection
-				auto new_collection =
-				    make_uniq<ColumnDataCollection>(context, children[0]->types);
+				auto new_collection = make_uniq<ColumnDataCollection>(context, children[0]->types);
 				new_collection->SetPartitionIndex(0); // Makes the buffer manager less likely to spill this data
 				append_batch = make_uniq<FixedRawBatchData>(0U, std::move(new_collection));
 			}
diff --git a/test/sql/copy/parquet/writer/parquet_write_memory_usage.test b/test/sql/copy/parquet/writer/parquet_write_memory_usage.test
index 2e91149a5cea..6bfc26d0fdf6 100644
--- a/test/sql/copy/parquet/writer/parquet_write_memory_usage.test
+++ b/test/sql/copy/parquet/writer/parquet_write_memory_usage.test
@@ -9,7 +9,7 @@ load __TEST_DIR__/parquet_write_memory_usage.db
 statement ok
 set threads=1
 
-foreach memory_limit,row_group_size 0.5mb,20480 1.0mb,40960
+foreach memory_limit,row_group_size 0.6mb,20480 1.2mb,40960
 
 statement ok
 set memory_limit='${memory_limit}'

From 3c90da4674539789ed29bee41900dc0ff8cf3669 Mon Sep 17 00:00:00 2001
From: Tmonster <tom@ebergen.com>
Date: Mon, 17 Feb 2025 10:44:01 +0100
Subject: [PATCH 105/142] whenever seed is set, parallel sink is false

---
 .../operator/helper/physical_streaming_sample.cpp         | 2 +-
 test/sql/sample/bernoulli_sampling.test                   | 8 +++-----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/execution/operator/helper/physical_streaming_sample.cpp b/src/execution/operator/helper/physical_streaming_sample.cpp
index 721717989f88..ed9e21f35195 100644
--- a/src/execution/operator/helper/physical_streaming_sample.cpp
+++ b/src/execution/operator/helper/physical_streaming_sample.cpp
@@ -51,7 +51,7 @@ void PhysicalStreamingSample::BernoulliSample(DataChunk &input, DataChunk &resul
 }
 
 bool PhysicalStreamingSample::ParallelOperator() const {
-	return !sample_options->repeatable;
+	return !(sample_options->repeatable || sample_options->seed.IsValid());
 }
 
 unique_ptr<OperatorState> PhysicalStreamingSample::GetOperatorState(ExecutionContext &context) const {
diff --git a/test/sql/sample/bernoulli_sampling.test b/test/sql/sample/bernoulli_sampling.test
index e8953bb38e40..95b3e3796c8f 100644
--- a/test/sql/sample/bernoulli_sampling.test
+++ b/test/sql/sample/bernoulli_sampling.test
@@ -26,18 +26,16 @@ INSERT INTO output select count(*) as n_rows FROM sampled;
 endloop
 
 
-query III
-select min(num_rows) > 0, max(num_rows) < 25, count(*) FILTER (num_rows = 0) = 0 from output;
+query II
+select min(num_rows) > 0, count(*) FILTER (num_rows = 0) = 0 from output;
 ----
-true	true	true
+true	true
 
 query III
 select avg(rowid), min(rowid), max(rowid) from output where num_rows = 0;
 ----
 NULL	NULL	NULL
 
-
-
 statement ok
 create table t1 as select range id from range(1000);
 

From 0483d905743300366f838a9610c0a222f3f41a2a Mon Sep 17 00:00:00 2001
From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com>
Date: Mon, 17 Feb 2025 11:03:31 +0100
Subject: [PATCH 106/142] merge resolution

---
 src/execution/operator/persistent/physical_batch_insert.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/execution/operator/persistent/physical_batch_insert.cpp b/src/execution/operator/persistent/physical_batch_insert.cpp
index 2cc4ea709fe4..0415585dd7f3 100644
--- a/src/execution/operator/persistent/physical_batch_insert.cpp
+++ b/src/execution/operator/persistent/physical_batch_insert.cpp
@@ -229,7 +229,7 @@ class MergeCollectionTask : public BatchInsertTask {
 		auto &g_state = g_state_p.Cast<BatchInsertGlobalState>();
 		auto &l_state = l_state_p.Cast<BatchInsertLocalState>();
 
-        // Merge the collections.
+		// Merge the collections.
 		if (!l_state.writer) {
 			l_state.writer = &g_state.table.GetStorage().CreateOptimisticWriter(context);
 		}

From 694ad70bd30b2b7d06be15c4305b7713734e2979 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Mon, 17 Feb 2025 11:11:18 +0100
Subject: [PATCH 107/142] some ci fixes

---
 .../duckdb/function/aggregate/minmax_n_helpers.hpp     | 10 ++++++----
 test/sql/types/enum/test_enum_from_query.test_slow     |  7 +++----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/include/duckdb/function/aggregate/minmax_n_helpers.hpp b/src/include/duckdb/function/aggregate/minmax_n_helpers.hpp
index 07e1c48e9ea7..a26772819c43 100644
--- a/src/include/duckdb/function/aggregate/minmax_n_helpers.hpp
+++ b/src/include/duckdb/function/aggregate/minmax_n_helpers.hpp
@@ -108,8 +108,9 @@ class UnaryAggregateHeap {
 
 	void Initialize(ArenaAllocator &allocator, const idx_t capacity_p) {
 		capacity = capacity_p;
-		heap = reinterpret_cast<HeapEntry<T> *>(allocator.AllocateAligned(capacity * sizeof(HeapEntry<T>)));
-		memset(heap, 0, capacity * sizeof(HeapEntry<T>));
+		auto ptr = allocator.AllocateAligned(capacity * sizeof(HeapEntry<T>));
+		memset(ptr, 0, capacity * sizeof(HeapEntry<T>));
+		heap = reinterpret_cast<HeapEntry<T> *>(ptr);
 		size = 0;
 	}
 
@@ -179,8 +180,9 @@ class BinaryAggregateHeap {
 
 	void Initialize(ArenaAllocator &allocator, const idx_t capacity_p) {
 		capacity = capacity_p;
-		heap = reinterpret_cast<STORAGE_TYPE *>(allocator.AllocateAligned(capacity * sizeof(STORAGE_TYPE)));
-		memset(heap, 0, capacity * sizeof(STORAGE_TYPE));
+		auto ptr = allocator.AllocateAligned(capacity * sizeof(STORAGE_TYPE));
+		memset(ptr, 0, capacity * sizeof(STORAGE_TYPE));
+		heap = reinterpret_cast<STORAGE_TYPE *>(ptr);
 		size = 0;
 	}
 
diff --git a/test/sql/types/enum/test_enum_from_query.test_slow b/test/sql/types/enum/test_enum_from_query.test_slow
index d9d1d68cd2e6..68697606bcb5 100644
--- a/test/sql/types/enum/test_enum_from_query.test_slow
+++ b/test/sql/types/enum/test_enum_from_query.test_slow
@@ -107,10 +107,9 @@ DROP TABLE number_str;
 statement ok
 DROP TYPE number_enum;
 
-# Throw exception for NULL
-statement error
-CREATE TYPE number_enum AS ENUM (SELECT NULL::VARCHAR);
-----
+# This just creates an empty enum type
+statement ok
+CREATE TYPE empty_number_enum AS ENUM (SELECT NULL::VARCHAR);
 
 # Test inserted order
 statement ok

From 7b9d464b956ca02705d36b9a7995098fe1f958e0 Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Mon, 17 Feb 2025 12:47:31 +0100
Subject: [PATCH 108/142] Modify histogram test to statement ok since the test
 can be inconsistent on different platforms

---
 .../aggregates/histogram_table_function.test        | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/test/sql/aggregate/aggregates/histogram_table_function.test b/test/sql/aggregate/aggregates/histogram_table_function.test
index 3b54ef1270e8..a5406cf897b1 100644
--- a/test/sql/aggregate/aggregates/histogram_table_function.test
+++ b/test/sql/aggregate/aggregates/histogram_table_function.test
@@ -64,19 +64,8 @@ x <= 12	13
 statement ok
 INSERT INTO integers VALUES (99999999)
 
-query II
+statement ok
 SELECT * FROM histogram_values(integers, i, technique := 'equi-height')
-----
-12	13
-25	13
-38	13
-50	13
-63	13
-76	13
-88	13
-101	13
-114	13
-99999999	13
 
 # sample integers
 query II

From bf1d472d689be49565e1029bc8ee0b132b734415 Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Mon, 17 Feb 2025 12:49:52 +0100
Subject: [PATCH 109/142] Check avg count

---
 test/sql/aggregate/aggregates/histogram_table_function.test | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/sql/aggregate/aggregates/histogram_table_function.test b/test/sql/aggregate/aggregates/histogram_table_function.test
index a5406cf897b1..846789118d0b 100644
--- a/test/sql/aggregate/aggregates/histogram_table_function.test
+++ b/test/sql/aggregate/aggregates/histogram_table_function.test
@@ -64,8 +64,10 @@ x <= 12	13
 statement ok
 INSERT INTO integers VALUES (99999999)
 
-statement ok
-SELECT * FROM histogram_values(integers, i, technique := 'equi-height')
+query II
+SELECT COUNT(*), AVG(count) FROM histogram_values(integers, i, technique := 'equi-height')
+----
+10	13
 
 # sample integers
 query II

From 8651a48618241d72ecaed30684939f202b709602 Mon Sep 17 00:00:00 2001
From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com>
Date: Mon, 17 Feb 2025 13:12:17 +0100
Subject: [PATCH 110/142] move optimistic writers

---
 .../persistent/physical_batch_insert.cpp      | 28 ++++++------
 .../operator/persistent/physical_insert.cpp   | 13 +++---
 .../operator/persistent/physical_insert.hpp   |  3 +-
 src/include/duckdb/storage/data_table.hpp     |  6 +--
 .../duckdb/transaction/local_storage.hpp      | 12 ++---
 src/storage/data_table.cpp                    |  9 +---
 src/storage/local_storage.cpp                 | 44 +++----------------
 7 files changed, 39 insertions(+), 76 deletions(-)

diff --git a/src/execution/operator/persistent/physical_batch_insert.cpp b/src/execution/operator/persistent/physical_batch_insert.cpp
index 0415585dd7f3..3caaff78914d 100644
--- a/src/execution/operator/persistent/physical_batch_insert.cpp
+++ b/src/execution/operator/persistent/physical_batch_insert.cpp
@@ -193,7 +193,7 @@ class BatchInsertLocalState : public LocalSinkState {
 	idx_t current_index;
 	TableAppendState current_append_state;
 	PhysicalIndex collection_index;
-	optional_ptr<OptimisticDataWriter> writer;
+	unique_ptr<OptimisticDataWriter> optimistic_writer;
 	unique_ptr<ConstraintState> constraint_state;
 
 	void CreateNewCollection(ClientContext &context, DuckTableEntry &table_entry,
@@ -230,10 +230,10 @@ class MergeCollectionTask : public BatchInsertTask {
 		auto &l_state = l_state_p.Cast<BatchInsertLocalState>();
 
 		// Merge the collections.
-		if (!l_state.writer) {
-			l_state.writer = &g_state.table.GetStorage().CreateOptimisticWriter(context);
+		if (!l_state.optimistic_writer) {
+			l_state.optimistic_writer = make_uniq<OptimisticDataWriter>(g_state.table.GetStorage());
 		}
-		auto result_collection_index = g_state.MergeCollections(context, merge_collections, *l_state.writer);
+		auto result_collection_index = g_state.MergeCollections(context, merge_collections, *l_state.optimistic_writer);
 		merge_collections.clear();
 
 		lock_guard<mutex> l(g_state.lock);
@@ -474,7 +474,7 @@ SinkNextBatchType PhysicalBatchInsert::NextBatch(ExecutionContext &context, Oper
 		auto &collection = gstate.table.GetStorage().GetOptimisticCollection(context.client, lstate.collection_index);
 		collection.FinalizeAppend(tdata, lstate.current_append_state);
 		gstate.AddCollection(context.client, lstate.current_index, lstate.partition_info.min_batch_index.GetIndex(),
-		                     lstate.collection_index, lstate.writer);
+		                     lstate.collection_index, lstate.optimistic_writer);
 
 		bool any_unblocked;
 		{
@@ -530,8 +530,8 @@ SinkResultType PhysicalBatchInsert::Sink(ExecutionContext &context, DataChunk &c
 		lock_guard<mutex> l(gstate.lock);
 		// no collection yet: create a new one
 		lstate.CreateNewCollection(context.client, table, insert_types);
-		if (!lstate.writer) {
-			lstate.writer = &table.GetStorage().CreateOptimisticWriter(context.client);
+		if (!lstate.optimistic_writer) {
+			lstate.optimistic_writer = make_uniq<OptimisticDataWriter>(table.GetStorage());
 		}
 	}
 
@@ -549,7 +549,7 @@ SinkResultType PhysicalBatchInsert::Sink(ExecutionContext &context, DataChunk &c
 	auto new_row_group = collection.Append(lstate.insert_chunk, lstate.current_append_state);
 	if (new_row_group) {
 		// we have already written to disk - flush the next row group as well
-		lstate.writer->WriteNewRowGroup(collection);
+		lstate.optimistic_writer->WriteNewRowGroup(collection);
 	}
 	return SinkResultType::NEED_MORE_INPUT;
 }
@@ -577,9 +577,10 @@ SinkCombineResultType PhysicalBatchInsert::Combine(ExecutionContext &context, Op
 			lstate.collection_index = PhysicalIndex(DConstants::INVALID_INDEX);
 		}
 	}
-	if (lstate.writer) {
+	if (lstate.optimistic_writer) {
 		lock_guard<mutex> l(gstate.lock);
-		gstate.table.GetStorage().FinalizeOptimisticWriter(context.client, *lstate.writer);
+		auto &optimistic_writer = gstate.table.GetStorage().GetOptimisticWriter(context.client);
+		optimistic_writer.Merge(*lstate.optimistic_writer);
 	}
 
 	// unblock any blocked tasks
@@ -636,9 +637,9 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
 		// now that we have created all of the mergers, perform the actual merging
 		vector<PhysicalIndex> final_collections;
 		final_collections.reserve(mergers.size());
-		auto &writer = data_table.CreateOptimisticWriter(context);
+		auto writer = make_uniq<OptimisticDataWriter>(data_table);
 		for (auto &merger : mergers) {
-			final_collections.push_back(merger->Flush(writer));
+			final_collections.push_back(merger->Flush(*writer));
 		}
 
 		// finally, merge the row groups into the local storage
@@ -648,7 +649,8 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
 			data_table.ResetOptimisticCollection(context, collection_index);
 		}
 
-		data_table.FinalizeOptimisticWriter(context, writer);
+		auto &optimistic_writer = data_table.GetOptimisticWriter(context);
+		optimistic_writer.Merge(*writer);
 		memory_manager.FinalCheck();
 		return SinkFinalizeType::READY;
 	}
diff --git a/src/execution/operator/persistent/physical_insert.cpp b/src/execution/operator/persistent/physical_insert.cpp
index 8e206c60bb05..8d5572be200c 100644
--- a/src/execution/operator/persistent/physical_insert.cpp
+++ b/src/execution/operator/persistent/physical_insert.cpp
@@ -665,7 +665,7 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, DataChunk &chunk,
 
 		gstate.insert_count += lstate.insert_chunk.size();
 		gstate.insert_count += updated_tuples;
-		if (!parallel && return_chunk) {
+		if (return_chunk) {
 			gstate.return_collection.Append(lstate.insert_chunk);
 		}
 		storage.LocalAppend(gstate.append_state, context.client, lstate.insert_chunk, true);
@@ -692,7 +692,7 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, DataChunk &chunk,
 		collection->InitializeAppend(lstate.local_append_state);
 
 		lock_guard<mutex> l(gstate.lock);
-		lstate.writer = data_table.CreateOptimisticWriter(context.client);
+		lstate.optimistic_writer = make_uniq<OptimisticDataWriter>(data_table);
 		lstate.collection_index = data_table.CreateOptimisticCollection(context.client, std::move(collection));
 	}
 
@@ -702,7 +702,7 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, DataChunk &chunk,
 	auto &collection = data_table.GetOptimisticCollection(context.client, lstate.collection_index);
 	auto new_row_group = collection.Append(lstate.insert_chunk, lstate.local_append_state);
 	if (new_row_group) {
-		lstate.writer->WriteNewRowGroup(collection);
+		lstate.optimistic_writer->WriteNewRowGroup(collection);
 	}
 	return SinkResultType::NEED_MORE_INPUT;
 }
@@ -743,10 +743,11 @@ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, Operato
 		storage.FinalizeLocalAppend(gstate.append_state);
 	} else {
 		// we have written rows to disk optimistically - merge directly into the transaction-local storage
-		lstate.writer->WriteLastRowGroup(collection);
-		lstate.writer->FinalFlush();
+		lstate.optimistic_writer->WriteLastRowGroup(collection);
+		lstate.optimistic_writer->FinalFlush();
 		gstate.table.GetStorage().LocalMerge(context.client, collection);
-		gstate.table.GetStorage().FinalizeOptimisticWriter(context.client, *lstate.writer);
+		auto &optimistic_writer = gstate.table.GetStorage().GetOptimisticWriter(context.client);
+		optimistic_writer.Merge(*lstate.optimistic_writer);
 	}
 
 	return SinkCombineResultType::FINISHED;
diff --git a/src/include/duckdb/execution/operator/persistent/physical_insert.hpp b/src/include/duckdb/execution/operator/persistent/physical_insert.hpp
index 9a800ae82678..ffa4f6b224e3 100644
--- a/src/include/duckdb/execution/operator/persistent/physical_insert.hpp
+++ b/src/include/duckdb/execution/operator/persistent/physical_insert.hpp
@@ -16,6 +16,7 @@
 #include "duckdb/storage/table/append_state.hpp"
 #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
 #include "duckdb/storage/table/delete_state.hpp"
+#include "duckdb/storage/optimistic_data_writer.hpp"
 
 namespace duckdb {
 
@@ -55,7 +56,7 @@ class InsertLocalState : public LocalSinkState {
 	TableAppendState local_append_state;
 	//! An index to the optimistic row group collection vector of the local table storage for this transaction.
 	PhysicalIndex collection_index;
-	optional_ptr<OptimisticDataWriter> writer;
+	unique_ptr<OptimisticDataWriter> optimistic_writer;
 	// Rows that have been updated by a DO UPDATE conflict
 	unordered_set<row_t> updated_rows;
 	idx_t update_count = 0;
diff --git a/src/include/duckdb/storage/data_table.hpp b/src/include/duckdb/storage/data_table.hpp
index c282a2a6560e..5d9f6c057aed 100644
--- a/src/include/duckdb/storage/data_table.hpp
+++ b/src/include/duckdb/storage/data_table.hpp
@@ -29,7 +29,6 @@ class ColumnDataCollection;
 class ColumnDefinition;
 class DataTable;
 class DuckTransaction;
-class OptimisticDataWriter;
 class RowGroup;
 class StorageManager;
 class TableCatalogEntry;
@@ -122,9 +121,8 @@ class DataTable {
 	RowGroupCollection &GetOptimisticCollection(ClientContext &context, const PhysicalIndex collection_index);
 	//! Resets the optimistic row group collection corresponding to the index.
 	void ResetOptimisticCollection(ClientContext &context, const PhysicalIndex collection_index);
-	//! Create an optimistic writer for this table. Used for optimistically writing parallel appends.
-	OptimisticDataWriter &CreateOptimisticWriter(ClientContext &context);
-	void FinalizeOptimisticWriter(ClientContext &context, OptimisticDataWriter &writer);
+	//! Returns the optimistic writer of the corresponding local table.
+	OptimisticDataWriter &GetOptimisticWriter(ClientContext &context);
 
 	unique_ptr<TableDeleteState> InitializeDelete(TableCatalogEntry &table, ClientContext &context,
 	                                              const vector<unique_ptr<BoundConstraint>> &bound_constraints);
diff --git a/src/include/duckdb/transaction/local_storage.hpp b/src/include/duckdb/transaction/local_storage.hpp
index 83adef5387d7..4119e968dc71 100644
--- a/src/include/duckdb/transaction/local_storage.hpp
+++ b/src/include/duckdb/transaction/local_storage.hpp
@@ -58,8 +58,6 @@ class LocalTableStorage : public enable_shared_from_this<LocalTableStorage> {
 	vector<unique_ptr<RowGroupCollection>> optimistic_collections;
 	//! The main optimistic data writer associated with this table.
 	OptimisticDataWriter optimistic_writer;
-	//! The optimistic data writers associated with this table.
-	vector<unique_ptr<OptimisticDataWriter>> optimistic_writers;
 
 	//! Whether or not storage was merged
 	bool merged_storage = false;
@@ -86,9 +84,8 @@ class LocalTableStorage : public enable_shared_from_this<LocalTableStorage> {
 	RowGroupCollection &GetOptimisticCollection(const PhysicalIndex collection_index);
 	//! Resets the optimistic row group collection corresponding to the index.
 	void ResetOptimisticCollection(const PhysicalIndex collection_index);
-	//! Create an optimistic writer for this table.
-	OptimisticDataWriter &CreateOptimisticWriter();
-	void FinalizeOptimisticWriter(OptimisticDataWriter &writer);
+	//! Returns the optimistic writer.
+	OptimisticDataWriter &GetOptimisticWriter();
 
 private:
 	mutex collections_lock;
@@ -152,9 +149,8 @@ class LocalStorage {
 	RowGroupCollection &GetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index);
 	//! Resets the optimistic row group collection corresponding to the index.
 	void ResetOptimisticCollection(DataTable &table, const PhysicalIndex collection_index);
-	//! Create an optimistic writer for this table.
-	OptimisticDataWriter &CreateOptimisticWriter(DataTable &table);
-	void FinalizeOptimisticWriter(DataTable &table, OptimisticDataWriter &writer);
+	//! Returns the optimistic writer.
+	OptimisticDataWriter &GetOptimisticWriter(DataTable &table);
 
 	//! Delete a set of rows from the local storage
 	idx_t Delete(DataTable &table, Vector &row_ids, idx_t count);
diff --git a/src/storage/data_table.cpp b/src/storage/data_table.cpp
index d34872c68778..eaddd112175a 100644
--- a/src/storage/data_table.cpp
+++ b/src/storage/data_table.cpp
@@ -869,14 +869,9 @@ void DataTable::ResetOptimisticCollection(ClientContext &context, const Physical
 	local_storage.ResetOptimisticCollection(*this, collection_index);
 }
 
-OptimisticDataWriter &DataTable::CreateOptimisticWriter(ClientContext &context) {
+OptimisticDataWriter &DataTable::GetOptimisticWriter(ClientContext &context) {
 	auto &local_storage = LocalStorage::Get(context, db);
-	return local_storage.CreateOptimisticWriter(*this);
-}
-
-void DataTable::FinalizeOptimisticWriter(ClientContext &context, OptimisticDataWriter &writer) {
-	auto &local_storage = LocalStorage::Get(context, db);
-	local_storage.FinalizeOptimisticWriter(*this, writer);
+	return local_storage.GetOptimisticWriter(*this);
 }
 
 void DataTable::LocalMerge(ClientContext &context, RowGroupCollection &collection) {
diff --git a/src/storage/local_storage.cpp b/src/storage/local_storage.cpp
index 5379ddc30f83..d8339fdadb35 100644
--- a/src/storage/local_storage.cpp
+++ b/src/storage/local_storage.cpp
@@ -55,8 +55,7 @@ LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_data
                                      const vector<StorageIndex> &bound_columns, Expression &cast_expr)
     : table_ref(new_data_table), allocator(Allocator::Get(new_data_table.db)), deleted_rows(parent.deleted_rows),
       optimistic_collections(std::move(parent.optimistic_collections)),
-      optimistic_writer(new_data_table, parent.optimistic_writer),
-      optimistic_writers(std::move(parent.optimistic_writers)), merged_storage(parent.merged_storage) {
+      optimistic_writer(new_data_table, parent.optimistic_writer), merged_storage(parent.merged_storage) {
 
 	// Alter the column type.
 	row_groups = parent.row_groups->AlterType(context, alter_column_index, target_type, bound_columns, cast_expr);
@@ -70,8 +69,7 @@ LocalTableStorage::LocalTableStorage(DataTable &new_data_table, LocalTableStorag
                                      const idx_t drop_column_index)
     : table_ref(new_data_table), allocator(Allocator::Get(new_data_table.db)), deleted_rows(parent.deleted_rows),
       optimistic_collections(std::move(parent.optimistic_collections)),
-      optimistic_writer(new_data_table, parent.optimistic_writer),
-      optimistic_writers(std::move(parent.optimistic_writers)), merged_storage(parent.merged_storage) {
+      optimistic_writer(new_data_table, parent.optimistic_writer), merged_storage(parent.merged_storage) {
 
 	// Remove the column from the previous table storage.
 	row_groups = parent.row_groups->RemoveColumn(drop_column_index);
@@ -85,8 +83,7 @@ LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_dt,
                                      ColumnDefinition &new_column, ExpressionExecutor &default_executor)
     : table_ref(new_dt), allocator(Allocator::Get(new_dt.db)), deleted_rows(parent.deleted_rows),
       optimistic_collections(std::move(parent.optimistic_collections)),
-      optimistic_writer(new_dt, parent.optimistic_writer), optimistic_writers(std::move(parent.optimistic_writers)),
-      merged_storage(parent.merged_storage) {
+      optimistic_writer(new_dt, parent.optimistic_writer), merged_storage(parent.merged_storage) {
 
 	row_groups = parent.row_groups->AddColumn(context, new_column, default_executor);
 	parent.row_groups.reset();
@@ -250,33 +247,11 @@ void LocalTableStorage::ResetOptimisticCollection(const PhysicalIndex collection
 	optimistic_collections[collection_index.index].reset();
 }
 
-OptimisticDataWriter &LocalTableStorage::CreateOptimisticWriter() {
-	auto writer = make_uniq<OptimisticDataWriter>(table_ref.get());
-	optimistic_writers.push_back(std::move(writer));
-	return *optimistic_writers.back();
-}
-
-void LocalTableStorage::FinalizeOptimisticWriter(OptimisticDataWriter &writer) {
-	// remove the writer from the set of optimistic writers
-	unique_ptr<OptimisticDataWriter> owned_writer;
-	for (idx_t i = 0; i < optimistic_writers.size(); i++) {
-		if (optimistic_writers[i].get() == &writer) {
-			owned_writer = std::move(optimistic_writers[i]);
-			optimistic_writers.erase_at(i);
-			break;
-		}
-	}
-	if (!owned_writer) {
-		throw InternalException("Error in FinalizeOptimisticWriter - could not find writer");
-	}
-	optimistic_writer.Merge(*owned_writer);
+OptimisticDataWriter &LocalTableStorage::GetOptimisticWriter() {
+	return optimistic_writer;
 }
 
 void LocalTableStorage::Rollback() {
-	for (auto &writer : optimistic_writers) {
-		writer->Rollback();
-	}
-	optimistic_writers.clear();
 	optimistic_writer.Rollback();
 
 	for (auto &collection : optimistic_collections) {
@@ -489,14 +464,9 @@ void LocalStorage::ResetOptimisticCollection(DataTable &table, const PhysicalInd
 	storage.ResetOptimisticCollection(collection_index);
 }
 
-OptimisticDataWriter &LocalStorage::CreateOptimisticWriter(DataTable &table) {
-	auto &storage = table_manager.GetOrCreateStorage(context, table);
-	return storage.CreateOptimisticWriter();
-}
-
-void LocalStorage::FinalizeOptimisticWriter(DataTable &table, OptimisticDataWriter &writer) {
+OptimisticDataWriter &LocalStorage::GetOptimisticWriter(DataTable &table) {
 	auto &storage = table_manager.GetOrCreateStorage(context, table);
-	storage.FinalizeOptimisticWriter(writer);
+	return storage.GetOptimisticWriter();
 }
 
 bool LocalStorage::ChangesMade() noexcept {

From 3136585cd87ab9244273d267578a092bea4268c4 Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Mon, 17 Feb 2025 13:16:01 +0100
Subject: [PATCH 111/142] Execute does not like a dirty validity mask, use
 vector caches (through the DataChunk) just for completeness of using 'clean'
 Vectors every iteration

---
 src/execution/expression_executor/execute_operator.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/execution/expression_executor/execute_operator.cpp b/src/execution/expression_executor/execute_operator.cpp
index 6f37e5d29138..04883c5deeac 100644
--- a/src/execution/expression_executor/execute_operator.cpp
+++ b/src/execution/expression_executor/execute_operator.cpp
@@ -124,13 +124,16 @@ void ExpressionExecutor::Execute(const BoundOperatorExpression &expr, Expression
 			}
 		}
 		SelectionVector selvec(1);
-		Vector intermediate(result.GetType(), 1);
+		DataChunk intermediate;
+		intermediate.Initialize(GetAllocator(), {result.GetType()}, 1);
 		for (idx_t i = 0; i < count; i++) {
+			intermediate.Reset();
+			intermediate.SetCardinality(1);
 			selvec.set_index(0, sel ? sel->get_index(i) : i);
 			Value val(result.GetType());
 			try {
-				Execute(*expr.children[0], &child_state, &selvec, 1, intermediate);
-				val = intermediate.GetValue(0);
+				Execute(*expr.children[0], &child_state, &selvec, 1, intermediate.data[0]);
+				val = intermediate.GetValue(0, 0);
 			} catch (std::exception &ex) {
 				ErrorData error(ex);
 				auto error_type = error.Type();

From ee5cc9061f2f02f7cd5acc5e88e9d837c42cebc2 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Mon, 17 Feb 2025 13:30:10 +0100
Subject: [PATCH 112/142] change result order now that string hash has changed

---
 test/api/adbc/test_adbc.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/api/adbc/test_adbc.cpp b/test/api/adbc/test_adbc.cpp
index afcbb596d073..a624a66a7857 100644
--- a/test/api/adbc/test_adbc.cpp
+++ b/test/api/adbc/test_adbc.cpp
@@ -1364,8 +1364,8 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		REQUIRE((res->ColumnCount() == 2));
 		REQUIRE((res->RowCount() == 3));
 		REQUIRE((res->GetValue(1, 0).ToString() ==
-		         "[{'db_schema_name': information_schema, 'db_schema_tables': NULL}, {'db_schema_name': main, "
-		         "'db_schema_tables': NULL}, {'db_schema_name': pg_catalog, 'db_schema_tables': NULL}]"));
+		         "[{'db_schema_name': main, 'db_schema_tables': NULL}, {'db_schema_name': pg_catalog, "
+		         "'db_schema_tables': NULL}, {'db_schema_name': information_schema, 'db_schema_tables': NULL}]"));
 		db.Query("Drop table result;");
 
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_COLUMNS, nullptr, nullptr, nullptr, nullptr,

From b27267eaed46a1610c77db5a25195e5269895e5c Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Mon, 17 Feb 2025 13:30:36 +0100
Subject: [PATCH 113/142] Avoid caching the compressed buffer in the
 ColumnReader

---
 extension/parquet/column_reader.cpp         | 10 ++++------
 extension/parquet/include/column_reader.hpp |  2 --
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/extension/parquet/column_reader.cpp b/extension/parquet/column_reader.cpp
index 8a2112c356dd..73db368bc277 100644
--- a/extension/parquet/column_reader.cpp
+++ b/extension/parquet/column_reader.cpp
@@ -319,7 +319,8 @@ void ColumnReader::PreparePageV2(PageHeader &page_hdr) {
 
 	auto compressed_bytes = page_hdr.compressed_page_size - uncompressed_bytes;
 
-	AllocateCompressed(compressed_bytes);
+	ResizeableBuffer compressed_buffer;
+	compressed_buffer.resize(GetAllocator(), compressed_bytes);
 	reader.ReadData(*protocol, compressed_buffer.ptr, compressed_bytes);
 
 	DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, compressed_bytes, block->ptr + uncompressed_bytes,
@@ -334,10 +335,6 @@ void ColumnReader::AllocateBlock(idx_t size) {
 	}
 }
 
-void ColumnReader::AllocateCompressed(idx_t size) {
-	compressed_buffer.resize(GetAllocator(), size);
-}
-
 void ColumnReader::PreparePage(PageHeader &page_hdr) {
 	AllocateBlock(page_hdr.uncompressed_page_size + 1);
 	if (chunk->meta_data.codec == CompressionCodec::UNCOMPRESSED) {
@@ -348,7 +345,8 @@ void ColumnReader::PreparePage(PageHeader &page_hdr) {
 		return;
 	}
 
-	AllocateCompressed(page_hdr.compressed_page_size + 1);
+	ResizeableBuffer compressed_buffer;
+	compressed_buffer.resize(GetAllocator(), page_hdr.compressed_page_size + 1);
 	reader.ReadData(*protocol, compressed_buffer.ptr, page_hdr.compressed_page_size);
 
 	DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, page_hdr.compressed_page_size, block->ptr,
diff --git a/extension/parquet/include/column_reader.hpp b/extension/parquet/include/column_reader.hpp
index 23d4fc3d4b6b..2b09623950f1 100644
--- a/extension/parquet/include/column_reader.hpp
+++ b/extension/parquet/include/column_reader.hpp
@@ -160,7 +160,6 @@ class ColumnReader {
 
 private:
 	void AllocateBlock(idx_t size);
-	void AllocateCompressed(idx_t size);
 	void PrepareRead(parquet_filter_t &filter);
 	void PreparePage(PageHeader &page_hdr);
 	void PrepareDataPage(PageHeader &page_hdr);
@@ -178,7 +177,6 @@ class ColumnReader {
 
 	shared_ptr<ResizeableBuffer> block;
 
-	ResizeableBuffer compressed_buffer;
 	ResizeableBuffer offset_buffer;
 
 	unique_ptr<RleBpDecoder> dict_decoder;

From b21d19bb6ac9d98b0aae61f40dbf1bc36bf99885 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Mon, 17 Feb 2025 13:55:50 +0100
Subject: [PATCH 114/142] improve performance of boolean column writer too

---
 .../parquet/writer/boolean_column_writer.cpp  | 42 ++++++++++++-------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/extension/parquet/writer/boolean_column_writer.cpp b/extension/parquet/writer/boolean_column_writer.cpp
index b7a3ee01856b..bcfd78b3ea82 100644
--- a/extension/parquet/writer/boolean_column_writer.cpp
+++ b/extension/parquet/writer/boolean_column_writer.cpp
@@ -49,22 +49,36 @@ void BooleanColumnWriter::WriteVector(WriteStream &temp_writer, ColumnWriterStat
                                       idx_t chunk_end) {
 	auto &stats = stats_p->Cast<BooleanStatisticsState>();
 	auto &state = state_p->Cast<BooleanWriterPageState>();
-	auto &mask = FlatVector::Validity(input_column);
-
-	auto *ptr = FlatVector::GetData<bool>(input_column);
-	for (idx_t r = chunk_start; r < chunk_end; r++) {
-		if (mask.RowIsValid(r)) {
-			// only encode if non-null
-			if (ptr[r]) {
-				stats.max = true;
-				state.byte |= 1 << state.byte_pos;
-			} else {
-				stats.min = false;
+	const auto &mask = FlatVector::Validity(input_column);
+
+	const auto *const ptr = FlatVector::GetData<bool>(input_column);
+	if (mask.AllValid()) {
+		for (idx_t r = chunk_start; r < chunk_end; r++) {
+			const auto &val = ptr[r];
+
+			stats.max |= val;
+			stats.min &= val;
+			state.byte |= val << state.byte_pos;
+
+			if (++state.byte_pos == 8) {
+				temp_writer.Write(state.byte);
+				state.byte = 0;
+				state.byte_pos = 0;
+			}
+		}
+	} else {
+		for (idx_t r = chunk_start; r < chunk_end; r++) {
+			if (!mask.RowIsValid(r)) {
+				continue;
 			}
-			state.byte_pos++;
+			const auto &val = ptr[r];
+
+			stats.max |= val;
+			stats.min &= val;
+			state.byte |= val << state.byte_pos;
 
-			if (state.byte_pos == 8) {
-				temp_writer.Write<uint8_t>(state.byte);
+			if (++state.byte_pos == 8) {
+				temp_writer.Write(state.byte);
 				state.byte = 0;
 				state.byte_pos = 0;
 			}

From 7d720dff253b459cdf83661f640aa1b3f4a8e0c4 Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Mon, 17 Feb 2025 14:09:35 +0100
Subject: [PATCH 115/142] Fix #16260: correctly handle parameters in
 getvariable

---
 src/function/scalar/generic/getvariable.cpp | 6 +++---
 test/sql/variables/test_variables.test      | 8 ++++++++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/function/scalar/generic/getvariable.cpp b/src/function/scalar/generic/getvariable.cpp
index 14d32954d1cf..e6eebf0d10af 100644
--- a/src/function/scalar/generic/getvariable.cpp
+++ b/src/function/scalar/generic/getvariable.cpp
@@ -24,12 +24,12 @@ struct GetVariableBindData : FunctionData {
 
 static unique_ptr<FunctionData> GetVariableBind(ClientContext &context, ScalarFunction &function,
                                                 vector<unique_ptr<Expression>> &arguments) {
-	if (!arguments[0]->IsFoldable()) {
-		throw NotImplementedException("getvariable requires a constant input");
-	}
 	if (arguments[0]->HasParameter()) {
 		throw ParameterNotResolvedException();
 	}
+	if (!arguments[0]->IsFoldable()) {
+		throw NotImplementedException("getvariable requires a constant input");
+	}
 	Value value;
 	auto variable_name = ExpressionExecutor::EvaluateScalar(context, *arguments[0]);
 	if (!variable_name.IsNull()) {
diff --git a/test/sql/variables/test_variables.test b/test/sql/variables/test_variables.test
index ad3c15d43f57..b3dd60747846 100644
--- a/test/sql/variables/test_variables.test
+++ b/test/sql/variables/test_variables.test
@@ -13,6 +13,14 @@ SELECT GETVARIABLE('animal')
 ----
 duck
 
+statement ok
+PREPARE v1 AS SELECT GETVARIABLE($1);
+
+query I
+EXECUTE v1('animal');
+----
+duck
+
 # overwriting
 statement ok
 SET VARIABLE animal='bird'

From 9c3cd8a48fe64124137b31f4efff284ba336f8c4 Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Mon, 17 Feb 2025 14:12:16 +0100
Subject: [PATCH 116/142] Handle macros as well

---
 src/function/scalar/generic/getvariable.cpp | 2 +-
 test/sql/variables/test_variables.test      | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/function/scalar/generic/getvariable.cpp b/src/function/scalar/generic/getvariable.cpp
index e6eebf0d10af..0181c07523bc 100644
--- a/src/function/scalar/generic/getvariable.cpp
+++ b/src/function/scalar/generic/getvariable.cpp
@@ -24,7 +24,7 @@ struct GetVariableBindData : FunctionData {
 
 static unique_ptr<FunctionData> GetVariableBind(ClientContext &context, ScalarFunction &function,
                                                 vector<unique_ptr<Expression>> &arguments) {
-	if (arguments[0]->HasParameter()) {
+	if (arguments[0]->HasParameter() || arguments[0]->return_type.id() == LogicalTypeId::UNKNOWN) {
 		throw ParameterNotResolvedException();
 	}
 	if (!arguments[0]->IsFoldable()) {
diff --git a/test/sql/variables/test_variables.test b/test/sql/variables/test_variables.test
index b3dd60747846..b67d81686222 100644
--- a/test/sql/variables/test_variables.test
+++ b/test/sql/variables/test_variables.test
@@ -21,6 +21,14 @@ EXECUTE v1('animal');
 ----
 duck
 
+statement ok
+CREATE MACRO _(x) AS getvariable(x);
+
+query I
+SELECT _('animal')
+----
+duck
+
 # overwriting
 statement ok
 SET VARIABLE animal='bird'

From ba6fe78896ab3f5dd771f1c3af2eece82e787ded Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Mon, 17 Feb 2025 14:53:12 +0100
Subject: [PATCH 117/142] change extension install mode to not_installed
 instead of null

---
 src/function/table/system/duckdb_extensions.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/function/table/system/duckdb_extensions.cpp b/src/function/table/system/duckdb_extensions.cpp
index 0edc2c2ff929..8adeb356afde 100644
--- a/src/function/table/system/duckdb_extensions.cpp
+++ b/src/function/table/system/duckdb_extensions.cpp
@@ -84,7 +84,7 @@ unique_ptr<GlobalTableFunctionState> DuckDBExtensionsInit(ClientContext &context
 		info.loaded = false;
 		info.file_path = extension.statically_loaded ? "(BUILT-IN)" : string();
 		info.install_mode =
-		    extension.statically_loaded ? ExtensionInstallMode::STATICALLY_LINKED : ExtensionInstallMode::UNKNOWN;
+		    extension.statically_loaded ? ExtensionInstallMode::STATICALLY_LINKED : ExtensionInstallMode::NOT_INSTALLED;
 		info.description = extension.description;
 		for (idx_t k = 0; k < alias_count; k++) {
 			auto alias = ExtensionHelper::GetExtensionAlias(k);
@@ -206,7 +206,7 @@ void DuckDBExtensionsFunction(ClientContext &context, TableFunctionInput &data_p
 		// extension version     LogicalType::LIST(LogicalType::VARCHAR)
 		output.SetValue(6, count, Value(entry.extension_version));
 		// installed_mode LogicalType::VARCHAR
-		output.SetValue(7, count, entry.installed ? Value(EnumUtil::ToString(entry.install_mode)) : Value());
+		output.SetValue(7, count, EnumUtil::ToString(entry.install_mode));
 		// installed_source LogicalType::VARCHAR
 		output.SetValue(8, count, Value(entry.installed_from));
 

From 80fa4cd08d4b89ab796dccf88b9489a3c262e4cf Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Mon, 17 Feb 2025 14:59:02 +0100
Subject: [PATCH 118/142] add the correct variant of the flag based on the
 compiler (MSVC or not)

---
 CMakeLists.txt | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c61c7f9def65..24ca161f0d83 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1420,6 +1420,15 @@ if(BUILD_PYTHON)
     set(ALL_COMPILE_FLAGS "${CMAKE_CXX_FLAGS}")
   endif()
 
+  # Check for MSVC compiler and set the correct C++ standard flag
+  if(MSVC)
+    # MSVC does not support `-std=c++11` or `-std=c++14`, use `/std:c++14`
+    set(ALL_COMPILE_FLAGS "${ALL_COMPILE_FLAGS} /std:c++14")
+  else()
+    # For non-MSVC compilers, use the `-std=c++11`
+    set(ALL_COMPILE_FLAGS "${ALL_COMPILE_FLAGS} -std=c++11")
+  endif()
+
   get_target_property(duckdb_libs duckdb LINK_LIBRARIES)
 
   set(PIP_COMMAND

From 7c3296fedf07754df1bf6963c1b76fd5367b4f73 Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Mon, 17 Feb 2025 15:01:27 +0100
Subject: [PATCH 119/142] add test

---
 test/extension/autoloading_base.test | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/extension/autoloading_base.test b/test/extension/autoloading_base.test
index 591383eb80f6..80c120e8d119 100644
--- a/test/extension/autoloading_base.test
+++ b/test/extension/autoloading_base.test
@@ -15,6 +15,12 @@ SELECT (count(*) > 0) FROM duckdb_extensions() WHERE install_path ILIKE '%duckdb
 ----
 false
 
+# All extensions reported by duckdb are either statically linked or not installed
+query I
+SELECT count(*) FROM duckdb_extensions() WHERE install_mode != 'NOT_INSTALLED' AND install_mode != 'STATICALLY_LINKED'
+----
+0
+
 ### No autoloading nor installing: throw error with installation hint
 statement ok
 set autoload_known_extensions=false

From f066290ff0dd7ad5931499719710097bb95c2383 Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Mon, 17 Feb 2025 15:37:39 +0100
Subject: [PATCH 120/142] Avoid calling SetFilterAlwaysTrue multiple times in
 RowGroup::CheckZonemap

---
 src/storage/table/row_group.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/storage/table/row_group.cpp b/src/storage/table/row_group.cpp
index d5250387362b..55c6e064f4e5 100644
--- a/src/storage/table/row_group.cpp
+++ b/src/storage/table/row_group.cpp
@@ -430,14 +430,13 @@ bool RowGroup::CheckZonemap(ScanFilterInfo &filters) {
 		if (prune_result == FilterPropagateResult::FILTER_ALWAYS_FALSE) {
 			return false;
 		}
-		if (prune_result == FilterPropagateResult::FILTER_ALWAYS_TRUE) {
-			// filter is always true - no need to check it
-			// label the filter as always true so we don't need to check it anymore
-			filters.SetFilterAlwaysTrue(i);
-		}
 		if (filter.filter_type == TableFilterType::OPTIONAL_FILTER) {
 			// these are only for row group checking, set as always true so we don't check it
 			filters.SetFilterAlwaysTrue(i);
+		} else if (prune_result == FilterPropagateResult::FILTER_ALWAYS_TRUE) {
+			// filter is always true - no need to check it
+			// label the filter as always true so we don't need to check it anymore
+			filters.SetFilterAlwaysTrue(i);
 		}
 	}
 	return true;
@@ -619,7 +618,7 @@ void RowGroup::TemplatedScan(TransactionData transaction, CollectionScanState &s
 						if (prune_result == FilterPropagateResult::FILTER_ALWAYS_FALSE) {
 							// We can just break out of the loop here.
 							approved_tuple_count = 0;
-							break;
+							continue;
 						}
 
 						// Generate row ids

From 6637b90bb5a3cf9d5e59dc6ef51795bbb17b1bd1 Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Mon, 17 Feb 2025 15:39:54 +0100
Subject: [PATCH 121/142] Add safeguard to SetFilterAlwaysTrue

---
 src/storage/table/scan_state.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/storage/table/scan_state.cpp b/src/storage/table/scan_state.cpp
index adeccde91b03..fdfa76433059 100644
--- a/src/storage/table/scan_state.cpp
+++ b/src/storage/table/scan_state.cpp
@@ -96,6 +96,9 @@ void ScanFilterInfo::CheckAllFilters() {
 
 void ScanFilterInfo::SetFilterAlwaysTrue(idx_t filter_idx) {
 	auto &filter = filter_list[filter_idx];
+	if (filter.always_true) {
+		return;
+	}
 	filter.always_true = true;
 	column_has_filter[filter.scan_column_index] = false;
 	always_true_filters++;

From fe56c8f0554fe31f03a0eef51530c3dbccc9770e Mon Sep 17 00:00:00 2001
From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com>
Date: Mon, 17 Feb 2025 15:55:08 +0100
Subject: [PATCH 122/142] fix scanning from normal leaf to nested leaf

---
 src/execution/index/art/iterator.cpp          | 10 +++--
 .../duckdb/execution/index/art/iterator.hpp   |  2 +
 .../scan/test_art_scan_normal_to_nested.test  | 38 +++++++++++++++++++
 3 files changed, 47 insertions(+), 3 deletions(-)
 create mode 100644 test/sql/index/art/scan/test_art_scan_normal_to_nested.test

diff --git a/src/execution/index/art/iterator.cpp b/src/execution/index/art/iterator.cpp
index 689029a02e40..1c138e1d3e34 100644
--- a/src/execution/index/art/iterator.cpp
+++ b/src/execution/index/art/iterator.cpp
@@ -46,9 +46,11 @@ bool Iterator::Scan(const ARTKey &upper_bound, const idx_t max_count, unsafe_vec
 	bool has_next;
 	do {
 		// An empty upper bound indicates that no upper bound exists.
-		if (!upper_bound.Empty() && status == GateStatus::GATE_NOT_SET) {
-			if (current_key.GreaterThan(upper_bound, equal, nested_depth)) {
-				return true;
+		if (!upper_bound.Empty()) {
+			if (status == GateStatus::GATE_NOT_SET || entered_nested_leaf) {
+				if (current_key.GreaterThan(upper_bound, equal, nested_depth)) {
+					return true;
+				}
 			}
 		}
 
@@ -86,6 +88,7 @@ bool Iterator::Scan(const ARTKey &upper_bound, const idx_t max_count, unsafe_vec
 			throw InternalException("Invalid leaf type for index scan.");
 		}
 
+		entered_nested_leaf = false;
 		has_next = Next();
 	} while (has_next);
 	return true;
@@ -104,6 +107,7 @@ void Iterator::FindMinimum(const Node &node) {
 	if (node.GetGateStatus() == GateStatus::GATE_SET) {
 		D_ASSERT(status == GateStatus::GATE_NOT_SET);
 		status = GateStatus::GATE_SET;
+		entered_nested_leaf = true;
 		nested_depth = 0;
 	}
 
diff --git a/src/include/duckdb/execution/index/art/iterator.hpp b/src/include/duckdb/execution/index/art/iterator.hpp
index 58a0f106d54d..977cc7791081 100644
--- a/src/include/duckdb/execution/index/art/iterator.hpp
+++ b/src/include/duckdb/execution/index/art/iterator.hpp
@@ -90,6 +90,8 @@ class Iterator {
 	GateStatus status;
 	//! Depth in a nested leaf.
 	uint8_t nested_depth = 0;
+	//! True, if we entered a nested leaf to retrieve the next node.
+	bool entered_nested_leaf = false;
 
 private:
 	//! Goes to the next leaf in the ART and sets it as last_leaf,
diff --git a/test/sql/index/art/scan/test_art_scan_normal_to_nested.test b/test/sql/index/art/scan/test_art_scan_normal_to_nested.test
new file mode 100644
index 000000000000..0cd8cf886fe5
--- /dev/null
+++ b/test/sql/index/art/scan/test_art_scan_normal_to_nested.test
@@ -0,0 +1,38 @@
+# name: test/sql/index/art/scan/test_art_scan_normal_to_nested.test
+# description: Test range scanning with an iterator moving from a normal leaf to a nested leaf.
+# group: [scan]
+
+statement ok
+PRAGMA enable_verification
+
+statement ok
+CREATE TABLE integers (i BIGINT);
+
+statement ok
+CREATE INDEX idx_integers ON integers (i);
+
+statement ok
+INSERT INTO integers (i) VALUES ('1'), ('-1'), ('1');
+
+# The border is exactly when moving from a non-nested leaf to a nested leaf.
+
+query I
+SELECT i FROM integers WHERE i <= 0;
+----
+-1
+
+# Issue 16074.
+
+statement ok
+CREATE TABLE t0(c1 TIMESTAMP);
+
+statement ok
+INSERT INTO t0(c1) VALUES ('2020-02-29 12:00:00'), ('1969-12-09 09:26:38'), ('2020-02-29 12:00:00');
+
+statement ok
+CREATE INDEX i0 ON t0(c1);
+
+query I
+SELECT c1 FROM t0 WHERE c1 <= '2007-07-07 07:07:07';
+----
+1969-12-09 09:26:38
\ No newline at end of file

From 1d06c91a381eff4f507690cbb17bf0ca7a174fe0 Mon Sep 17 00:00:00 2001
From: Mytherin <mark.raasveldt@gmail.com>
Date: Mon, 17 Feb 2025 16:01:03 +0100
Subject: [PATCH 123/142] Fix #16231: refer to order by condition in
 ARRAY(SUBQUERY) by alias instead of by index

---
 src/parser/transform/expression/transform_subquery.cpp |  5 ++++-
 test/sql/subquery/scalar/array_order_subquery.test     | 10 ++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/parser/transform/expression/transform_subquery.cpp b/src/parser/transform/expression/transform_subquery.cpp
index 6f6d742073ba..0403d24bc5dc 100644
--- a/src/parser/transform/expression/transform_subquery.cpp
+++ b/src/parser/transform/expression/transform_subquery.cpp
@@ -107,6 +107,7 @@ unique_ptr<ParsedExpression> Transformer::TransformSubquery(duckdb_libpgquery::P
 			}
 		}
 		// transform constants (e.g. ORDER BY 1) into positional references (ORDER BY #1)
+		idx_t array_idx = 0;
 		if (aggr->order_bys) {
 			for (auto &order : aggr->order_bys->orders) {
 				if (order.expression->GetExpressionType() == ExpressionType::VALUE_CONSTANT) {
@@ -120,8 +121,10 @@ unique_ptr<ParsedExpression> Transformer::TransformSubquery(duckdb_libpgquery::P
 					}
 				} else if (sub_select) {
 					// if we have a SELECT we can push the ORDER BY clause into the SELECT list and reference it
+					auto alias = "__array_internal_idx_" + to_string(++array_idx);
+					order.expression->alias = alias;
 					sub_select->select_list.push_back(std::move(order.expression));
-					order.expression = make_uniq<PositionalReferenceExpression>(sub_select->select_list.size() - 1);
+					order.expression = make_uniq<ColumnRefExpression>(alias);
 				} else {
 					// otherwise we remove order qualifications
 					RemoveOrderQualificationRecursive(order.expression);
diff --git a/test/sql/subquery/scalar/array_order_subquery.test b/test/sql/subquery/scalar/array_order_subquery.test
index a0ca2fb4c7d0..94abd308009a 100644
--- a/test/sql/subquery/scalar/array_order_subquery.test
+++ b/test/sql/subquery/scalar/array_order_subquery.test
@@ -86,6 +86,16 @@ SELECT ARRAY
 ----
 [3, 2, 1]
 
+query I
+select array(select * from unnest(['a', 'b']) as _t(u) order by if(u='a',100, 1)) as out;
+----
+[b, a]
+
+query I
+select array(select * from unnest(['a', 'b']) as _t(u) order by if(u='a',100, 1) desc) as out;
+----
+[a, b]
+
 statement error
 SELECT ARRAY
   (SELECT 1 UNION ALL

From 7dec52e599066b4e17f9a3aa47cbec400053ce2b Mon Sep 17 00:00:00 2001
From: Sam Ansmink <samansmink@hotmail.com>
Date: Mon, 17 Feb 2025 15:37:12 +0100
Subject: [PATCH 124/142] add pragma to truncate log

---
 src/function/pragma/pragma_functions.cpp      |  6 +++++
 src/include/duckdb/logging/log_manager.hpp    |  2 ++
 src/include/duckdb/logging/log_storage.hpp    |  7 ++++++
 src/logging/log_manager.cpp                   |  5 +++++
 src/logging/log_storage.cpp                   | 22 +++++++++++++++++++
 .../test_logging_function_large.test_slow     |  8 +++++++
 6 files changed, 50 insertions(+)

diff --git a/src/function/pragma/pragma_functions.cpp b/src/function/pragma/pragma_functions.cpp
index 635828066029..5612f519c1e0 100644
--- a/src/function/pragma/pragma_functions.cpp
+++ b/src/function/pragma/pragma_functions.cpp
@@ -94,6 +94,10 @@ static void PragmaForceCheckpoint(ClientContext &context, const FunctionParamete
 	DBConfig::GetConfig(context).options.force_checkpoint = true;
 }
 
+static void PragmaTruncateDuckDBLogs(ClientContext &context, const FunctionParameters &parameters) {
+	context.db->GetLogManager().TruncateLogStorage();
+}
+
 static void PragmaDisableForceParallelism(ClientContext &context, const FunctionParameters &parameters) {
 	ClientConfig::GetConfig(context).verify_parallelism = false;
 }
@@ -149,6 +153,8 @@ void PragmaFunctions::RegisterFunction(BuiltinFunctions &set) {
 
 	set.AddFunction(PragmaFunction::PragmaStatement("force_checkpoint", PragmaForceCheckpoint));
 
+	set.AddFunction(PragmaFunction::PragmaStatement("truncate_duckdb_logs", PragmaTruncateDuckDBLogs));
+
 	set.AddFunction(PragmaFunction::PragmaStatement("enable_progress_bar", PragmaEnableProgressBar));
 	set.AddFunction(PragmaFunction::PragmaStatement("disable_progress_bar", PragmaDisableProgressBar));
 
diff --git a/src/include/duckdb/logging/log_manager.hpp b/src/include/duckdb/logging/log_manager.hpp
index 90c6384c3274..6f414d9efa7e 100644
--- a/src/include/duckdb/logging/log_manager.hpp
+++ b/src/include/duckdb/logging/log_manager.hpp
@@ -54,6 +54,8 @@ class LogManager : public enable_shared_from_this<LogManager> {
 	DUCKDB_API void SetDisabledLogTypes(unordered_set<string> &disabled_log_types);
 	DUCKDB_API void SetLogStorage(DatabaseInstance &db, const string &storage_name);
 
+	DUCKDB_API void TruncateLogStorage();
+
 	DUCKDB_API LogConfig GetConfig();
 
 protected:
diff --git a/src/include/duckdb/logging/log_storage.hpp b/src/include/duckdb/logging/log_storage.hpp
index f99175b590ea..d30d370a7028 100644
--- a/src/include/duckdb/logging/log_storage.hpp
+++ b/src/include/duckdb/logging/log_storage.hpp
@@ -61,6 +61,8 @@ class LogStorage {
 	DUCKDB_API virtual unique_ptr<LogStorageScanState> CreateScanContextsState() const;
 	DUCKDB_API virtual bool ScanContexts(LogStorageScanState &state, DataChunk &result) const;
 	DUCKDB_API virtual void InitializeScanContexts(LogStorageScanState &state) const;
+
+	DUCKDB_API virtual void Truncate();
 };
 
 class StdOutLogStorage : public LogStorage {
@@ -73,6 +75,8 @@ class StdOutLogStorage : public LogStorage {
 	                   const RegisteredLoggingContext &context) override;
 	void WriteLogEntries(DataChunk &chunk, const RegisteredLoggingContext &context) override;
 	void Flush() override;
+
+	void Truncate() override;
 };
 
 class InMemoryLogStorageScanState : public LogStorageScanState {
@@ -94,6 +98,8 @@ class InMemoryLogStorage : public LogStorage {
 	void WriteLogEntries(DataChunk &chunk, const RegisteredLoggingContext &context) override;
 	void Flush() override;
 
+	void Truncate() override;
+
 	//! LogStorage API: READING
 	bool CanScan() override;
 
@@ -106,6 +112,7 @@ class InMemoryLogStorage : public LogStorage {
 
 protected:
 	void WriteLoggingContext(const RegisteredLoggingContext &context);
+	void ResetBuffers();
 
 protected:
 	mutable mutex lock;
diff --git a/src/logging/log_manager.cpp b/src/logging/log_manager.cpp
index c937b3fda006..f493e2ee57b2 100644
--- a/src/logging/log_manager.cpp
+++ b/src/logging/log_manager.cpp
@@ -149,6 +149,11 @@ void LogManager::SetLogStorage(DatabaseInstance &db, const string &storage_name)
 	config.storage = storage_name_to_lower;
 }
 
+void LogManager::TruncateLogStorage() {
+	unique_lock<mutex> lck(lock);
+	log_storage->Truncate();
+}
+
 LogConfig LogManager::GetConfig() {
 	unique_lock<mutex> lck(lock);
 	return config;
diff --git a/src/logging/log_storage.cpp b/src/logging/log_storage.cpp
index 909bddf75830..8afb3f84e174 100644
--- a/src/logging/log_storage.cpp
+++ b/src/logging/log_storage.cpp
@@ -25,6 +25,9 @@ bool LogStorage::ScanContexts(LogStorageScanState &state, DataChunk &result) con
 void LogStorage::InitializeScanContexts(LogStorageScanState &state) const {
 	throw NotImplementedException("Not implemented for this LogStorage: InitializeScanContexts");
 }
+void LogStorage::Truncate() {
+	throw NotImplementedException("Not implemented for this LogStorage: TruncateLogStorage");
+}
 
 StdOutLogStorage::StdOutLogStorage() {
 }
@@ -46,6 +49,10 @@ void StdOutLogStorage::WriteLogEntries(DataChunk &chunk, const RegisteredLogging
 	throw NotImplementedException("StdOutLogStorage::WriteLogEntries");
 }
 
+void StdOutLogStorage::Truncate() {
+	// NOP
+}
+
 void StdOutLogStorage::Flush() {
 	// NOP
 }
@@ -82,6 +89,16 @@ InMemoryLogStorage::InMemoryLogStorage(DatabaseInstance &db_p)
 	log_contexts = make_uniq<ColumnDataCollection>(db_p.GetBufferManager(), log_context_schema);
 }
 
+void InMemoryLogStorage::ResetBuffers() {
+	entry_buffer->Reset();
+	log_context_buffer->Reset();
+
+	log_entries->Reset();
+	log_contexts->Reset();
+
+	registered_contexts.clear();
+}
+
 InMemoryLogStorage::~InMemoryLogStorage() {
 }
 
@@ -122,6 +139,11 @@ void InMemoryLogStorage::Flush() {
 	FlushInternal();
 }
 
+void InMemoryLogStorage::Truncate() {
+	unique_lock<mutex> lck(lock);
+	ResetBuffers();
+}
+
 void InMemoryLogStorage::FlushInternal() {
 	if (entry_buffer->size() > 0) {
 		log_entries->Append(*entry_buffer);
diff --git a/test/sql/logging/test_logging_function_large.test_slow b/test/sql/logging/test_logging_function_large.test_slow
index 7cb6195aaf1c..4bb29995aa02 100644
--- a/test/sql/logging/test_logging_function_large.test_slow
+++ b/test/sql/logging/test_logging_function_large.test_slow
@@ -33,3 +33,11 @@ SELECT count(*), message FROM duckdb_logs where starts_with(message, 'hi_') grou
 250000	hi_client
 250000	hi_file
 250000	hi_global
+
+statement ok
+pragma truncate_duckdb_logs;
+
+query I
+SELECT count(*) FROM duckdb_logs;
+----
+0
\ No newline at end of file

From fc58d8b7826ec422d34327bed498b4384f661832 Mon Sep 17 00:00:00 2001
From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com>
Date: Mon, 17 Feb 2025 16:14:02 +0100
Subject: [PATCH 125/142] increase max variation for linux

---
 .../parallel/reclaim_space_primary_key_optimistic.test_slow   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow b/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow
index 6865f4d0a75d..07cce7185d9a 100644
--- a/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow
+++ b/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow
@@ -78,10 +78,10 @@ SELECT COUNT(*) - ${i} FROM integers2;
 query I
 SELECT
 	CASE WHEN ${i} = 0 THEN True::test_result
-	WHEN current.total_blocks <= total_blocks_tbl.total_blocks * 1.4 THEN True::test_result
+	WHEN current.total_blocks <= total_blocks_tbl.total_blocks * 1.6 THEN True::test_result
 	ELSE {
 		'old': total_blocks_tbl.total_blocks,
-		'allowed_max': total_blocks_tbl.total_blocks * 1.4,
+		'allowed_max': total_blocks_tbl.total_blocks * 1.6,
 		'actual': current.total_blocks
 	}::test_result
 	END

From 16f1151c6a077628f1e90d30c0de57f125b97654 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Mon, 17 Feb 2025 16:16:38 +0100
Subject: [PATCH 126/142] fix #16257

---
 extension/parquet/column_writer.cpp           | 19 +++++++-------
 .../parquet/include/parquet_bss_encoder.hpp   |  1 -
 .../parquet/include/parquet_dlba_encoder.hpp  |  3 +--
 test/issues/general/test_16257.test_slow      | 25 +++++++++++++++++++
 4 files changed, 36 insertions(+), 12 deletions(-)
 create mode 100644 test/issues/general/test_16257.test_slow

diff --git a/extension/parquet/column_writer.cpp b/extension/parquet/column_writer.cpp
index ba42a9b2f20a..8791bc596c08 100644
--- a/extension/parquet/column_writer.cpp
+++ b/extension/parquet/column_writer.cpp
@@ -388,7 +388,7 @@ class BasicColumnWriter : public ColumnWriter {
 	virtual unique_ptr<ColumnWriterStatistics> InitializeStatsState();
 
 	//! Initialize the writer for a specific page. Only used for scalar types.
-	virtual unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state);
+	virtual unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state, idx_t page_idx);
 
 	//! Flushes the writer for a specific page. Only used for scalar types.
 	virtual void FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state);
@@ -427,7 +427,8 @@ void BasicColumnWriter::RegisterToRowGroup(duckdb_parquet::RowGroup &row_group)
 	row_group.columns.push_back(std::move(column_chunk));
 }
 
-unique_ptr<ColumnWriterPageState> BasicColumnWriter::InitializePageState(BasicColumnWriterState &state) {
+unique_ptr<ColumnWriterPageState> BasicColumnWriter::InitializePageState(BasicColumnWriterState &state,
+                                                                         idx_t page_idx) {
 	return nullptr;
 }
 
@@ -502,7 +503,7 @@ void BasicColumnWriter::BeginWrite(ColumnWriterState &state_p) {
 		    MaxValue<idx_t>(NextPowerOfTwo(page_info.estimated_page_size), MemoryStream::DEFAULT_INITIAL_CAPACITY));
 		write_info.write_count = page_info.empty_count;
 		write_info.max_write_count = page_info.row_count;
-		write_info.page_state = InitializePageState(state);
+		write_info.page_state = InitializePageState(state, page_idx);
 
 		write_info.compressed_size = 0;
 		write_info.compressed_data = nullptr;
@@ -1232,11 +1233,11 @@ class StandardColumnWriter : public BasicColumnWriter {
 		return std::move(result);
 	}
 
-	unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state_p) override {
+	unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state_p, idx_t page_idx) override {
 		auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
-
-		auto result = make_uniq<StandardWriterPageState<SRC, TGT>>(state.total_value_count, state.total_string_size,
-		                                                           state.encoding, state.dictionary);
+		const auto &page_info = state_p.page_info[page_idx];
+		auto result = make_uniq<StandardWriterPageState<SRC, TGT>>(
+		    page_info.row_count - page_info.empty_count, state.total_string_size, state.encoding, state.dictionary);
 		return std::move(result);
 	}
 
@@ -1586,7 +1587,7 @@ class BooleanColumnWriter : public BasicColumnWriter {
 		}
 	}
 
-	unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
+	unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state, idx_t page_idx) override {
 		return make_uniq<BooleanWriterPageState>();
 	}
 
@@ -1828,7 +1829,7 @@ class EnumColumnWriter : public BasicColumnWriter {
 		}
 	}
 
-	unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
+	unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state, idx_t page_idx) override {
 		return make_uniq<EnumWriterPageState>(bit_width);
 	}
 
diff --git a/extension/parquet/include/parquet_bss_encoder.hpp b/extension/parquet/include/parquet_bss_encoder.hpp
index 80da1726de92..65561eb2573a 100644
--- a/extension/parquet/include/parquet_bss_encoder.hpp
+++ b/extension/parquet/include/parquet_bss_encoder.hpp
@@ -30,7 +30,6 @@ class BssEncoder {
 	}
 
 	void FinishWrite(WriteStream &writer) {
-		D_ASSERT(count == total_value_count);
 		writer.WriteData(buffer.get(), total_value_count * bit_width);
 	}
 
diff --git a/extension/parquet/include/parquet_dlba_encoder.hpp b/extension/parquet/include/parquet_dlba_encoder.hpp
index b3cd1aa96076..89702fc12e41 100644
--- a/extension/parquet/include/parquet_dlba_encoder.hpp
+++ b/extension/parquet/include/parquet_dlba_encoder.hpp
@@ -33,9 +33,8 @@ class DlbaEncoder {
 	}
 
 	void FinishWrite(WriteStream &writer) {
-		D_ASSERT(stream->GetPosition() == total_string_size);
 		dbp_encoder.FinishWrite(writer);
-		writer.WriteData(buffer.get(), total_string_size);
+		writer.WriteData(buffer.get(), stream->GetPosition());
 	}
 
 private:
diff --git a/test/issues/general/test_16257.test_slow b/test/issues/general/test_16257.test_slow
new file mode 100644
index 000000000000..6b3faf9a7ba4
--- /dev/null
+++ b/test/issues/general/test_16257.test_slow
@@ -0,0 +1,25 @@
+# name: test/issues/general/test_16257.test_slow
+# description: Issue 16257 - value count mismatch when writing DELTA_BINARY_PACKED
+# group: [general]
+
+require parquet
+
+# Some macros to generate lorem ipsum
+statement ok
+CREATE OR REPLACE MACRO deterministic_random(rand) AS hash(rand) / 18446744073709551615;
+
+statement ok
+CREATE OR REPLACE MACRO lorem_word(rand) AS ['voluptatem', 'quaerat', 'quiquia', 'non', 'dolore', 'dolorem', 'labore', 'consectetur', 'porro', 'sed', 'numquam', 'aliquam', 'sit', 'eius', 'modi', 'est', 'amet', 'magnam', 'dolor', 'etincidunt', 'velit', 'neque', 'ipsum', 'adipisci', 'quisquam', 'ut', 'tempora'][1 + floor(rand * 27 % 27)::BIGINT];
+
+statement ok
+CREATE OR REPLACE MACRO lorem_sentence_util(s) AS upper(s[1]) || s[2:] || '.';
+
+statement ok
+CREATE OR REPLACE MACRO lorem_sentence(rand, words) AS lorem_sentence_util(list_aggr([lorem_word(deterministic_random(rand + i)) for i in range(words)], 'string_agg', ' '));
+
+
+statement ok
+SET preserve_insertion_order=false;
+
+statement ok
+COPY (SELECT lorem_sentence(random(), 20) FROM range(1_000_000)) TO '__TEST_DIR__/16257.parquet' (PARQUET_VERSION V2, ROW_GROUP_SIZE 2_000_000);

From 73e15e8b217b41ca263b5c414c6a2cd92088af36 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Mon, 17 Feb 2025 17:10:57 +0100
Subject: [PATCH 127/142] even faster boolean writing

---
 extension/parquet/writer/boolean_column_writer.cpp | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/extension/parquet/writer/boolean_column_writer.cpp b/extension/parquet/writer/boolean_column_writer.cpp
index bcfd78b3ea82..a8b2f9add185 100644
--- a/extension/parquet/writer/boolean_column_writer.cpp
+++ b/extension/parquet/writer/boolean_column_writer.cpp
@@ -52,14 +52,11 @@ void BooleanColumnWriter::WriteVector(WriteStream &temp_writer, ColumnWriterStat
 	const auto &mask = FlatVector::Validity(input_column);
 
 	const auto *const ptr = FlatVector::GetData<bool>(input_column);
-	if (mask.AllValid()) {
+	if (stats.max && !stats.min && mask.AllValid()) {
+		// Fast path: stats have already been set, and there's no NULLs
 		for (idx_t r = chunk_start; r < chunk_end; r++) {
 			const auto &val = ptr[r];
-
-			stats.max |= val;
-			stats.min &= val;
 			state.byte |= val << state.byte_pos;
-
 			if (++state.byte_pos == 8) {
 				temp_writer.Write(state.byte);
 				state.byte = 0;

From 81bd903c1619a809e7f047b87cb392704da45b7c Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Mon, 10 Feb 2025 09:34:13 -0300
Subject: [PATCH 128/142] Adding fuzzer tests

---
 data/csv/afl/4172/case_1.csv              | Bin 0 -> 2398 bytes
 data/csv/afl/4172/case_2.csv              | Bin 0 -> 229 bytes
 data/csv/afl/4172/case_3.csv              | Bin 0 -> 257 bytes
 data/csv/afl/4172/case_4.csv              | Bin 0 -> 239 bytes
 data/csv/afl/4172/case_5.csv              | Bin 0 -> 240 bytes
 test/sql/copy/csv/afl/test_fuzz_4172.test |  32 ++++++++++++++++++++++
 6 files changed, 32 insertions(+)
 create mode 100644 data/csv/afl/4172/case_1.csv
 create mode 100644 data/csv/afl/4172/case_2.csv
 create mode 100644 data/csv/afl/4172/case_3.csv
 create mode 100644 data/csv/afl/4172/case_4.csv
 create mode 100644 data/csv/afl/4172/case_5.csv
 create mode 100644 test/sql/copy/csv/afl/test_fuzz_4172.test

diff --git a/data/csv/afl/4172/case_1.csv b/data/csv/afl/4172/case_1.csv
new file mode 100644
index 0000000000000000000000000000000000000000..47200b6b4d2ec0e76ba53d1168e9d63a90edfebc
GIT binary patch
literal 2398
zcmeHI-H+Nv5Z_dZbVkZ^+qV&Y@C%?_^KsxlUDE4^)TGUYM%Sv+X|NaAlkw8^`sn2p
z<sVPf&aMpxn)EGFqy!`*|7PYl-vf4LtyzX0;h&LvesTZJT?-(z2J-?Ft8{*)(lCbb
z#o5K<AI~nrju%T@)%Z_OrxlheBfUWgUoyt-eSJb!J1MSoncFZ9zx(C4M?areCYU4+
z^-PWGzV8&RbVg^>+Jt;)lgjk!KN5?JqPK7B;@%5ewmZ<4q#_^Qh1X$RXFz^m*GUlp
zS=^vWmT1D(tCRcTpZ2H}fvr*xcIgIHYC)4supfnsGdPGkct@l)R&zUr78gHUp4_8K
z{_~bG@om%^GWqCy^7zS%;RSy-n4Vw$F}VEx^4Dh<!EhXipx>L&V-iGPMNwCXZWI9*
zG3T-1LBykemk+vp1Y#8P(>NLhj=$zR;<{^JUF*{1n5mcY%9NRG$IzY=1O-)J0_G_c
z@<!%xE725y3k^;>N4u2tJ~O&pEG^8nu}dh|ItRN{1tdDRDwnwhgj{YQM>k*&PNce#
zDX1JWHIv5PdLPthl~Dasu$mYotc_mlLU}8@MB>RZSWz5EQ6)xtUHMmuSr2l(fepn<
z5}7Y>QPCqToEjwRMrE);<ECQ2Un{i9k|xrnETaBkI66I2fksGDC%o3P8az<V7qCXk
z#Ze_?Vf9LScV!{Xj^uQF8cm#8VJUj2`t*1A>dI@u-mX3N9H|L}QUTI3u~fO0$1$Pb
zop!qakws$E+Lg+YIwd2ss+i4kZ^Lkzjz!;T?G<BJQnZePrpQm05L22g5ygGs47PQz
z9KA;=!@|N!r)sXK5l5<nDWAl=9}j)0XIN0%{Jn5q2jd>6j}Hz1_DD}eTd1&<Bn<}M
z(iqq}9cp0Ry79QK?SWQ9J{<IfsK}3)+}<7}y$f>aLfJZ;BaKn5`+$p4dt;Q9T|J-g
zO+<rjSsg`#Ht{&TdV{p|{&|)CD}N8(zyIC(01Sj%_QM0f{Q>yqe-6Mq4$9{v@bMo3
zxBRprM@Qg8Ek1k6I;_t)d&pYs346mb`buU9jo6TF$L{Dld<u*2uWPf;)&Il4d_~FD

literal 0
HcmV?d00001

diff --git a/data/csv/afl/4172/case_2.csv b/data/csv/afl/4172/case_2.csv
new file mode 100644
index 0000000000000000000000000000000000000000..33f1547affb4a6235e4039cfa14c182a8121a156
GIT binary patch
literal 229
zcmY$+E6uG;<*HUvFsxOwQZU@8Zl$0emXoMs$d#0nc+?0)Iy0`|U{nut3^M#|poF2`
z*~2kNT?Yt2Iz9bDT-{xR)NA<|4Fx&47$Crq(J)ab31k$|SYsUpLqn^i|Nk2pAcjkG
zFfuSQ6y>CFSy<>KCM9zLSw_ZOI$$nWh?PQSUWtx^dWI3ug0jS-WQYN*|NpaQBo=`z
llKP{jqn=@?lVQYVW}s8fz{J47rEjQHo)0w0%*3xe9{^6dIeh>C

literal 0
HcmV?d00001

diff --git a/data/csv/afl/4172/case_3.csv b/data/csv/afl/4172/case_3.csv
new file mode 100644
index 0000000000000000000000000000000000000000..691f0c0e161588d33fb6b4be9499800244f0176c
GIT binary patch
literal 257
zcmY+8JqyA>42JKvl0k6Q<>=ro0nMG(a+`>#i$f{yR~0H$3&jsa{89craq#*fXdv*0
zym{XE$MfN~Yu$Z858nuSN+aYtvq+FYlG(o|PJZQwAOCbQi6jb{ycl~eSMh8<i{zul
zsqF!)%iUS5a1N_c5E+L?Rl=vd>v6_ek!>Li(Iy*kN@>7qgxHAv@#Ofaa<5%%v8e&0
pv6!T`D1$oh9?Fz3RS*pDnwO|PH6l=rP#Rrw-{?SpSHsmc{{U@iK(+t?

literal 0
HcmV?d00001

diff --git a/data/csv/afl/4172/case_4.csv b/data/csv/afl/4172/case_4.csv
new file mode 100644
index 0000000000000000000000000000000000000000..c3c65cce9923449b0e63966944fb1d1b6b0919a2
GIT binary patch
literal 239
zcmXrwVwB`y{LjF^zz73B)Mdq^)j>RUCr@|J1KH|23Z_sX$-&IWC?q4Mu9~Z_nxd~N
zsimNb3QSE^bv><Ab$u07AqHWXVam&;!ONv-XrynXXk=ihgdgZA@hSl^mliLVHZPaH
zp^=f1D-dfLF{*H=mzJdI8k(t3+cVK$2Z#c6ToOxCK^FRj7#Zs*fUMAGVlXlmRWLNP
HiUtAzKszjt

literal 0
HcmV?d00001

diff --git a/data/csv/afl/4172/case_5.csv b/data/csv/afl/4172/case_5.csv
new file mode 100644
index 0000000000000000000000000000000000000000..708aee93cd765054732ddca101ee10bb800d8376
GIT binary patch
literal 240
zcmXrE&dJP6HI6s3W|TJKGE7WDB_MnbMg|CAWnwJKN#QcH;4-t+DaQvAi;|M8ITKrr
zb<{Hqbux^&%nWqO^TC=8VGJW(sE9F4#017L<uX%;vCUx&h^=5lEG%?DF0z1WKxV|m
PC@C-)LX1Wd=K`|<hO|Sz

literal 0
HcmV?d00001

diff --git a/test/sql/copy/csv/afl/test_fuzz_4172.test b/test/sql/copy/csv/afl/test_fuzz_4172.test
new file mode 100644
index 000000000000..14cdfc62d2ba
--- /dev/null
+++ b/test/sql/copy/csv/afl/test_fuzz_4172.test
@@ -0,0 +1,32 @@
+# name: test/sql/copy/csv/afl/test_fuzz_4172.test
+# description: fuzzer generated csv files - should not raise internal exception (by failed assertion).
+# group: [csv]
+
+# statement ok
+# PRAGMA enable_verification
+
+query I
+select count(file) from glob('data/csv/afl/4172/*');
+----
+5
+
+statement maybe
+FROM read_csv('data/csv/afl/4172/case_1.csv', auto_detect=false, columns={'a': 'VARCHAR', 'b': 'INTEGER'}, header=true, max_line_size=2305843009213693962, store_rejects=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/4172/case_2.csv', buffer_size=42, columns={'j': 'JSON'}, store_rejects=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/4172/case_3.csv', buffer_size=42, columns={'j': 'JSON'}, store_rejects=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/4172/case_4.csv', ignore_errors=true, buffer_size=1, store_rejects=false);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/4172/case_5.csv', buffer_size=42, auto_detect=true, store_rejects=true);
+----
+

From 9c74515746cb0cbbfe87b96e26780d58cdf65c29 Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Mon, 10 Feb 2025 10:24:30 -0300
Subject: [PATCH 129/142] Fix for buffer_size=1 in encoding, and check for
 conflicts between maximum line size and buffer size

---
 .../operator/csv_scanner/encode/csv_encoder.cpp        |  4 ++++
 .../operator/csv_scanner/util/csv_reader_options.cpp   | 10 ++++++++++
 test/sql/copy/csv/afl/test_fuzz_4172.test              |  6 +++---
 .../copy/csv/parallel/csv_parallel_buffer_size.test    |  8 ++++----
 test/sql/copy/csv/relaxed_quotes.test                  |  7 +------
 test/sql/copy/csv/test_validator.test                  |  2 +-
 6 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/src/execution/operator/csv_scanner/encode/csv_encoder.cpp b/src/execution/operator/csv_scanner/encode/csv_encoder.cpp
index 89fc5df040bd..8a6c08032597 100644
--- a/src/execution/operator/csv_scanner/encode/csv_encoder.cpp
+++ b/src/execution/operator/csv_scanner/encode/csv_encoder.cpp
@@ -51,6 +51,10 @@ CSVEncoder::CSVEncoder(DBConfig &config, const string &encoding_name_to_find, id
 	}
 	// We ensure that the encoded buffer size is an even number to make the two byte lookup on utf-16 work
 	idx_t encoded_buffer_size = buffer_size % 2 != 0 ? buffer_size - 1 : buffer_size;
+	if (encoded_buffer_size == 0) {
+		// This might happen if buffer size = 1
+		encoded_buffer_size = 2;
+	}
 	D_ASSERT(encoded_buffer_size > 0);
 	encoded_buffer.Initialize(encoded_buffer_size);
 	remaining_bytes_buffer.Initialize(function->GetBytesPerIteration());
diff --git a/src/execution/operator/csv_scanner/util/csv_reader_options.cpp b/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
index 7957f2c47be8..5c91a5523eef 100644
--- a/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
@@ -251,6 +251,10 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
 			throw BinderException("Invalid value for MAX_LINE_SIZE parameter: it cannot be smaller than 0");
 		}
 		maximum_line_size.Set(NumericCast<idx_t>(line_size));
+		if (buffer_size_option.IsSetByUser() && maximum_line_size.GetValue() > buffer_size_option.GetValue()) {
+			throw InvalidInputException("Buffer Size of %d must be a higher value than the maximum line size %d",
+			                            buffer_size_option.GetValue(), maximum_line_size.GetValue());
+		}
 	} else if (loption == "date_format" || loption == "dateformat") {
 		string format = ParseString(value, loption);
 		SetDateFormat(LogicalTypeId::DATE, format, true);
@@ -264,6 +268,12 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
 		if (buffer_size_option == 0) {
 			throw InvalidInputException("Buffer Size option must be higher than 0");
 		}
+		if (maximum_line_size.IsSetByUser() && maximum_line_size.GetValue() > buffer_size_option.GetValue()) {
+			throw InvalidInputException("Buffer Size of %d must be a higher value than the maximum line size %d",
+			                            buffer_size_option.GetValue(), maximum_line_size.GetValue());
+		} else {
+			maximum_line_size.Set(buffer_size_option.GetValue(), false);
+		}
 	} else if (loption == "decimal_separator") {
 		decimal_separator = ParseString(value, loption);
 		if (decimal_separator != "." && decimal_separator != ",") {
diff --git a/test/sql/copy/csv/afl/test_fuzz_4172.test b/test/sql/copy/csv/afl/test_fuzz_4172.test
index 14cdfc62d2ba..2390bbaa1c9e 100644
--- a/test/sql/copy/csv/afl/test_fuzz_4172.test
+++ b/test/sql/copy/csv/afl/test_fuzz_4172.test
@@ -1,9 +1,9 @@
 # name: test/sql/copy/csv/afl/test_fuzz_4172.test
 # description: fuzzer generated csv files - should not raise internal exception (by failed assertion).
-# group: [csv]
+# group: [afl]
 
-# statement ok
-# PRAGMA enable_verification
+statement ok
+PRAGMA enable_verification
 
 query I
 select count(file) from glob('data/csv/afl/4172/*');
diff --git a/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test b/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test
index 466758becc6f..0fa6a8506860 100644
--- a/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test
+++ b/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test
@@ -35,7 +35,7 @@ select * from read_csv('data/csv/test/multi_column_string.csv',  COLUMNS=STRUCT_
 100000000	15519	785	p9
 
 query IIII
-select * from read_csv('data/csv/test/multi_column_string_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=25)
+select * from read_csv('data/csv/test/multi_column_string_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=27)
 ----
 1	6370	371	p1
 10	214	465	p2
@@ -53,7 +53,7 @@ SELECT sum(a) FROM read_csv('data/csv/test/new_line_string_rn.csv',  COLUMNS=STR
 111
 
 query I
-SELECT sum(a) FROM read_csv('data/csv/test/new_line_string_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=80)
+SELECT sum(a) FROM read_csv('data/csv/test/new_line_string_rn.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=100)
 ----
 111
 
@@ -64,7 +64,7 @@ SELECT sum(a) FROM read_csv('data/csv/test/new_line_string_rn_exc.csv',  COLUMNS
 111
 
 query I
-SELECT sum(a) FROM read_csv('data/csv/test/new_line_string_rn_exc.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=60)
+SELECT sum(a) FROM read_csv('data/csv/test/new_line_string_rn_exc.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=80)
 ----
 111
 
@@ -75,6 +75,6 @@ SELECT sum(a) FROM read_csv('data/csv/test/new_line_string.csv',  COLUMNS=STRUCT
 111
 
 query I
-SELECT sum(a) FROM read_csv('data/csv/test/new_line_string.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=60)
+SELECT sum(a) FROM read_csv('data/csv/test/new_line_string.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=80)
 ----
 111
diff --git a/test/sql/copy/csv/relaxed_quotes.test b/test/sql/copy/csv/relaxed_quotes.test
index f8c6e8012c88..6bdfa8ede56b 100644
--- a/test/sql/copy/csv/relaxed_quotes.test
+++ b/test/sql/copy/csv/relaxed_quotes.test
@@ -78,12 +78,7 @@ statement ok
 drop table t;
 
 statement error
-create table t as from read_csv('data/csv/unescaped_quotes/unescaped_quote_new_line_rn.csv', strict_mode=false, buffer_size = 20, header = 0)
-----
-
-
-statement error
-create table t as from read_csv('data/csv/unescaped_quotes/unescaped_quote_new_line_rn.csv', strict_mode=false, buffer_size = 20, header = 0)
+create table t as from read_csv('data/csv/unescaped_quotes/unescaped_quote_new_line_rn.csv', strict_mode=false, buffer_size = 20, header = 0, delim = ';')
 ----
 
 statement ok
diff --git a/test/sql/copy/csv/test_validator.test b/test/sql/copy/csv/test_validator.test
index 66a444809520..e44d97b23014 100644
--- a/test/sql/copy/csv/test_validator.test
+++ b/test/sql/copy/csv/test_validator.test
@@ -52,7 +52,7 @@ statement ok
 FROM read_csv('data/csv/validator/quoted_new_value.csv', columns = {'band': 'varchar', 'album': 'varchar', 'release': 'varchar'}, quote = '''', delim = ';', header = 0)
 
 statement ok
-FROM read_csv('data/csv/validator/quoted_new_value.csv', columns = {'band': 'varchar', 'album': 'varchar', 'release': 'varchar'}, quote = '''', delim = ';', header = 0, buffer_size = 46)
+FROM read_csv('data/csv/validator/quoted_new_value.csv', columns = {'band': 'varchar', 'album': 'varchar', 'release': 'varchar'}, quote = '''', delim = ';', header = 0, buffer_size = 48)
 
 statement ok
 FROM read_csv('data/csv/validator/single_column_quoted_newline.csv', columns = {'Raffaella Carrà': 'varchar'}, quote = '"',  buffer_size = 24)

From 7cba0a92b233eb21da4ef8f0ffab5fa8d6e4008c Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Mon, 10 Feb 2025 14:11:01 -0300
Subject: [PATCH 130/142] Adjust a couple more tests

---
 test/sql/copy/csv/maximum_line_size.test_slow              | 2 +-
 test/sql/copy/csv/parallel/csv_parallel_buffer_size.test   | 2 +-
 test/sql/copy/csv/parallel/csv_parallel_new_line.test_slow | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/sql/copy/csv/maximum_line_size.test_slow b/test/sql/copy/csv/maximum_line_size.test_slow
index db5672073fbf..1ea62d7e6513 100644
--- a/test/sql/copy/csv/maximum_line_size.test_slow
+++ b/test/sql/copy/csv/maximum_line_size.test_slow
@@ -39,4 +39,4 @@ Be sure that the maximum line size is set to an appropriate value
 statement error
 select * from read_csv_auto('data/csv/issue_8320_3.csv.gz', max_line_size = 2097152, buffer_size = 10);
 ----
-BUFFER_SIZE option was set to 10, while MAX_LINE_SIZE was set to 2097152. BUFFER_SIZE must have always be set to value bigger than MAX_LINE_SIZE
\ No newline at end of file
+Buffer Size of 10 must be a higher value than the maximum line size 2097152
\ No newline at end of file
diff --git a/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test b/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test
index 0fa6a8506860..e3b0531499a6 100644
--- a/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test
+++ b/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test
@@ -22,7 +22,7 @@ SELECT sum(a) FROM read_csv('data/csv/test/multi_column_integer_rn.csv',  COLUMN
 111111111
 
 query IIII
-select * from read_csv('data/csv/test/multi_column_string.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=25)
+select * from read_csv('data/csv/test/multi_column_string.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=30)
 ----
 1	6370	371	p1
 10	214	465	p2
diff --git a/test/sql/copy/csv/parallel/csv_parallel_new_line.test_slow b/test/sql/copy/csv/parallel/csv_parallel_new_line.test_slow
index b9654da5d729..f9fb5c45385f 100644
--- a/test/sql/copy/csv/parallel/csv_parallel_new_line.test_slow
+++ b/test/sql/copy/csv/parallel/csv_parallel_new_line.test_slow
@@ -9,7 +9,7 @@ PRAGMA verify_parallelism
 statement ok
 PRAGMA enable_verification
 
-loop i 25 100
+loop i 27 100
 
 
 # Test read_csv auto with \n

From fb12980e8e1911655d383ebc418a8c07b151131b Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Wed, 12 Feb 2025 07:15:05 -0300
Subject: [PATCH 131/142] More tests and fixes

---
 .../afl/20250211_csv_fuzz_crash/case_1.csv    | Bin 0 -> 171 bytes
 .../afl/20250211_csv_fuzz_crash/case_10.csv   | Bin 0 -> 209 bytes
 .../afl/20250211_csv_fuzz_crash/case_100.csv  |   3 +
 .../afl/20250211_csv_fuzz_crash/case_101.csv  | Bin 0 -> 204 bytes
 .../afl/20250211_csv_fuzz_crash/case_102.csv  | Bin 0 -> 136 bytes
 .../afl/20250211_csv_fuzz_crash/case_103.csv  | Bin 0 -> 86 bytes
 .../afl/20250211_csv_fuzz_crash/case_104.csv  | Bin 0 -> 86 bytes
 .../afl/20250211_csv_fuzz_crash/case_105.csv  | Bin 0 -> 132 bytes
 .../afl/20250211_csv_fuzz_crash/case_106.csv  | Bin 0 -> 86 bytes
 .../afl/20250211_csv_fuzz_crash/case_107.csv  | Bin 0 -> 137 bytes
 .../afl/20250211_csv_fuzz_crash/case_108.csv  | Bin 0 -> 137 bytes
 .../afl/20250211_csv_fuzz_crash/case_109.csv  | Bin 0 -> 122 bytes
 .../afl/20250211_csv_fuzz_crash/case_11.csv   | Bin 0 -> 241 bytes
 .../afl/20250211_csv_fuzz_crash/case_110.csv  | Bin 0 -> 116 bytes
 .../afl/20250211_csv_fuzz_crash/case_111.csv  | Bin 0 -> 98 bytes
 .../afl/20250211_csv_fuzz_crash/case_112.csv  | Bin 0 -> 134 bytes
 .../afl/20250211_csv_fuzz_crash/case_113.csv  | Bin 0 -> 128 bytes
 .../afl/20250211_csv_fuzz_crash/case_114.csv  | Bin 0 -> 98 bytes
 .../afl/20250211_csv_fuzz_crash/case_115.csv  | Bin 0 -> 113 bytes
 .../afl/20250211_csv_fuzz_crash/case_116.csv  | Bin 0 -> 159 bytes
 .../afl/20250211_csv_fuzz_crash/case_117.csv  | Bin 0 -> 405 bytes
 .../afl/20250211_csv_fuzz_crash/case_118.csv  | Bin 0 -> 134 bytes
 .../afl/20250211_csv_fuzz_crash/case_119.csv  | Bin 0 -> 93 bytes
 .../afl/20250211_csv_fuzz_crash/case_12.csv   | Bin 0 -> 241 bytes
 .../afl/20250211_csv_fuzz_crash/case_120.csv  | Bin 0 -> 148 bytes
 .../afl/20250211_csv_fuzz_crash/case_121.csv  | Bin 0 -> 228 bytes
 .../afl/20250211_csv_fuzz_crash/case_122.csv  | Bin 0 -> 220 bytes
 .../afl/20250211_csv_fuzz_crash/case_123.csv  | Bin 0 -> 264 bytes
 .../afl/20250211_csv_fuzz_crash/case_124.csv  | Bin 0 -> 415 bytes
 .../afl/20250211_csv_fuzz_crash/case_125.csv  | Bin 0 -> 418 bytes
 .../afl/20250211_csv_fuzz_crash/case_126.csv  |  30 +
 .../afl/20250211_csv_fuzz_crash/case_127.csv  |  28 +
 .../afl/20250211_csv_fuzz_crash/case_128.csv  | Bin 0 -> 279 bytes
 .../afl/20250211_csv_fuzz_crash/case_129.csv  |  30 +
 .../afl/20250211_csv_fuzz_crash/case_13.csv   | Bin 0 -> 186 bytes
 .../afl/20250211_csv_fuzz_crash/case_130.csv  | Bin 0 -> 759 bytes
 .../afl/20250211_csv_fuzz_crash/case_131.csv  | Bin 0 -> 199 bytes
 .../afl/20250211_csv_fuzz_crash/case_132.csv  | Bin 0 -> 291 bytes
 .../afl/20250211_csv_fuzz_crash/case_14.csv   | Bin 0 -> 175 bytes
 .../afl/20250211_csv_fuzz_crash/case_15.csv   |  20 +
 .../afl/20250211_csv_fuzz_crash/case_16.csv   | Bin 0 -> 177 bytes
 .../afl/20250211_csv_fuzz_crash/case_17.csv   |   6 +
 .../afl/20250211_csv_fuzz_crash/case_18.csv   |   8 +
 .../afl/20250211_csv_fuzz_crash/case_19.csv   |   5 +
 .../afl/20250211_csv_fuzz_crash/case_2.csv    | Bin 0 -> 171 bytes
 .../afl/20250211_csv_fuzz_crash/case_20.csv   |   6 +
 .../afl/20250211_csv_fuzz_crash/case_21.csv   |   6 +
 .../afl/20250211_csv_fuzz_crash/case_22.csv   | Bin 0 -> 176 bytes
 .../afl/20250211_csv_fuzz_crash/case_23.csv   |   6 +
 .../afl/20250211_csv_fuzz_crash/case_24.csv   |   7 +
 .../afl/20250211_csv_fuzz_crash/case_25.csv   |   7 +
 .../afl/20250211_csv_fuzz_crash/case_26.csv   |   7 +
 .../afl/20250211_csv_fuzz_crash/case_27.csv   |   8 +
 .../afl/20250211_csv_fuzz_crash/case_28.csv   | Bin 0 -> 175 bytes
 .../afl/20250211_csv_fuzz_crash/case_29.csv   |   6 +
 .../afl/20250211_csv_fuzz_crash/case_3.csv    | Bin 0 -> 160 bytes
 .../afl/20250211_csv_fuzz_crash/case_30.csv   |   6 +
 .../afl/20250211_csv_fuzz_crash/case_31.csv   | Bin 0 -> 189 bytes
 .../afl/20250211_csv_fuzz_crash/case_32.csv   |   6 +
 .../afl/20250211_csv_fuzz_crash/case_33.csv   | Bin 0 -> 186 bytes
 .../afl/20250211_csv_fuzz_crash/case_34.csv   |   6 +
 .../afl/20250211_csv_fuzz_crash/case_35.csv   |  27 +
 .../afl/20250211_csv_fuzz_crash/case_36.csv   |   6 +
 .../afl/20250211_csv_fuzz_crash/case_37.csv   |  10 +
 .../afl/20250211_csv_fuzz_crash/case_38.csv   |   6 +
 .../afl/20250211_csv_fuzz_crash/case_39.csv   | Bin 0 -> 181 bytes
 .../afl/20250211_csv_fuzz_crash/case_4.csv    | Bin 0 -> 171 bytes
 .../afl/20250211_csv_fuzz_crash/case_40.csv   |   4 +
 .../afl/20250211_csv_fuzz_crash/case_41.csv   |   3 +
 .../afl/20250211_csv_fuzz_crash/case_42.csv   |   4 +
 .../afl/20250211_csv_fuzz_crash/case_43.csv   |   4 +
 .../afl/20250211_csv_fuzz_crash/case_44.csv   | Bin 0 -> 209 bytes
 .../afl/20250211_csv_fuzz_crash/case_45.csv   |   4 +
 .../afl/20250211_csv_fuzz_crash/case_46.csv   |   5 +
 .../afl/20250211_csv_fuzz_crash/case_47.csv   | Bin 0 -> 204 bytes
 .../afl/20250211_csv_fuzz_crash/case_48.csv   | Bin 0 -> 241 bytes
 .../afl/20250211_csv_fuzz_crash/case_49.csv   |   3 +
 .../afl/20250211_csv_fuzz_crash/case_5.csv    | Bin 0 -> 171 bytes
 .../afl/20250211_csv_fuzz_crash/case_50.csv   | Bin 0 -> 157 bytes
 .../afl/20250211_csv_fuzz_crash/case_51.csv   | Bin 0 -> 201 bytes
 .../afl/20250211_csv_fuzz_crash/case_52.csv   |   5 +
 .../afl/20250211_csv_fuzz_crash/case_53.csv   | Bin 0 -> 88 bytes
 .../afl/20250211_csv_fuzz_crash/case_54.csv   | Bin 0 -> 234 bytes
 .../afl/20250211_csv_fuzz_crash/case_55.csv   | Bin 0 -> 232 bytes
 .../afl/20250211_csv_fuzz_crash/case_56.csv   | Bin 0 -> 288 bytes
 .../afl/20250211_csv_fuzz_crash/case_57.csv   | Bin 0 -> 288 bytes
 .../afl/20250211_csv_fuzz_crash/case_58.csv   | Bin 0 -> 319 bytes
 .../afl/20250211_csv_fuzz_crash/case_59.csv   | Bin 0 -> 239 bytes
 .../afl/20250211_csv_fuzz_crash/case_6.csv    | Bin 0 -> 171 bytes
 .../afl/20250211_csv_fuzz_crash/case_60.csv   | Bin 0 -> 236 bytes
 .../afl/20250211_csv_fuzz_crash/case_61.csv   | Bin 0 -> 236 bytes
 .../afl/20250211_csv_fuzz_crash/case_62.csv   | Bin 0 -> 2371 bytes
 .../afl/20250211_csv_fuzz_crash/case_63.csv   | Bin 0 -> 2399 bytes
 .../afl/20250211_csv_fuzz_crash/case_64.csv   | Bin 0 -> 261 bytes
 .../afl/20250211_csv_fuzz_crash/case_65.csv   |  27 +
 .../afl/20250211_csv_fuzz_crash/case_66.csv   | Bin 0 -> 268 bytes
 .../afl/20250211_csv_fuzz_crash/case_67.csv   | Bin 0 -> 291 bytes
 .../afl/20250211_csv_fuzz_crash/case_68.csv   | Bin 0 -> 252 bytes
 .../afl/20250211_csv_fuzz_crash/case_69.csv   | Bin 0 -> 233 bytes
 .../afl/20250211_csv_fuzz_crash/case_7.csv    | Bin 0 -> 171 bytes
 .../afl/20250211_csv_fuzz_crash/case_70.csv   | Bin 0 -> 272 bytes
 .../afl/20250211_csv_fuzz_crash/case_71.csv   | Bin 0 -> 251 bytes
 .../afl/20250211_csv_fuzz_crash/case_72.csv   | Bin 0 -> 251 bytes
 .../afl/20250211_csv_fuzz_crash/case_73.csv   | Bin 0 -> 810 bytes
 .../afl/20250211_csv_fuzz_crash/case_74.csv   | Bin 0 -> 236 bytes
 .../afl/20250211_csv_fuzz_crash/case_75.csv   | Bin 0 -> 231 bytes
 .../afl/20250211_csv_fuzz_crash/case_76.csv   | Bin 0 -> 232 bytes
 .../afl/20250211_csv_fuzz_crash/case_77.csv   | Bin 0 -> 87 bytes
 .../afl/20250211_csv_fuzz_crash/case_78.csv   | Bin 0 -> 92 bytes
 .../afl/20250211_csv_fuzz_crash/case_79.csv   | Bin 0 -> 106 bytes
 .../afl/20250211_csv_fuzz_crash/case_8.csv    | Bin 0 -> 175 bytes
 .../afl/20250211_csv_fuzz_crash/case_80.csv   | Bin 0 -> 85 bytes
 .../afl/20250211_csv_fuzz_crash/case_81.csv   | Bin 0 -> 101 bytes
 .../afl/20250211_csv_fuzz_crash/case_82.csv   | Bin 0 -> 90 bytes
 .../afl/20250211_csv_fuzz_crash/case_83.csv   | Bin 0 -> 101 bytes
 .../afl/20250211_csv_fuzz_crash/case_84.csv   | Bin 0 -> 455 bytes
 .../afl/20250211_csv_fuzz_crash/case_85.csv   | Bin 0 -> 449 bytes
 .../afl/20250211_csv_fuzz_crash/case_86.csv   | Bin 0 -> 405 bytes
 .../afl/20250211_csv_fuzz_crash/case_87.csv   | Bin 0 -> 397 bytes
 .../afl/20250211_csv_fuzz_crash/case_88.csv   | Bin 0 -> 412 bytes
 .../afl/20250211_csv_fuzz_crash/case_89.csv   | Bin 0 -> 393 bytes
 .../afl/20250211_csv_fuzz_crash/case_9.csv    | Bin 0 -> 200 bytes
 .../afl/20250211_csv_fuzz_crash/case_90.csv   | Bin 0 -> 421 bytes
 .../afl/20250211_csv_fuzz_crash/case_91.csv   | Bin 0 -> 446 bytes
 .../afl/20250211_csv_fuzz_crash/case_92.csv   | Bin 0 -> 397 bytes
 .../afl/20250211_csv_fuzz_crash/case_93.csv   | Bin 0 -> 442 bytes
 .../afl/20250211_csv_fuzz_crash/case_94.csv   | Bin 0 -> 444 bytes
 .../afl/20250211_csv_fuzz_crash/case_95.csv   | Bin 0 -> 264 bytes
 .../afl/20250211_csv_fuzz_crash/case_96.csv   | Bin 0 -> 397 bytes
 .../afl/20250211_csv_fuzz_crash/case_97.csv   | Bin 0 -> 373 bytes
 .../afl/20250211_csv_fuzz_crash/case_98.csv   | Bin 0 -> 270 bytes
 .../afl/20250211_csv_fuzz_crash/case_99.csv   | Bin 0 -> 322 bytes
 .../scanner/string_value_scanner.cpp          |  26 +-
 .../sql/copy/csv/afl/fuzz_20250211_crash.test | 539 ++++++++++++++++++
 134 files changed, 864 insertions(+), 10 deletions(-)
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_1.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_10.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_100.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_101.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_102.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_103.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_104.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_105.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_106.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_107.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_108.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_109.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_11.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_110.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_111.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_112.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_113.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_114.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_115.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_116.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_117.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_118.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_119.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_12.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_120.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_121.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_122.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_123.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_124.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_125.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_126.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_127.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_128.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_129.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_13.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_130.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_131.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_132.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_14.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_15.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_16.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_17.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_18.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_19.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_2.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_20.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_21.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_22.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_23.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_24.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_25.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_26.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_27.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_28.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_29.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_3.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_30.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_31.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_32.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_33.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_34.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_35.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_36.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_37.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_38.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_39.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_4.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_40.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_41.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_42.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_43.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_44.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_45.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_46.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_47.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_48.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_49.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_5.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_50.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_51.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_52.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_53.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_54.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_55.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_56.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_57.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_58.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_59.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_6.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_60.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_61.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_62.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_63.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_64.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_65.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_66.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_67.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_68.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_69.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_7.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_70.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_71.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_72.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_73.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_74.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_75.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_76.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_77.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_78.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_79.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_8.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_80.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_81.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_82.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_83.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_84.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_85.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_86.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_87.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_88.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_89.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_9.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_90.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_91.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_92.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_93.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_94.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_95.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_96.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_97.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_98.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_99.csv
 create mode 100644 test/sql/copy/csv/afl/fuzz_20250211_crash.test

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_1.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_1.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2dff96bfb5f9b182d598d7cfb9ed20a53a95b5be
GIT binary patch
literal 171
zcmXpsGUhUb;{3c+Ae)h&Sv^IaQQldagNcEGg^AIC#jcu5JyG3CLEY1@-qqbTNL@!g
zN!_Yiy(}>YC@K&a83zS!VA==5Q!jJNQMa#E;!=XCL}n`>bh#tx;xaT)GBhv%QU(GH
M28=+;KnH3#07#T7&;S4c

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_10.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_10.csv
new file mode 100644
index 0000000000000000000000000000000000000000..1e43526015ac6450d9ea8a7f25505492a6628370
GIT binary patch
literal 209
zcmeZK<<#ZYWz=U@Pf=%-XOw2J)#U^O1_nk32n&T&FH6i(w^C4ui;RoQj{^aBFzEv(
z<J8OCa@6f>mAI6up(?>_2F3=suDG}eu&z>M0MZRMA~6SIVmyQ!w-913m=s`OcopL6
Y9%2OsjJ&J{hCs+=XrN<gV88_h0NxBURR910

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_100.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_100.csv
new file mode 100644
index 000000000000..c6998a9902ec
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_100.csv
@@ -0,0 +1,3 @@
+d':x }"
+"{'val': x}"
+"{'val'W -$�99zzzz99+99999999999$�99zzz99999999999999999999999999999-�
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_101.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_101.csv
new file mode 100644
index 0000000000000000000000000000000000000000..47aca2c8373235507150dcad95fbd59ecac1320c
GIT binary patch
literal 204
zcmZQ%1Oo<3OH1a@KmrZ$^6IMmx3sJR0!vG6BuSP3j4X21>dE;z>Q)Ns!66u?1cf?>
zXp|-9D1<o%IeRz;X{y&MRjUI9)U7INmAK$+1_1^JUI@^(s9|7WfJy>gq;08fT;u8*
e2hyWn3DnP(&BBo7;h~od(+w0=0IOF984Li<e=v0b

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_102.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_102.csv
new file mode 100644
index 0000000000000000000000000000000000000000..55c76eae4da1b46f59cc08d6c79c62568b9a5fe5
GIT binary patch
literal 136
zcmY#Za8yXnNGwt}Qcy2TOq1u(Vt@c;GqardL?tUj9i`-aG!UMj57m}huE5J>$;)NM
g%f-vfrJxWO83zUKVA==9)d3lg!YN7yS)>l70Hy3CLI3~&

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_103.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_103.csv
new file mode 100644
index 0000000000000000000000000000000000000000..627236618725faeee0f2a66fdc8dfaf2ebff8b19
GIT binary patch
literal 86
zcmY#Za8yXnNGwt}Qcy2TOq1u(;^bstV5sMWgG41OLmj2${2V40o-Fk^D9`~ZFG^;J
L^FbE@>Q)B;4d)YR

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_104.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_104.csv
new file mode 100644
index 0000000000000000000000000000000000000000..3ffe8f761076d228091c82edc09de307cc15f5ff
GIT binary patch
literal 86
zcmY#Za8yXnNGwt}Qcy2TOq1u(Vt{}+AkYDcl`HUaS@LpO@pAF<aw#apMS^5Nz#UBb
QfJkMCDP^fe$spb805qx;6#xJL

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_105.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_105.csv
new file mode 100644
index 0000000000000000000000000000000000000000..081e3b10729056638acfdea8c35273923596d8a2
GIT binary patch
literal 132
zcmY#Za8yXnNX*ZPPgJrp)KN+XvXYdnjB2@<SgeDK)QuF>%M#P%IkXrcKpCRGD79RH
rm&=lu%ZitaSBXnOAuciw3fv(ygz_nbit2#ODN6Q%se<xBhN=Sqb*myR

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_106.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_106.csv
new file mode 100644
index 0000000000000000000000000000000000000000..39b8c5d7e5b0a641aff4b4f0a2f2d0ed42848916
GIT binary patch
literal 86
zcmY#Za8yXnNGwt}Qcy2TOp5~nA21mg7pnsjD^ld;vgGBm;^pF1;!+Tbi-gL?xyy5C
QF+hMa)RffnWRQS50LZ))7ytkO

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_107.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_107.csv
new file mode 100644
index 0000000000000000000000000000000000000000..80f792a4d760f0b1ccb31059abf39fb08bfdf577
GIT binary patch
literal 137
zcmY#Za8yXnNGwt}Qcy2TOq1u(Vt@c;s6;B4dZN0Og1V<)y{o%xkh+erFsraIP;I#a
pFP9}RmlZD;uM(GnLR@4V6u5(F9~f7MfdQvk3hL#FMaduw)d4TbAE^KU

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_108.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_108.csv
new file mode 100644
index 0000000000000000000000000000000000000000..5a46f2f620fd6a665b75f1f23be86cd5a1563cb6
GIT binary patch
literal 137
zcmY#Za8yXnNGwt};$;MZqGSd2vcxob4lM>SP=?B<mMidbS@LpO@pADhaVaRoMaDsa
aJDB!?adklY!A8Ld+y;VG6eWW!QU?HfavDDX

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_109.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_109.csv
new file mode 100644
index 0000000000000000000000000000000000000000..f21f1e3212bb206cd1271525141b8985967dfc2e
GIT binary patch
literal 122
zcmY#Za8yWEH&Re9OH7mJ&|&}sWsqP-Vo_?j0xy>(FP9ZB7q1eRf<jzm92B^NX&)F@
YM?swn4kSOonOtRwj2NIO8Dxn%0L&N^&Hw-a

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_11.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_11.csv
new file mode 100644
index 0000000000000000000000000000000000000000..42afe4ceef4ad4e00ec27729e45b38ded979c384
GIT binary patch
literal 241
zcmXpsGUhUb;{3c+AUhb&H)Q<J=%dQ1%dN|(&#a!J&M40)&0wp`i4UlkCFZDGDJaB6
z#>M5wfq*-h^Z}D`>Sb;@>h`rtTuRkYm0&glBO?P$S6o~KSXU`B0O<xBk(dK9F&@H=
kTL>{1ObReCyb5u353vFRMqX9}Lm=cbG|)L`V88_h05se=hyVZp

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_110.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_110.csv
new file mode 100644
index 0000000000000000000000000000000000000000..48f32730da7f3dc915d56e92b8784e748387b441
GIT binary patch
literal 116
zcmZ=%NX|$sQr9uGG`D1Q;FRSA10x0Xvcxob4lM>SP*!jRD@!d`;N{Tb<+9@C;#J~O
aP>73+g93Lj?E~ZLfb=tBfTCoOaq0jy9u<cG

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_111.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_111.csv
new file mode 100644
index 0000000000000000000000000000000000000000..0cf8b82e23082d074aafdf41d8bb48ff5a32949d
GIT binary patch
literal 98
zcmY#ZaEt>1bwdLn;L-t81_lbr8Hq*eMhfa>iD~j2TD(eJ3JP(NV3k0^9nA27adkk-
N7%@OmG6N8(0|1OQ6t@5X

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_112.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_112.csv
new file mode 100644
index 0000000000000000000000000000000000000000..73296b846c3bb9a5983b32a841e35a29f892f0c0
GIT binary patch
literal 134
zcmY#Za8yXnNGwt}Qcy2TOq1s@Ff>v%umS-~FbO4iS%DJTR#sXJV4w`umRhdB%Vo*S
jWyQ<ItHh<C5EmH-1@2(l2gcP=Fe*!A!~jLfAmh{ljyD>D

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_113.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_113.csv
new file mode 100644
index 0000000000000000000000000000000000000000..91f981ac0cc27ecc6756b472a295f0772c608e51
GIT binary patch
literal 128
zcmZQ%1cDcg43?Ib%-?_n8sO#CRrzmeSp@`^mfBFsYW1?j9Ca&&S|u(eOG{p`s2-X|
aOG_(-irN$gC79w0uyQC*y%MAxWH<n|yB{0?

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_114.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_114.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8e28783396d4f97d45b7befc7b376130ec0338cb
GIT binary patch
literal 98
zcmZQ%1Oo<3OH1a@Kmr5=AQXt?<<(XBZ)sTt1QM2pmSu@K>Q)N1N?b~YhK5`U3UQHf
Saj|i6>Xw#z2t7zFkahqQA{E#G

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_115.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_115.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e58f1e9f64aa92c00ad58df71c909366c8275960
GIT binary patch
literal 113
zcmZQ%WMnXiQ!ldw0+s)kmQ_^(RhE{#Ag-kzm_#Oc83Z7Ts(^~Lp)zhc>h`rtTuSaC
fLFx<*Zy6XE1U^HgK*qR2jQL;Bz+lPCtE&P4BGDVB

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_116.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_116.csv
new file mode 100644
index 0000000000000000000000000000000000000000..f30120d3aa53c5e68cd1bbc85b14f504650971eb
GIT binary patch
literal 159
zcmXriDJd#V<ucMqOUx;@go9ZiFdIz536LZ&gMi9^9VJ73BYh(vH`vlJFF!BUQsqC8
z!^x>$mdN<uvZ^Yf%F<FBq})=|NE@Nr(ozYox~fV5sE8M=NDoZ%G5{%8OAsgEe?0?(
JB`>e83IL-=FfIT9

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_117.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_117.csv
new file mode 100644
index 0000000000000000000000000000000000000000..aa7e242fad58bbc5cfb7c8d9f456b8c407383cd9
GIT binary patch
literal 405
zcmZQ%!~+<f>NBgSs58nlN;BB%a&mE^gE%1Y0TIZCFfcGQFml0w6(a)!3lj!lG*DM(
z`CreZo~RzKpzi5c@9Jg=2XT>cartp^c5!j;alE{`P;o6@US2R$$I>z`4rmJl$mX~>
z^)j~{b^BT+E~RRXGS_BxtNTDU5&-+b2IPnP)#_!5IqFsl3Q!w>y4^uW05w=utBYX;
z%3@+=3=QfKA2K{e_!Q)pxHuV*O@FvDK*rR72rxjf;y{K90dd*?1~g?7jE0Ivh6Y@Q
RhK9f(s|Qj*z{M382LNjHN|OKp

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_118.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_118.csv
new file mode 100644
index 0000000000000000000000000000000000000000..12c1eefed8aeda7b8b899c4fcc22e88442b2ba70
GIT binary patch
literal 134
zcmY#Za8yXnNGwt}Qc#a$U|@)gi}Q(7h6<*ZEAVnz@^V@6a`7s0DJaC{s9PzhSE?)2
nDsd@QtCuAL`Rb8CEnwgdCVim1I2{F`0{oyVu_zg6CxbcwCtW4~

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_119.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_119.csv
new file mode 100644
index 0000000000000000000000000000000000000000..79fea4b0758b2a85f0077dc5ba92526b45a2a78b
GIT binary patch
literal 93
zcmY#Za8yXnNGwt}Qcy2TOq1u(VgLeOB`yVpxX3sl0P>W9YCz05pSU<i1`tCBq_`*<
OC<oO5RfS>_gE|0|!V>WS

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_12.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_12.csv
new file mode 100644
index 0000000000000000000000000000000000000000..4c1a8352beb22246e53a5cd2b5c696744fca8535
GIT binary patch
literal 241
zcmXpsGUhUb;{3c+AUhb&H)Q<J=%dQ1%dN|(&#a!J&M40)&0wp`i3AuJSQwDFoSf=q
zi8<<43JP(NadG)^Am9!reZXX#dYM~}x_zw@mr^xUC78{?*Z|bUz{tP=QWgQk?s28a
w03-}HA~6SIVmyQ!w-6))1p*8VuR>hiL#)7nk(brL5D2*p4Rj0*47i{G02tahzyJUM

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_120.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_120.csv
new file mode 100644
index 0000000000000000000000000000000000000000..dc91aa419dda8f81202f056122a3368eb813209c
GIT binary patch
literal 148
zcmY#Za8yXnNGwt}Qcy2TOq1u(Vqi!tN{tHvaf`}X1z5p=*T4b7P~hdV<mIyB<>Ccu
pN{!P2DpM>fM~EtMDJaB6#zBEQjDd^xiDQIFfecD4N@f58bpVW-Bq{&^

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_121.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_121.csv
new file mode 100644
index 0000000000000000000000000000000000000000..416d4de5e3f6fa6a8f82606e27787d3778dc14a1
GIT binary patch
literal 228
zcmX|)y$ixH5XJ8>OQJ(}2TMS13O1V*#6h%6$2zzih#d=VS*qwCE@&>1{@~tyJa~tg
z`C_})Ztie1qn>buOV~NBUpk|-MgT}l?4XMA?@+i7RF@@RnC~n!BDYvZc(6UB!9Z_5
zMm*+t6cDU6cBJs0B9eVh5!|LFOA()T$EK?jpa6X3E8`bJ&YzS#GlkPY+x=7C`~a1B
BD$M`@

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_122.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_122.csv
new file mode 100644
index 0000000000000000000000000000000000000000..29da7018bbd835f6eb234d0ea9d5faf4807dded3
GIT binary patch
literal 220
zcmZQ%gn<8yKB}C$+`5eV%<3uXjPi`q47R$Qob{X#zyhLlpd1LLUY3}nZl$0Q7a13q
z9|r>NU=m1hfq)P;P!p#F16)eg>SYZuBkF+KV&g&~M)<%C0@4Bu3=AQz?jcrSz{tz$
eZ)jj>z-7n-0$_^}gt}o|oO+pCj=Fs<$QA&eh9_PC

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_123.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_123.csv
new file mode 100644
index 0000000000000000000000000000000000000000..3eebaa16b6aa47c88857f1169f10054a423d13d9
GIT binary patch
literal 264
zcmZQ%1Oo;zc*DS82?9(kJk{!n>Q?Gr!Tx^gwE~QW)k^jGIq``~Rt7ps$v{>TkX6eS
z?Fj=eKnA)POb(UK%P&z#RLCmM&x_?s&Ig$VG}RDps!=UhHP{A_C?ktpwK`CVx|M=`
za7a+7bBIP+Vva(XW013l<62GiT7_zLpn$qntrC|KlnJ3w?S%5I6e^$!85)2_Rwy88
F0|361M_vE`

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_124.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_124.csv
new file mode 100644
index 0000000000000000000000000000000000000000..955e7021fe1e00e1fab21209acb1864d3fc2f49c
GIT binary patch
literal 415
zcmZQ%1cU!T0AYbihR+b{DO}7_3<@A3mM{s0xX3sta8Hhli;c4Y@nEuWnhOT37#SE?
zm@ojNfx0@&|9U3%M0G0#bx*%~S2s&IfLdV}7v~<w%c~0%cXbU8F}JkT)HbdGGvgV6
zmb!y%j*C+-bIVb;uT|nws#Y&^ZC1Cs4`d?&eP;C(bw+tcX@>hi1&KL83&Ebq#{^*W
zz{WzY4GI<$({c9j^l{Yz1&0|lOvK6<7#IY=o@GD;86(I8adBK+adDLpxA}k#1iH{Q
i7w$r!0~ifO84VSU3=Oyp4GkF>8tQ=*5O8tbjROE@LR3Bg

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_125.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_125.csv
new file mode 100644
index 0000000000000000000000000000000000000000..9c24c33b7bc7f8f769d417ddff855a26ad2b6d99
GIT binary patch
literal 418
zcmZQ%#0D5XgGCvh!X+)mpa3Fb36oHWi;RN;_hb;m2SmVR;WQTvSTQm%urOf&Mgw(q
zmjCrk>WS)B3hJJI^{#G~Z~(Q!E-ubJj+a*#DDLVS9AXZnwT)}Q%=jvxrS2e`<KooI
z+;Y_IYn8Z^>_LVx%D!iGb$1O?*HKSWx2jezOU#LjtLB6Q^|btakew-T5g<do%(YqF
z>ORmAE-p@()AgCvQ`8yd8KoKSSF0-k%>imtU|gnOtzMFuo2qUFrix1va|=qU)OCO+
zrWPj`WfqiV=H~$=%My!{GZKr`Ye9;2kmR_G42%qP4Ge)O#K6D`jC67{Q}Qy?GfEV4
F@&VzKR_y=)

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_126.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_126.csv
new file mode 100644
index 000000000000..b0e23e1f3f15
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_126.csv
@@ -0,0 +1,30 @@
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+1 ""
+10"
+199;1
+;a ""
+10"
+199;1000;"a ""
+;a99;1000;"a ""000;"a ""
+;a ""
+10
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_127.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_127.csv
new file mode 100644
index 000000000000..7c1e2505553f
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_127.csv
@@ -0,0 +1,28 @@
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+1 ""#10"
+199;1
+;a ""
+10"
+199;1000;"a ""
+;a99;1000;"a ""000;"a ""a ""
+;a99;1
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_128.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_128.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ae9fd5dd6f8f0d3ff80db4a464ad756378f2ef53
GIT binary patch
literal 279
zcmXrCw6r!fFfg!IN>or%;<5&lh6YMphJ#ZA<Vd*l@HqFGV6}Q$e!jYuf_h4RX;MzA
WdM!7zx{kUu2Lq#iB8uOz`5pjlV>46$

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_129.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_129.csv
new file mode 100644
index 000000000000..475ffa66fb2c
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_129.csv
@@ -0,0 +1,30 @@
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+1 ""
+10"
+199;1
+;a ""
+10"
+199;1000;"a ""
+;a99;1000;"a ""000;"a ""
+;{{{{{{{{{{{{{a ""
+10"
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_13.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_13.csv
new file mode 100644
index 0000000000000000000000000000000000000000..738932afa8f621cb8c6703b3a12350cb65c74fe5
GIT binary patch
literal 186
zcmXpsGUhS_;}49eK%ZGXMV-M92pAa{locEm(o)NTD*mIYLguO03NRX0D<$XW#3w3Q
z8R&pnNkCRDS9D%}i9(`6R&jn_EEimfAxMdi0$5$L5+lgQI5;SUu)r3?sT&#q0hf-B
OdXjofwR%}%jyeFSPbPf;

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_130.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_130.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d476d82e98e5d29ceaf19bb50b4dfd79419e5127
GIT binary patch
literal 759
zcmcJK*-jKu5Qb&BAS4=Jz@%x^<L;>$U`{chj;P><I3h07h*ixDE~DdaD}#^c)A$g6
za%19!@y1TR?&O^Q>i<hrn-XJe=9cC8WOM&5F`tzGaWV2^Jjzs-Wwl0(a&UB9cSAYo
zOPJ5XMytKF(rSk@*{`EoSo`y+{`)TrdlO?}xbF`0GS7$Q=%JB#WTF$iKk%?wR55};
z4eBrhvoHq@NZ?RH7kWiCjUzaMBRGO1ID#WMf+GqBxPmL&fG@plDxW@j?07hF^3>_G
z=gvo|)n{hs8cFAy3yT*nUTR%lx^nf}^2+raH*eiuy|Z?=eQ*8##)F5C9(OjMY&|tE
zx4XTaXU|`}?7w<Fc=Pt%`wt&KeXhQI-5u_IE2>1fM7cz{M7cz{M7cz{M7fkYRr=1C
za*1+j87P;!MNuh&A}E3)D1x$$vW;4mSYDa0%va|B{dew}uPm?3SC&_N!#7#24Bv<|
z;*2;W&WJPOj5wp8;Tyi;8@}N?zLW3xj_>%6@A!`I_>S*e_V3Su!<CceWI0(*mXlTf
E1b?68vj6}9

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_131.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_131.csv
new file mode 100644
index 0000000000000000000000000000000000000000..115a1d3880e4f67e5d6d2b3e9161d73fd73f58a2
GIT binary patch
literal 199
zcmcE2QBW^SwB+U0RrzmeSyffV$jHP91Pm}>2?7m392Xbo4#aVBK43DAsh)wwu9{0d
zQQb;G-P5n$)!j8nT}M4h-KrXBVh&JLAubZ46iugknOly!eXSCg5=<pB8>Y*WQJ+~o
oMV(QeQJRB^mw^RrlmO6KA+GKrLFx<*Zy60m84V2$3=Oyp0sLDqbpQYW

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_132.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_132.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2bfd1d477eab24c69f8fd82aa60f0548a9100d66
GIT binary patch
literal 291
zcmXrCw6r!fFfg!IN>or%;<5&lh6YMph7?IK>NBgSs58nlN;4}8R;#Dw=c`*OsHfzY
zCgr56C$KZB>!?d}FhBuNmdnsc$CS&^Ku^cOfXh+O2n1Y!n2Cj_T0K$SO5H1%(LkO<
z3#x-f2O{d}7vk#f8l<k{@26gC$Y^*-3BSt?jZAdRebtlHt#aZMm0}Hbl#=sv;**pt
QF@Tj(E!R#iATTrn0Aq+Z@Bjb+

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_14.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_14.csv
new file mode 100644
index 0000000000000000000000000000000000000000..c6b66ba1f760aa8babf47548188f769b52bdef16
GIT binary patch
literal 175
zcmXpsGUhS_;}49C@ySYz`poJnaUj452Mjt4Mhu1wybN3njtp>7C_}wgfYGp8DLFqU
zK2gcaKnKi90<vnkqVw`g6cQD(iu3bgx!_6+K}vKK!0M3g16x`OCLwmmsT&#q0hf-B
OdXjofwR%}%jyeD}?Ip?p

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_15.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_15.csv
new file mode 100644
index 000000000000..90901977f6ed
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_15.csv
@@ -0,0 +1,20 @@
+123
+123
+12�
+�
+
+
+
+
+
+
+
+
+
+
+
+
+'}1{"col_a":A,"col_b":0}
+[not a json]
+{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_16.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_16.csv
new file mode 100644
index 0000000000000000000000000000000000000000..9917f8eaff5d7d02d11fbd8564443d3acf5f729e
GIT binary patch
literal 177
zcmXpsGUhS_;}49)0`*!BMg|53Ngy_~HsUh17GN~2R!Yv#iBD9rGSC6Dl7OsQuIRk{
z5`{#Ctm6E<ST48{Ly!_31+cngB}RQ_^^`a`D21@VZi!PjGynoF9Ub)~^_Xh)vcw#9
E07SwfvH$=8

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_17.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_17.csv
new file mode 100644
index 000000000000..42e8f6fffa56
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_17.csv
@@ -0,0 +1,6 @@
+123
+123
+12�^^^^^]
+{"col_a":1,"'}1{"col_a":0,"col_b":/'d^^^^^^^^^^]
+{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_18.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_18.csv
new file mode 100644
index 000000000000..cf125c47fbd3
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_18.csv
@@ -0,0 +1,8 @@
+123
+123
+12�'}1{"col_a":0,"col_b":0}
+[not a json]
+{"col_a":1,"c�, 01010
+10, 0
+"col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_19.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_19.csv
new file mode 100644
index 000000000000..e199e3537aa3
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_19.csv
@@ -0,0 +1,5 @@
+123
+123
+12�'}1{"col_a":0,"col_b":0}
+[n�C.M�ot a json]
+{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^===================,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_2.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_2.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ae1aecca8fe173b3315997f3f518b850794a0441
GIT binary patch
literal 171
zcmXpsGG;V1GUfu|{Jc~k&B)KJo}$hu&nV5o#K6eH#Av``SIwoKsBWd8?&(+W>h2n(
zuA`o$ZdI*bmY4$+6^M(Bg93Lj?E~Sdm$~Jr+t(^_DZx}CvlX!G;xaT)GBhv%QU(GH
M28=+;KnH3#03I_chX4Qo

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_20.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_20.csv
new file mode 100644
index 000000000000..ac53beaa9544
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_20.csv
@@ -0,0 +1,6 @@
+123
+123
+12�'}1{"col_a":0,"col_b":0}
+[h`r', 'yarchar', 'varchanot a�json]
+{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_21.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_21.csv
new file mode 100644
index 000000000000..7a4be8dbbd6f
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_21.csv
@@ -0,0 +1,6 @@
+123
+129*99999999999999-nda999-93
+12�'}1{"col_a":0,"col_b":0}
+[not a json]
+{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_22.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_22.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2ddf7b2c3297a4eb76023ad4664a9f57e0ac9df7
GIT binary patch
literal 176
zcmXryP%kY>(={|xm+$7_xL{<=WeCO}7>NbywE~QW)k?|vIq``~Rt7p?RuYg^%N3oM
zU!stxkX4+Y7s~}#VhB>AqX1Tyti-6#tez4F2c-}e*z!1aLjxe-($P^*Vu-0$FH6i(
F2LJ)5B@_Sv

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_23.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_23.csv
new file mode 100644
index 000000000000..9e40b5220838
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_23.csv
@@ -0,0 +1,6 @@
+123
+123
+12�'}1{"col_a":0,"col_b":0}
+[not a json]
+{"col_a":1,co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_24.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_24.csv
new file mode 100644
index 000000000000..5a0c0c194278
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_24.csv
@@ -0,0 +1,7 @@
+123
+123
+12�'}1{"cool���2}
+;STRUCTl_a":0,"col_b":0}
+[not a json]
+{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_25.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_25.csv
new file mode 100644
index 000000000000..99e0a7375746
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_25.csv
@@ -0,0 +1,7 @@
+123
+123
+12�n]
+{"col_a":1,"co, "co'}1{"col_a":0,"col_b":0}
+[not a json]
+{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_26.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_26.csv
new file mode 100644
index 000000000000..48473ccb3069
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_26.csv
@@ -0,0 +1,7 @@
+123
+123
+12�'}1{"col_a":0,"col_b":0}
+[not a json]
+{"col_a":1,"co, "col_son]
+{"col_a":1,"co, "col_cc"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_27.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_27.csv
new file mode 100644
index 000000000000..fde479ec47d4
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_27.csv
@@ -0,0 +1,8 @@
+123
+123
+12�'}1{"col_a":0,"col_b":0}
+[not a json]
+{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'1~000
+,,'b'\{':0,"col_b":0}
+[not a json]
+{"cval'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_28.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_28.csv
new file mode 100644
index 0000000000000000000000000000000000000000..99c23520e19a116ab8352b598cb944bd7e684589
GIT binary patch
literal 175
zcmXpsGUhS_;}49)0`*z}M#E~Q<oulYL?tT&9WW~i$g1Uv&dV=RNL0uw&d-bGf-5lu
uDbY~?t4lWELIO&R`poJnad1!yVS!x|r*3Ef1Y9~g>PhM`)eH;_0_p(0zaYi{

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_29.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_29.csv
new file mode 100644
index 000000000000..a4787dbb0d68
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_29.csv
@@ -0,0 +1,6 @@
+123
+0123
+12�'}1{"col_a":0,"col_b":0}
+[not a json]
+{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_3.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_3.csv
new file mode 100644
index 0000000000000000000000000000000000000000..f7dbe5492a56cc7bb3e271f4c85514b3a005f2f8
GIT binary patch
literal 160
zcmXpsGUhS_VtpXa&r1ce8TpyjQ`8yd8LPR}6V<I0)II&`UELWCrZXCV6oN2NR3I)g
z4hr1Cv=4-*UgnmgZeOd!rBtn6mY4%niOg2Ou8YgiK*`X+07x0IFc>fbDFYp-;Q;i#
BC}RKs

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_30.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_30.csv
new file mode 100644
index 000000000000..733a0aa58558
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_30.csv
@@ -0,0 +1,6 @@
+123
+123
+12�'}1{"col_a":0,"col_b":0}
+[not a json]
+{"0,"cocol_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_31.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_31.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ac1dfd8e50a13afb2c9069b16da0c24e46325fce
GIT binary patch
literal 189
zcmZQ#G?3@uW`F<|fok<cbt?t+%)FA+bVDO!E(rd>NGwpV6<{>1R!Yt{00J%@LjwaH
z!}=s8D}!3D=)C+Ag+zs{;{3c=E|8L(_(UZuLmePXM*+-DR$|m=R!@n8gHi|!Y<ZkI
T&@hmhI_gR4G1cm2i8<;3-ajOB

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_32.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_32.csv
new file mode 100644
index 000000000000..bfe0bb105008
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_32.csv
@@ -0,0 +1,6 @@
+123
+123333333333333333
+12�'}1{"col_a":0,"col_b":0}
+[not a json]
+{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_33.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_33.csv
new file mode 100644
index 0000000000000000000000000000000000000000..66cf0afacdfa84366fb445a528f84b2093be05b7
GIT binary patch
literal 186
zcmXpsGUhS_;}48zfZ-*Y0F0|%E5K-2t(2Uf6Q8JLWuOCQB>`ErT+w;?C0sg&1_nBY
z^#%r9hCtc?1{4w%vWoNbV!7a24MAFU6u|nEl^FGz)l=f&pcKLaTN<YhHik<_M?FbB
Lrdqu$F-IK$%snRB

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_34.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_34.csv
new file mode 100644
index 000000000000..7b2c6ff352b2
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_34.csv
@@ -0,0 +1,6 @@
+123
+123
+12�'}1{"col_a":0,"col_b":0}
+[not a json]
+{"col_a":1,Cco, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_35.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_35.csv
new file mode 100644
index 000000000000..022d39346d72
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_35.csv
@@ -0,0 +1,27 @@
+123
+123
+12�'}1{"col_a":0,"col_b":0}
+o, "c
+
+
+
+on]
+{"col_a":1,"c[not a js
+
+
+
+
+
+
+
+^^^
+
+
+
+
+"
+"{'va
+
+
+ol_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_36.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_36.csv
new file mode 100644
index 000000000000..460021112b51
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_36.csv
@@ -0,0 +1,6 @@
+123
+123
+12�'}1{"col_a":0,"col_b":0}
+[not a json]
+{"col_a":1,"co,  col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_37.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_37.csv
new file mode 100644
index 000000000000..513230f07d50
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_37.csv
@@ -0,0 +1,10 @@
+123
+123
+12�'}1{"col_a":^^^
+�
+
+00S^^^^^
+�0,"col_b":0}
+[not a json]
+{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_38.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_38.csv
new file mode 100644
index 000000000000..cf0cf366b91a
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_38.csv
@@ -0,0 +1,6 @@
+123
+123
+12�'}1{"col_a":0,"col_b":0}
+[not a json]
+{"col_a":1�����,"co, "col_c"/'d^^^^6666666es45^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_39.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_39.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b65f88e75d62a9116372c58a0e31360548dffd5c
GIT binary patch
literal 181
zcmXpsGUhUb;{3c+AUl}LkWrsmJw=^So>7{EiGhKIiP3<?u9{0dQQb;G-P5n$&D}Lf
zT}M4h-Ktu>EHMWtst^|$2L<k6+6Tf@XJE=vx35*=Qi7>OW@FP;1=M&Cq*;K0fg!}z
ZJtRn-q2Vo~p(vxFp@E?Rm!Sbr4gl0pE!6-3

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_4.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_4.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ccc7d2986d19d813a245c83b24f4e6518e7d6fdd
GIT binary patch
literal 171
zcmXpsGUhUb;{3c+Ae)h&Sv^IaQJztogNcEGg^AIC#jcu5JyG3CL0!wQ-qqbTNL@!g
zN!_Yiy(}>YC@K&a83zUKVA==5Q!jJNQMa#E;!=XBR8&B)trQfn>*6vrP%<<y08$15
N3<iuq%0LHdH~<$cDrW!y

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_40.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_40.csv
new file mode 100644
index 000000000000..5a67eaebdf7a
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_40.csv
@@ -0,0 +1,4 @@
+^'10000
+,,'bA\{'l'?}"'}1{"col_a":0+�col_b":0}
+[not a json]
+{"col_a":1, co, "col_c"/'d^^^^^^^^^^u^============^^^^z!{"col_a":1,"co, "col_c"���val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_41.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_41.csv
new file mode 100644
index 000000000000..bd200078e616
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_41.csv
@@ -0,0 +1,3 @@
+'}1{"ccccccccc�col_b":0}
+[not a json]
+{"col_a":�"cp, "col_c"/'d^^^^^^^^{'foo': 'double'}!^^u^^^^^^^'1000�+,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_42.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_42.csv
new file mode 100644
index 000000000000..adb31c62bee4
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_42.csv
@@ -0,0 +1,4 @@
+'}1{"col_a":0+�col_b":0}
+[not a json]
+{"col_a"al'"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_43.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_43.csv
new file mode 100644
index 000000000000..1bb6ef988691
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_43.csv
@@ -0,0 +1,4 @@
+1{"col_a":0+�col_b":0}
+[not a json]
+{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_44.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_44.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e429951c3dc3c0f9f8adadf686b430ca3504413f
GIT binary patch
literal 209
zcmZQ%1OhFs=)C+Ag+whaErVJvAc+A$s^CDqR)Ep4`hR_XPJE)0m4S8x0|P_D|Ns9X
zx`;6WZWNHAqm-PlqW~ln;**s)^qJLD;y@q)M3jPvxHz!#ICVn<0|P@Y9Ub)~^_XfT
NBVZ<|mnG(?0|2=0CdB{%

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_45.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_45.csv
new file mode 100644
index 000000000000..569bd64c4a4c
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_45.csv
@@ -0,0 +1,4 @@
+,,'��99999�991{"col_a":0+�col_b":0}
+[nou a json]
+{"cof_a":1,"co, "col_c"/'d^^Y^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_46.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_46.csv
new file mode 100644
index 000000000000..58ff0d50238d
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_46.csv
@@ -0,0 +1,5 @@
+'}1{"co�_a":0+�col_b":0}
+[not a json]�z"col_a""
+"{'val':  ^Y^^^^^^^^:1,"co, "col_cT
+T"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_47.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_47.csv
new file mode 100644
index 0000000000000000000000000000000000000000..5ff73650c0578ad57c3420ea082143343526bb7b
GIT binary patch
literal 204
zcmZQ%Bnj|fRUH=>r(Wik!)srw#HCbS3giO88kkZB0R{#TTa1x0k%5JQK`A*uCq7BZ
z%BYE}niIr{OH{Hl)B&<|6u@%HN{sr<>M1}&b<~s8t#Tmd#l=AlhM5befd&JO&QS*d
DACep}

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_48.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_48.csv
new file mode 100644
index 0000000000000000000000000000000000000000..947747bb1c464de190e13f0f291157e8c0099300
GIT binary patch
literal 241
zcmZQ%L<TGZ)#{1rRtoBwc_pdosYU9wd~oSsNB|@lr*3Ef1Y9~g>L6|k1Bhf~lwnk_
z6<{>1R!Yv#iBD9rGSL1HVkH4twOrA8`6czP?ygDdI_gR4R@Lfdi8<<=K)}GrPy?b7
z04&15ARuX_prDwjkX4+Y7s~}V)ldg$zK#Oel4K=DeP;FAxVY>%2q=Y6VCTSyJOv&e
HeUNPcA@evD

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_49.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_49.csv
new file mode 100644
index 000000000000..199641f8f80d
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_49.csv
@@ -0,0 +1,3 @@
+999+9899999999not a json]
+{"col����������������������01_a":1,"co+ "col_c"/'d^^^^^^^^99@9999999^^u^^^^0苹�0
+,,'b'\{00^^^'1'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_5.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_5.csv
new file mode 100644
index 0000000000000000000000000000000000000000..c754534b4502b986fb0e49b43982f99450eb5dd0
GIT binary patch
literal 171
zcmXpozyj1$)EVU&r8$@w7+9DX4Or}|xzrQYtrXNf{pwxaU4zth)RWY$s@2O9bAX}(
zaglLQ;LacbWcfgN>Sb;@>h`rtTuNY-|Ev_?Y@jX$BwcwR4G>*ih6YN81_nUNK!Cx3
M5l9*6K#Yq608SGt7ytkO

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_50.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_50.csv
new file mode 100644
index 0000000000000000000000000000000000000000..0cb4a24515a27da97c355caf071ae0780e6f5dd7
GIT binary patch
literal 157
zcmZQ%!~_1lL6>A;VKl5(O3u%TPgJrp(Ebl%B>`ErT+w;?B?^fOS;hHzv1_q_WneKb
iB~CEl0+Xe2U;wrxPTkS~2)J}~)RWX>s@2O9bJPLp{wLo6

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_51.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_51.csv
new file mode 100644
index 0000000000000000000000000000000000000000..144ac1887d73bb137a8569634ff53718ceaba3ef
GIT binary patch
literal 201
zcmZQ%WMp{3NDQ#FL{_B==DubCi>cQNFd9}XCFkeFe+Gg?B`ZUnIm!7tV5Wige~?6y
zl9fR%S9D%}yc+~4Br0ST=jX+8K{SFj0gcd60GpGn#Hi1#o)QNKr4SawPoRx)>V^hD
Sz@?+3o}?aAtzMRxqYeNM0WIPH

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_52.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_52.csv
new file mode 100644
index 000000000000..d702dcc8f36e
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_52.csv
@@ -0,0 +1,5 @@
+a json]
+{"col_a":1,"c'}1{"col_a":0+�col_b":0}
+[not a json]
+{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
+,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_53.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_53.csv
new file mode 100644
index 0000000000000000000000000000000000000000..869b03087ab2396a74eeba7aba188141e5a73493
GIT binary patch
literal 88
zcmY#Za8yW3Emz>>vgB1@U|_K3<+9=BvgPHn<K>d!<#OQVa#Yt*FH_f1P%leNEK1Hu
qEGkN7$Vo}m2`)@5D$t44G0`zJ(lN6%GSvwV4f6DJ^mPRqqYeP{7ZTL~

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_54.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_54.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b57920716e6cb5c013518bbe15cf08a20b51a6ba
GIT binary patch
literal 234
zcmZQ%1OtYr5E@8YLVzKV;^o!l%}Ytt3(thhLKzI`0*val0*r>$O3C>-@rg=S2HO9V
zfUG1StClM|FTX?~Q6Z~1KQERGs6>bHKckN-r!KcHqdv2GiaMiwzchobE+-hgV202j
zvVj54RxeA;Q4dm3h>MJit5xDs%8vs9cQEM#CM_-9-3{GqGF6RKT}mu1<J8OCa@6fX
IYOBGT0dI0G@Bjb+

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_55.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_55.csv
new file mode 100644
index 0000000000000000000000000000000000000000..50e5025ffb6c6c414887987fe006e7a6a250b7c0
GIT binary patch
literal 232
zcmZQ%1OtYr5E@8YLVzKV(&E+O%}W7+Oo%E51_rn~Ms+I%^~^jVzgoSFp#jKKh>K*g
ztF}Vra;YbR6ngsAySlpusq3gGsqX@+NX&s~h>OdQ0|ED`Akrr;2t=uux#g(a*D7%-
z0adyJRo;iVNe4tRI6ycrLGI(_wX_5xowy)%hK9F{hN6syhDL@4T!zSiiz_Y;0Qhz@
A9RL6T

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_56.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_56.csv
new file mode 100644
index 0000000000000000000000000000000000000000..fd74276fc29f8a24dbe37c29362abbbfad62c68b
GIT binary patch
literal 288
zcmZQ%1OtYr5E@8Yf&c>p^9#5LV~RSXJfk!NBLf2q1EYbuI*VO3mwKX_m4dpbU%jil
zYmmB*dbzrJwR%}%PF!4^dIJLk1CWk$2N7{TaX}zTz055~-M&_d3kZQKU31i}?gM#N
zKsG~zx|M=L9J8WewR&29K9HlHl3$vXld4|J%?vbKn(;rQk1D4ww=SbTvpU!{KOfub
zav}qTxX8G;2_TCYB{_gDWdKpi3|qHug*pT14lqEt1L&lDun^3h2>HYub!!EXTLHIn
BLOuWh

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_57.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_57.csv
new file mode 100644
index 0000000000000000000000000000000000000000..22a39d8efc129a9083c90c825b443414e65c3e7c
GIT binary patch
literal 288
zcmZQ%1OtYr5E@8Yf&c>p^9#5LV~RSXJfk!NBLf2q1EYbuI*VO3mwKX_m4dpbU%jil
zYmmB*dXl<%wR%}%j=GhCLR@4V7`TH;pSU0ptzPDqqi$cT#07*vm99XQ_klbsAe*5<
z-6}yLj#*K#T0Jd4A83SnN`7flPO5q>H#5*|X~zGIKB}C$+`5eV%<5p*{CsSy%ZUuY
wmga*jVwB|g$H2e<q!^VMwr<@T2XRK64@eT(9Y806H9!?0%LDzPtN?N=07rF0fB*mh

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_58.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_58.csv
new file mode 100644
index 0000000000000000000000000000000000000000..084ec3add08f47bd422571e529d81837dfbf284d
GIT binary patch
literal 319
zcmZQ%1OtYr5E@8Yf&c>p^9#5LV~RSXJfk!NBLf2q1EYbuI*VO3mwKX_m4dpbU%jil
zYmmB*dXl<%wR%}%j=GhCLR@4V7`TH;pSU0p4U@<R3zoS8Ro<^v;<5tjVrT$MGAjyJ
ztEc7X13Bs``K3uYsp_@d%s{iH8UHi-sB-FZ>oV#ytAkzh^RcZiCo%wA3O1XNA0#<|
z9${bvQpyZlw{DGtxH8TM?5;TVGPfLcd!WOVfJ}&6Vcv*40`dk(9_USL1&|s5DBnh?

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_59.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_59.csv
new file mode 100644
index 0000000000000000000000000000000000000000..c04e3fedf310e26e11fc8f149a2ff7e12a69e416
GIT binary patch
literal 239
zcmZQ%WMpIjqNhL_0xUs*fs6SCOn{LwMV(QeQJR5~fq{jA(Li0D#jcu5JyFd{LEY1@
z-qqbTNL@$0MBThvy(}?D-AX|rj`2UEFDs)yvpSMaCPo9GKA;&4>WM%_NLrKBt*UiF
zT77|9Bje)o<3PY2O!|PyIQ25O9CiCzB`zhHB@i}Hr#crGCl?U#fK_Hdb=8CDN)RCc
Pv@yihJtRn-q2Vn6i}W)#

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_6.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_6.csv
new file mode 100644
index 0000000000000000000000000000000000000000..bcb439094c5584d2b9141561c8c54a8ad18e8ae3
GIT binary patch
literal 171
zcmXpsGUhUb;{3c+Ae)h&Sv^IaQJztngNcEGg^AIC#jcu5JyG3CLEY1@-qqbTNL@!g
zN!_Yiy(}>YC@K&a83zUKVA==5Q!jJNQMa#E;!=XCgs^MCx)iYM;xaT)GBhv%QU(GH
M28=+;KnH3#03_ckl>h($

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_60.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_60.csv
new file mode 100644
index 0000000000000000000000000000000000000000..3d726a869239ab47a0376d4c77df4eff5ab3b781
GIT binary patch
literal 236
zcmZQ%1OtYr5E@8Yf&c>p^9#5LV~RSXJfk!NBLf2q1EYbuI*VO3mwKX_m4dpbU%jil
zYmmB*dXl<%wR%}%j=GhCLLB3NMqgG&eP(qeolJ}dKz)qri9i)7`mCyTLHc}w`Xb}v
z^5a0j9ZdRw$vE{gw;Xl*S|u(em<<rNx>YqJmtr)~)}rK$#3FSa1q`MD0|P^dt9wY0
IIzz)-0CDLv!2kdN

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_61.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_61.csv
new file mode 100644
index 0000000000000000000000000000000000000000..05ba66ba7e8034bb3e169315903736ccd32a3013
GIT binary patch
literal 236
zcmZQ%1OtYr5E@8Yf&c>p^9#5LV~RSXJfk!NBLf2q1EYbuI*VO3mwKX_m4dpfU%jil
zYmmB*dXl<%wR%}%j=GhCLLB3NMqgG&eP(qeolJ}dEOuFp>WM%V>Ygxt_5anas&zs7
ze1ZBR<Kpt;K)@YL`hdwe^)j~{b^BT+E+v=^5H?V!Iu{ow7ZC7(Rb~K5<2awVdJwM?
TL<lf2Fod|ehXkoJG`s}>pX4*p

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_62.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_62.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b08e116c19e39d67f543610e3390bf8063c85a78
GIT binary patch
literal 2371
zcmd5--EZ4A5SJ1Uh>Qhj-v;bqtR6-Tusu??V_U#lWSN_GXp%McigXxk(-Li~kVQ|T
z>}5{S{U!VF1`OEi9tU(sO8$sztk_F2!l0!4_}%Z0ceH8hW&EB!VQ;Br82f*$B&Lte
z&&*E^(=ZB#NiZ%vTof_;+(r25`J47>`x2M%05=wR#zO2O)HUClXCIkg?&A&5J(GZ+
zi5Xbj@pBht<#z^tuex2)RqIsm&-C8i46-)Hv&SKRbZZy)3Lh^ND*pDx#W}vcXVQi3
ziLl3Pmz4C|z<#T<YJE}4o~!*Zl1cHw@>6S#d+M{WU-)}bL<q~6MG5m{AfjI2EAB6k
zFHfEvL(W>w*(8@gJ?~^PKZrRvOrtE*j<KgK*+is~1e)JmeE;jSc4o>vPf0Zb6eS1P
zjOjDUmD6!2cL8h>``qunqmgWsR5pDT-z~b@WXX9WBFMpZ#$s>GVqCmEDBvF@{$wBb
zw<=)C%-hPd6p@u>X03CvHc2R&)*aH;4x+g%$5sYKLQk*0ZnsX(Ue+(k^IE5U^+WCI
z@zr<FFRgmhqE@xif~{pa#Bs`$mK_HX>JkD!C99T6txS#(J#tCIb&mQM{k~K3`>>qS
zyLbz`_gbWJ#KPbg-iy<a+b*()AVLWgoFf+bDB&|6p=-|K1pPJ=NNI;mjyZe}Vv&x<
z5)DNx$0(hO2+47fAWuXxh<GFsLlK{&h^byuVvh$i?xP?=VKCsayw)+`Xaf&w<|8Qp
zfum_GrXmS+yDeJ_O@IdjR$k1tE&nNHVFg8Ej^@A>G&~xyQ3lV@NSProm<1u4v)C>n
zd-IGYOnPHzLX|Rgs<rx2W6K1sm7rD1*J6+h@0dn6)dsz>r_?^A<i?@3o0<-nCOlq&
zG@6ECAOJ}uuvbyfUnvwQukce(LXjp~%shBz3Min@6E6;?s(#8xDPkUGwOWtqGKxy1
zNvn6*slw+KV^U!(n}^a4G!J#$Dpd|kqb$&$g}?UhALwh&H;jWPdBbh_ge;M0BK%+&
zK-n|T6@0BjS}v)&bzRs2OW=_7g`i0MthQQ(Is}#qpGZg(PM%%EIm<jDIHCM(n$6Y@
zt2JI7u;@-CHH>%CUh%3c#AU!CgFac>D#bhwBrl8MaO23=vfY+zJ_;fGLVl{_lZsCD
z_B}&h&ZoRa|82_cA|)p!`i2mADXCk-fd8tUx5j^-a`u6z9D3V?sfS@@Vfa6u@)0yA
z+*qoYyfl$w!go!1!^s<Ud&;4y<@Hpz!V3mht?Iq_eWOnwe1(wHAcP(&hYvFPs{vl#
M5BK%`usQ4h09H$|)&Kwi

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_63.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_63.csv
new file mode 100644
index 0000000000000000000000000000000000000000..41a9e37c6fe10a64abbfd85f60d9992d52a13481
GIT binary patch
literal 2399
zcmd5--HzKt6t=smgd<ieHwcM~PA$O*DM@D>XOmbeXj#&wsJo@Rq1qypFtI0zE5{S;
zNy3t>+DG7;OC)Z16nG9KcmyP*a%SvoehBoETJhLE=R5yDF-?6LzhzH2o9Z%*z2C1X
zW`NDx=DUVz7zM*57#BXA7cqO^L-@hz^VV_e43}^Z*XMZ3BJ3g5F<+V|H_f|ycun$#
zNkGoT3=kV~-n^{jPC)Kyrz1M*I@aeyeRkH3tiX7BKf)hf*~J@$H|I(#{_OnGDZaX8
z(z)%6sLO1Rl=NHAeyO8seo)Gu*XMpL)8d`wr{)^B)MsI@@YlSE5S9sxQ|8N1#N9AZ
zzCSrUJNonxV%BWVMmhb#X*;6@VZuRR9A}Z{R5lWmSc03MJo@6r!&U}mzOSfS10*Gf
zuG<Wl<VxtUor{2Okpw*G{(~dgAg!#+Du1`!)sUsnYZieH*eOf=Axm)a>Wu>aO5%6+
z@Yh=zuw*7}<xz^*$|AE?u~_R)C^xMQr>z~1=CoY50u%{7zWBJ+JUV$&J0p+XcI)Cx
z_u~G==a0{<TEn7NwbF#6X*tAk%9NHJ2NCKK0zXBomWf*?2Z$bcr0zKf)((B<{>>jc
zKmGCN-Cq~K{CNKJq4B~{H=)jZaIaaL+udhklEf?uzvJCx5^>u@b{|M6g>rDj;sB+5
z%42lNS&||)5J++ROb$7G_YyG~3?=G|L=MqpEMg>wVTyba%P{7#L=44zhGM2VO_6;b
zPI-XB7)4=^C-PF;fWn79WHdlhfCi4ni5QDC)XlhTJTwA77<lC;Tx0n+6BboaEM{m1
zz5<8G12)LiGc-^z<cHHRLNk`wC1kHxaKxlPgcemHQ>W_I4(eMFv|<8RF`tQEPTYXT
zi)x)N>@l_XDOotwUAWrZB<0Dql6u213<OG23EnHKr%#m@NUsPczJw%=G@1GE%ovcs
zfTw;Ej#d7YjAA4_%5t?D;-wW8NP|{4yi<wKGsdLSv1~0$+t6Cn@v2k_EJ2x}e*r}-
zm1rb_updI6x1esDG(A%FYC4mMrBD^UACy(MQF)sXqFrO(YD$GVK+&XBLXc2*b_rFM
zc{os?yfTeOQweUNR?2H`h0>jx)SzwzeeGmTX<QoYGw6~fRxxHtD0x}*`)dVX(E|3p
z*1CyY`J?FB7YZgCNh&(jt5+<5a=rmv`d=HsE>dz-qR$C|my+5j2K<lh+$gX8j6-Le
zF!L~~EQ(&^86QA%!oE_y<WEv5Mts+d*PXmkw`UxhT3(KIE1WY}w5s=#R}DVBa}Ob>
TP6*vs0{_q8zooas`r!W!s64qn

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_64.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_64.csv
new file mode 100644
index 0000000000000000000000000000000000000000..9050f01683ebf4f2e70e34fa4c5eeed7760abcc3
GIT binary patch
literal 261
zcmZQ%1OtYrK<YaKh;0c13=GUKz!G30MV(QeQJR5~fq{jA(Li0D#jcu5JyFd{LEY1@
z-qqbTNL@!gN!`3!y(}?D-AX|rj`2UEFDs)yvpSMaCPsw7XP{9)P`?ihfLc%tvVt2_
z6d4zn9|r>NVA2On#;KRN<*3`&Dsd@QGa5Q76h-PdaT)3u85o)98W`)UTW}e{IR=*c
T5J!Pk!yO1T7XpChI>rG2cUC$Z

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_65.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_65.csv
new file mode 100644
index 000000000000..a98344d21bb3
--- /dev/null
+++ b/data/csv/afl/20250211_csv_fuzz_crash/case_65.csv
@@ -0,0 +1,27 @@
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+11"
+199;1000;"a ""
+;a ""y^^z0120^^^^^'0000
+,10C,1
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
+199;1000;"a ""
+;a ""
+10"
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_66.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_66.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b7408c92520aa15dce7c3190e16ca05b96e749d2
GIT binary patch
literal 268
zcmXrCw6r!fFfg!IN>or%;<5&lh6YMphP07j)Mr*tQD>BAk!E0IU=U+sG+?na;4(DO
zF*I-w2~uZhc*|%g%4le4U<eV70|Ivt5$6-f0HzHL;v?hY^1*DF449){=9Z&wU#rBW
mRIOf?n4@l`0An-qvKkl~0d;|ZLL8I<H2A9l$QF<&$Rq%$lRYs2

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_67.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_67.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7dfe4884b54067156a0b0a2f029d724088de24d1
GIT binary patch
literal 291
zcmb_Vy9xp^5X_nrBw{Oqg<+d*1b6N30*VM~VP$8ta$tF2ca7X<+ndA(*gM5AJ2UJ^
zSq5SD0c(%wLS|uf@(TFR0W2(C31-FAfHC79aIs=1)ek^~CAvHpjo2sBV`f$6%USi7
zuWzzFY);e-ECm``LYhQM4Qh7{tvhshr?zgW@*fSOKF5MA<Ff>>ZxQz%oW_%z2A+&K
BL3jWF

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_68.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_68.csv
new file mode 100644
index 0000000000000000000000000000000000000000..37a9ce79fbb141f91d187765647d099bb249da84
GIT binary patch
literal 252
zcmXrCw6r!fFfg!IN>or%;<5&lh6YMphQvxR{%7=MWz=U@Pf=%-XOw1OWME)X0D}Ko
zmb{iVuCBo$=1hzREOyn5>WS(p4C<bK^{(!&LFzi{N$OVB>Sc*J>b?pJaglLx`EelN
z4kmrTWSn}LTaLPYtrC|KM5UEN?RyYg-Rgfm7Z)cN5b%IiW<Yh-1L^+^3_2VP3=FP1
bIyxM#>M0;T#5e(<Xr!xqNRT>1!&^oG%S$`y

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_69.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_69.csv
new file mode 100644
index 0000000000000000000000000000000000000000..bee400e35ba1bc4ed9609a9ecf2ac4c450e19af0
GIT binary patch
literal 233
zcmXpsGUhS_;}48jfT5uQx~M+0dWt%uJfrky#(IVZ28Q~5V4!ZLpzi5c?`l=8UY3{x
z<S7(I#>GvD0|9q1=>sO?)XUs*)a`4PxRk0H4ZU=nxD0iS42(>44UBaSEVzt-oM51>
zh6YBeT!scZhGqs_FaR<%4rnL@01b7Fi}PWH@qx4e0|P^dt9!^@b%utwjE15hr^5^a
E07D%zw*UYD

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_7.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_7.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e6b78bdd5fe06abf9b9d2552b2037835db408356
GIT binary patch
literal 171
zcmXpsGUhUb;{3c+Ae)h&Sv^IaQJztngNcEGg^AIC#jcu5JyG3CLEY1@-qqbTNL@!g
zN!_Yiy(}>YC@K&a83zUKVA==5Q!jJNQMa#E;!=XCoX%(fvkOF{*$39eWoV#eXkY-O
P3<MYq7=e_54%Bb}aeO9g

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_70.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_70.csv
new file mode 100644
index 0000000000000000000000000000000000000000..89cabc23096bc8644032e52f390ef18c5445a009
GIT binary patch
literal 272
zcmXpsGUhS_;}48jKm!8<16bJ7QqR;94s^A3IpG`-!%9ItI3y_4IYgr@F-IZHG054&
zF-TLrR;gMYD4=dtQLDrSV>2*7ISfDv1%<TKa)spl+}zZ>l434p1y6N#F?Fc&T98)W
y3I!z1>Xn9t2&2?1)e(|8Ame}{VATxuAiW?iLV>!KIz$rUCUvM357>nY5IF#8Wj`1I

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_71.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_71.csv
new file mode 100644
index 0000000000000000000000000000000000000000..1f44de30dc3ca3c073e951ef0787b4d958261889
GIT binary patch
literal 251
zcmXpsGUhTgGOktPQmR%jOUzLZQbz-f7(l&NfYGp8DLFrfkwvar9Z0KNDX0gB1cf?>
zXaLPn2y+Z__HYc+RIh~^YE^--z>0wZ%3%QMgUZwbt+wK=P(V_rUTKKJQ%6YVfNiV<
jnh$jLk5Z6c1@%fK1?pDn5J|9~fE<Wk9*_@#O2KjfW&%7X

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_72.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_72.csv
new file mode 100644
index 0000000000000000000000000000000000000000..f10df119bfe3182e769d84aaf668695bf408132a
GIT binary patch
literal 251
zcmXpsGUhS_;}49)0`*z}M#E~Q<op~)7P)G5AgyktpdK6&6zUwJQI?pa5at-<?BN)s
zsa~s8tqv4Wx2mXB;)1am7@!;mkUpqPEl?F)g?XhRSjj=4UYIWRN_7ReAvs_hD}m+%
c^>Bf0P(W7Tt!||bk%U;U4%N#8RSK2^0OW}{CjbBd

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_73.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_73.csv
new file mode 100644
index 0000000000000000000000000000000000000000..6e8fbb5507c2d718375126f1a053c1692f402353
GIT binary patch
literal 810
zcmb_aO-tNh5PpIeeMRUW5JFILFrk}Wzi%$0Qg=OcPgPX1Uw*|z_QQj+^mp~xqko|H
z9;B6}P$-ce#Q4m_^{NL4-ppj?d1s!Pd4pof1bY5jZxwg&!3wUOMiL!{&T7F^t4?cR
z_Tr>xhxTbViTBM5BoK&aE3V;KAIIG0!He{Bi^bi`FmhMzf#V0Rm06Y934hYJteJ7Q
z-)sB9qMJR&2yu=UJnAFrDHwc0f-df0<2xc;GSXq>ikK1#im2j&k%-<7y+tgjcY}W+
zV*KL$6K1fn{#r5L|KX?U1Z9pVP19kTPQNPy^l?ExF?wV=|ID<EQ{0nnOpa^(Hno#3
zISbMwD(|y~|Ju9Yw%JqbYnEli9<fiEN>g(dwfjx=H1LPoMg{*^cQmzSMciOgrKSrq
z-wXv5;}{dMa#JesZk4u7JS2CSc}Z_?;TC7gPqLpV>pD>#vXLF+HOl<YyB#u<Z1R}M
zQD5^|(p<arOZ+o+U2}~PMFfh-r_#|^P+^ree)A;=D`B*#m<pp(E1v|nO}=${WcR{o
WMN#X#m*9)_lk9Em(3Tf<2JZ<2&z^<=

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_74.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_74.csv
new file mode 100644
index 0000000000000000000000000000000000000000..0bde83dae226bc22b3f75cbaf9310d394a7fc110
GIT binary patch
literal 236
zcmXpsGUhUL)mBmY2x1t0V8jh{82>Z+sB-FZ>oV#ytEZ?l%J)k%*y?g(fCdJbPy<6*
zVvc%{f<jzmTwHz}2)KhuA21oGUgnmgZeOd!rBtmBQfZ|CWjBJ@|1D7f*btDu{|(57
k2rw`(gt)qgSb+hfEUST`fuTH?p@EK}fq{~e5*Hi*0Hc^MYXATM

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_75.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_75.csv
new file mode 100644
index 0000000000000000000000000000000000000000..559adf408fe09c1176d808bc320ded571871275c
GIT binary patch
literal 231
zcmXpsGUhS_;}49ufezz;Mjus9U2a`QeP;C(bw>GqX$D(ePBg&7*}wo7QZGx)Q4dm3
zh>MJi%Z~#AcQEM#Cgar0+;Y_IYn8Z^s=+F)6rk+J9Ca&&|CT5KYzRo-{|0131Q-|?
fLR{TLtiXU#mes(}z)+sc&_Kt~z(7e!i3<(@`O7VJ

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_76.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_76.csv
new file mode 100644
index 0000000000000000000000000000000000000000..db289c458b9c336b9a248463a9c19135c02de26b
GIT binary patch
literal 232
zcmY+5F$;oF6ov0i8@e>KH}r7mEJbdiL$#O$*&r@04~&Kep-qTK*q;qr`VHk1i#o%1
z50~>*+=lL-#(@oy^Z0)F2p;i}n8+pe0e4{p`8ZVA?5Ze^QcQ5b+@Q=0gi2|Canj|P
zgCDcQe(f=GTK#*WnOZNYy}j0FZ4)=#Hq<EB1(sJ&JY`{k&_JXoB7yXq&8gj2_FgOo
MM3hBwmV$22H)VS<Z2$lO

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_77.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_77.csv
new file mode 100644
index 0000000000000000000000000000000000000000..23169a981789d0ca9968dd2401c8758a2d263a7f
GIT binary patch
literal 87
zcmXqKU|?WmU=UPRu;=A+;N@~u*Q+l}EK1HuEK=7|P%ks^OD%Q?w)D==iBD9rGSpE@
a&eu_3P+&k8V-ct}0D{D#G6oq226X^EiWAKM

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_78.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_78.csv
new file mode 100644
index 0000000000000000000000000000000000000000..4c9fd2d24197b4bf68af5e85659d01f80803d2a5
GIT binary patch
literal 92
zcmb1S00Ti~1$$mD2VO37V;yu5pQvPIsH2pqZCvB(8XRG6X=$l#00PPRIbgN*Wy$$E
T3JeMiECSUa@x-DskfG`T)dvxU

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_79.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_79.csv
new file mode 100644
index 0000000000000000000000000000000000000000..f935355e6c39466213e93a84c40e295e118e88d4
GIT binary patch
literal 106
zcmdO500Ti~1$$mD2VO2mb)EXM#G>Sk#3FSa1@$rmxP$>1Bpc+!Cn{MP>L?}W>nJcN
XFt7+z8yGO+0Fw0v`H4kkAS=`Xx~vdA

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_8.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_8.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2ee586557b168beb83a0b72dc5c2dd9524fee86b
GIT binary patch
literal 175
zcmXpsGUhUb;{3c+Ae)h&Sv^IaQJztngNcEGg^AIC#jcu5JyG3CLEY1@-qqbTNL@!g
zN!_Yiy(}>YC@K&a83zUKVA==5Q!jJNQMa#E;!>(s2dRLlhVUV51?)Px3=NbF4Ge&k
PfdGR6BakxCfm#3n>K`lu

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_80.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_80.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ca8c67c89c03d488aab455b4f0c004ac5d89b3ce
GIT binary patch
literal 85
zcmdOrVE_R^Wd(a)E(cyNM|GY0vc#g~jKnH+9R>9=0|-dY&#^=Tx|RwI3Jfd))gal#
MqB5Wf2o{4n0M7Ce0ssI2

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_81.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_81.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7651212a6534a6397efc9b23f35795754ce6a0c2
GIT binary patch
literal 101
zcmdO500Ti~1^c614!m3`KyF!LQF2CNk-Cn8dYJ))GysF-{G9kiB`ZT6rQCcS1qKBM
U7Ig7ssJK8iNG7qU3}m`G02k5}>Hq)$

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_82.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_82.csv
new file mode 100644
index 0000000000000000000000000000000000000000..9b792cfb3b325c840c4ef2387a1242f9fd6acf6f
GIT binary patch
literal 90
zcmdO500Ti~1$$mD2VO2mb)EXMKm<svPtHgzQrA&XXJjw{0sZ9socKf~D@I;c14AQK
a0YhD-<a`|k1_cHdfohO~#G*2gk?H_5v=SNs

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_83.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_83.csv
new file mode 100644
index 0000000000000000000000000000000000000000..5a40ed3334f7a785053e55f08c603e43ded64965
GIT binary patch
literal 101
zcmdO50D=F4$_n<pTn@Zkj_Nw~Wr;<}8Hq*eItuD#1`v>(pA(;`WM!zMl$@`lz@Wgu
nB2aB$U}&JkWoT(>Z3t9kt(2&sq{L+nCczR028l&wAk)<WNcI$a

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_84.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_84.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ae4c3fbf2a16e5dcc4daa1326290e3869eaf1c81
GIT binary patch
literal 455
zcmZQ%!~+<f!X+)m5TJ~qLH#@ufGJXli;RN;_hb;m2SmVR;WQTvSTQm%urPrE;~xkO
zA{h<T)mi@6GpQ%4TPdh}`qjI-S;7Ixiu|}ZySO;_I9^^|xHy=>05lLN85g%54FDB`
zfqI!+j=FuV5|>i7dYNmpy48J%aX?9k3b2dg;t=W-z@7pd3Q+>oY*np}2n;k9C~P1;
zMG8Ddkn7@n@PasSsQlr|06Vk>M8<-drKP3rAwlX44R09@MHvkhjSLOA3=It#7#iw<
L6cBK6#l-;tSX67Y

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_85.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_85.csv
new file mode 100644
index 0000000000000000000000000000000000000000..c6030efb1e9a477b11fbc9f696eead2dcdb95796
GIT binary patch
literal 449
zcmZQ%!~+<f!X+)m5TJ~qK^-Qc5EmH-1@6fph7X8<$--$a7_eewU|?aw0E`Ce>MT|Y
zwMtw{)#_!5IqFsl$R>l#R7%dziBD9rGSo3NG*VSc7RZTDQnLDA&!nCRRITpmSMTa(
z2?ro&<;TU@#l^YD@$%}z#lZ}yB+yWIkjZgz>Sb;@>h@6EU7OXd?gQCizzrt#nblL&
z8RZ$J8SbMv4`>4rfDHj^u&P!^1QLR!24W(@3@K0<L9U6516%QjD+6p;4VVNIrKP3r
jAwlX44R09@MHvkhjSLOA3=It#7#iw<6cBK6#l-;t?|N6+

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_86.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_86.csv
new file mode 100644
index 0000000000000000000000000000000000000000..68aaa5288cd118890ed3a2437c45f9633abe2624
GIT binary patch
literal 405
zcmaKoEf2y#42G|Ox?~az0+^{8VZ|{4iY*uf!C;y!AP^*|%>daFekcmbKOlkb*cS=m
z5lydOZ+ifLQ1>!9=~Toj#Ujz?4d(<~W&mB_fl{r;H(LLSN)5P(tyj{(5WPMwmdV_o
zlw=Y|<|%W)mNhKn#>tbiL-5%HWB-7&i>>Vw#`7zBH}PwXnz%#gL08?RAUuemIYJbQ
zA=2QTusT@0cI{Q&U;7})wqZ6xPrWz8&c+%OMNzO`p-LY>ZJ@!JTunqur3sPkIF9W)
Jt^`PH<~OUsNIU=l

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_87.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_87.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7ab38f0bf7bc3bff1f0711631fcaf5476681efc4
GIT binary patch
literal 397
zcmZvYu?oUK42JKtivdSn9IRb3Nj*0!=%hsu2XS!;6dfD{r*=_k!3XW+DEI(^=3=db
zenS#2cmJQ9)_*x-GPz)opFIGuHwMiwiOTTv%B3|Ggd1DTZTNJ{%qnjJt&~&wLH7`y
zXIjAs0~cqDbTS>3Sc?PEh$y6by<473Q8d{!w9Z7BO!qkE!M(S;x=IGP*JK30id)oe
z-BLMk8ST+`WeIj*MC9xZ|4@FwcmG0L4D2*S&y-CbuoJP9L<w;mhpQ#3?50gan@->K
Nz!;-cmU3bMh+ZVgO`!k)

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_88.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_88.csv
new file mode 100644
index 0000000000000000000000000000000000000000..df6772439a27db84637baff573059fb93c79eb8a
GIT binary patch
literal 412
zcmZQ%!~+<f!X+)m5TJ~qK^-Qc5EmJT9k?fh#C<>nOfj71f&nW=1_l-;48UlhuFmqm
zo=H7X-AX~-)34su%@PhkHs;60*~P`V$MN#&g2k1%l&aOs5_5pckzC{sv|GK*El1tH
z7OWu7wOQTjK9G$B^qJLD)EVU&r5Wy{n2;a05~v$wLL5+oRkb?8|7a{ka3F;jBgk2C
zabS!7aAkmuiK_vVV4}3N)IB6fouT0^qoF9Hp`wwY0hghnAp=80J&*zdF0QyZ0AIXL
AWB>pF

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_89.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_89.csv
new file mode 100644
index 0000000000000000000000000000000000000000..1d9e2996dc5bb6a504e06eb73aefc34b2cd6325a
GIT binary patch
literal 393
zcmZQ%!~+<f!X+)m5TJ~qK^-Qc5EmH-1@6fph7X8<$--$a7_eewU|@j@!!2O=Y{`TH
z7!B0bS^n2EsVAyiDX4q;)w{Y`!U4$M{J1!~xH$JXUS3_eIG6#I1iHi><ea!T^)j~{
zb^BT+E~ON8MtMeQhWpj(Wr;aJbHN?}8UzGjwLmpi)#^~sG6*1i4Phbt4tG1yHy}&n
z;=q3V!<7LxtOiViiPF+i_mCiUhK9F{hN6syibjS8T!w~*3=9qRKne)BxGLfRr|d?S

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_9.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_9.csv
new file mode 100644
index 0000000000000000000000000000000000000000..04b41bee70e26fe2bc67d0bb02aa84d1936e9bf5
GIT binary patch
literal 200
zcmXpsGUhUb;{3c+Ae)h&Sv^IaQJztngNcEG#X!f<0Ep`i42*RR40RI?7=e5Poj4FM
zU}7|2v8(1%PgJ*3Q1|q!cXf9SQrA&WQn#vBFH6h;iVDO<f|UXZcQC^T!c{MG%Tc$l
eRpL^DsYGTgVAsWEXrKhN9Y`4nFd#c5E)D>di7_hx

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_90.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_90.csv
new file mode 100644
index 0000000000000000000000000000000000000000..09ac5761f6569eee79063051e37ebd8982cd64b5
GIT binary patch
literal 421
zcmZQ%!~+<f!X+)m5TJ~KfrSYJFdC>cG^oQgD8xm^L4kWRh~WbwV6t$U3kIwhaT%)4
z^1q%*JyG3CLEY1@-qp<#4nS7q$Hm#j#kt4v^6J9H!3?M*&{B7h&2e$+Wo|j@_O(h}
zO4aIRuFXmREiHNBTC|O8TwQ}h%z*;h#`l2=5_5nSf;|DW0SLf`05w}xt0Mveja3Hs
zKRn<VelddVh>HXJ^AA@B*svNf2_{NQOWi|)fFbvm(NL7pP|?WHfXmR(kb$A09!LQJ
I7gt;y0EBQ(Gynhq

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_91.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_91.csv
new file mode 100644
index 0000000000000000000000000000000000000000..c56c566235daff6282e7edaf8460a7a91e20b024
GIT binary patch
literal 446
zcmZ{gu?oU45I}S46zr&rgWBaLv2klbowNwzATBNkMF)q1Q@g0N;J5i5PLBS8pqJLz
zqWFf~kxMQw2k?(NfVh2%ZnGd3DVjou*7U~TaB{#n2+`|g`+K6fE}FxiC=C}7qE`6<
z9K_l)sbIjZaJEP$Q}>N`KsI?6Hci(ZvID<0BHJg+>Y1Z?)Ph`O{A?wRry-6{wzOGx
zaz^jUd5L}%w+L-$iR<@oOcTD4dz3UvX)&PE1VQT%We+{%E^|5Zsx9=$5ObwmlA<W`
ZS4$M>9aIBUcc2`ploBFMI51KYy#UdDRX6|u

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_92.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_92.csv
new file mode 100644
index 0000000000000000000000000000000000000000..5627769cf6a13456eeb7299ca9274a7dceda4f4c
GIT binary patch
literal 397
zcmZQ%L<eF}P{z=p4wF-ei-ZXS=@L|#I4E#W1~Gg<1WX^C=7Iq$Mg|5JCJex6psvpH
zzn)1wQQb;G-P5n$)y)zP;v(bX^5f#{;^N%nczJc<;;4*MaIRa9x_zw@mr}KQnQODU
z)%{w=m<mJVm<kZZ#ih@zo}$hu&nV4sAE+QP2WTPCdte)YE_VkxAE?=?S{>|TGbDe*
zn23;JcnbF+Bgp!=IIuteaAkmuX<%Sr0MQ^&T3YHJ5~R-1@Rrd~l+jSp$k2ex(9n>9
Op`ji~0Rb0RTpR!wTuKrE

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_93.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_93.csv
new file mode 100644
index 0000000000000000000000000000000000000000..c343ed6dcf8cf074a4667dadc9ffe705853a92c9
GIT binary patch
literal 442
zcmaFOhzBq{g-cqBAwU^JgE~w?AuckG9N?Y|(&YmpU`D}dE-qm%FtB1|U|?aw0E`Ce
z>MZ~3nbZ^2trXNf{pwxaEa3p;p8U8tySO;_I9^^|xHy;rl>}Ovm;=;XtHh;LtzHIZ
zBYDI<E)HTp7neS>dWt%uJfk$jeH4X28-M_)FfI;gh*h;ZG<4K(0!Bup&}0O;25QS6
wt_+YdaW!BPOq7<Ex`za*Gc>$qG!$htR5UU);4(BcWMF8h2U0-5#T6F^0Mb}o761SM

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_94.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_94.csv
new file mode 100644
index 0000000000000000000000000000000000000000..fca0d7564746f6f3f2f7f86ec759c38b867ed89b
GIT binary patch
literal 444
zcmZQ%!~+<f!X+)m5TJ~qL7kD&JsBja5EmH-1&oX!h7X8<YlAVkV8Duzfq{hy127t>
ztF!#CXHrj8w^C5|^s9GuvxEbX75Q;-c5!j;alE{`mX>;8)0DWl7?PB%jC2%~bOqsx
zfefg6pgY__j){v?FLTRLx35*=QmR%jb8S|)x({RnK?BrP`poJn>Ws3C(hT>33KDaG
zZUFlWXekhY4DkkPu&P!^gn<~U7f{`gqzuS}1Q#eIfZ>T@KF|q&xH7;N)qqJbQCeE+
j9ulO^(D0VgP?XV7(a6w%%h1q}fuW%uNC5#CS6mzba+Ol9

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_95.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_95.csv
new file mode 100644
index 0000000000000000000000000000000000000000..f651e896146e2d1865c3cd1cd92aed63bfc2a7c9
GIT binary patch
literal 264
zcmZQ%!~+<f!X+)mpny@ISv^IaQJztn!9+(}Tf3eCNNCq{fk<s_GgU?g1{Nko^(1wx
zYW1?j9Ca%Ng}BJLxcoQ}aQ6g}J|My_u2zXl384lgh77Q32P*(;kBbywfGBrI5-bL@
v1sE6@LR{TLg47ur-ZC1BG8!5h7#eWJ{QqD7AE-dx&;SUybbu5W3}^!YK}<XF

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_96.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_96.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2f74fcba9e47b658916cc2e4a95851264b15957d
GIT binary patch
literal 397
zcmaJ-u?~VT5WOl>6IK@o3Cm4Ja1sVLASTA$<qXlmfrO2Opn=2>W%4gf@Cp=+CcdHX
z+SlH@S0VlYo`aaqMr2T-7A!hakLaw5y;`(7u+;DcfHC=ja1f2AoM4Q;g}Xx;gx<#M
zp)69OEm1_KxzxuUpE{9EBjrqlS(r2pfV2zDf(QEb-PZ1kS5v&8Sco~)d?5xNs13mm
zjJ63s@CzIRe^q1tj}H=R9HQ3gC9iF>TaMgH%@HRmSB{q6uU$vFt_x6=ObjWB-cgrG
AAOHXW

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_97.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_97.csv
new file mode 100644
index 0000000000000000000000000000000000000000..777ca34bdcff0af731fa9384ca0b517fe5401b8c
GIT binary patch
literal 373
zcmZQ%!~+<f!X+)m#Khv_+~at8buEEFfq{X+5+nu&Weg4KFqH~%k#SJqo(y96fQUqO
zD+P5=zj{}9*C2Hr^(1wxYW1?j9CaopCPo8wb(a72OzIFd{`IbIe?V4%K^%iXTz*`f
z9g5ZJR_f}tN?b}HBUlZ99EDmXkR@^IWo|j@_FP<C`poJn>WuP?(hT<@_E;%^-32rX
z2;4yg&;XztU_QX05nh4^0vjVE$W3u^Knu9I{%~c0jESoOlVD<cX{mcikUB%dTSh}s
auv$Y!BSQl&LqkIbhK70|1q57Nad7}joJ=GD

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_98.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_98.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2258bfc2ce906f44499e98cee540aaf8ea1adfd6
GIT binary patch
literal 270
zcmZQ%L<0=Z5CRNO;cR<23zcETrtaxi@9ORvq^_f$q;6GhT$Y%lZl$0Q7a13q9|r>N
zaSRL$|9!yhIQ25O9Cd|SB`&3EbtIJ_NuVycI7n9<lA#I=J`A55k$?cu%n(=ikRWx2
whPRA{>Yf+^5LW?7pspf@WQMm4i3~_KiZTiq8W<XI85-yq8W?bKaftvi0EEChegFUf

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_99.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_99.csv
new file mode 100644
index 0000000000000000000000000000000000000000..33d27b9db471eef8d1f490bf7523790ffb4fa9ca
GIT binary patch
literal 322
zcmZQ%!~+<f!X+)m#Khv_+~at8buEEFfq{X+5+nu&Weg4KFqH~%k#SJqo(y96fQUqO
zD+P5=zj{}9*C2Hr^(1wxYW1?j9H1ydy*o%DM0s4CdYM~}x_zw@mr}JlM5o&yki}pS
z#~={L(7?dJte&FID9<R(!NkD8!o+9*#&*?Q20DiI1_oS)K-vHqK<$@C2Rc?*9R&0)
N4i^c4oep-LGyv2ELhk?o

literal 0
HcmV?d00001

diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
index 94ef37399510..266f35196af5 100644
--- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp
@@ -688,23 +688,29 @@ bool LineError::HandleErrors(StringValueResult &result) {
 				    line_pos.GetGlobalPosition(result.requested_size), result.path);
 			}
 			break;
-		case CAST_ERROR:
+		case CAST_ERROR: {
+			string column_name;
+			LogicalTypeId type_id;
+			if (cur_error.col_idx < result.names.size()) {
+				column_name = result.names[cur_error.col_idx];
+			}
+			if (cur_error.col_idx < result.number_of_columns) {
+				type_id = result.parse_types[cur_error.chunk_idx].type_id;
+			}
 			if (result.current_line_position.begin == line_pos) {
 				csv_error = CSVError::CastError(
-				    result.state_machine.options, result.names[cur_error.col_idx], cur_error.error_message,
-				    cur_error.col_idx, borked_line, lines_per_batch,
+				    result.state_machine.options, column_name, cur_error.error_message, cur_error.col_idx, borked_line,
+				    lines_per_batch,
 				    result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl),
-				    line_pos.GetGlobalPosition(result.requested_size, first_nl),
-				    result.parse_types[cur_error.chunk_idx].type_id, result.path);
+				    line_pos.GetGlobalPosition(result.requested_size, first_nl), type_id, result.path);
 			} else {
 				csv_error = CSVError::CastError(
-				    result.state_machine.options, result.names[cur_error.col_idx], cur_error.error_message,
-				    cur_error.col_idx, borked_line, lines_per_batch,
+				    result.state_machine.options, column_name, cur_error.error_message, cur_error.col_idx, borked_line,
+				    lines_per_batch,
 				    result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl),
-				    line_pos.GetGlobalPosition(result.requested_size), result.parse_types[cur_error.chunk_idx].type_id,
-				    result.path);
+				    line_pos.GetGlobalPosition(result.requested_size), type_id, result.path);
 			}
-			break;
+		} break;
 		case MAXIMUM_LINE_SIZE:
 			csv_error = CSVError::LineSizeError(
 			    result.state_machine.options, lines_per_batch, borked_line,
diff --git a/test/sql/copy/csv/afl/fuzz_20250211_crash.test b/test/sql/copy/csv/afl/fuzz_20250211_crash.test
new file mode 100644
index 000000000000..d08623b157bf
--- /dev/null
+++ b/test/sql/copy/csv/afl/fuzz_20250211_crash.test
@@ -0,0 +1,539 @@
+# name: test/sql/copy/csv/afl/fuzz_20250211_crash.test
+# description: fuzzer generated csv files - should not raise internal exception (by failed assertion).
+# group: [afl]
+
+statement ok
+PRAGMA enable_verification
+
+query I
+select count(file) from glob('./data/csv/afl/20250211_csv_fuzz_crash/*');
+----
+132
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_1.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_2.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_3.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_4.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_5.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_6.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_7.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_8.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_9.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_10.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_11.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_12.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_13.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_14.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_15.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_16.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_17.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_18.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_19.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_20.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_21.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_22.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_23.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_24.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_25.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_26.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_27.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_28.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_29.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_30.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_31.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_32.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_33.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_34.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=B);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_35.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_36.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, hive_partitioning=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_37.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_38.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_39.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_40.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_41.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_42.csv', auto_detect=false, buffer_size=11, columns={'': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_43.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_44.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_45.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_46.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_47.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_48.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_49.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_50.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_51.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_52.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_53.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_54.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_55.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_56.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_57.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_58.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_59.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_60.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_61.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_62.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_63.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_64.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_65.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_66.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_67.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_68.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_69.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_70.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_71.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_72.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_73.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_74.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_75.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_76.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, all_varchar=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_77.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_78.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_79.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_80.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_81.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_82.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_83.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_84.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_85.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_86.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_87.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_88.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_89.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_90.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_91.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_92.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_93.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=false);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_94.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_95.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_96.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_97.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_98.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_99.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_100.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_101.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_102.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_103.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_104.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_105.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_106.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_107.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_108.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_109.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_110.csv', buffer_size=57);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_111.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_112.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_113.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_114.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_115.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_116.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=false);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_117.csv', auto_detect=false, buffer_size=2, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_118.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_119.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_120.csv', buffer_size=42);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_121.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_122.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_123.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_124.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_125.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, null_padding=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_126.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_127.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_128.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_129.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_130.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_131.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_132.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
+----

From b2ac033f1b71bd6202d1778183f3169213606883 Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Wed, 12 Feb 2025 09:02:36 -0300
Subject: [PATCH 132/142]  Add hangs

---
 .../afl/20250211_csv_fuzz_hangs/case_0.csv    | Bin 0 -> 42 bytes
 .../afl/20250211_csv_fuzz_hangs/case_1.csv    | Bin 0 -> 42 bytes
 .../afl/20250211_csv_fuzz_hangs/case_10.csv   | Bin 0 -> 4922 bytes
 .../afl/20250211_csv_fuzz_hangs/case_11.csv   | Bin 0 -> 4952 bytes
 .../afl/20250211_csv_fuzz_hangs/case_12.csv   | Bin 0 -> 4922 bytes
 .../afl/20250211_csv_fuzz_hangs/case_13.csv   | Bin 0 -> 4915 bytes
 .../afl/20250211_csv_fuzz_hangs/case_14.csv   | Bin 0 -> 4918 bytes
 .../afl/20250211_csv_fuzz_hangs/case_15.csv   | Bin 0 -> 4933 bytes
 .../afl/20250211_csv_fuzz_hangs/case_16.csv   | Bin 0 -> 4911 bytes
 .../afl/20250211_csv_fuzz_hangs/case_2.csv    | Bin 0 -> 42 bytes
 .../afl/20250211_csv_fuzz_hangs/case_3.csv    | Bin 0 -> 48 bytes
 .../afl/20250211_csv_fuzz_hangs/case_4.csv    | Bin 0 -> 58 bytes
 .../afl/20250211_csv_fuzz_hangs/case_5.csv    | Bin 0 -> 42 bytes
 .../afl/20250211_csv_fuzz_hangs/case_6.csv    | Bin 0 -> 63 bytes
 .../afl/20250211_csv_fuzz_hangs/case_7.csv    | Bin 0 -> 4909 bytes
 .../afl/20250211_csv_fuzz_hangs/case_8.csv    | Bin 0 -> 4909 bytes
 .../afl/20250211_csv_fuzz_hangs/case_9.csv    | Bin 0 -> 4920 bytes
 .../sql/copy/csv/afl/fuzz_20250211_hangs.test |  82 ++++++++++++++++++
 18 files changed, 82 insertions(+)
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_0.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_1.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_10.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_11.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_12.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_13.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_14.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_15.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_16.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_2.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_3.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_4.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_5.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_6.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_7.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_8.csv
 create mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_9.csv
 create mode 100644 test/sql/copy/csv/afl/fuzz_20250211_hangs.test

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_0.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..65d6c1a8136e83309dc31c555a4293b1ee0b1b8b
GIT binary patch
literal 42
xcmb2|=HPgEi9dlQm4TO4fPpEuK<~dmR+3J#E|;N>k&ZDhmywRHjtMW<7XafL3K0MR

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_1.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_1.csv
new file mode 100644
index 0000000000000000000000000000000000000000..f685be9af1f97706e3dd0225bdce240b3aa935c4
GIT binary patch
literal 42
wcmb2|=HPgEi9dlQm4TO4ASFMyK<|H2R+3JoE|;N>k&ZDh7m&~~;pO@Q00LACPyhe`

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_10.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_10.csv
new file mode 100644
index 0000000000000000000000000000000000000000..fbd977e626f66e1b4358e2d45ba0db4ec129386e
GIT binary patch
literal 4922
zcmY%8)Mc>M<)kKHU}Ru`nx|ft$oQYpNA-m+H&ld?kI^86YZQ)#zz7ZjM-b4lG&MH<
z|DRzrGmM77Xb6mkz-R~z;}9?##(o>Y75Qkaj)uT!2n^v6h>J_k7%jg>Ltr!n=oSL}
KbhC-J<q`n8lYIyP

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_11.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_11.csv
new file mode 100644
index 0000000000000000000000000000000000000000..670f603a9c8b8de54965dac81192d2dae3ffa885
GIT binary patch
literal 4952
zcmY%8)Mc>M<)kKHU}Ru`nx|ft$oQYpNA-m+H&ld?kI^86Yv{m)1S6fH<JwV|j)uT!
z2#kgR?Lxr7P}jgPWJnd^v<r+;lSe~<E+L?8i3xalb$Rnr67|9}b5e&;gNiQUH5wkH
YAut*OqaiRF0;3@?8UmvsK>rW`02Y^mPyhe`

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_12.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_12.csv
new file mode 100644
index 0000000000000000000000000000000000000000..80bfdbe5ee3434edd31feccc610c2b9b75f64a70
GIT binary patch
literal 4922
zcmY%8)Mc>M<)kKHU}Ru`nx|ft$oQYpNA-m+H&ld?kI^86YZQ)#z-S1JhQMeDjE2By
z2#kinXb6mkz-S1JhQMeDjE2B~h5)a!0h1tyYmmB*y0!6WH+3`wMnhmU1h9ty086WS
A4FCWD

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_13.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_13.csv
new file mode 100644
index 0000000000000000000000000000000000000000..de4bb8a6bff7dabefc48c6d5a96f206c04ceeb11
GIT binary patch
literal 4915
zcmY%8)Mc>M<)kKHU}Ru`nx|ft$oQYpNA-m+H&ld?kI^86YZOu?1QK)9trQeSLvu6)
zMneE!2z;eVZo_B8sN84>jE2By2#kinXb6mk05Al?M?+yW1V%$(Gz3ONU^E2i5CWNW
JumjqQ1OTRodV~M~

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_14.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_14.csv
new file mode 100644
index 0000000000000000000000000000000000000000..77660389cae4170b67750ccbc52c48b93feedb41
GIT binary patch
literal 4918
zcmY%8)Mc>M<)kKHU}Ru`nx|ft$oQYpNA-m+H&ld?kI^86Yw*I*LF~4{8)}rg%+SDq
z3ms5u=BWPB5Eu=C(GVC7fzc2c4S~@R7!3jXgaD(Vp@E?Rm;Go%dNc$^Ltr!nMnhoO
Sg#fsKH#E?pPlYlFZ2<t!OLofu

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_15.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_15.csv
new file mode 100644
index 0000000000000000000000000000000000000000..643618f5acc030643254ff15b1de159633681159
GIT binary patch
literal 4933
zcmeH`KTE_w5XIjXinnrh_TeyWlR)-7Ed&FK2#Q*GXQ4%4izrIWiT9^a1izY}#X>AB
z6$|lviK&7%spoAqGjHB5KUlvN;v{S}l9{^9Y^a$!@R_p7l)YLG6i~H(&7PN>Cdn`<
zbe0T`4IbZ=T~^jcHq!ln<>`4%J#^kX_iAOqOvdM-T{pAu91hn=5wz!ew(GOHVAY1k
zK86_K6&{|RGfX+T<>U?lRuPqsLwq6QNjVA7#ah|jW#y3Lh5MIG@UVjkZdm0Q)7Ptf
z+ql99y*V9Qh~qfNQ5iMjQeh5gtS&N(eSFeeSIXBqIJKmrftDi#ni42J$6soiCwoFb
b2nYcoAOwVf5D)@FKnMr{As_^VK%Kx3Qon$J

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_16.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_16.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8c2175321d6744c5d2956ef1a31c28737719997f
GIT binary patch
literal 4911
zcmeH@Jxc>Y6h-e8ikT2Q`ym*n*@c}LEdnkmk|JOVlSYeyDOOR|iR7zL1pk_UB`JbM
zO2tAnFPp7|4QT!DRB!K@H_NG&LL7&se??DRs+*!Gi;n>E)oWNOdueUou5x927aA`M
zVfKVEk%?rtFiLJq%5(L-`Xu&u^mud~nj`)5?cs2b1VLr1Vd^!SZfL4NqK!U=c!R?0
zEX9PH0X2^ZaDXTy&hdqmPC`w9Iu5h1Et(uW=5+s-HYl87j0c)LM#MF8?i6?UAl4Ic
zf;f&nI@zirU0T5bf%!wa+{Guczmw44IykW_qGE^KumLu}1~wU(PB*pOu>m%~2G{@_
TU;}J`4X^<=zy{dBUkxk)`c#8$

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_2.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_2.csv
new file mode 100644
index 0000000000000000000000000000000000000000..3b6e26bb6ace46abb7a240b441c76e3ec9b4d698
GIT binary patch
literal 42
xcmb2|=HPgEi9dlQm4TO4ASFMyK<|H2R)bE9E|-9gk&ZDhmjHvYjtMW<O#lHT3hw{_

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_3.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_3.csv
new file mode 100644
index 0000000000000000000000000000000000000000..edb695406edef3749cab43955151fd26266742bc
GIT binary patch
literal 48
ycmb2|=HPg^lRtqam4TO)fq|jEU+;fXR+3J#E|&us80r}581r%g2^|w&t}g&F<qPis

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_4.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_4.csv
new file mode 100644
index 0000000000000000000000000000000000000000..42cb35203aeb4d8804948d7148605269a45d608d
GIT binary patch
literal 58
zcmb2|=HPgEDS^e8ftOVvB|jBRHRl%S{g25?(n-k&vYsaaS;@LwhB`(%#=KlWLdS%c
G>k9z+ZxEUQ

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_5.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_5.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2709f85fe7d427752072bd85572b411066693fe8
GIT binary patch
literal 42
wcmb2|=HTE@czB5=g@Kn<Acf@%lOTs{kh+e#E|;N>k&ZDh7m&~~;pO@Q0MCpGCjbBd

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_6.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_6.csv
new file mode 100644
index 0000000000000000000000000000000000000000..95986aca82e796b51a0dd27b2717e2e6a47f5b30
GIT binary patch
literal 63
zcmb2|=HPh9z`zjV>K?+&`mymXqoF9Hp`n4H0hghHj-i3<CH@4KR0duK21bV50zE?=
RBOPO2E+C;}635H+1pvty4YL3M

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_7.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_7.csv
new file mode 100644
index 0000000000000000000000000000000000000000..4a82fd9f93ec45d02d6876dc1d1b95234113f2b4
GIT binary patch
literal 4909
zcmY%8)Mc>M<)kKHU}Ru`nx|ft$oQYpNA-m+H&ld?kI^86YZQ)#z-S1JhQMeDjE2By
z2n?bS;2uP78EqxdCj>Z0<9sv(MnhmU1V%$(Gz3ONfNCM2MKzP?(+(I`wiM99Hvk-*
BcuxQT

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_8.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_8.csv
new file mode 100644
index 0000000000000000000000000000000000000000..768f41dcd7bb32a6e808b795f8f361087bbaab3b
GIT binary patch
literal 4909
zcmY%8)Mc>M<)kKHU}Ru`nx|ft$oQYpNA-m+H&ld?kI^86iw;nV4t7w_9}M->Gi%hO
z(GVC7fzc2c4S~@R7!85Z5Eu=C(GVC70dhj1fq@)dqk2X|U^E0qLx3tF5K9$PhSXTd
H;Po*8A3S((

literal 0
HcmV?d00001

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_9.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_9.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2897497b6cfff1bc9d4a7d5fb1055c2c4ec01fe7
GIT binary patch
literal 4920
zcmY%8)Mc>M<)kKHU}Ru`nx|ft$oQYpNA-m+H&ld?kI^86YZQ)#z-S1JhQMeDjE2BS
l3W3pzZ!`o(Ltr!nMnhmU1V%$(Gz3ONU^E0qLtrR|0037gdGi1O

literal 0
HcmV?d00001

diff --git a/test/sql/copy/csv/afl/fuzz_20250211_hangs.test b/test/sql/copy/csv/afl/fuzz_20250211_hangs.test
new file mode 100644
index 000000000000..77e165ebe972
--- /dev/null
+++ b/test/sql/copy/csv/afl/fuzz_20250211_hangs.test
@@ -0,0 +1,82 @@
+# name: test/sql/copy/csv/afl/fuzz_20250211_hangs.test
+# description: fuzzer generated csv files - should not raise internal exception (by failed assertion).
+# group: [csv]
+
+# This test takes a very long time to run, ~ 6 minutes on a Mac M1 Max
+mode skip
+
+statement ok
+PRAGMA enable_verification
+
+query I
+select count(file) from glob('./data/csv/afl/20250211_csv_fuzz_hangs/*');
+----
+17
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_0.csv', compression='gzip');
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_1.csv', compression='gzip');
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_2.csv', compression='gzip');
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_3.csv', compression='gzip');
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_4.csv', compression='gzip');
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_5.csv', compression='gzip');
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_6.csv', compression='gzip');
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_7.csv', rejects_table=L);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_8.csv', rejects_table=L);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_9.csv', rejects_table=L);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_10.csv', rejects_table=L);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_11.csv', rejects_table=L);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_12.csv', rejects_table=L);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_13.csv', rejects_table=L);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_14.csv', rejects_table=L);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_15.csv', rejects_table=L);
+----
+
+statement maybe
+FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_16.csv', rejects_table=L);
+----

From 0110b2467ba4bab35c3e47dd8d464c9003a2e95f Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Wed, 12 Feb 2025 09:27:23 -0300
Subject: [PATCH 133/142]  This needs to be slightly bigger for windows

---
 test/sql/copy/csv/parallel/csv_parallel_buffer_size.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test b/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test
index e3b0531499a6..04f78983c3cb 100644
--- a/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test
+++ b/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test
@@ -75,6 +75,6 @@ SELECT sum(a) FROM read_csv('data/csv/test/new_line_string.csv',  COLUMNS=STRUCT
 111
 
 query I
-SELECT sum(a) FROM read_csv('data/csv/test/new_line_string.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=80)
+SELECT sum(a) FROM read_csv('data/csv/test/new_line_string.csv',  COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), quote ='"', escape ='"', comment = '', auto_detect='true', delim = '|', buffer_size=100, new_line = '\r\n')
 ----
 111

From 28fd731a9c5de9ea8468fb2d2a7ab24d6f8c463e Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Wed, 12 Feb 2025 10:27:20 -0300
Subject: [PATCH 134/142] Format

---
 test/sql/copy/csv/afl/fuzz_20250211_hangs.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/sql/copy/csv/afl/fuzz_20250211_hangs.test b/test/sql/copy/csv/afl/fuzz_20250211_hangs.test
index 77e165ebe972..456d8966edc0 100644
--- a/test/sql/copy/csv/afl/fuzz_20250211_hangs.test
+++ b/test/sql/copy/csv/afl/fuzz_20250211_hangs.test
@@ -1,6 +1,6 @@
 # name: test/sql/copy/csv/afl/fuzz_20250211_hangs.test
 # description: fuzzer generated csv files - should not raise internal exception (by failed assertion).
-# group: [csv]
+# group: [afl]
 
 # This test takes a very long time to run, ~ 6 minutes on a Mac M1 Max
 mode skip

From a04d9065fdc697b4eda3b5f86efc0f8ef6ec5fe8 Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Wed, 12 Feb 2025 17:34:52 -0300
Subject: [PATCH 135/142] Verify that the table names are valid

---
 .../operator/csv_scanner/util/csv_reader_options.cpp        | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/execution/operator/csv_scanner/util/csv_reader_options.cpp b/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
index 5c91a5523eef..4c26d4f02e51 100644
--- a/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
+++ b/src/execution/operator/csv_scanner/util/csv_reader_options.cpp
@@ -308,6 +308,9 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
 		if (table_name.empty()) {
 			throw BinderException("REJECTS_TABLE option cannot be empty");
 		}
+		if (KeywordHelper::RequiresQuotes(table_name)) {
+			throw BinderException("rejects_scan option: %s requires quotes to be used as an identifier", table_name);
+		}
 		rejects_table_name.Set(table_name);
 	} else if (loption == "rejects_scan") {
 		// skip, handled in SetRejectsOptions
@@ -315,6 +318,9 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
 		if (table_name.empty()) {
 			throw BinderException("rejects_scan option cannot be empty");
 		}
+		if (KeywordHelper::RequiresQuotes(table_name)) {
+			throw BinderException("rejects_scan option: %s requires quotes to be used as an identifier", table_name);
+		}
 		rejects_scan_name.Set(table_name);
 	} else if (loption == "rejects_limit") {
 		auto limit = ParseInteger(value, loption);

From 36538e672aa818519c3cb4c5bf093b1362352406 Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Mon, 17 Feb 2025 15:47:56 -0300
Subject: [PATCH 136/142] Drastrically minimize the number of tests

---
 .../afl/20250211_csv_fuzz_crash/case_1.csv    | Bin 171 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_10.csv   | Bin 209 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_100.csv  |   3 -
 .../afl/20250211_csv_fuzz_crash/case_101.csv  | Bin 204 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_102.csv  | Bin 136 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_103.csv  | Bin 86 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_104.csv  | Bin 86 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_105.csv  | Bin 132 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_106.csv  | Bin 86 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_107.csv  | Bin 137 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_108.csv  | Bin 137 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_109.csv  | Bin 122 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_11.csv   | Bin 241 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_110.csv  | Bin 116 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_111.csv  | Bin 98 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_112.csv  | Bin 134 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_113.csv  | Bin 128 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_114.csv  | Bin 98 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_115.csv  | Bin 113 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_116.csv  | Bin 159 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_117.csv  | Bin 405 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_118.csv  | Bin 134 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_119.csv  | Bin 93 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_12.csv   | Bin 241 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_120.csv  | Bin 148 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_121.csv  | Bin 228 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_122.csv  | Bin 220 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_123.csv  | Bin 264 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_124.csv  | Bin 415 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_125.csv  | Bin 418 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_126.csv  |  30 -
 .../afl/20250211_csv_fuzz_crash/case_127.csv  |  28 -
 .../afl/20250211_csv_fuzz_crash/case_128.csv  | Bin 279 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_129.csv  |  30 -
 .../afl/20250211_csv_fuzz_crash/case_13.csv   | Bin 186 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_130.csv  | Bin 759 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_131.csv  | Bin 199 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_132.csv  | Bin 291 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_14.csv   | Bin 175 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_15.csv   |  20 -
 .../afl/20250211_csv_fuzz_crash/case_16.csv   | Bin 177 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_17.csv   |   6 -
 .../afl/20250211_csv_fuzz_crash/case_18.csv   |   8 -
 .../afl/20250211_csv_fuzz_crash/case_19.csv   |   5 -
 .../afl/20250211_csv_fuzz_crash/case_2.csv    | Bin 171 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_20.csv   |   6 -
 .../afl/20250211_csv_fuzz_crash/case_21.csv   |   6 -
 .../afl/20250211_csv_fuzz_crash/case_22.csv   | Bin 176 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_23.csv   |   6 -
 .../afl/20250211_csv_fuzz_crash/case_24.csv   |   7 -
 .../afl/20250211_csv_fuzz_crash/case_25.csv   |   7 -
 .../afl/20250211_csv_fuzz_crash/case_26.csv   |   7 -
 .../afl/20250211_csv_fuzz_crash/case_27.csv   |   8 -
 .../afl/20250211_csv_fuzz_crash/case_28.csv   | Bin 175 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_29.csv   |   6 -
 .../afl/20250211_csv_fuzz_crash/case_3.csv    | Bin 160 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_30.csv   |   6 -
 .../afl/20250211_csv_fuzz_crash/case_31.csv   | Bin 189 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_32.csv   |   6 -
 .../afl/20250211_csv_fuzz_crash/case_33.csv   | Bin 186 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_34.csv   |   6 -
 .../afl/20250211_csv_fuzz_crash/case_35.csv   |  27 -
 .../afl/20250211_csv_fuzz_crash/case_36.csv   |   6 -
 .../afl/20250211_csv_fuzz_crash/case_37.csv   |  10 -
 .../afl/20250211_csv_fuzz_crash/case_38.csv   |   6 -
 .../afl/20250211_csv_fuzz_crash/case_39.csv   | Bin 181 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_4.csv    | Bin 171 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_40.csv   |   4 -
 .../afl/20250211_csv_fuzz_crash/case_41.csv   |   3 -
 .../afl/20250211_csv_fuzz_crash/case_42.csv   |   4 -
 .../afl/20250211_csv_fuzz_crash/case_43.csv   |   4 -
 .../afl/20250211_csv_fuzz_crash/case_44.csv   | Bin 209 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_45.csv   |   4 -
 .../afl/20250211_csv_fuzz_crash/case_46.csv   |   5 -
 .../afl/20250211_csv_fuzz_crash/case_47.csv   | Bin 204 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_48.csv   | Bin 241 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_49.csv   |   3 -
 .../afl/20250211_csv_fuzz_crash/case_5.csv    | Bin 171 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_50.csv   | Bin 157 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_51.csv   | Bin 201 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_52.csv   |   5 -
 .../afl/20250211_csv_fuzz_crash/case_54.csv   | Bin 234 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_55.csv   | Bin 232 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_56.csv   | Bin 288 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_57.csv   | Bin 288 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_58.csv   | Bin 319 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_59.csv   | Bin 239 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_6.csv    | Bin 171 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_60.csv   | Bin 236 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_61.csv   | Bin 236 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_62.csv   | Bin 2371 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_63.csv   | Bin 2399 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_64.csv   | Bin 261 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_65.csv   |  27 -
 .../afl/20250211_csv_fuzz_crash/case_66.csv   | Bin 268 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_67.csv   | Bin 291 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_68.csv   | Bin 252 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_69.csv   | Bin 233 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_7.csv    | Bin 171 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_70.csv   | Bin 272 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_71.csv   | Bin 251 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_72.csv   | Bin 251 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_73.csv   | Bin 810 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_74.csv   | Bin 236 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_75.csv   | Bin 231 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_76.csv   | Bin 232 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_77.csv   | Bin 87 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_78.csv   | Bin 92 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_79.csv   | Bin 106 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_8.csv    | Bin 175 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_80.csv   | Bin 85 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_81.csv   | Bin 101 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_82.csv   | Bin 90 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_83.csv   | Bin 101 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_84.csv   | Bin 455 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_85.csv   | Bin 449 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_86.csv   | Bin 405 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_87.csv   | Bin 397 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_88.csv   | Bin 412 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_89.csv   | Bin 393 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_9.csv    | Bin 200 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_90.csv   | Bin 421 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_91.csv   | Bin 446 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_92.csv   | Bin 397 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_93.csv   | Bin 442 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_94.csv   | Bin 444 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_95.csv   | Bin 264 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_96.csv   | Bin 397 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_97.csv   | Bin 373 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_98.csv   | Bin 270 -> 0 bytes
 .../afl/20250211_csv_fuzz_crash/case_99.csv   | Bin 322 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_0.csv    | Bin 42 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_1.csv    | Bin 42 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_10.csv   | Bin 4922 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_11.csv   | Bin 4952 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_12.csv   | Bin 4922 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_13.csv   | Bin 4915 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_14.csv   | Bin 4918 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_15.csv   | Bin 4933 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_16.csv   | Bin 4911 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_2.csv    | Bin 42 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_3.csv    | Bin 48 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_4.csv    | Bin 58 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_5.csv    | Bin 42 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_6.csv    | Bin 63 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_7.csv    | Bin 4909 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_8.csv    | Bin 4909 -> 0 bytes
 .../afl/20250211_csv_fuzz_hangs/case_9.csv    | Bin 4920 -> 0 bytes
 data/csv/afl/4172/case_1.csv                  | Bin 2398 -> 0 bytes
 data/csv/afl/4172/case_2.csv                  | Bin 229 -> 0 bytes
 data/csv/afl/4172/case_3.csv                  | Bin 257 -> 0 bytes
 data/csv/afl/4172/case_5.csv                  | Bin 240 -> 0 bytes
 .../sql/copy/csv/afl/fuzz_20250211_crash.test | 529 ------------------
 .../sql/copy/csv/afl/fuzz_20250211_hangs.test |  82 ---
 test/sql/copy/csv/afl/test_fuzz_4172.test     |  22 -
 155 files changed, 942 deletions(-)
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_1.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_10.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_100.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_101.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_102.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_103.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_104.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_105.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_106.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_107.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_108.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_109.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_11.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_110.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_111.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_112.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_113.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_114.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_115.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_116.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_117.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_118.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_119.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_12.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_120.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_121.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_122.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_123.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_124.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_125.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_126.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_127.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_128.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_129.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_13.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_130.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_131.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_132.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_14.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_15.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_16.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_17.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_18.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_19.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_2.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_20.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_21.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_22.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_23.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_24.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_25.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_26.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_27.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_28.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_29.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_3.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_30.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_31.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_32.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_33.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_34.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_35.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_36.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_37.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_38.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_39.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_4.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_40.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_41.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_42.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_43.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_44.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_45.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_46.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_47.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_48.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_49.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_5.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_50.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_51.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_52.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_54.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_55.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_56.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_57.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_58.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_59.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_6.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_60.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_61.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_62.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_63.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_64.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_65.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_66.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_67.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_68.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_69.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_7.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_70.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_71.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_72.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_73.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_74.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_75.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_76.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_77.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_78.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_79.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_8.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_80.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_81.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_82.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_83.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_84.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_85.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_86.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_87.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_88.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_89.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_9.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_90.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_91.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_92.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_93.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_94.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_95.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_96.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_97.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_98.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_crash/case_99.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_0.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_1.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_10.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_11.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_12.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_13.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_14.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_15.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_16.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_2.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_3.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_4.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_5.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_6.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_7.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_8.csv
 delete mode 100644 data/csv/afl/20250211_csv_fuzz_hangs/case_9.csv
 delete mode 100644 data/csv/afl/4172/case_1.csv
 delete mode 100644 data/csv/afl/4172/case_2.csv
 delete mode 100644 data/csv/afl/4172/case_3.csv
 delete mode 100644 data/csv/afl/4172/case_5.csv
 delete mode 100644 test/sql/copy/csv/afl/fuzz_20250211_hangs.test

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_1.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_1.csv
deleted file mode 100644
index 2dff96bfb5f9b182d598d7cfb9ed20a53a95b5be..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 171
zcmXpsGUhUb;{3c+Ae)h&Sv^IaQQldagNcEGg^AIC#jcu5JyG3CLEY1@-qqbTNL@!g
zN!_Yiy(}>YC@K&a83zS!VA==5Q!jJNQMa#E;!=XCL}n`>bh#tx;xaT)GBhv%QU(GH
M28=+;KnH3#07#T7&;S4c

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_10.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_10.csv
deleted file mode 100644
index 1e43526015ac6450d9ea8a7f25505492a6628370..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 209
zcmeZK<<#ZYWz=U@Pf=%-XOw2J)#U^O1_nk32n&T&FH6i(w^C4ui;RoQj{^aBFzEv(
z<J8OCa@6f>mAI6up(?>_2F3=suDG}eu&z>M0MZRMA~6SIVmyQ!w-913m=s`OcopL6
Y9%2OsjJ&J{hCs+=XrN<gV88_h0NxBURR910

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_100.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_100.csv
deleted file mode 100644
index c6998a9902ec..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_100.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-d':x }"
-"{'val': x}"
-"{'val'W -$�99zzzz99+99999999999$�99zzz99999999999999999999999999999-�
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_101.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_101.csv
deleted file mode 100644
index 47aca2c8373235507150dcad95fbd59ecac1320c..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 204
zcmZQ%1Oo<3OH1a@KmrZ$^6IMmx3sJR0!vG6BuSP3j4X21>dE;z>Q)Ns!66u?1cf?>
zXp|-9D1<o%IeRz;X{y&MRjUI9)U7INmAK$+1_1^JUI@^(s9|7WfJy>gq;08fT;u8*
e2hyWn3DnP(&BBo7;h~od(+w0=0IOF984Li<e=v0b

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_102.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_102.csv
deleted file mode 100644
index 55c76eae4da1b46f59cc08d6c79c62568b9a5fe5..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 136
zcmY#Za8yXnNGwt}Qcy2TOq1u(Vt@c;GqardL?tUj9i`-aG!UMj57m}huE5J>$;)NM
g%f-vfrJxWO83zUKVA==9)d3lg!YN7yS)>l70Hy3CLI3~&

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_103.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_103.csv
deleted file mode 100644
index 627236618725faeee0f2a66fdc8dfaf2ebff8b19..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 86
zcmY#Za8yXnNGwt}Qcy2TOq1u(;^bstV5sMWgG41OLmj2${2V40o-Fk^D9`~ZFG^;J
L^FbE@>Q)B;4d)YR

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_104.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_104.csv
deleted file mode 100644
index 3ffe8f761076d228091c82edc09de307cc15f5ff..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 86
zcmY#Za8yXnNGwt}Qcy2TOq1u(Vt{}+AkYDcl`HUaS@LpO@pAF<aw#apMS^5Nz#UBb
QfJkMCDP^fe$spb805qx;6#xJL

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_105.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_105.csv
deleted file mode 100644
index 081e3b10729056638acfdea8c35273923596d8a2..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 132
zcmY#Za8yXnNX*ZPPgJrp)KN+XvXYdnjB2@<SgeDK)QuF>%M#P%IkXrcKpCRGD79RH
rm&=lu%ZitaSBXnOAuciw3fv(ygz_nbit2#ODN6Q%se<xBhN=Sqb*myR

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_106.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_106.csv
deleted file mode 100644
index 39b8c5d7e5b0a641aff4b4f0a2f2d0ed42848916..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 86
zcmY#Za8yXnNGwt}Qcy2TOp5~nA21mg7pnsjD^ld;vgGBm;^pF1;!+Tbi-gL?xyy5C
QF+hMa)RffnWRQS50LZ))7ytkO

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_107.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_107.csv
deleted file mode 100644
index 80f792a4d760f0b1ccb31059abf39fb08bfdf577..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 137
zcmY#Za8yXnNGwt}Qcy2TOq1u(Vt@c;s6;B4dZN0Og1V<)y{o%xkh+erFsraIP;I#a
pFP9}RmlZD;uM(GnLR@4V6u5(F9~f7MfdQvk3hL#FMaduw)d4TbAE^KU

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_108.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_108.csv
deleted file mode 100644
index 5a46f2f620fd6a665b75f1f23be86cd5a1563cb6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 137
zcmY#Za8yXnNGwt};$;MZqGSd2vcxob4lM>SP=?B<mMidbS@LpO@pADhaVaRoMaDsa
aJDB!?adklY!A8Ld+y;VG6eWW!QU?HfavDDX

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_109.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_109.csv
deleted file mode 100644
index f21f1e3212bb206cd1271525141b8985967dfc2e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 122
zcmY#Za8yWEH&Re9OH7mJ&|&}sWsqP-Vo_?j0xy>(FP9ZB7q1eRf<jzm92B^NX&)F@
YM?swn4kSOonOtRwj2NIO8Dxn%0L&N^&Hw-a

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_11.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_11.csv
deleted file mode 100644
index 42afe4ceef4ad4e00ec27729e45b38ded979c384..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 241
zcmXpsGUhUb;{3c+AUhb&H)Q<J=%dQ1%dN|(&#a!J&M40)&0wp`i4UlkCFZDGDJaB6
z#>M5wfq*-h^Z}D`>Sb;@>h`rtTuRkYm0&glBO?P$S6o~KSXU`B0O<xBk(dK9F&@H=
kTL>{1ObReCyb5u353vFRMqX9}Lm=cbG|)L`V88_h05se=hyVZp

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_110.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_110.csv
deleted file mode 100644
index 48f32730da7f3dc915d56e92b8784e748387b441..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 116
zcmZ=%NX|$sQr9uGG`D1Q;FRSA10x0Xvcxob4lM>SP*!jRD@!d`;N{Tb<+9@C;#J~O
aP>73+g93Lj?E~ZLfb=tBfTCoOaq0jy9u<cG

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_111.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_111.csv
deleted file mode 100644
index 0cf8b82e23082d074aafdf41d8bb48ff5a32949d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 98
zcmY#ZaEt>1bwdLn;L-t81_lbr8Hq*eMhfa>iD~j2TD(eJ3JP(NV3k0^9nA27adkk-
N7%@OmG6N8(0|1OQ6t@5X

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_112.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_112.csv
deleted file mode 100644
index 73296b846c3bb9a5983b32a841e35a29f892f0c0..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 134
zcmY#Za8yXnNGwt}Qcy2TOq1s@Ff>v%umS-~FbO4iS%DJTR#sXJV4w`umRhdB%Vo*S
jWyQ<ItHh<C5EmH-1@2(l2gcP=Fe*!A!~jLfAmh{ljyD>D

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_113.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_113.csv
deleted file mode 100644
index 91f981ac0cc27ecc6756b472a295f0772c608e51..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 128
zcmZQ%1cDcg43?Ib%-?_n8sO#CRrzmeSp@`^mfBFsYW1?j9Ca&&S|u(eOG{p`s2-X|
aOG_(-irN$gC79w0uyQC*y%MAxWH<n|yB{0?

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_114.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_114.csv
deleted file mode 100644
index 8e28783396d4f97d45b7befc7b376130ec0338cb..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 98
zcmZQ%1Oo<3OH1a@Kmr5=AQXt?<<(XBZ)sTt1QM2pmSu@K>Q)N1N?b~YhK5`U3UQHf
Saj|i6>Xw#z2t7zFkahqQA{E#G

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_115.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_115.csv
deleted file mode 100644
index e58f1e9f64aa92c00ad58df71c909366c8275960..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 113
zcmZQ%WMnXiQ!ldw0+s)kmQ_^(RhE{#Ag-kzm_#Oc83Z7Ts(^~Lp)zhc>h`rtTuSaC
fLFx<*Zy6XE1U^HgK*qR2jQL;Bz+lPCtE&P4BGDVB

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_116.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_116.csv
deleted file mode 100644
index f30120d3aa53c5e68cd1bbc85b14f504650971eb..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 159
zcmXriDJd#V<ucMqOUx;@go9ZiFdIz536LZ&gMi9^9VJ73BYh(vH`vlJFF!BUQsqC8
z!^x>$mdN<uvZ^Yf%F<FBq})=|NE@Nr(ozYox~fV5sE8M=NDoZ%G5{%8OAsgEe?0?(
JB`>e83IL-=FfIT9

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_117.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_117.csv
deleted file mode 100644
index aa7e242fad58bbc5cfb7c8d9f456b8c407383cd9..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 405
zcmZQ%!~+<f>NBgSs58nlN;BB%a&mE^gE%1Y0TIZCFfcGQFml0w6(a)!3lj!lG*DM(
z`CreZo~RzKpzi5c@9Jg=2XT>cartp^c5!j;alE{`P;o6@US2R$$I>z`4rmJl$mX~>
z^)j~{b^BT+E~RRXGS_BxtNTDU5&-+b2IPnP)#_!5IqFsl3Q!w>y4^uW05w=utBYX;
z%3@+=3=QfKA2K{e_!Q)pxHuV*O@FvDK*rR72rxjf;y{K90dd*?1~g?7jE0Ivh6Y@Q
RhK9f(s|Qj*z{M382LNjHN|OKp

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_118.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_118.csv
deleted file mode 100644
index 12c1eefed8aeda7b8b899c4fcc22e88442b2ba70..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 134
zcmY#Za8yXnNGwt}Qc#a$U|@)gi}Q(7h6<*ZEAVnz@^V@6a`7s0DJaC{s9PzhSE?)2
nDsd@QtCuAL`Rb8CEnwgdCVim1I2{F`0{oyVu_zg6CxbcwCtW4~

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_119.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_119.csv
deleted file mode 100644
index 79fea4b0758b2a85f0077dc5ba92526b45a2a78b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 93
zcmY#Za8yXnNGwt}Qcy2TOq1u(VgLeOB`yVpxX3sl0P>W9YCz05pSU<i1`tCBq_`*<
OC<oO5RfS>_gE|0|!V>WS

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_12.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_12.csv
deleted file mode 100644
index 4c1a8352beb22246e53a5cd2b5c696744fca8535..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 241
zcmXpsGUhUb;{3c+AUhb&H)Q<J=%dQ1%dN|(&#a!J&M40)&0wp`i3AuJSQwDFoSf=q
zi8<<43JP(NadG)^Am9!reZXX#dYM~}x_zw@mr^xUC78{?*Z|bUz{tP=QWgQk?s28a
w03-}HA~6SIVmyQ!w-6))1p*8VuR>hiL#)7nk(brL5D2*p4Rj0*47i{G02tahzyJUM

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_120.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_120.csv
deleted file mode 100644
index dc91aa419dda8f81202f056122a3368eb813209c..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 148
zcmY#Za8yXnNGwt}Qcy2TOq1u(Vqi!tN{tHvaf`}X1z5p=*T4b7P~hdV<mIyB<>Ccu
pN{!P2DpM>fM~EtMDJaB6#zBEQjDd^xiDQIFfecD4N@f58bpVW-Bq{&^

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_121.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_121.csv
deleted file mode 100644
index 416d4de5e3f6fa6a8f82606e27787d3778dc14a1..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 228
zcmX|)y$ixH5XJ8>OQJ(}2TMS13O1V*#6h%6$2zzih#d=VS*qwCE@&>1{@~tyJa~tg
z`C_})Ztie1qn>buOV~NBUpk|-MgT}l?4XMA?@+i7RF@@RnC~n!BDYvZc(6UB!9Z_5
zMm*+t6cDU6cBJs0B9eVh5!|LFOA()T$EK?jpa6X3E8`bJ&YzS#GlkPY+x=7C`~a1B
BD$M`@

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_122.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_122.csv
deleted file mode 100644
index 29da7018bbd835f6eb234d0ea9d5faf4807dded3..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 220
zcmZQ%gn<8yKB}C$+`5eV%<3uXjPi`q47R$Qob{X#zyhLlpd1LLUY3}nZl$0Q7a13q
z9|r>NU=m1hfq)P;P!p#F16)eg>SYZuBkF+KV&g&~M)<%C0@4Bu3=AQz?jcrSz{tz$
eZ)jj>z-7n-0$_^}gt}o|oO+pCj=Fs<$QA&eh9_PC

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_123.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_123.csv
deleted file mode 100644
index 3eebaa16b6aa47c88857f1169f10054a423d13d9..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 264
zcmZQ%1Oo;zc*DS82?9(kJk{!n>Q?Gr!Tx^gwE~QW)k^jGIq``~Rt7ps$v{>TkX6eS
z?Fj=eKnA)POb(UK%P&z#RLCmM&x_?s&Ig$VG}RDps!=UhHP{A_C?ktpwK`CVx|M=`
za7a+7bBIP+Vva(XW013l<62GiT7_zLpn$qntrC|KlnJ3w?S%5I6e^$!85)2_Rwy88
F0|361M_vE`

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_124.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_124.csv
deleted file mode 100644
index 955e7021fe1e00e1fab21209acb1864d3fc2f49c..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 415
zcmZQ%1cU!T0AYbihR+b{DO}7_3<@A3mM{s0xX3sta8Hhli;c4Y@nEuWnhOT37#SE?
zm@ojNfx0@&|9U3%M0G0#bx*%~S2s&IfLdV}7v~<w%c~0%cXbU8F}JkT)HbdGGvgV6
zmb!y%j*C+-bIVb;uT|nws#Y&^ZC1Cs4`d?&eP;C(bw+tcX@>hi1&KL83&Ebq#{^*W
zz{WzY4GI<$({c9j^l{Yz1&0|lOvK6<7#IY=o@GD;86(I8adBK+adDLpxA}k#1iH{Q
i7w$r!0~ifO84VSU3=Oyp4GkF>8tQ=*5O8tbjROE@LR3Bg

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_125.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_125.csv
deleted file mode 100644
index 9c24c33b7bc7f8f769d417ddff855a26ad2b6d99..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 418
zcmZQ%#0D5XgGCvh!X+)mpa3Fb36oHWi;RN;_hb;m2SmVR;WQTvSTQm%urOf&Mgw(q
zmjCrk>WS)B3hJJI^{#G~Z~(Q!E-ubJj+a*#DDLVS9AXZnwT)}Q%=jvxrS2e`<KooI
z+;Y_IYn8Z^>_LVx%D!iGb$1O?*HKSWx2jezOU#LjtLB6Q^|btakew-T5g<do%(YqF
z>ORmAE-p@()AgCvQ`8yd8KoKSSF0-k%>imtU|gnOtzMFuo2qUFrix1va|=qU)OCO+
zrWPj`WfqiV=H~$=%My!{GZKr`Ye9;2kmR_G42%qP4Ge)O#K6D`jC67{Q}Qy?GfEV4
F@&VzKR_y=)

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_126.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_126.csv
deleted file mode 100644
index b0e23e1f3f15..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_126.csv
+++ /dev/null
@@ -1,30 +0,0 @@
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-1 ""
-10"
-199;1
-;a ""
-10"
-199;1000;"a ""
-;a99;1000;"a ""000;"a ""
-;a ""
-10
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_127.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_127.csv
deleted file mode 100644
index 7c1e2505553f..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_127.csv
+++ /dev/null
@@ -1,28 +0,0 @@
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-1 ""#10"
-199;1
-;a ""
-10"
-199;1000;"a ""
-;a99;1000;"a ""000;"a ""a ""
-;a99;1
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_128.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_128.csv
deleted file mode 100644
index ae9fd5dd6f8f0d3ff80db4a464ad756378f2ef53..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 279
zcmXrCw6r!fFfg!IN>or%;<5&lh6YMphJ#ZA<Vd*l@HqFGV6}Q$e!jYuf_h4RX;MzA
WdM!7zx{kUu2Lq#iB8uOz`5pjlV>46$

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_129.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_129.csv
deleted file mode 100644
index 475ffa66fb2c..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_129.csv
+++ /dev/null
@@ -1,30 +0,0 @@
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-1 ""
-10"
-199;1
-;a ""
-10"
-199;1000;"a ""
-;a99;1000;"a ""000;"a ""
-;{{{{{{{{{{{{{a ""
-10"
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_13.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_13.csv
deleted file mode 100644
index 738932afa8f621cb8c6703b3a12350cb65c74fe5..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 186
zcmXpsGUhS_;}49eK%ZGXMV-M92pAa{locEm(o)NTD*mIYLguO03NRX0D<$XW#3w3Q
z8R&pnNkCRDS9D%}i9(`6R&jn_EEimfAxMdi0$5$L5+lgQI5;SUu)r3?sT&#q0hf-B
OdXjofwR%}%jyeFSPbPf;

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_130.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_130.csv
deleted file mode 100644
index d476d82e98e5d29ceaf19bb50b4dfd79419e5127..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 759
zcmcJK*-jKu5Qb&BAS4=Jz@%x^<L;>$U`{chj;P><I3h07h*ixDE~DdaD}#^c)A$g6
za%19!@y1TR?&O^Q>i<hrn-XJe=9cC8WOM&5F`tzGaWV2^Jjzs-Wwl0(a&UB9cSAYo
zOPJ5XMytKF(rSk@*{`EoSo`y+{`)TrdlO?}xbF`0GS7$Q=%JB#WTF$iKk%?wR55};
z4eBrhvoHq@NZ?RH7kWiCjUzaMBRGO1ID#WMf+GqBxPmL&fG@plDxW@j?07hF^3>_G
z=gvo|)n{hs8cFAy3yT*nUTR%lx^nf}^2+raH*eiuy|Z?=eQ*8##)F5C9(OjMY&|tE
zx4XTaXU|`}?7w<Fc=Pt%`wt&KeXhQI-5u_IE2>1fM7cz{M7cz{M7cz{M7fkYRr=1C
za*1+j87P;!MNuh&A}E3)D1x$$vW;4mSYDa0%va|B{dew}uPm?3SC&_N!#7#24Bv<|
z;*2;W&WJPOj5wp8;Tyi;8@}N?zLW3xj_>%6@A!`I_>S*e_V3Su!<CceWI0(*mXlTf
E1b?68vj6}9

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_131.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_131.csv
deleted file mode 100644
index 115a1d3880e4f67e5d6d2b3e9161d73fd73f58a2..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 199
zcmcE2QBW^SwB+U0RrzmeSyffV$jHP91Pm}>2?7m392Xbo4#aVBK43DAsh)wwu9{0d
zQQb;G-P5n$)!j8nT}M4h-KrXBVh&JLAubZ46iugknOly!eXSCg5=<pB8>Y*WQJ+~o
oMV(QeQJRB^mw^RrlmO6KA+GKrLFx<*Zy60m84V2$3=Oyp0sLDqbpQYW

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_132.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_132.csv
deleted file mode 100644
index 2bfd1d477eab24c69f8fd82aa60f0548a9100d66..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 291
zcmXrCw6r!fFfg!IN>or%;<5&lh6YMph7?IK>NBgSs58nlN;4}8R;#Dw=c`*OsHfzY
zCgr56C$KZB>!?d}FhBuNmdnsc$CS&^Ku^cOfXh+O2n1Y!n2Cj_T0K$SO5H1%(LkO<
z3#x-f2O{d}7vk#f8l<k{@26gC$Y^*-3BSt?jZAdRebtlHt#aZMm0}Hbl#=sv;**pt
QF@Tj(E!R#iATTrn0Aq+Z@Bjb+

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_14.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_14.csv
deleted file mode 100644
index c6b66ba1f760aa8babf47548188f769b52bdef16..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 175
zcmXpsGUhS_;}49C@ySYz`poJnaUj452Mjt4Mhu1wybN3njtp>7C_}wgfYGp8DLFqU
zK2gcaKnKi90<vnkqVw`g6cQD(iu3bgx!_6+K}vKK!0M3g16x`OCLwmmsT&#q0hf-B
OdXjofwR%}%jyeD}?Ip?p

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_15.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_15.csv
deleted file mode 100644
index 90901977f6ed..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_15.csv
+++ /dev/null
@@ -1,20 +0,0 @@
-123
-123
-12�
-�
-
-
-
-
-
-
-
-
-
-
-
-
-'}1{"col_a":A,"col_b":0}
-[not a json]
-{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_16.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_16.csv
deleted file mode 100644
index 9917f8eaff5d7d02d11fbd8564443d3acf5f729e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 177
zcmXpsGUhS_;}49)0`*!BMg|53Ngy_~HsUh17GN~2R!Yv#iBD9rGSC6Dl7OsQuIRk{
z5`{#Ctm6E<ST48{Ly!_31+cngB}RQ_^^`a`D21@VZi!PjGynoF9Ub)~^_Xh)vcw#9
E07SwfvH$=8

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_17.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_17.csv
deleted file mode 100644
index 42e8f6fffa56..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_17.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-123
-123
-12�^^^^^]
-{"col_a":1,"'}1{"col_a":0,"col_b":/'d^^^^^^^^^^]
-{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_18.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_18.csv
deleted file mode 100644
index cf125c47fbd3..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_18.csv
+++ /dev/null
@@ -1,8 +0,0 @@
-123
-123
-12�'}1{"col_a":0,"col_b":0}
-[not a json]
-{"col_a":1,"c�, 01010
-10, 0
-"col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_19.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_19.csv
deleted file mode 100644
index e199e3537aa3..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_19.csv
+++ /dev/null
@@ -1,5 +0,0 @@
-123
-123
-12�'}1{"col_a":0,"col_b":0}
-[n�C.M�ot a json]
-{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^===================,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_2.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_2.csv
deleted file mode 100644
index ae1aecca8fe173b3315997f3f518b850794a0441..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 171
zcmXpsGG;V1GUfu|{Jc~k&B)KJo}$hu&nV5o#K6eH#Av``SIwoKsBWd8?&(+W>h2n(
zuA`o$ZdI*bmY4$+6^M(Bg93Lj?E~Sdm$~Jr+t(^_DZx}CvlX!G;xaT)GBhv%QU(GH
M28=+;KnH3#03I_chX4Qo

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_20.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_20.csv
deleted file mode 100644
index ac53beaa9544..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_20.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-123
-123
-12�'}1{"col_a":0,"col_b":0}
-[h`r', 'yarchar', 'varchanot a�json]
-{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_21.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_21.csv
deleted file mode 100644
index 7a4be8dbbd6f..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_21.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-123
-129*99999999999999-nda999-93
-12�'}1{"col_a":0,"col_b":0}
-[not a json]
-{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_22.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_22.csv
deleted file mode 100644
index 2ddf7b2c3297a4eb76023ad4664a9f57e0ac9df7..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 176
zcmXryP%kY>(={|xm+$7_xL{<=WeCO}7>NbywE~QW)k?|vIq``~Rt7p?RuYg^%N3oM
zU!stxkX4+Y7s~}#VhB>AqX1Tyti-6#tez4F2c-}e*z!1aLjxe-($P^*Vu-0$FH6i(
F2LJ)5B@_Sv

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_23.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_23.csv
deleted file mode 100644
index 9e40b5220838..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_23.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-123
-123
-12�'}1{"col_a":0,"col_b":0}
-[not a json]
-{"col_a":1,co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_24.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_24.csv
deleted file mode 100644
index 5a0c0c194278..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_24.csv
+++ /dev/null
@@ -1,7 +0,0 @@
-123
-123
-12�'}1{"cool���2}
-;STRUCTl_a":0,"col_b":0}
-[not a json]
-{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_25.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_25.csv
deleted file mode 100644
index 99e0a7375746..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_25.csv
+++ /dev/null
@@ -1,7 +0,0 @@
-123
-123
-12�n]
-{"col_a":1,"co, "co'}1{"col_a":0,"col_b":0}
-[not a json]
-{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_26.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_26.csv
deleted file mode 100644
index 48473ccb3069..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_26.csv
+++ /dev/null
@@ -1,7 +0,0 @@
-123
-123
-12�'}1{"col_a":0,"col_b":0}
-[not a json]
-{"col_a":1,"co, "col_son]
-{"col_a":1,"co, "col_cc"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_27.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_27.csv
deleted file mode 100644
index fde479ec47d4..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_27.csv
+++ /dev/null
@@ -1,8 +0,0 @@
-123
-123
-12�'}1{"col_a":0,"col_b":0}
-[not a json]
-{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'1~000
-,,'b'\{':0,"col_b":0}
-[not a json]
-{"cval'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_28.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_28.csv
deleted file mode 100644
index 99c23520e19a116ab8352b598cb944bd7e684589..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 175
zcmXpsGUhS_;}49)0`*z}M#E~Q<oulYL?tT&9WW~i$g1Uv&dV=RNL0uw&d-bGf-5lu
uDbY~?t4lWELIO&R`poJnad1!yVS!x|r*3Ef1Y9~g>PhM`)eH;_0_p(0zaYi{

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_29.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_29.csv
deleted file mode 100644
index a4787dbb0d68..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_29.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-123
-0123
-12�'}1{"col_a":0,"col_b":0}
-[not a json]
-{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_3.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_3.csv
deleted file mode 100644
index f7dbe5492a56cc7bb3e271f4c85514b3a005f2f8..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 160
zcmXpsGUhS_VtpXa&r1ce8TpyjQ`8yd8LPR}6V<I0)II&`UELWCrZXCV6oN2NR3I)g
z4hr1Cv=4-*UgnmgZeOd!rBtn6mY4%niOg2Ou8YgiK*`X+07x0IFc>fbDFYp-;Q;i#
BC}RKs

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_30.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_30.csv
deleted file mode 100644
index 733a0aa58558..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_30.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-123
-123
-12�'}1{"col_a":0,"col_b":0}
-[not a json]
-{"0,"cocol_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_31.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_31.csv
deleted file mode 100644
index ac1dfd8e50a13afb2c9069b16da0c24e46325fce..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 189
zcmZQ#G?3@uW`F<|fok<cbt?t+%)FA+bVDO!E(rd>NGwpV6<{>1R!Yt{00J%@LjwaH
z!}=s8D}!3D=)C+Ag+zs{;{3c=E|8L(_(UZuLmePXM*+-DR$|m=R!@n8gHi|!Y<ZkI
T&@hmhI_gR4G1cm2i8<;3-ajOB

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_32.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_32.csv
deleted file mode 100644
index bfe0bb105008..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_32.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-123
-123333333333333333
-12�'}1{"col_a":0,"col_b":0}
-[not a json]
-{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_33.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_33.csv
deleted file mode 100644
index 66cf0afacdfa84366fb445a528f84b2093be05b7..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 186
zcmXpsGUhS_;}48zfZ-*Y0F0|%E5K-2t(2Uf6Q8JLWuOCQB>`ErT+w;?C0sg&1_nBY
z^#%r9hCtc?1{4w%vWoNbV!7a24MAFU6u|nEl^FGz)l=f&pcKLaTN<YhHik<_M?FbB
Lrdqu$F-IK$%snRB

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_34.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_34.csv
deleted file mode 100644
index 7b2c6ff352b2..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_34.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-123
-123
-12�'}1{"col_a":0,"col_b":0}
-[not a json]
-{"col_a":1,Cco, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_35.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_35.csv
deleted file mode 100644
index 022d39346d72..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_35.csv
+++ /dev/null
@@ -1,27 +0,0 @@
-123
-123
-12�'}1{"col_a":0,"col_b":0}
-o, "c
-
-
-
-on]
-{"col_a":1,"c[not a js
-
-
-
-
-
-
-
-^^^
-
-
-
-
-"
-"{'va
-
-
-ol_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_36.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_36.csv
deleted file mode 100644
index 460021112b51..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_36.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-123
-123
-12�'}1{"col_a":0,"col_b":0}
-[not a json]
-{"col_a":1,"co,  col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_37.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_37.csv
deleted file mode 100644
index 513230f07d50..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_37.csv
+++ /dev/null
@@ -1,10 +0,0 @@
-123
-123
-12�'}1{"col_a":^^^
-�
-
-00S^^^^^
-�0,"col_b":0}
-[not a json]
-{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_38.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_38.csv
deleted file mode 100644
index cf0cf366b91a..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_38.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-123
-123
-12�'}1{"col_a":0,"col_b":0}
-[not a json]
-{"col_a":1�����,"co, "col_c"/'d^^^^6666666es45^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_39.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_39.csv
deleted file mode 100644
index b65f88e75d62a9116372c58a0e31360548dffd5c..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 181
zcmXpsGUhUb;{3c+AUl}LkWrsmJw=^So>7{EiGhKIiP3<?u9{0dQQb;G-P5n$&D}Lf
zT}M4h-Ktu>EHMWtst^|$2L<k6+6Tf@XJE=vx35*=Qi7>OW@FP;1=M&Cq*;K0fg!}z
ZJtRn-q2Vo~p(vxFp@E?Rm!Sbr4gl0pE!6-3

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_4.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_4.csv
deleted file mode 100644
index ccc7d2986d19d813a245c83b24f4e6518e7d6fdd..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 171
zcmXpsGUhUb;{3c+Ae)h&Sv^IaQJztogNcEGg^AIC#jcu5JyG3CL0!wQ-qqbTNL@!g
zN!_Yiy(}>YC@K&a83zUKVA==5Q!jJNQMa#E;!=XBR8&B)trQfn>*6vrP%<<y08$15
N3<iuq%0LHdH~<$cDrW!y

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_40.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_40.csv
deleted file mode 100644
index 5a67eaebdf7a..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_40.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-^'10000
-,,'bA\{'l'?}"'}1{"col_a":0+�col_b":0}
-[not a json]
-{"col_a":1, co, "col_c"/'d^^^^^^^^^^u^============^^^^z!{"col_a":1,"co, "col_c"���val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_41.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_41.csv
deleted file mode 100644
index bd200078e616..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_41.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-'}1{"ccccccccc�col_b":0}
-[not a json]
-{"col_a":�"cp, "col_c"/'d^^^^^^^^{'foo': 'double'}!^^u^^^^^^^'1000�+,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_42.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_42.csv
deleted file mode 100644
index adb31c62bee4..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_42.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-'}1{"col_a":0+�col_b":0}
-[not a json]
-{"col_a"al'"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_43.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_43.csv
deleted file mode 100644
index 1bb6ef988691..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_43.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-1{"col_a":0+�col_b":0}
-[not a json]
-{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_44.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_44.csv
deleted file mode 100644
index e429951c3dc3c0f9f8adadf686b430ca3504413f..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 209
zcmZQ%1OhFs=)C+Ag+whaErVJvAc+A$s^CDqR)Ep4`hR_XPJE)0m4S8x0|P_D|Ns9X
zx`;6WZWNHAqm-PlqW~ln;**s)^qJLD;y@q)M3jPvxHz!#ICVn<0|P@Y9Ub)~^_XfT
NBVZ<|mnG(?0|2=0CdB{%

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_45.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_45.csv
deleted file mode 100644
index 569bd64c4a4c..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_45.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-,,'��99999�991{"col_a":0+�col_b":0}
-[nou a json]
-{"cof_a":1,"co, "col_c"/'d^^Y^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_46.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_46.csv
deleted file mode 100644
index 58ff0d50238d..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_46.csv
+++ /dev/null
@@ -1,5 +0,0 @@
-'}1{"co�_a":0+�col_b":0}
-[not a json]�z"col_a""
-"{'val':  ^Y^^^^^^^^:1,"co, "col_cT
-T"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_47.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_47.csv
deleted file mode 100644
index 5ff73650c0578ad57c3420ea082143343526bb7b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 204
zcmZQ%Bnj|fRUH=>r(Wik!)srw#HCbS3giO88kkZB0R{#TTa1x0k%5JQK`A*uCq7BZ
z%BYE}niIr{OH{Hl)B&<|6u@%HN{sr<>M1}&b<~s8t#Tmd#l=AlhM5befd&JO&QS*d
DACep}

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_48.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_48.csv
deleted file mode 100644
index 947747bb1c464de190e13f0f291157e8c0099300..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 241
zcmZQ%L<TGZ)#{1rRtoBwc_pdosYU9wd~oSsNB|@lr*3Ef1Y9~g>L6|k1Bhf~lwnk_
z6<{>1R!Yv#iBD9rGSL1HVkH4twOrA8`6czP?ygDdI_gR4R@Lfdi8<<=K)}GrPy?b7
z04&15ARuX_prDwjkX4+Y7s~}V)ldg$zK#Oel4K=DeP;FAxVY>%2q=Y6VCTSyJOv&e
HeUNPcA@evD

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_49.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_49.csv
deleted file mode 100644
index 199641f8f80d..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_49.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-999+9899999999not a json]
-{"col����������������������01_a":1,"co+ "col_c"/'d^^^^^^^^99@9999999^^u^^^^0苹�0
-,,'b'\{00^^^'1'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_5.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_5.csv
deleted file mode 100644
index c754534b4502b986fb0e49b43982f99450eb5dd0..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 171
zcmXpozyj1$)EVU&r8$@w7+9DX4Or}|xzrQYtrXNf{pwxaU4zth)RWY$s@2O9bAX}(
zaglLQ;LacbWcfgN>Sb;@>h`rtTuNY-|Ev_?Y@jX$BwcwR4G>*ih6YN81_nUNK!Cx3
M5l9*6K#Yq608SGt7ytkO

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_50.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_50.csv
deleted file mode 100644
index 0cb4a24515a27da97c355caf071ae0780e6f5dd7..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 157
zcmZQ%!~_1lL6>A;VKl5(O3u%TPgJrp(Ebl%B>`ErT+w;?B?^fOS;hHzv1_q_WneKb
iB~CEl0+Xe2U;wrxPTkS~2)J}~)RWX>s@2O9bJPLp{wLo6

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_51.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_51.csv
deleted file mode 100644
index 144ac1887d73bb137a8569634ff53718ceaba3ef..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 201
zcmZQ%WMp{3NDQ#FL{_B==DubCi>cQNFd9}XCFkeFe+Gg?B`ZUnIm!7tV5Wige~?6y
zl9fR%S9D%}yc+~4Br0ST=jX+8K{SFj0gcd60GpGn#Hi1#o)QNKr4SawPoRx)>V^hD
Sz@?+3o}?aAtzMRxqYeNM0WIPH

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_52.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_52.csv
deleted file mode 100644
index d702dcc8f36e..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_52.csv
+++ /dev/null
@@ -1,5 +0,0 @@
-a json]
-{"col_a":1,"c'}1{"col_a":0+�col_b":0}
-[not a json]
-{"col_a":1,"co, "col_c"/'d^^^^^^^^^^u^^^^^^^'10000
-,,'b'\{'val'
\ No newline at end of file
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_54.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_54.csv
deleted file mode 100644
index b57920716e6cb5c013518bbe15cf08a20b51a6ba..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 234
zcmZQ%1OtYr5E@8YLVzKV;^o!l%}Ytt3(thhLKzI`0*val0*r>$O3C>-@rg=S2HO9V
zfUG1StClM|FTX?~Q6Z~1KQERGs6>bHKckN-r!KcHqdv2GiaMiwzchobE+-hgV202j
zvVj54RxeA;Q4dm3h>MJit5xDs%8vs9cQEM#CM_-9-3{GqGF6RKT}mu1<J8OCa@6fX
IYOBGT0dI0G@Bjb+

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_55.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_55.csv
deleted file mode 100644
index 50e5025ffb6c6c414887987fe006e7a6a250b7c0..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 232
zcmZQ%1OtYr5E@8YLVzKV(&E+O%}W7+Oo%E51_rn~Ms+I%^~^jVzgoSFp#jKKh>K*g
ztF}Vra;YbR6ngsAySlpusq3gGsqX@+NX&s~h>OdQ0|ED`Akrr;2t=uux#g(a*D7%-
z0adyJRo;iVNe4tRI6ycrLGI(_wX_5xowy)%hK9F{hN6syhDL@4T!zSiiz_Y;0Qhz@
A9RL6T

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_56.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_56.csv
deleted file mode 100644
index fd74276fc29f8a24dbe37c29362abbbfad62c68b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 288
zcmZQ%1OtYr5E@8Yf&c>p^9#5LV~RSXJfk!NBLf2q1EYbuI*VO3mwKX_m4dpbU%jil
zYmmB*dbzrJwR%}%PF!4^dIJLk1CWk$2N7{TaX}zTz055~-M&_d3kZQKU31i}?gM#N
zKsG~zx|M=L9J8WewR&29K9HlHl3$vXld4|J%?vbKn(;rQk1D4ww=SbTvpU!{KOfub
zav}qTxX8G;2_TCYB{_gDWdKpi3|qHug*pT14lqEt1L&lDun^3h2>HYub!!EXTLHIn
BLOuWh

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_57.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_57.csv
deleted file mode 100644
index 22a39d8efc129a9083c90c825b443414e65c3e7c..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 288
zcmZQ%1OtYr5E@8Yf&c>p^9#5LV~RSXJfk!NBLf2q1EYbuI*VO3mwKX_m4dpbU%jil
zYmmB*dXl<%wR%}%j=GhCLR@4V7`TH;pSU0ptzPDqqi$cT#07*vm99XQ_klbsAe*5<
z-6}yLj#*K#T0Jd4A83SnN`7flPO5q>H#5*|X~zGIKB}C$+`5eV%<5p*{CsSy%ZUuY
wmga*jVwB|g$H2e<q!^VMwr<@T2XRK64@eT(9Y806H9!?0%LDzPtN?N=07rF0fB*mh

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_58.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_58.csv
deleted file mode 100644
index 084ec3add08f47bd422571e529d81837dfbf284d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 319
zcmZQ%1OtYr5E@8Yf&c>p^9#5LV~RSXJfk!NBLf2q1EYbuI*VO3mwKX_m4dpbU%jil
zYmmB*dXl<%wR%}%j=GhCLR@4V7`TH;pSU0p4U@<R3zoS8Ro<^v;<5tjVrT$MGAjyJ
ztEc7X13Bs``K3uYsp_@d%s{iH8UHi-sB-FZ>oV#ytAkzh^RcZiCo%wA3O1XNA0#<|
z9${bvQpyZlw{DGtxH8TM?5;TVGPfLcd!WOVfJ}&6Vcv*40`dk(9_USL1&|s5DBnh?

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_59.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_59.csv
deleted file mode 100644
index c04e3fedf310e26e11fc8f149a2ff7e12a69e416..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 239
zcmZQ%WMpIjqNhL_0xUs*fs6SCOn{LwMV(QeQJR5~fq{jA(Li0D#jcu5JyFd{LEY1@
z-qqbTNL@$0MBThvy(}?D-AX|rj`2UEFDs)yvpSMaCPo9GKA;&4>WM%_NLrKBt*UiF
zT77|9Bje)o<3PY2O!|PyIQ25O9CiCzB`zhHB@i}Hr#crGCl?U#fK_Hdb=8CDN)RCc
Pv@yihJtRn-q2Vn6i}W)#

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_6.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_6.csv
deleted file mode 100644
index bcb439094c5584d2b9141561c8c54a8ad18e8ae3..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 171
zcmXpsGUhUb;{3c+Ae)h&Sv^IaQJztngNcEGg^AIC#jcu5JyG3CLEY1@-qqbTNL@!g
zN!_Yiy(}>YC@K&a83zUKVA==5Q!jJNQMa#E;!=XCgs^MCx)iYM;xaT)GBhv%QU(GH
M28=+;KnH3#03_ckl>h($

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_60.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_60.csv
deleted file mode 100644
index 3d726a869239ab47a0376d4c77df4eff5ab3b781..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 236
zcmZQ%1OtYr5E@8Yf&c>p^9#5LV~RSXJfk!NBLf2q1EYbuI*VO3mwKX_m4dpbU%jil
zYmmB*dXl<%wR%}%j=GhCLLB3NMqgG&eP(qeolJ}dKz)qri9i)7`mCyTLHc}w`Xb}v
z^5a0j9ZdRw$vE{gw;Xl*S|u(em<<rNx>YqJmtr)~)}rK$#3FSa1q`MD0|P^dt9wY0
IIzz)-0CDLv!2kdN

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_61.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_61.csv
deleted file mode 100644
index 05ba66ba7e8034bb3e169315903736ccd32a3013..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 236
zcmZQ%1OtYr5E@8Yf&c>p^9#5LV~RSXJfk!NBLf2q1EYbuI*VO3mwKX_m4dpfU%jil
zYmmB*dXl<%wR%}%j=GhCLLB3NMqgG&eP(qeolJ}dEOuFp>WM%V>Ygxt_5anas&zs7
ze1ZBR<Kpt;K)@YL`hdwe^)j~{b^BT+E+v=^5H?V!Iu{ow7ZC7(Rb~K5<2awVdJwM?
TL<lf2Fod|ehXkoJG`s}>pX4*p

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_62.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_62.csv
deleted file mode 100644
index b08e116c19e39d67f543610e3390bf8063c85a78..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2371
zcmd5--EZ4A5SJ1Uh>Qhj-v;bqtR6-Tusu??V_U#lWSN_GXp%McigXxk(-Li~kVQ|T
z>}5{S{U!VF1`OEi9tU(sO8$sztk_F2!l0!4_}%Z0ceH8hW&EB!VQ;Br82f*$B&Lte
z&&*E^(=ZB#NiZ%vTof_;+(r25`J47>`x2M%05=wR#zO2O)HUClXCIkg?&A&5J(GZ+
zi5Xbj@pBht<#z^tuex2)RqIsm&-C8i46-)Hv&SKRbZZy)3Lh^ND*pDx#W}vcXVQi3
ziLl3Pmz4C|z<#T<YJE}4o~!*Zl1cHw@>6S#d+M{WU-)}bL<q~6MG5m{AfjI2EAB6k
zFHfEvL(W>w*(8@gJ?~^PKZrRvOrtE*j<KgK*+is~1e)JmeE;jSc4o>vPf0Zb6eS1P
zjOjDUmD6!2cL8h>``qunqmgWsR5pDT-z~b@WXX9WBFMpZ#$s>GVqCmEDBvF@{$wBb
zw<=)C%-hPd6p@u>X03CvHc2R&)*aH;4x+g%$5sYKLQk*0ZnsX(Ue+(k^IE5U^+WCI
z@zr<FFRgmhqE@xif~{pa#Bs`$mK_HX>JkD!C99T6txS#(J#tCIb&mQM{k~K3`>>qS
zyLbz`_gbWJ#KPbg-iy<a+b*()AVLWgoFf+bDB&|6p=-|K1pPJ=NNI;mjyZe}Vv&x<
z5)DNx$0(hO2+47fAWuXxh<GFsLlK{&h^byuVvh$i?xP?=VKCsayw)+`Xaf&w<|8Qp
zfum_GrXmS+yDeJ_O@IdjR$k1tE&nNHVFg8Ej^@A>G&~xyQ3lV@NSProm<1u4v)C>n
zd-IGYOnPHzLX|Rgs<rx2W6K1sm7rD1*J6+h@0dn6)dsz>r_?^A<i?@3o0<-nCOlq&
zG@6ECAOJ}uuvbyfUnvwQukce(LXjp~%shBz3Min@6E6;?s(#8xDPkUGwOWtqGKxy1
zNvn6*slw+KV^U!(n}^a4G!J#$Dpd|kqb$&$g}?UhALwh&H;jWPdBbh_ge;M0BK%+&
zK-n|T6@0BjS}v)&bzRs2OW=_7g`i0MthQQ(Is}#qpGZg(PM%%EIm<jDIHCM(n$6Y@
zt2JI7u;@-CHH>%CUh%3c#AU!CgFac>D#bhwBrl8MaO23=vfY+zJ_;fGLVl{_lZsCD
z_B}&h&ZoRa|82_cA|)p!`i2mADXCk-fd8tUx5j^-a`u6z9D3V?sfS@@Vfa6u@)0yA
z+*qoYyfl$w!go!1!^s<Ud&;4y<@Hpz!V3mht?Iq_eWOnwe1(wHAcP(&hYvFPs{vl#
M5BK%`usQ4h09H$|)&Kwi

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_63.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_63.csv
deleted file mode 100644
index 41a9e37c6fe10a64abbfd85f60d9992d52a13481..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2399
zcmd5--HzKt6t=smgd<ieHwcM~PA$O*DM@D>XOmbeXj#&wsJo@Rq1qypFtI0zE5{S;
zNy3t>+DG7;OC)Z16nG9KcmyP*a%SvoehBoETJhLE=R5yDF-?6LzhzH2o9Z%*z2C1X
zW`NDx=DUVz7zM*57#BXA7cqO^L-@hz^VV_e43}^Z*XMZ3BJ3g5F<+V|H_f|ycun$#
zNkGoT3=kV~-n^{jPC)Kyrz1M*I@aeyeRkH3tiX7BKf)hf*~J@$H|I(#{_OnGDZaX8
z(z)%6sLO1Rl=NHAeyO8seo)Gu*XMpL)8d`wr{)^B)MsI@@YlSE5S9sxQ|8N1#N9AZ
zzCSrUJNonxV%BWVMmhb#X*;6@VZuRR9A}Z{R5lWmSc03MJo@6r!&U}mzOSfS10*Gf
zuG<Wl<VxtUor{2Okpw*G{(~dgAg!#+Du1`!)sUsnYZieH*eOf=Axm)a>Wu>aO5%6+
z@Yh=zuw*7}<xz^*$|AE?u~_R)C^xMQr>z~1=CoY50u%{7zWBJ+JUV$&J0p+XcI)Cx
z_u~G==a0{<TEn7NwbF#6X*tAk%9NHJ2NCKK0zXBomWf*?2Z$bcr0zKf)((B<{>>jc
zKmGCN-Cq~K{CNKJq4B~{H=)jZaIaaL+udhklEf?uzvJCx5^>u@b{|M6g>rDj;sB+5
z%42lNS&||)5J++ROb$7G_YyG~3?=G|L=MqpEMg>wVTyba%P{7#L=44zhGM2VO_6;b
zPI-XB7)4=^C-PF;fWn79WHdlhfCi4ni5QDC)XlhTJTwA77<lC;Tx0n+6BboaEM{m1
zz5<8G12)LiGc-^z<cHHRLNk`wC1kHxaKxlPgcemHQ>W_I4(eMFv|<8RF`tQEPTYXT
zi)x)N>@l_XDOotwUAWrZB<0Dql6u213<OG23EnHKr%#m@NUsPczJw%=G@1GE%ovcs
zfTw;Ej#d7YjAA4_%5t?D;-wW8NP|{4yi<wKGsdLSv1~0$+t6Cn@v2k_EJ2x}e*r}-
zm1rb_updI6x1esDG(A%FYC4mMrBD^UACy(MQF)sXqFrO(YD$GVK+&XBLXc2*b_rFM
zc{os?yfTeOQweUNR?2H`h0>jx)SzwzeeGmTX<QoYGw6~fRxxHtD0x}*`)dVX(E|3p
z*1CyY`J?FB7YZgCNh&(jt5+<5a=rmv`d=HsE>dz-qR$C|my+5j2K<lh+$gX8j6-Le
zF!L~~EQ(&^86QA%!oE_y<WEv5Mts+d*PXmkw`UxhT3(KIE1WY}w5s=#R}DVBa}Ob>
TP6*vs0{_q8zooas`r!W!s64qn

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_64.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_64.csv
deleted file mode 100644
index 9050f01683ebf4f2e70e34fa4c5eeed7760abcc3..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 261
zcmZQ%1OtYrK<YaKh;0c13=GUKz!G30MV(QeQJR5~fq{jA(Li0D#jcu5JyFd{LEY1@
z-qqbTNL@!gN!`3!y(}?D-AX|rj`2UEFDs)yvpSMaCPsw7XP{9)P`?ihfLc%tvVt2_
z6d4zn9|r>NVA2On#;KRN<*3`&Dsd@QGa5Q76h-PdaT)3u85o)98W`)UTW}e{IR=*c
T5J!Pk!yO1T7XpChI>rG2cUC$Z

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_65.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_65.csv
deleted file mode 100644
index a98344d21bb3..000000000000
--- a/data/csv/afl/20250211_csv_fuzz_crash/case_65.csv
+++ /dev/null
@@ -1,27 +0,0 @@
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-11"
-199;1000;"a ""
-;a ""y^^z0120^^^^^'0000
-,10C,1
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
-199;1000;"a ""
-;a ""
-10"
diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_66.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_66.csv
deleted file mode 100644
index b7408c92520aa15dce7c3190e16ca05b96e749d2..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 268
zcmXrCw6r!fFfg!IN>or%;<5&lh6YMphP07j)Mr*tQD>BAk!E0IU=U+sG+?na;4(DO
zF*I-w2~uZhc*|%g%4le4U<eV70|Ivt5$6-f0HzHL;v?hY^1*DF449){=9Z&wU#rBW
mRIOf?n4@l`0An-qvKkl~0d;|ZLL8I<H2A9l$QF<&$Rq%$lRYs2

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_67.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_67.csv
deleted file mode 100644
index 7dfe4884b54067156a0b0a2f029d724088de24d1..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 291
zcmb_Vy9xp^5X_nrBw{Oqg<+d*1b6N30*VM~VP$8ta$tF2ca7X<+ndA(*gM5AJ2UJ^
zSq5SD0c(%wLS|uf@(TFR0W2(C31-FAfHC79aIs=1)ek^~CAvHpjo2sBV`f$6%USi7
zuWzzFY);e-ECm``LYhQM4Qh7{tvhshr?zgW@*fSOKF5MA<Ff>>ZxQz%oW_%z2A+&K
BL3jWF

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_68.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_68.csv
deleted file mode 100644
index 37a9ce79fbb141f91d187765647d099bb249da84..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 252
zcmXrCw6r!fFfg!IN>or%;<5&lh6YMphQvxR{%7=MWz=U@Pf=%-XOw1OWME)X0D}Ko
zmb{iVuCBo$=1hzREOyn5>WS(p4C<bK^{(!&LFzi{N$OVB>Sc*J>b?pJaglLx`EelN
z4kmrTWSn}LTaLPYtrC|KM5UEN?RyYg-Rgfm7Z)cN5b%IiW<Yh-1L^+^3_2VP3=FP1
bIyxM#>M0;T#5e(<Xr!xqNRT>1!&^oG%S$`y

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_69.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_69.csv
deleted file mode 100644
index bee400e35ba1bc4ed9609a9ecf2ac4c450e19af0..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 233
zcmXpsGUhS_;}48jfT5uQx~M+0dWt%uJfrky#(IVZ28Q~5V4!ZLpzi5c?`l=8UY3{x
z<S7(I#>GvD0|9q1=>sO?)XUs*)a`4PxRk0H4ZU=nxD0iS42(>44UBaSEVzt-oM51>
zh6YBeT!scZhGqs_FaR<%4rnL@01b7Fi}PWH@qx4e0|P^dt9!^@b%utwjE15hr^5^a
E07D%zw*UYD

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_7.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_7.csv
deleted file mode 100644
index e6b78bdd5fe06abf9b9d2552b2037835db408356..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 171
zcmXpsGUhUb;{3c+Ae)h&Sv^IaQJztngNcEGg^AIC#jcu5JyG3CLEY1@-qqbTNL@!g
zN!_Yiy(}>YC@K&a83zUKVA==5Q!jJNQMa#E;!=XCoX%(fvkOF{*$39eWoV#eXkY-O
P3<MYq7=e_54%Bb}aeO9g

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_70.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_70.csv
deleted file mode 100644
index 89cabc23096bc8644032e52f390ef18c5445a009..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 272
zcmXpsGUhS_;}48jKm!8<16bJ7QqR;94s^A3IpG`-!%9ItI3y_4IYgr@F-IZHG054&
zF-TLrR;gMYD4=dtQLDrSV>2*7ISfDv1%<TKa)spl+}zZ>l434p1y6N#F?Fc&T98)W
y3I!z1>Xn9t2&2?1)e(|8Ame}{VATxuAiW?iLV>!KIz$rUCUvM357>nY5IF#8Wj`1I

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_71.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_71.csv
deleted file mode 100644
index 1f44de30dc3ca3c073e951ef0787b4d958261889..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 251
zcmXpsGUhTgGOktPQmR%jOUzLZQbz-f7(l&NfYGp8DLFrfkwvar9Z0KNDX0gB1cf?>
zXaLPn2y+Z__HYc+RIh~^YE^--z>0wZ%3%QMgUZwbt+wK=P(V_rUTKKJQ%6YVfNiV<
jnh$jLk5Z6c1@%fK1?pDn5J|9~fE<Wk9*_@#O2KjfW&%7X

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_72.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_72.csv
deleted file mode 100644
index f10df119bfe3182e769d84aaf668695bf408132a..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 251
zcmXpsGUhS_;}49)0`*z}M#E~Q<op~)7P)G5AgyktpdK6&6zUwJQI?pa5at-<?BN)s
zsa~s8tqv4Wx2mXB;)1am7@!;mkUpqPEl?F)g?XhRSjj=4UYIWRN_7ReAvs_hD}m+%
c^>Bf0P(W7Tt!||bk%U;U4%N#8RSK2^0OW}{CjbBd

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_73.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_73.csv
deleted file mode 100644
index 6e8fbb5507c2d718375126f1a053c1692f402353..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 810
zcmb_aO-tNh5PpIeeMRUW5JFILFrk}Wzi%$0Qg=OcPgPX1Uw*|z_QQj+^mp~xqko|H
z9;B6}P$-ce#Q4m_^{NL4-ppj?d1s!Pd4pof1bY5jZxwg&!3wUOMiL!{&T7F^t4?cR
z_Tr>xhxTbViTBM5BoK&aE3V;KAIIG0!He{Bi^bi`FmhMzf#V0Rm06Y934hYJteJ7Q
z-)sB9qMJR&2yu=UJnAFrDHwc0f-df0<2xc;GSXq>ikK1#im2j&k%-<7y+tgjcY}W+
zV*KL$6K1fn{#r5L|KX?U1Z9pVP19kTPQNPy^l?ExF?wV=|ID<EQ{0nnOpa^(Hno#3
zISbMwD(|y~|Ju9Yw%JqbYnEli9<fiEN>g(dwfjx=H1LPoMg{*^cQmzSMciOgrKSrq
z-wXv5;}{dMa#JesZk4u7JS2CSc}Z_?;TC7gPqLpV>pD>#vXLF+HOl<YyB#u<Z1R}M
zQD5^|(p<arOZ+o+U2}~PMFfh-r_#|^P+^ree)A;=D`B*#m<pp(E1v|nO}=${WcR{o
WMN#X#m*9)_lk9Em(3Tf<2JZ<2&z^<=

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_74.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_74.csv
deleted file mode 100644
index 0bde83dae226bc22b3f75cbaf9310d394a7fc110..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 236
zcmXpsGUhUL)mBmY2x1t0V8jh{82>Z+sB-FZ>oV#ytEZ?l%J)k%*y?g(fCdJbPy<6*
zVvc%{f<jzmTwHz}2)KhuA21oGUgnmgZeOd!rBtmBQfZ|CWjBJ@|1D7f*btDu{|(57
k2rw`(gt)qgSb+hfEUST`fuTH?p@EK}fq{~e5*Hi*0Hc^MYXATM

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_75.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_75.csv
deleted file mode 100644
index 559adf408fe09c1176d808bc320ded571871275c..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 231
zcmXpsGUhS_;}49ufezz;Mjus9U2a`QeP;C(bw>GqX$D(ePBg&7*}wo7QZGx)Q4dm3
zh>MJi%Z~#AcQEM#Cgar0+;Y_IYn8Z^s=+F)6rk+J9Ca&&|CT5KYzRo-{|0131Q-|?
fLR{TLtiXU#mes(}z)+sc&_Kt~z(7e!i3<(@`O7VJ

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_76.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_76.csv
deleted file mode 100644
index db289c458b9c336b9a248463a9c19135c02de26b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 232
zcmY+5F$;oF6ov0i8@e>KH}r7mEJbdiL$#O$*&r@04~&Kep-qTK*q;qr`VHk1i#o%1
z50~>*+=lL-#(@oy^Z0)F2p;i}n8+pe0e4{p`8ZVA?5Ze^QcQ5b+@Q=0gi2|Canj|P
zgCDcQe(f=GTK#*WnOZNYy}j0FZ4)=#Hq<EB1(sJ&JY`{k&_JXoB7yXq&8gj2_FgOo
MM3hBwmV$22H)VS<Z2$lO

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_77.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_77.csv
deleted file mode 100644
index 23169a981789d0ca9968dd2401c8758a2d263a7f..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 87
zcmXqKU|?WmU=UPRu;=A+;N@~u*Q+l}EK1HuEK=7|P%ks^OD%Q?w)D==iBD9rGSpE@
a&eu_3P+&k8V-ct}0D{D#G6oq226X^EiWAKM

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_78.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_78.csv
deleted file mode 100644
index 4c9fd2d24197b4bf68af5e85659d01f80803d2a5..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 92
zcmb1S00Ti~1$$mD2VO37V;yu5pQvPIsH2pqZCvB(8XRG6X=$l#00PPRIbgN*Wy$$E
T3JeMiECSUa@x-DskfG`T)dvxU

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_79.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_79.csv
deleted file mode 100644
index f935355e6c39466213e93a84c40e295e118e88d4..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 106
zcmdO500Ti~1$$mD2VO2mb)EXM#G>Sk#3FSa1@$rmxP$>1Bpc+!Cn{MP>L?}W>nJcN
XFt7+z8yGO+0Fw0v`H4kkAS=`Xx~vdA

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_8.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_8.csv
deleted file mode 100644
index 2ee586557b168beb83a0b72dc5c2dd9524fee86b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 175
zcmXpsGUhUb;{3c+Ae)h&Sv^IaQJztngNcEGg^AIC#jcu5JyG3CLEY1@-qqbTNL@!g
zN!_Yiy(}>YC@K&a83zUKVA==5Q!jJNQMa#E;!>(s2dRLlhVUV51?)Px3=NbF4Ge&k
PfdGR6BakxCfm#3n>K`lu

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_80.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_80.csv
deleted file mode 100644
index ca8c67c89c03d488aab455b4f0c004ac5d89b3ce..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 85
zcmdOrVE_R^Wd(a)E(cyNM|GY0vc#g~jKnH+9R>9=0|-dY&#^=Tx|RwI3Jfd))gal#
MqB5Wf2o{4n0M7Ce0ssI2

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_81.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_81.csv
deleted file mode 100644
index 7651212a6534a6397efc9b23f35795754ce6a0c2..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 101
zcmdO500Ti~1^c614!m3`KyF!LQF2CNk-Cn8dYJ))GysF-{G9kiB`ZT6rQCcS1qKBM
U7Ig7ssJK8iNG7qU3}m`G02k5}>Hq)$

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_82.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_82.csv
deleted file mode 100644
index 9b792cfb3b325c840c4ef2387a1242f9fd6acf6f..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 90
zcmdO500Ti~1$$mD2VO2mb)EXMKm<svPtHgzQrA&XXJjw{0sZ9socKf~D@I;c14AQK
a0YhD-<a`|k1_cHdfohO~#G*2gk?H_5v=SNs

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_83.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_83.csv
deleted file mode 100644
index 5a40ed3334f7a785053e55f08c603e43ded64965..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 101
zcmdO50D=F4$_n<pTn@Zkj_Nw~Wr;<}8Hq*eItuD#1`v>(pA(;`WM!zMl$@`lz@Wgu
nB2aB$U}&JkWoT(>Z3t9kt(2&sq{L+nCczR028l&wAk)<WNcI$a

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_84.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_84.csv
deleted file mode 100644
index ae4c3fbf2a16e5dcc4daa1326290e3869eaf1c81..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 455
zcmZQ%!~+<f!X+)m5TJ~qLH#@ufGJXli;RN;_hb;m2SmVR;WQTvSTQm%urPrE;~xkO
zA{h<T)mi@6GpQ%4TPdh}`qjI-S;7Ixiu|}ZySO;_I9^^|xHy=>05lLN85g%54FDB`
zfqI!+j=FuV5|>i7dYNmpy48J%aX?9k3b2dg;t=W-z@7pd3Q+>oY*np}2n;k9C~P1;
zMG8Ddkn7@n@PasSsQlr|06Vk>M8<-drKP3rAwlX44R09@MHvkhjSLOA3=It#7#iw<
L6cBK6#l-;tSX67Y

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_85.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_85.csv
deleted file mode 100644
index c6030efb1e9a477b11fbc9f696eead2dcdb95796..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 449
zcmZQ%!~+<f!X+)m5TJ~qK^-Qc5EmH-1@6fph7X8<$--$a7_eewU|?aw0E`Ce>MT|Y
zwMtw{)#_!5IqFsl$R>l#R7%dziBD9rGSo3NG*VSc7RZTDQnLDA&!nCRRITpmSMTa(
z2?ro&<;TU@#l^YD@$%}z#lZ}yB+yWIkjZgz>Sb;@>h@6EU7OXd?gQCizzrt#nblL&
z8RZ$J8SbMv4`>4rfDHj^u&P!^1QLR!24W(@3@K0<L9U6516%QjD+6p;4VVNIrKP3r
jAwlX44R09@MHvkhjSLOA3=It#7#iw<6cBK6#l-;t?|N6+

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_86.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_86.csv
deleted file mode 100644
index 68aaa5288cd118890ed3a2437c45f9633abe2624..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 405
zcmaKoEf2y#42G|Ox?~az0+^{8VZ|{4iY*uf!C;y!AP^*|%>daFekcmbKOlkb*cS=m
z5lydOZ+ifLQ1>!9=~Toj#Ujz?4d(<~W&mB_fl{r;H(LLSN)5P(tyj{(5WPMwmdV_o
zlw=Y|<|%W)mNhKn#>tbiL-5%HWB-7&i>>Vw#`7zBH}PwXnz%#gL08?RAUuemIYJbQ
zA=2QTusT@0cI{Q&U;7})wqZ6xPrWz8&c+%OMNzO`p-LY>ZJ@!JTunqur3sPkIF9W)
Jt^`PH<~OUsNIU=l

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_87.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_87.csv
deleted file mode 100644
index 7ab38f0bf7bc3bff1f0711631fcaf5476681efc4..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 397
zcmZvYu?oUK42JKtivdSn9IRb3Nj*0!=%hsu2XS!;6dfD{r*=_k!3XW+DEI(^=3=db
zenS#2cmJQ9)_*x-GPz)opFIGuHwMiwiOTTv%B3|Ggd1DTZTNJ{%qnjJt&~&wLH7`y
zXIjAs0~cqDbTS>3Sc?PEh$y6by<473Q8d{!w9Z7BO!qkE!M(S;x=IGP*JK30id)oe
z-BLMk8ST+`WeIj*MC9xZ|4@FwcmG0L4D2*S&y-CbuoJP9L<w;mhpQ#3?50gan@->K
Nz!;-cmU3bMh+ZVgO`!k)

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_88.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_88.csv
deleted file mode 100644
index df6772439a27db84637baff573059fb93c79eb8a..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 412
zcmZQ%!~+<f!X+)m5TJ~qK^-Qc5EmJT9k?fh#C<>nOfj71f&nW=1_l-;48UlhuFmqm
zo=H7X-AX~-)34su%@PhkHs;60*~P`V$MN#&g2k1%l&aOs5_5pckzC{sv|GK*El1tH
z7OWu7wOQTjK9G$B^qJLD)EVU&r5Wy{n2;a05~v$wLL5+oRkb?8|7a{ka3F;jBgk2C
zabS!7aAkmuiK_vVV4}3N)IB6fouT0^qoF9Hp`wwY0hghnAp=80J&*zdF0QyZ0AIXL
AWB>pF

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_89.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_89.csv
deleted file mode 100644
index 1d9e2996dc5bb6a504e06eb73aefc34b2cd6325a..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 393
zcmZQ%!~+<f!X+)m5TJ~qK^-Qc5EmH-1@6fph7X8<$--$a7_eewU|@j@!!2O=Y{`TH
z7!B0bS^n2EsVAyiDX4q;)w{Y`!U4$M{J1!~xH$JXUS3_eIG6#I1iHi><ea!T^)j~{
zb^BT+E~ON8MtMeQhWpj(Wr;aJbHN?}8UzGjwLmpi)#^~sG6*1i4Phbt4tG1yHy}&n
z;=q3V!<7LxtOiViiPF+i_mCiUhK9F{hN6syibjS8T!w~*3=9qRKne)BxGLfRr|d?S

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_9.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_9.csv
deleted file mode 100644
index 04b41bee70e26fe2bc67d0bb02aa84d1936e9bf5..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 200
zcmXpsGUhUb;{3c+Ae)h&Sv^IaQJztngNcEG#X!f<0Ep`i42*RR40RI?7=e5Poj4FM
zU}7|2v8(1%PgJ*3Q1|q!cXf9SQrA&WQn#vBFH6h;iVDO<f|UXZcQC^T!c{MG%Tc$l
eRpL^DsYGTgVAsWEXrKhN9Y`4nFd#c5E)D>di7_hx

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_90.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_90.csv
deleted file mode 100644
index 09ac5761f6569eee79063051e37ebd8982cd64b5..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 421
zcmZQ%!~+<f!X+)m5TJ~KfrSYJFdC>cG^oQgD8xm^L4kWRh~WbwV6t$U3kIwhaT%)4
z^1q%*JyG3CLEY1@-qp<#4nS7q$Hm#j#kt4v^6J9H!3?M*&{B7h&2e$+Wo|j@_O(h}
zO4aIRuFXmREiHNBTC|O8TwQ}h%z*;h#`l2=5_5nSf;|DW0SLf`05w}xt0Mveja3Hs
zKRn<VelddVh>HXJ^AA@B*svNf2_{NQOWi|)fFbvm(NL7pP|?WHfXmR(kb$A09!LQJ
I7gt;y0EBQ(Gynhq

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_91.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_91.csv
deleted file mode 100644
index c56c566235daff6282e7edaf8460a7a91e20b024..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 446
zcmZ{gu?oU45I}S46zr&rgWBaLv2klbowNwzATBNkMF)q1Q@g0N;J5i5PLBS8pqJLz
zqWFf~kxMQw2k?(NfVh2%ZnGd3DVjou*7U~TaB{#n2+`|g`+K6fE}FxiC=C}7qE`6<
z9K_l)sbIjZaJEP$Q}>N`KsI?6Hci(ZvID<0BHJg+>Y1Z?)Ph`O{A?wRry-6{wzOGx
zaz^jUd5L}%w+L-$iR<@oOcTD4dz3UvX)&PE1VQT%We+{%E^|5Zsx9=$5ObwmlA<W`
ZS4$M>9aIBUcc2`ploBFMI51KYy#UdDRX6|u

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_92.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_92.csv
deleted file mode 100644
index 5627769cf6a13456eeb7299ca9274a7dceda4f4c..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 397
zcmZQ%L<eF}P{z=p4wF-ei-ZXS=@L|#I4E#W1~Gg<1WX^C=7Iq$Mg|5JCJex6psvpH
zzn)1wQQb;G-P5n$)y)zP;v(bX^5f#{;^N%nczJc<;;4*MaIRa9x_zw@mr}KQnQODU
z)%{w=m<mJVm<kZZ#ih@zo}$hu&nV4sAE+QP2WTPCdte)YE_VkxAE?=?S{>|TGbDe*
zn23;JcnbF+Bgp!=IIuteaAkmuX<%Sr0MQ^&T3YHJ5~R-1@Rrd~l+jSp$k2ex(9n>9
Op`ji~0Rb0RTpR!wTuKrE

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_93.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_93.csv
deleted file mode 100644
index c343ed6dcf8cf074a4667dadc9ffe705853a92c9..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 442
zcmaFOhzBq{g-cqBAwU^JgE~w?AuckG9N?Y|(&YmpU`D}dE-qm%FtB1|U|?aw0E`Ce
z>MZ~3nbZ^2trXNf{pwxaEa3p;p8U8tySO;_I9^^|xHy;rl>}Ovm;=;XtHh;LtzHIZ
zBYDI<E)HTp7neS>dWt%uJfk$jeH4X28-M_)FfI;gh*h;ZG<4K(0!Bup&}0O;25QS6
wt_+YdaW!BPOq7<Ex`za*Gc>$qG!$htR5UU);4(BcWMF8h2U0-5#T6F^0Mb}o761SM

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_94.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_94.csv
deleted file mode 100644
index fca0d7564746f6f3f2f7f86ec759c38b867ed89b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 444
zcmZQ%!~+<f!X+)m5TJ~qL7kD&JsBja5EmH-1&oX!h7X8<YlAVkV8Duzfq{hy127t>
ztF!#CXHrj8w^C5|^s9GuvxEbX75Q;-c5!j;alE{`mX>;8)0DWl7?PB%jC2%~bOqsx
zfefg6pgY__j){v?FLTRLx35*=QmR%jb8S|)x({RnK?BrP`poJn>Ws3C(hT>33KDaG
zZUFlWXekhY4DkkPu&P!^gn<~U7f{`gqzuS}1Q#eIfZ>T@KF|q&xH7;N)qqJbQCeE+
j9ulO^(D0VgP?XV7(a6w%%h1q}fuW%uNC5#CS6mzba+Ol9

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_95.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_95.csv
deleted file mode 100644
index f651e896146e2d1865c3cd1cd92aed63bfc2a7c9..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 264
zcmZQ%!~+<f!X+)mpny@ISv^IaQJztn!9+(}Tf3eCNNCq{fk<s_GgU?g1{Nko^(1wx
zYW1?j9Ca%Ng}BJLxcoQ}aQ6g}J|My_u2zXl384lgh77Q32P*(;kBbywfGBrI5-bL@
v1sE6@LR{TLg47ur-ZC1BG8!5h7#eWJ{QqD7AE-dx&;SUybbu5W3}^!YK}<XF

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_96.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_96.csv
deleted file mode 100644
index 2f74fcba9e47b658916cc2e4a95851264b15957d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 397
zcmaJ-u?~VT5WOl>6IK@o3Cm4Ja1sVLASTA$<qXlmfrO2Opn=2>W%4gf@Cp=+CcdHX
z+SlH@S0VlYo`aaqMr2T-7A!hakLaw5y;`(7u+;DcfHC=ja1f2AoM4Q;g}Xx;gx<#M
zp)69OEm1_KxzxuUpE{9EBjrqlS(r2pfV2zDf(QEb-PZ1kS5v&8Sco~)d?5xNs13mm
zjJ63s@CzIRe^q1tj}H=R9HQ3gC9iF>TaMgH%@HRmSB{q6uU$vFt_x6=ObjWB-cgrG
AAOHXW

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_97.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_97.csv
deleted file mode 100644
index 777ca34bdcff0af731fa9384ca0b517fe5401b8c..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 373
zcmZQ%!~+<f!X+)m#Khv_+~at8buEEFfq{X+5+nu&Weg4KFqH~%k#SJqo(y96fQUqO
zD+P5=zj{}9*C2Hr^(1wxYW1?j9CaopCPo8wb(a72OzIFd{`IbIe?V4%K^%iXTz*`f
z9g5ZJR_f}tN?b}HBUlZ99EDmXkR@^IWo|j@_FP<C`poJn>WuP?(hT<@_E;%^-32rX
z2;4yg&;XztU_QX05nh4^0vjVE$W3u^Knu9I{%~c0jESoOlVD<cX{mcikUB%dTSh}s
auv$Y!BSQl&LqkIbhK70|1q57Nad7}joJ=GD

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_98.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_98.csv
deleted file mode 100644
index 2258bfc2ce906f44499e98cee540aaf8ea1adfd6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 270
zcmZQ%L<0=Z5CRNO;cR<23zcETrtaxi@9ORvq^_f$q;6GhT$Y%lZl$0Q7a13q9|r>N
zaSRL$|9!yhIQ25O9Cd|SB`&3EbtIJ_NuVycI7n9<lA#I=J`A55k$?cu%n(=ikRWx2
whPRA{>Yf+^5LW?7pspf@WQMm4i3~_KiZTiq8W<XI85-yq8W?bKaftvi0EEChegFUf

diff --git a/data/csv/afl/20250211_csv_fuzz_crash/case_99.csv b/data/csv/afl/20250211_csv_fuzz_crash/case_99.csv
deleted file mode 100644
index 33d27b9db471eef8d1f490bf7523790ffb4fa9ca..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 322
zcmZQ%!~+<f!X+)m#Khv_+~at8buEEFfq{X+5+nu&Weg4KFqH~%k#SJqo(y96fQUqO
zD+P5=zj{}9*C2Hr^(1wxYW1?j9H1ydy*o%DM0s4CdYM~}x_zw@mr}JlM5o&yki}pS
z#~={L(7?dJte&FID9<R(!NkD8!o+9*#&*?Q20DiI1_oS)K-vHqK<$@C2Rc?*9R&0)
N4i^c4oep-LGyv2ELhk?o

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_0.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_0.csv
deleted file mode 100644
index 65d6c1a8136e83309dc31c555a4293b1ee0b1b8b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 42
xcmb2|=HPgEi9dlQm4TO4fPpEuK<~dmR+3J#E|;N>k&ZDhmywRHjtMW<7XafL3K0MR

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_1.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_1.csv
deleted file mode 100644
index f685be9af1f97706e3dd0225bdce240b3aa935c4..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 42
wcmb2|=HPgEi9dlQm4TO4ASFMyK<|H2R+3JoE|;N>k&ZDh7m&~~;pO@Q00LACPyhe`

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_10.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_10.csv
deleted file mode 100644
index fbd977e626f66e1b4358e2d45ba0db4ec129386e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4922
zcmY%8)Mc>M<)kKHU}Ru`nx|ft$oQYpNA-m+H&ld?kI^86YZQ)#zz7ZjM-b4lG&MH<
z|DRzrGmM77Xb6mkz-R~z;}9?##(o>Y75Qkaj)uT!2n^v6h>J_k7%jg>Ltr!n=oSL}
KbhC-J<q`n8lYIyP

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_11.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_11.csv
deleted file mode 100644
index 670f603a9c8b8de54965dac81192d2dae3ffa885..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4952
zcmY%8)Mc>M<)kKHU}Ru`nx|ft$oQYpNA-m+H&ld?kI^86Yv{m)1S6fH<JwV|j)uT!
z2#kgR?Lxr7P}jgPWJnd^v<r+;lSe~<E+L?8i3xalb$Rnr67|9}b5e&;gNiQUH5wkH
YAut*OqaiRF0;3@?8UmvsK>rW`02Y^mPyhe`

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_12.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_12.csv
deleted file mode 100644
index 80bfdbe5ee3434edd31feccc610c2b9b75f64a70..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4922
zcmY%8)Mc>M<)kKHU}Ru`nx|ft$oQYpNA-m+H&ld?kI^86YZQ)#z-S1JhQMeDjE2By
z2#kinXb6mkz-S1JhQMeDjE2B~h5)a!0h1tyYmmB*y0!6WH+3`wMnhmU1h9ty086WS
A4FCWD

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_13.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_13.csv
deleted file mode 100644
index de4bb8a6bff7dabefc48c6d5a96f206c04ceeb11..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4915
zcmY%8)Mc>M<)kKHU}Ru`nx|ft$oQYpNA-m+H&ld?kI^86YZOu?1QK)9trQeSLvu6)
zMneE!2z;eVZo_B8sN84>jE2By2#kinXb6mk05Al?M?+yW1V%$(Gz3ONU^E2i5CWNW
JumjqQ1OTRodV~M~

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_14.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_14.csv
deleted file mode 100644
index 77660389cae4170b67750ccbc52c48b93feedb41..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4918
zcmY%8)Mc>M<)kKHU}Ru`nx|ft$oQYpNA-m+H&ld?kI^86Yw*I*LF~4{8)}rg%+SDq
z3ms5u=BWPB5Eu=C(GVC7fzc2c4S~@R7!3jXgaD(Vp@E?Rm;Go%dNc$^Ltr!nMnhoO
Sg#fsKH#E?pPlYlFZ2<t!OLofu

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_15.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_15.csv
deleted file mode 100644
index 643618f5acc030643254ff15b1de159633681159..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4933
zcmeH`KTE_w5XIjXinnrh_TeyWlR)-7Ed&FK2#Q*GXQ4%4izrIWiT9^a1izY}#X>AB
z6$|lviK&7%spoAqGjHB5KUlvN;v{S}l9{^9Y^a$!@R_p7l)YLG6i~H(&7PN>Cdn`<
zbe0T`4IbZ=T~^jcHq!ln<>`4%J#^kX_iAOqOvdM-T{pAu91hn=5wz!ew(GOHVAY1k
zK86_K6&{|RGfX+T<>U?lRuPqsLwq6QNjVA7#ah|jW#y3Lh5MIG@UVjkZdm0Q)7Ptf
z+ql99y*V9Qh~qfNQ5iMjQeh5gtS&N(eSFeeSIXBqIJKmrftDi#ni42J$6soiCwoFb
b2nYcoAOwVf5D)@FKnMr{As_^VK%Kx3Qon$J

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_16.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_16.csv
deleted file mode 100644
index 8c2175321d6744c5d2956ef1a31c28737719997f..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4911
zcmeH@Jxc>Y6h-e8ikT2Q`ym*n*@c}LEdnkmk|JOVlSYeyDOOR|iR7zL1pk_UB`JbM
zO2tAnFPp7|4QT!DRB!K@H_NG&LL7&se??DRs+*!Gi;n>E)oWNOdueUou5x927aA`M
zVfKVEk%?rtFiLJq%5(L-`Xu&u^mud~nj`)5?cs2b1VLr1Vd^!SZfL4NqK!U=c!R?0
zEX9PH0X2^ZaDXTy&hdqmPC`w9Iu5h1Et(uW=5+s-HYl87j0c)LM#MF8?i6?UAl4Ic
zf;f&nI@zirU0T5bf%!wa+{Guczmw44IykW_qGE^KumLu}1~wU(PB*pOu>m%~2G{@_
TU;}J`4X^<=zy{dBUkxk)`c#8$

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_2.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_2.csv
deleted file mode 100644
index 3b6e26bb6ace46abb7a240b441c76e3ec9b4d698..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 42
xcmb2|=HPgEi9dlQm4TO4ASFMyK<|H2R)bE9E|-9gk&ZDhmjHvYjtMW<O#lHT3hw{_

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_3.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_3.csv
deleted file mode 100644
index edb695406edef3749cab43955151fd26266742bc..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 48
ycmb2|=HPg^lRtqam4TO)fq|jEU+;fXR+3J#E|&us80r}581r%g2^|w&t}g&F<qPis

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_4.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_4.csv
deleted file mode 100644
index 42cb35203aeb4d8804948d7148605269a45d608d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 58
zcmb2|=HPgEDS^e8ftOVvB|jBRHRl%S{g25?(n-k&vYsaaS;@LwhB`(%#=KlWLdS%c
G>k9z+ZxEUQ

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_5.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_5.csv
deleted file mode 100644
index 2709f85fe7d427752072bd85572b411066693fe8..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 42
wcmb2|=HTE@czB5=g@Kn<Acf@%lOTs{kh+e#E|;N>k&ZDh7m&~~;pO@Q0MCpGCjbBd

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_6.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_6.csv
deleted file mode 100644
index 95986aca82e796b51a0dd27b2717e2e6a47f5b30..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 63
zcmb2|=HPh9z`zjV>K?+&`mymXqoF9Hp`n4H0hghHj-i3<CH@4KR0duK21bV50zE?=
RBOPO2E+C;}635H+1pvty4YL3M

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_7.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_7.csv
deleted file mode 100644
index 4a82fd9f93ec45d02d6876dc1d1b95234113f2b4..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4909
zcmY%8)Mc>M<)kKHU}Ru`nx|ft$oQYpNA-m+H&ld?kI^86YZQ)#z-S1JhQMeDjE2By
z2n?bS;2uP78EqxdCj>Z0<9sv(MnhmU1V%$(Gz3ONfNCM2MKzP?(+(I`wiM99Hvk-*
BcuxQT

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_8.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_8.csv
deleted file mode 100644
index 768f41dcd7bb32a6e808b795f8f361087bbaab3b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4909
zcmY%8)Mc>M<)kKHU}Ru`nx|ft$oQYpNA-m+H&ld?kI^86iw;nV4t7w_9}M->Gi%hO
z(GVC7fzc2c4S~@R7!85Z5Eu=C(GVC70dhj1fq@)dqk2X|U^E0qLx3tF5K9$PhSXTd
H;Po*8A3S((

diff --git a/data/csv/afl/20250211_csv_fuzz_hangs/case_9.csv b/data/csv/afl/20250211_csv_fuzz_hangs/case_9.csv
deleted file mode 100644
index 2897497b6cfff1bc9d4a7d5fb1055c2c4ec01fe7..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4920
zcmY%8)Mc>M<)kKHU}Ru`nx|ft$oQYpNA-m+H&ld?kI^86YZQ)#z-S1JhQMeDjE2BS
l3W3pzZ!`o(Ltr!nMnhmU1V%$(Gz3ONU^E0qLtrR|0037gdGi1O

diff --git a/data/csv/afl/4172/case_1.csv b/data/csv/afl/4172/case_1.csv
deleted file mode 100644
index 47200b6b4d2ec0e76ba53d1168e9d63a90edfebc..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2398
zcmeHI-H+Nv5Z_dZbVkZ^+qV&Y@C%?_^KsxlUDE4^)TGUYM%Sv+X|NaAlkw8^`sn2p
z<sVPf&aMpxn)EGFqy!`*|7PYl-vf4LtyzX0;h&LvesTZJT?-(z2J-?Ft8{*)(lCbb
z#o5K<AI~nrju%T@)%Z_OrxlheBfUWgUoyt-eSJb!J1MSoncFZ9zx(C4M?areCYU4+
z^-PWGzV8&RbVg^>+Jt;)lgjk!KN5?JqPK7B;@%5ewmZ<4q#_^Qh1X$RXFz^m*GUlp
zS=^vWmT1D(tCRcTpZ2H}fvr*xcIgIHYC)4supfnsGdPGkct@l)R&zUr78gHUp4_8K
z{_~bG@om%^GWqCy^7zS%;RSy-n4Vw$F}VEx^4Dh<!EhXipx>L&V-iGPMNwCXZWI9*
zG3T-1LBykemk+vp1Y#8P(>NLhj=$zR;<{^JUF*{1n5mcY%9NRG$IzY=1O-)J0_G_c
z@<!%xE725y3k^;>N4u2tJ~O&pEG^8nu}dh|ItRN{1tdDRDwnwhgj{YQM>k*&PNce#
zDX1JWHIv5PdLPthl~Dasu$mYotc_mlLU}8@MB>RZSWz5EQ6)xtUHMmuSr2l(fepn<
z5}7Y>QPCqToEjwRMrE);<ECQ2Un{i9k|xrnETaBkI66I2fksGDC%o3P8az<V7qCXk
z#Ze_?Vf9LScV!{Xj^uQF8cm#8VJUj2`t*1A>dI@u-mX3N9H|L}QUTI3u~fO0$1$Pb
zop!qakws$E+Lg+YIwd2ss+i4kZ^Lkzjz!;T?G<BJQnZePrpQm05L22g5ygGs47PQz
z9KA;=!@|N!r)sXK5l5<nDWAl=9}j)0XIN0%{Jn5q2jd>6j}Hz1_DD}eTd1&<Bn<}M
z(iqq}9cp0Ry79QK?SWQ9J{<IfsK}3)+}<7}y$f>aLfJZ;BaKn5`+$p4dt;Q9T|J-g
zO+<rjSsg`#Ht{&TdV{p|{&|)CD}N8(zyIC(01Sj%_QM0f{Q>yqe-6Mq4$9{v@bMo3
zxBRprM@Qg8Ek1k6I;_t)d&pYs346mb`buU9jo6TF$L{Dld<u*2uWPf;)&Il4d_~FD

diff --git a/data/csv/afl/4172/case_2.csv b/data/csv/afl/4172/case_2.csv
deleted file mode 100644
index 33f1547affb4a6235e4039cfa14c182a8121a156..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 229
zcmY$+E6uG;<*HUvFsxOwQZU@8Zl$0emXoMs$d#0nc+?0)Iy0`|U{nut3^M#|poF2`
z*~2kNT?Yt2Iz9bDT-{xR)NA<|4Fx&47$Crq(J)ab31k$|SYsUpLqn^i|Nk2pAcjkG
zFfuSQ6y>CFSy<>KCM9zLSw_ZOI$$nWh?PQSUWtx^dWI3ug0jS-WQYN*|NpaQBo=`z
llKP{jqn=@?lVQYVW}s8fz{J47rEjQHo)0w0%*3xe9{^6dIeh>C

diff --git a/data/csv/afl/4172/case_3.csv b/data/csv/afl/4172/case_3.csv
deleted file mode 100644
index 691f0c0e161588d33fb6b4be9499800244f0176c..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 257
zcmY+8JqyA>42JKvl0k6Q<>=ro0nMG(a+`>#i$f{yR~0H$3&jsa{89craq#*fXdv*0
zym{XE$MfN~Yu$Z858nuSN+aYtvq+FYlG(o|PJZQwAOCbQi6jb{ycl~eSMh8<i{zul
zsqF!)%iUS5a1N_c5E+L?Rl=vd>v6_ek!>Li(Iy*kN@>7qgxHAv@#Ofaa<5%%v8e&0
pv6!T`D1$oh9?Fz3RS*pDnwO|PH6l=rP#Rrw-{?SpSHsmc{{U@iK(+t?

diff --git a/data/csv/afl/4172/case_5.csv b/data/csv/afl/4172/case_5.csv
deleted file mode 100644
index 708aee93cd765054732ddca101ee10bb800d8376..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 240
zcmXrE&dJP6HI6s3W|TJKGE7WDB_MnbMg|CAWnwJKN#QcH;4-t+DaQvAi;|M8ITKrr
zb<{Hqbux^&%nWqO^TC=8VGJW(sE9F4#017L<uX%;vCUx&h^=5lEG%?DF0z1WKxV|m
PC@C-)LX1Wd=K`|<hO|Sz

diff --git a/test/sql/copy/csv/afl/fuzz_20250211_crash.test b/test/sql/copy/csv/afl/fuzz_20250211_crash.test
index d08623b157bf..7a10d16a002d 100644
--- a/test/sql/copy/csv/afl/fuzz_20250211_crash.test
+++ b/test/sql/copy/csv/afl/fuzz_20250211_crash.test
@@ -5,535 +5,6 @@
 statement ok
 PRAGMA enable_verification
 
-query I
-select count(file) from glob('./data/csv/afl/20250211_csv_fuzz_crash/*');
-----
-132
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_1.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_2.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_3.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_4.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_5.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_6.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_7.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_8.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_9.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_10.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_11.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_12.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_13.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_14.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_15.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_16.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_17.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_18.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_19.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_20.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_21.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_22.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_23.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_24.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_25.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_26.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_27.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_28.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_29.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_30.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_31.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_32.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_33.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_34.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=B);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_35.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_36.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, hive_partitioning=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_37.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_38.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_39.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_40.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_41.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_42.csv', auto_detect=false, buffer_size=11, columns={'': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_43.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_44.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_45.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_46.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_47.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_48.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_49.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_50.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_51.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_52.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
 statement maybe
 FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_53.csv', buffer_size=42);
 ----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_54.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_55.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_56.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_57.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_58.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_59.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_60.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_61.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_62.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_63.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_64.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_65.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_66.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_67.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_68.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_69.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_70.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_71.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_72.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_73.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_74.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_75.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, rejects_table=0);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_76.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=false, all_varchar=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_77.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_78.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_79.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_80.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_81.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_82.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_83.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_84.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_85.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_86.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_87.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_88.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_89.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_90.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_91.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_92.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_93.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=false);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_94.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_95.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_96.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_97.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_98.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_99.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_100.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_101.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_102.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_103.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_104.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_105.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_106.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_107.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_108.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_109.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_110.csv', buffer_size=57);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_111.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_112.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_113.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_114.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_115.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_116.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=false);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_117.csv', auto_detect=false, buffer_size=2, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_118.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_119.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_120.csv', buffer_size=42);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_121.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_122.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_123.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_124.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_125.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, null_padding=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_126.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_127.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_128.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_129.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_130.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_131.csv', auto_detect=false, buffer_size=11, columns={'a': 'integer'}, header=true, allow_quoted_nulls=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_132.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
-----
diff --git a/test/sql/copy/csv/afl/fuzz_20250211_hangs.test b/test/sql/copy/csv/afl/fuzz_20250211_hangs.test
deleted file mode 100644
index 456d8966edc0..000000000000
--- a/test/sql/copy/csv/afl/fuzz_20250211_hangs.test
+++ /dev/null
@@ -1,82 +0,0 @@
-# name: test/sql/copy/csv/afl/fuzz_20250211_hangs.test
-# description: fuzzer generated csv files - should not raise internal exception (by failed assertion).
-# group: [afl]
-
-# This test takes a very long time to run, ~ 6 minutes on a Mac M1 Max
-mode skip
-
-statement ok
-PRAGMA enable_verification
-
-query I
-select count(file) from glob('./data/csv/afl/20250211_csv_fuzz_hangs/*');
-----
-17
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_0.csv', compression='gzip');
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_1.csv', compression='gzip');
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_2.csv', compression='gzip');
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_3.csv', compression='gzip');
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_4.csv', compression='gzip');
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_5.csv', compression='gzip');
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_6.csv', compression='gzip');
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_7.csv', rejects_table=L);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_8.csv', rejects_table=L);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_9.csv', rejects_table=L);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_10.csv', rejects_table=L);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_11.csv', rejects_table=L);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_12.csv', rejects_table=L);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_13.csv', rejects_table=L);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_14.csv', rejects_table=L);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_15.csv', rejects_table=L);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/20250211_csv_fuzz_hangs/case_16.csv', rejects_table=L);
-----
diff --git a/test/sql/copy/csv/afl/test_fuzz_4172.test b/test/sql/copy/csv/afl/test_fuzz_4172.test
index 2390bbaa1c9e..e22e66604e57 100644
--- a/test/sql/copy/csv/afl/test_fuzz_4172.test
+++ b/test/sql/copy/csv/afl/test_fuzz_4172.test
@@ -5,28 +5,6 @@
 statement ok
 PRAGMA enable_verification
 
-query I
-select count(file) from glob('data/csv/afl/4172/*');
-----
-5
-
-statement maybe
-FROM read_csv('data/csv/afl/4172/case_1.csv', auto_detect=false, columns={'a': 'VARCHAR', 'b': 'INTEGER'}, header=true, max_line_size=2305843009213693962, store_rejects=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/4172/case_2.csv', buffer_size=42, columns={'j': 'JSON'}, store_rejects=true);
-----
-
-statement maybe
-FROM read_csv('data/csv/afl/4172/case_3.csv', buffer_size=42, columns={'j': 'JSON'}, store_rejects=true);
-----
-
 statement maybe
 FROM read_csv('data/csv/afl/4172/case_4.csv', ignore_errors=true, buffer_size=1, store_rejects=false);
 ----
-
-statement maybe
-FROM read_csv('data/csv/afl/4172/case_5.csv', buffer_size=42, auto_detect=true, store_rejects=true);
-----
-

From 7e7236414cce705934a0fed89c2231c2a774b172 Mon Sep 17 00:00:00 2001
From: Jason Punyon <jason.punyon@gmail.com>
Date: Sat, 1 Feb 2025 14:18:37 -0500
Subject: [PATCH 137/142] Better comment in optimizer.cpp

---
 src/optimizer/optimizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp
index dc1ddfa59224..8c16e83a6c62 100644
--- a/src/optimizer/optimizer.cpp
+++ b/src/optimizer/optimizer.cpp
@@ -118,7 +118,7 @@ void Optimizer::RunBuiltInOptimizers() {
 	// this does not change the logical plan structure, but only simplifies the expression trees
 	RunOptimizer(OptimizerType::EXPRESSION_REWRITER, [&]() { rewriter.VisitOperator(*plan); });
 
-	// transform ORDER BY + LIMIT to TopN
+	// Rewrites SUM(x + C) into SUM(x) + C * COUNT(x)
 	RunOptimizer(OptimizerType::SUM_REWRITER, [&]() {
 		SumRewriterOptimizer optimizer(*this);
 		optimizer.Optimize(plan);

From aadd5438b4bf7897ca9ee8e1e02de06b5728a6b9 Mon Sep 17 00:00:00 2001
From: Richard Wesley <13156216+hawkfish@users.noreply.github.com>
Date: Tue, 18 Feb 2025 16:46:09 +1300
Subject: [PATCH 138/142] Issue #16250: Window Range Performance

* Check hints for equality and skip search.

fixes: duckdb/duckdb#16250
fixes: duckdblabs/duckdb-internal#4229
---
 .../window/window_boundaries_state.cpp        | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/function/window/window_boundaries_state.cpp b/src/function/window/window_boundaries_state.cpp
index 6ee3c105234d..a4b034441d38 100644
--- a/src/function/window/window_boundaries_state.cpp
+++ b/src/function/window/window_boundaries_state.cpp
@@ -211,15 +211,23 @@ static idx_t FindTypedRangeBound(WindowCursor &range_lo, WindowCursor &range_hi,
 	if (prev.start < prev.end) {
 		if (order_begin < prev.start && prev.start < order_end) {
 			const auto first = range_lo.GetCell<T>(0, prev.start);
-			if (!comp(val, first)) {
-				//	prev.first <= val, so we can start further forward
+			if (FROM && !comp(val, first)) {
+				// If prev.start == val and we are looking for a lower bound, then we are done
+				if (!comp(first, val)) {
+					return prev.start;
+				}
+				//	prev.start <= val, so we can start further forward
 				begin += UnsafeNumericCast<int64_t>(prev.start - order_begin);
 			}
 		}
 		if (order_begin < prev.end && prev.end < order_end) {
 			const auto second = range_hi.GetCell<T>(0, prev.end - 1);
 			if (!comp(second, val)) {
-				//	val <= prev.second, so we can end further back
+				//  If val == prev.end and we are looking for an upper bound, then we are done
+				if (!FROM && !comp(val, second)) {
+					return prev.end;
+				}
+				//	val <= prev.end, so we can end further back
 				// (prev.second is the largest peer)
 				end -= UnsafeNumericCast<int64_t>(order_end - prev.end - 1);
 			}
@@ -943,6 +951,11 @@ void WindowBoundariesState::FrameEnd(DataChunk &bounds, idx_t row_idx, const idx
 			} else {
 				const auto valid_start = valid_begin_data[chunk_idx];
 				prev.start = valid_start;
+				const auto cur_partition = partition_begin_data[chunk_idx];
+				if (cur_partition != prev_partition) {
+					prev.end = valid_end;
+					prev_partition = cur_partition;
+				}
 				window_end = FindOrderedRangeBound<false>(*range_lo, *range_hi, range_sense, valid_start, row_idx + 1,
 				                                          end_boundary, boundary_end, chunk_idx, prev);
 				prev.end = window_end;

From aa82eb90336db14590c68eebb3b43fbdf3576953 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Tue, 18 Feb 2025 10:13:16 +0100
Subject: [PATCH 139/142] change string hash function again, now inlined
 strings are hashed branchlessly

---
 src/common/types/hash.cpp                | 45 ++++++++++++++++++------
 test/sql/function/generic/hash_func.test | 44 +++++++++++------------
 2 files changed, 57 insertions(+), 32 deletions(-)

diff --git a/src/common/types/hash.cpp b/src/common/types/hash.cpp
index 9a9fd5daf9e8..de0233b0f224 100644
--- a/src/common/types/hash.cpp
+++ b/src/common/types/hash.cpp
@@ -78,6 +78,35 @@ hash_t Hash(const char *str) {
 
 template <>
 hash_t Hash(string_t val) {
+	// If the string is inlined, we can do a branchless hash
+	if (val.IsInlined()) {
+		// This seed slightly improves bit distribution, taken from here:
+		// https://github.com/martinus/robin-hood-hashing/blob/3.11.5/LICENSE
+		// MIT License Copyright (c) 2018-2021 Martin Ankerl
+		hash_t h = 0xe17a1465U ^ (val.GetSize() * 0xc6a4a7935bd1e995U);
+
+		// Hash/combine the first 8-byte block
+		h ^= Load<hash_t>(const_data_ptr_cast(val.GetPrefix()));
+		h *= 0xd6e8feb86659fd93U;
+
+		// Load remaining 4 bytes
+		hash_t hr = 0;
+		memcpy(&hr, const_data_ptr_cast(val.GetPrefix()) + sizeof(hash_t), 4U);
+
+		// Process the remainder the same an 8-byte block
+		// This operation is a NOP if the string is <= 8 bytes
+		const bool not_a_nop = val.GetSize() > sizeof(hash_t);
+		h ^= hr;
+		h *= 0xd6e8feb86659fd93U * not_a_nop + (1 - not_a_nop);
+
+		// This is just an optimization. It should not change the result
+		// This property is important for verification (e.g., DUCKDB_DEBUG_NO_INLINE)
+		// We achieved this with the NOP trick above (and in HashBytes)
+		h = Hash(h);
+		D_ASSERT(h == Hash(val.GetData(), val.GetSize()));
+
+		return h;
+	}
 	return Hash(val.GetData(), val.GetSize());
 }
 
@@ -98,22 +127,18 @@ hash_t HashBytes(const_data_ptr_t ptr, const idx_t len) noexcept {
 		h *= 0xd6e8feb86659fd93U;
 	}
 
-	// Load and process remaining (<8) bytes
+	// Load remaining (<8) bytes
 	hash_t hr = 0;
 	memcpy(&hr, ptr, len & 7U);
-	hr *= 0xd6e8feb86659fd93U;
-	hr ^= h >> 32;
 
-	// XOR with hash
+	// Process the remainder same as an 8-byte block
+	// This operation is a NOP if the number of remaining bytes is 0
+	const bool not_a_nop = len & 7U;
 	h ^= hr;
+	h *= 0xd6e8feb86659fd93U * not_a_nop + (1 - not_a_nop);
 
 	// Finalize
-	h *= 0xd6e8feb86659fd93U;
-	h ^= h >> 32;
-
-	return h;
-
-	// return Hash(h);
+	return Hash(h);
 }
 
 hash_t Hash(const char *val, size_t size) {
diff --git a/test/sql/function/generic/hash_func.test b/test/sql/function/generic/hash_func.test
index 44ca5113eb87..0427e0d0ee6c 100644
--- a/test/sql/function/generic/hash_func.test
+++ b/test/sql/function/generic/hash_func.test
@@ -44,9 +44,9 @@ CREATE TABLE structs AS
 query II
 SELECT s, HASH(s) FROM structs
 ----
-{'i': 5, 's': string}	5041354121594313779
+{'i': 5, 's': string}	312378390946197788
 {'i': -2, 's': NULL}	13311620765177879553
-{'i': NULL, 's': not null}	17669771151474316850
+{'i': NULL, 's': not null}	12187543307399756733
 {'i': NULL, 's': NULL}	18212156630472451589
 NULL	18212156630472451589
 
@@ -76,11 +76,11 @@ NULL	13787848793156543929
 query II
 SELECT lg, HASH(lg) FROM lists
 ----
-[TGTA]	17595328716338797054
-[CGGT]	10306172129632853293
-[CCTC]	13297701768986389650
-[TCTA]	12532519228232631318
-[AGGG]	18327401687889337414
+[TGTA]	2473061308111828075
+[CGGT]	17252230290449032892
+[CCTC]	12469451733100292545
+[TCTA]	16441147910138644840
+[AGGG]	6734708784738468094
 NULL	13787848793156543929
 
 # Maps
@@ -98,11 +98,11 @@ CREATE TABLE maps AS
 query II
 SELECT m, HASH(m) FROM maps
 ----
-{1=TGTA}	12831981919938534237
-{1=CGGT, 2=CCTC}	13475482557019497469
+{1=TGTA}	7235425910004250312
+{1=CGGT, 2=CCTC}	1011047862598495049
 {}	13787848793156543929
-{1=TCTA, 2=NULL, 3=CGGT}	6801514312074335687
-{1=TGTA, 2=CGGT, 3=CCTC, 4=TCTA, 5=AGGG}	1967491966533763128
+{1=TCTA, 2=NULL, 3=CGGT}	6001596667924474868
+{1=TGTA, 2=CGGT, 3=CCTC, 4=TCTA, 5=AGGG}	16287978232011168685
 NULL	13787848793156543929
 
 statement ok
@@ -189,17 +189,17 @@ SELECT r, HASH() FROM enums;
 query II
 SELECT r, HASH(r, 'capacitor') FROM enums;
 ----
-black	10215506564763180114
-brown	14699666407584440049
-red	10435339440036763924
-orange	7449326894723801922
-yellow	7545557152300511399
-green	13515514493392674532
-blue	16730185616673645170
-violet	6167961171085770869
-grey	10019148715359395841
-white	8224352891729695362
-NULL	14853453776375799790
+black	16797622758688705282
+brown	12620868779234625953
+red	17584344400128560708
+orange	268160620305560594
+yellow	895888387990267895
+green	16089427619650030004
+blue	10156864916169405730
+violet	3549084991787980581
+grey	17281098274178594641
+white	1655957553588749778
+NULL	12320705626460735678
 
 query II
 SELECT r, HASH('2022-02-12'::DATE, r) FROM enums;

From 12e96f6ce5ef0f0c334cfa624f4d6854b38644b5 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Tue, 18 Feb 2025 10:52:12 +0100
Subject: [PATCH 140/142] some more fast paths

---
 .../writer/templated_column_writer.hpp        | 81 +++++++++++--------
 .../types/column/column_data_collection.cpp   | 19 +++--
 2 files changed, 61 insertions(+), 39 deletions(-)

diff --git a/extension/parquet/include/writer/templated_column_writer.hpp b/extension/parquet/include/writer/templated_column_writer.hpp
index 027af57fe6c5..1ace63726af5 100644
--- a/extension/parquet/include/writer/templated_column_writer.hpp
+++ b/extension/parquet/include/writer/templated_column_writer.hpp
@@ -263,6 +263,49 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 
 	void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state_p,
 	                 Vector &input_column, idx_t chunk_start, idx_t chunk_end) override {
+		const auto &mask = FlatVector::Validity(input_column);
+		if (mask.AllValid()) {
+			WriteVectorInternal<true>(temp_writer, stats, page_state_p, input_column, chunk_start, chunk_end);
+		} else {
+			WriteVectorInternal<false>(temp_writer, stats, page_state_p, input_column, chunk_start, chunk_end);
+		}
+	}
+
+	void FlushDictionary(PrimitiveColumnWriterState &state_p, ColumnWriterStatistics *stats) override {
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
+		D_ASSERT(state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY);
+
+		state.bloom_filter =
+		    make_uniq<ParquetBloomFilter>(state.dictionary.GetSize(), writer.BloomFilterFalsePositiveRatio());
+
+		state.dictionary.IterateValues([&](const SRC &src_value, const TGT &tgt_value) {
+			// update the statistics
+			OP::template HandleStats<SRC, TGT>(stats, tgt_value);
+			// update the bloom filter
+			auto hash = OP::template XXHash64<SRC, TGT>(tgt_value);
+			state.bloom_filter->FilterInsert(hash);
+		});
+
+		// flush the dictionary page and add it to the to-be-written pages
+		WriteDictionary(state, state.dictionary.GetTargetMemoryStream(), state.dictionary.GetSize());
+		// bloom filter will be queued for writing in ParquetWriter::BufferBloomFilter one level up
+	}
+
+	idx_t GetRowSize(const Vector &vector, const idx_t index,
+	                 const PrimitiveColumnWriterState &state_p) const override {
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
+		if (state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY) {
+			return (state.key_bit_width + 7) / 8;
+		} else {
+			return OP::template GetRowSize<SRC, TGT>(vector, index);
+		}
+	}
+
+private:
+	template <bool ALL_VALID>
+	void WriteVectorInternal(WriteStream &temp_writer, ColumnWriterStatistics *stats,
+	                         ColumnWriterPageState *page_state_p, Vector &input_column, idx_t chunk_start,
+	                         idx_t chunk_end) {
 		auto &page_state = page_state_p->Cast<StandardWriterPageState<SRC, TGT, OP>>();
 
 		const auto &mask = FlatVector::Validity(input_column);
@@ -286,7 +329,7 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 			}
 
 			for (; r < chunk_end; r++) {
-				if (!mask.RowIsValid(r)) {
+				if (!ALL_VALID && !mask.RowIsValid(r)) {
 					continue;
 				}
 				const auto &src_value = data_ptr[r];
@@ -313,7 +356,7 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 			}
 
 			for (; r < chunk_end; r++) {
-				if (!mask.RowIsValid(r)) {
+				if (!ALL_VALID && !mask.RowIsValid(r)) {
 					continue;
 				}
 				const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
@@ -340,7 +383,7 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 			}
 
 			for (; r < chunk_end; r++) {
-				if (!mask.RowIsValid(r)) {
+				if (!ALL_VALID && !mask.RowIsValid(r)) {
 					continue;
 				}
 				const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
@@ -351,7 +394,7 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 		}
 		case duckdb_parquet::Encoding::BYTE_STREAM_SPLIT: {
 			for (idx_t r = chunk_start; r < chunk_end; r++) {
-				if (!mask.RowIsValid(r)) {
+				if (!ALL_VALID && !mask.RowIsValid(r)) {
 					continue;
 				}
 				const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
@@ -374,36 +417,6 @@ class StandardColumnWriter : public PrimitiveColumnWriter {
 			throw InternalException("Unknown encoding");
 		}
 	}
-
-	void FlushDictionary(PrimitiveColumnWriterState &state_p, ColumnWriterStatistics *stats) override {
-		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
-		D_ASSERT(state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY);
-
-		state.bloom_filter =
-		    make_uniq<ParquetBloomFilter>(state.dictionary.GetSize(), writer.BloomFilterFalsePositiveRatio());
-
-		state.dictionary.IterateValues([&](const SRC &src_value, const TGT &tgt_value) {
-			// update the statistics
-			OP::template HandleStats<SRC, TGT>(stats, tgt_value);
-			// update the bloom filter
-			auto hash = OP::template XXHash64<SRC, TGT>(tgt_value);
-			state.bloom_filter->FilterInsert(hash);
-		});
-
-		// flush the dictionary page and add it to the to-be-written pages
-		WriteDictionary(state, state.dictionary.GetTargetMemoryStream(), state.dictionary.GetSize());
-		// bloom filter will be queued for writing in ParquetWriter::BufferBloomFilter one level up
-	}
-
-	idx_t GetRowSize(const Vector &vector, const idx_t index,
-	                 const PrimitiveColumnWriterState &state_p) const override {
-		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
-		if (state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY) {
-			return (state.key_bit_width + 7) / 8;
-		} else {
-			return OP::template GetRowSize<SRC, TGT>(vector, index);
-		}
-	}
 };
 
 } // namespace duckdb
diff --git a/src/common/types/column/column_data_collection.cpp b/src/common/types/column/column_data_collection.cpp
index a2480f44d624..e45228b8c219 100644
--- a/src/common/types/column/column_data_collection.cpp
+++ b/src/common/types/column/column_data_collection.cpp
@@ -417,12 +417,21 @@ static void TemplatedColumnDataCopy(ColumnDataMetaData &meta_data, const Unified
 			// initialize the validity mask to set all to valid
 			result_validity.SetAllValid(STANDARD_VECTOR_SIZE);
 		}
-		for (idx_t i = 0; i < append_count; i++) {
-			auto source_idx = source_data.sel->get_index(offset + i);
-			if (source_data.validity.RowIsValid(source_idx)) {
+		if (source_data.validity.AllValid()) {
+			// Fast path: all valid
+			for (idx_t i = 0; i < append_count; i++) {
+				auto source_idx = source_data.sel->get_index(offset + i);
 				OP::template Assign<OP>(meta_data, base_ptr, source_data.data, current_segment.count + i, source_idx);
-			} else {
-				result_validity.SetInvalid(current_segment.count + i);
+			}
+		} else {
+			for (idx_t i = 0; i < append_count; i++) {
+				auto source_idx = source_data.sel->get_index(offset + i);
+				if (source_data.validity.RowIsValid(source_idx)) {
+					OP::template Assign<OP>(meta_data, base_ptr, source_data.data, current_segment.count + i,
+					                        source_idx);
+				} else {
+					result_validity.SetInvalid(current_segment.count + i);
+				}
 			}
 		}
 		current_segment.count += append_count;

From b2bf61759c3da8d48eda777f8640b90e24435655 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Tue, 18 Feb 2025 12:34:28 +0100
Subject: [PATCH 141/142] fix hash function for empty strings and fix test
 output now that hash function is changed again

---
 src/common/types/hash.cpp   | 7 +++++--
 test/api/adbc/test_adbc.cpp | 4 ++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/common/types/hash.cpp b/src/common/types/hash.cpp
index de0233b0f224..604a6249f8b3 100644
--- a/src/common/types/hash.cpp
+++ b/src/common/types/hash.cpp
@@ -86,8 +86,9 @@ hash_t Hash(string_t val) {
 		hash_t h = 0xe17a1465U ^ (val.GetSize() * 0xc6a4a7935bd1e995U);
 
 		// Hash/combine the first 8-byte block
+		const bool not_an_empty_string = !val.Empty();
 		h ^= Load<hash_t>(const_data_ptr_cast(val.GetPrefix()));
-		h *= 0xd6e8feb86659fd93U;
+		h *= 0xd6e8feb86659fd93U * not_an_empty_string + (1 - not_an_empty_string);
 
 		// Load remaining 4 bytes
 		hash_t hr = 0;
@@ -99,10 +100,12 @@ hash_t Hash(string_t val) {
 		h ^= hr;
 		h *= 0xd6e8feb86659fd93U * not_a_nop + (1 - not_a_nop);
 
+		// Finalize
+		h = Hash(h);
+
 		// This is just an optimization. It should not change the result
 		// This property is important for verification (e.g., DUCKDB_DEBUG_NO_INLINE)
 		// We achieved this with the NOP trick above (and in HashBytes)
-		h = Hash(h);
 		D_ASSERT(h == Hash(val.GetData(), val.GetSize()));
 
 		return h;
diff --git a/test/api/adbc/test_adbc.cpp b/test/api/adbc/test_adbc.cpp
index a624a66a7857..91a86b93b6dc 100644
--- a/test/api/adbc/test_adbc.cpp
+++ b/test/api/adbc/test_adbc.cpp
@@ -1364,8 +1364,8 @@ TEST_CASE("Test AdbcConnectionGetObjects", "[adbc]") {
 		REQUIRE((res->ColumnCount() == 2));
 		REQUIRE((res->RowCount() == 3));
 		REQUIRE((res->GetValue(1, 0).ToString() ==
-		         "[{'db_schema_name': main, 'db_schema_tables': NULL}, {'db_schema_name': pg_catalog, "
-		         "'db_schema_tables': NULL}, {'db_schema_name': information_schema, 'db_schema_tables': NULL}]"));
+		         "[{'db_schema_name': pg_catalog, 'db_schema_tables': NULL}, {'db_schema_name': information_schema, "
+		         "'db_schema_tables': NULL}, {'db_schema_name': main, 'db_schema_tables': NULL}]"));
 		db.Query("Drop table result;");
 
 		AdbcConnectionGetObjects(&db.adbc_connection, ADBC_OBJECT_DEPTH_COLUMNS, nullptr, nullptr, nullptr, nullptr,

From 6682a5272655e68a66be66303b3c847d8729b2bc Mon Sep 17 00:00:00 2001
From: xuke-hat <xuke19990611@gmail.com>
Date: Wed, 19 Feb 2025 00:57:17 +0800
Subject: [PATCH 142/142] make ValidityMask::RowIsValidUnsafe really unsafe

---
 src/include/duckdb/common/types/validity_mask.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/include/duckdb/common/types/validity_mask.hpp b/src/include/duckdb/common/types/validity_mask.hpp
index 05583cddd82f..89c39750d326 100644
--- a/src/include/duckdb/common/types/validity_mask.hpp
+++ b/src/include/duckdb/common/types/validity_mask.hpp
@@ -188,7 +188,7 @@ struct TemplatedValidityMask {
 		D_ASSERT(validity_mask);
 		idx_t entry_idx, idx_in_entry;
 		GetEntryIndex(row_idx, entry_idx, idx_in_entry);
-		auto entry = GetValidityEntry(entry_idx);
+		auto entry = GetValidityEntryUnsafe(entry_idx);
 		return RowIsValid(entry, idx_in_entry);
 	}