diff --git a/.github/workflows/cmake-single-platform.yml b/.github/workflows/cmake-single-platform.yml index 4cb6bf7..c1e52a0 100644 --- a/.github/workflows/cmake-single-platform.yml +++ b/.github/workflows/cmake-single-platform.yml @@ -48,9 +48,7 @@ jobs: libcds-dev \ llvm clang lldb - # Install GCC 13 (supports C++23) - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test - sudo apt-get update + # Install GCC 13 (available in Ubuntu 24.04 default repos, no PPA needed) sudo apt-get install -y gcc-13 g++-13 sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-13 13 --slave /usr/bin/g++ g++ /usr/bin/g++-13 diff --git a/include/node.hpp b/include/node.hpp index 1e824ef..db660c7 100644 --- a/include/node.hpp +++ b/include/node.hpp @@ -206,10 +206,10 @@ class NodeManager { } } - auto node = - std::make_shared(id, schema_name, EMPTY_DATA, - std::make_unique(node_handle), - node_arena_, schema_, layout_); + auto node = std::make_shared( + id, schema_name, EMPTY_DATA, + std::make_unique(std::move(node_handle)), node_arena_, + schema_, layout_); nodes[id] = node; return node; } else { diff --git a/include/node_arena.hpp b/include/node_arena.hpp index 78d799f..7b54022 100644 --- a/include/node_arena.hpp +++ b/include/node_arena.hpp @@ -1,7 +1,12 @@ #ifndef NODE_ARENA_HPP #define NODE_ARENA_HPP +#include + +#include #include +#include +#include #include #include @@ -14,25 +19,210 @@ namespace tundradb { +struct NodeHandle; + +/** + * Temporal version metadata with field-level copy-on-write. + * + * Stores only changed fields; forms a linked list via prev pointer. + * All versions share the same base node data. + */ +struct VersionInfo { + // Temporal validity interval: [valid_from, valid_to) + uint64_t version_id = 0; + uint64_t valid_from = 0; + uint64_t valid_to = std::numeric_limits::max(); + + // Linked list to previous version + VersionInfo* prev = nullptr; + + // Changed fields: field_idx -> value pointer (nullptr = explicit NULL) + llvm::SmallDenseMap updated_fields; + + // Lazy-populated cache: field_idx -> effective value pointer + mutable llvm::SmallDenseMap field_cache_; + mutable uint64_t cache_bitset_ = 0; + + VersionInfo() = default; + + VersionInfo(uint64_t vid, uint64_t ts_from, VersionInfo* prev_ver = nullptr) + : version_id(vid), valid_from(ts_from), prev(prev_ver) {} + + bool is_valid_at(uint64_t ts) const { + return valid_from <= ts && ts < valid_to; + } + + // O(N) + const VersionInfo* find_version_at_time(uint64_t ts) const { + const VersionInfo* current = this; + while (current != nullptr) { + if (current->is_valid_at(ts)) return current; + current = current->prev; + } + return nullptr; + } + + size_t count_versions() const { + size_t count = 1; + const VersionInfo* current = prev; + while (current != nullptr) { + count++; + current = current->prev; + } + return count; + } + + bool is_field_cached(uint16_t field_idx) const { + if (field_idx >= 64) return field_cache_.count(field_idx) > 0; + return (cache_bitset_ & (1ULL << field_idx)) != 0; + } + + void mark_field_cached(uint16_t field_idx) const { + if (field_idx < 64) cache_bitset_ |= (1ULL << field_idx); + } + + void clear_cache() const { + field_cache_.clear(); + cache_bitset_ = 0; + } +}; + /** - * Handle to a node stored in the arena - * Lightweight reference that can be passed around efficiently - * Includes schema version for evolution support + * Handle to a node stored in the arena. + * Supports optional temporal versioning (version_info_ == nullptr when + * disabled). */ struct NodeHandle { - void* ptr; // Direct pointer to node data - size_t size; // Size of the node data - std::string schema_name; // Schema name for proper string cleanup - uint32_t schema_version; // Schema version for evolution support + void* ptr; // pointer to base node data + size_t size; + std::string schema_name; + uint32_t schema_version; + + // optional versioning (nullptr = disabled, owned by version_arena_) + VersionInfo* version_info_; + + // ======================================================================== + // CONSTRUCTORS + // ======================================================================== + + /** + * Default constructor (non-versioned, null handle). + */ + NodeHandle() + : ptr(nullptr), + size(0), + schema_name(""), + schema_version(0), + version_info_(nullptr) {} + + /** + * Standard constructor (non-versioned). + * This is the backward-compatible constructor - existing code works + * unchanged. + */ + NodeHandle(void* p, const size_t s, std::string schema, + const uint32_t version = 1) + : ptr(p), + size(s), + schema_name(std::move(schema)), + schema_version(version), + version_info_(nullptr) {} // Non-versioned by default - NodeHandle() : ptr(nullptr), size(0), schema_name(""), schema_version(0) {} - NodeHandle(void* p, size_t s, std::string schema, uint32_t version = 1) + /** + * Versioned constructor (for arena allocation). + * Note: version_info_ will be set later by the arena after allocation. + * + * @param p Pointer to node data + * @param s Size of node data + * @param schema Schema name + * @param version Schema version + * @param version_info Pointer to VersionInfo allocated in arena + */ + NodeHandle(void* p, size_t s, std::string schema, uint32_t version, + VersionInfo* version_info) : ptr(p), size(s), schema_name(std::move(schema)), - schema_version(version) {} + schema_version(version), + version_info_(version_info) {} + + /** + * Destructor - does NOT delete version_info_ (owned by arena). + */ + ~NodeHandle() { + // version_info_ is owned by version_arena_, don't delete it here + version_info_ = nullptr; + } bool is_null() const { return ptr == nullptr; } + bool is_versioned() const { return version_info_ != nullptr; } + void set_version_info(VersionInfo* version_info) { + version_info_ = version_info; + } + + bool is_valid_at(uint64_t ts) const { + if (!is_versioned()) return true; + return version_info_->is_valid_at(ts); + } + + uint64_t get_version_id() const { + return is_versioned() ? version_info_->version_id : 0; + } + + uint64_t get_valid_from() const { + return is_versioned() ? version_info_->valid_from : 0; + } + + uint64_t get_valid_to() const { + return is_versioned() ? version_info_->valid_to + : std::numeric_limits::max(); + } + + VersionInfo* get_version_info() const { return version_info_; } + + size_t count_versions() const { + if (!is_versioned()) return 1; + return version_info_->count_versions(); + } + + const VersionInfo* find_version_at_time(uint64_t ts) const { + if (!is_versioned()) return nullptr; + return version_info_->find_version_at_time(ts); + } + + const VersionInfo* get_prev_version() const { + if (!is_versioned()) return nullptr; + return version_info_->prev; + } + + NodeHandle(NodeHandle&& other) noexcept + : ptr(other.ptr), + size(other.size), + schema_name(std::move(other.schema_name)), + schema_version(other.schema_version), + version_info_(other.version_info_) { + other.ptr = nullptr; + other.size = 0; + other.version_info_ = nullptr; + } + + NodeHandle& operator=(NodeHandle&& other) noexcept { + if (this != &other) { + ptr = other.ptr; + size = other.size; + schema_name = std::move(other.schema_name); + schema_version = other.schema_version; + version_info_ = other.version_info_; + + other.ptr = nullptr; + other.size = 0; + other.version_info_ = nullptr; + } + return *this; + } + + NodeHandle(const NodeHandle& other) = default; + NodeHandle& operator=(const NodeHandle& other) = default; bool operator==(const NodeHandle& other) const { return ptr == other.ptr && size == other.size && @@ -44,31 +234,79 @@ struct NodeHandle { }; /** - * Simplified node arena that manages both node layout and string content - * Uses TWO arenas: one for fixed-size node data, one for variable-size strings + * Simplified node arena that manages both node layout and string content. + * + * Architecture: + * - mem_arena_: Fixed-size node data (base nodes) + * - string_arena_: Variable-size string content + * - version_arena_: (OPTIONAL) Version metadata and field updates + * + * Versioning Support: + * When versioning is DISABLED (default): + * - version_arena_ is nullptr + * - Zero overhead + * + * When versioning is ENABLED: + * - version_arena_ stores VersionInfo and updated field data + * - Supports time-travel queries + * - Field-level copy-on-write for efficient updates + * + * Memory Layout: + * ┌──────────────────────────────────────────────────────────────┐ + * │ mem_arena_ (Base Nodes - Immutable) │ + * │ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ + * │ │ Node 1 │ │ Node 2 │ │ Node 3 │ ... │ + * │ └──────────┘ └──────────┘ └──────────┘ │ + * └──────────────────────────────────────────────────────────────┘ + * + * ┌──────────────────────────────────────────────────────────────┐ + * │ version_arena_ (Version Metadata - Only if enabled) │ + * │ ┌────────────┐ ┌────────────┐ ┌────────────┐ │ + * │ │ N1_v1 │ │ N1_v2 │ │ N2_v1 │ ... │ + * │ │ VersionInfo│ │ VersionInfo│ │ VersionInfo│ │ + * │ │ + field │ │ + field │ │ + field │ │ + * │ └────────────┘ └────────────┘ └────────────┘ │ + * └──────────────────────────────────────────────────────────────┘ + * + * ┌──────────────────────────────────────────────────────────────┐ + * │ string_arena_ (String Content - Shared) │ + * │ ┌────────┐ ┌────────┐ ┌────────┐ │ + * │ │ "Alice"│ │ "NYC" │ │ "Bob" │ ... │ + * │ └────────┘ └────────┘ └────────┘ │ + * └──────────────────────────────────────────────────────────────┘ */ class NodeArena { public: /** - * Constructor takes any MemArena implementation + StringArena for strings + * Constructor takes any MemArena implementation + StringArena for strings. + * * @param mem_arena Underlying memory arena for node layouts (MemoryArena or * FreeListArena) * @param layout_registry Registry containing schema layouts * @param string_arena Arena for managing string content (optional, creates * own if null) + * @param enable_versioning Whether to enable temporal versioning support + * (default: false) */ NodeArena(std::unique_ptr mem_arena, std::shared_ptr layout_registry, - std::unique_ptr string_arena = nullptr) + std::unique_ptr string_arena = nullptr, + bool enable_versioning = false) : mem_arena_(std::move(mem_arena)), layout_registry_(std::move(layout_registry)), string_arena_(string_arena ? std::move(string_arena) - : std::make_unique()) {} + : std::make_unique()), + versioning_enabled_(enable_versioning), + version_counter_(0) { + // Only allocate version arena if versioning is enabled + if (versioning_enabled_) { + // Use FreeListArena for versions (supports individual deallocation) + // Default 4MB - expect more versions than base nodes + version_arena_ = std::make_unique(4 * 1024 * 1024); + } + } - /** - * Allocate space for a new node of the given schema type - * Returns a handle to the allocated node memory - */ + /** Allocate new node (versioned if enabled). */ NodeHandle allocate_node(const std::string& schema_name) { const std::shared_ptr layout = layout_registry_->get_layout(schema_name); @@ -78,6 +316,8 @@ class NodeArena { return allocate_node(layout); } + + /** Allocate new node with given layout. */ NodeHandle allocate_node(const std::shared_ptr& layout) { size_t node_size = layout->get_total_size_with_bitset(); size_t alignment = layout->get_alignment(); @@ -89,14 +329,31 @@ class NodeArena { // Initialize the node data with default values layout->initialize_node_data(static_cast(node_data)); - return NodeHandle(node_data, node_size, layout->get_schema_name()); + + // Create versioned or non-versioned handle based on configuration + if (versioning_enabled_) { + // Allocate VersionInfo (v0) in version_arena_ + void* version_info_memory = + version_arena_->allocate(sizeof(VersionInfo), alignof(VersionInfo)); + if (!version_info_memory) { + return NodeHandle{}; // Allocation failed + } + + // Construct base version (v0) + uint64_t now = get_current_timestamp_ns(); + auto* version_info = new (version_info_memory) VersionInfo(); + version_info->version_id = 0; + version_info->valid_from = now; + version_info->valid_to = std::numeric_limits::max(); + version_info->prev = nullptr; + + return {node_data, node_size, layout->get_schema_name(), 1, version_info}; + } + return {node_data, node_size, layout->get_schema_name()}; } - /** - * Deallocate a node and all its string references - * Uses schema_name from the NodeHandle for proper cleanup - */ - void deallocate_node(const NodeHandle& handle) { + /** Deallocate node and its strings. */ + void deallocate_node(const NodeHandle& handle) const { if (handle.is_null()) { return; } @@ -112,8 +369,7 @@ class NodeArena { if (is_string_type(field.type)) { // Read the StringRef from the node memory (after data offset) const char* field_ptr = data_start + field.offset; - const StringRef* str_ref = - reinterpret_cast(field_ptr); + const auto* str_ref = reinterpret_cast(field_ptr); // Deallocate the string if it's not null if (!str_ref->is_null()) { @@ -128,12 +384,10 @@ class NodeArena { mem_arena_->deallocate(handle.ptr); } - /** - * Get field value from a node using its handle - */ - const char* get_field_value_ptr(const NodeHandle& handle, - const std::shared_ptr& layout, - const std::shared_ptr& field) const { + /** Get field value pointer. */ + static const char* get_field_value_ptr( + const NodeHandle& handle, const std::shared_ptr& layout, + const std::shared_ptr& field) { // Logger::get_instance().debug("get_field_value: {}.{}", schema_name, // field_name); if (handle.is_null()) { @@ -145,50 +399,214 @@ class NodeArena { field); } - Value get_field_value(const NodeHandle& handle, - const std::shared_ptr& layout, - const std::shared_ptr& field) const { - // Logger::get_instance().debug("get_field_value: {}.{}", schema_name, - // field_name); + static Value get_field_value(const NodeHandle& handle, + const std::shared_ptr& layout, + const std::shared_ptr& field) { if (handle.is_null()) { - // Logger::get_instance().error("null value for invalid handle"); - return nullptr; // null value for invalid handle + return Value{}; // null value for invalid handle } + // For versioned nodes, check version chain + if (handle.is_versioned()) { + const FieldLayout* field_layout = layout->get_field_layout(field); + if (!field_layout) { + return Value{}; // Invalid field + } + + uint16_t field_idx = field_layout->index; + + // Traverse version chain to find the field + const VersionInfo* current = handle.version_info_; + while (current != nullptr) { + auto it = current->updated_fields.find(field_idx); + if (it != current->updated_fields.end()) { + // Found in version chain + // Check if it's nullptr (explicit NULL sentinel) + if (it->second == nullptr) { + return Value{}; // Explicitly set to NULL + } + // Read actual value from version_arena_ + return Value::read_value_from_memory(it->second, field_layout->type); + } + current = current->prev; + } + + // Not found in version chain, read from base node + return layout->get_field_value(static_cast(handle.ptr), + field); + } + + // Non-versioned: direct read from base node return layout->get_field_value(static_cast(handle.ptr), field); } + /** Update multiple fields atomically (creates one version). */ + arrow::Result update_fields( + NodeHandle& current_handle, const std::shared_ptr& layout, + const std::vector, Value>>& + field_updates) { + // Convert Field pointers to indices + std::vector> indexed_updates; + indexed_updates.reserve(field_updates.size()); + + for (const auto& [field, value] : field_updates) { + const FieldLayout* field_layout = layout->get_field_layout(field); + if (!field_layout) { + return arrow::Status::Invalid("Invalid field in update_fields"); + } + indexed_updates.emplace_back(field_layout->index, value); + } + + return update_fields_by_index(current_handle, layout, indexed_updates); + } + /** - * Set field value in a node using its handle - * Automatically stores strings in the string arena and creates StringRef + * Create new version by updating a single field. + * For multiple fields, use update_fields() instead. */ - bool set_field_value(const NodeHandle& handle, + arrow::Result create_new_version( + NodeHandle& current_handle, const std::shared_ptr& layout, + uint16_t field_idx, const Value& new_value) { + if (field_idx >= layout->get_fields().size()) { + return arrow::Status::IndexError("Field index out of bounds"); + } + const std::vector> updates = { + {field_idx, new_value}}; + return update_fields_by_index(current_handle, layout, updates); + } + + /** Update multiple fields by index (internal, more efficient). */ + arrow::Result update_fields_by_index( + NodeHandle& current_handle, const std::shared_ptr& layout, + const std::vector>& field_updates) { + if (field_updates.empty()) return true; + + // Non-versioned: update each field directly + if (!versioning_enabled_ || !current_handle.is_versioned()) { + for (const auto& [field_idx, value] : field_updates) { + if (field_idx >= layout->get_fields().size()) { + return arrow::Status::IndexError("Field index out of bounds"); + } + const FieldLayout& field_layout = layout->get_fields()[field_idx]; + if (!set_field_value_internal(current_handle.ptr, layout, &field_layout, + value)) { + return arrow::Status::Invalid("Failed to set field value"); + } + } + return true; + } + + const uint64_t now = get_current_timestamp_ns(); + + // Allocate new VersionInfo + void* version_info_memory = + version_arena_->allocate(sizeof(VersionInfo), alignof(VersionInfo)); + if (!version_info_memory) { + return arrow::Status::OutOfMemory("Failed to allocate VersionInfo"); + } + + uint64_t new_version_id = + version_counter_.fetch_add(1, std::memory_order_relaxed) + 1; + VersionInfo* old_version_info = current_handle.version_info_; + VersionInfo* new_version_info = new (version_info_memory) + VersionInfo(new_version_id, now, old_version_info); + + // Process each field update + for (const auto& [field_idx, new_value] : field_updates) { + if (field_idx >= layout->get_fields().size()) { + return arrow::Status::IndexError("Field index out of bounds"); + } + + const FieldLayout& field_layout = layout->get_fields()[field_idx]; + + // Handle NULL: use nullptr sentinel + if (new_value.is_null()) { + new_version_info->updated_fields[field_idx] = nullptr; + continue; + } + + // Prepare value (convert strings to StringRef) + Value storage_value = new_value; + if (new_value.type() == ValueType::STRING) { + const StringRef str_ref = + string_arena_->store_string_auto(new_value.as_string()); + storage_value = Value{str_ref, field_layout.type}; + } + + // Allocate and write field value + char* field_storage = static_cast( + version_arena_->allocate(field_layout.size, field_layout.alignment)); + if (!field_storage) { + return arrow::Status::OutOfMemory("Failed to allocate field storage"); + } + + if (!write_value_to_memory(field_storage, field_layout.type, + storage_value)) { + return arrow::Status::TypeError("Type mismatch writing field value"); + } + + new_version_info->updated_fields[field_idx] = field_storage; + } + + old_version_info->valid_to = now; + current_handle.version_info_ = new_version_info; + + return true; + } + + /** + * Set field in v0 (initial population). + * Writes to base node without creating versions. + */ + bool set_field_value_v0(NodeHandle& handle, + const std::shared_ptr& layout, + const std::shared_ptr& field, + const Value& value) { + assert(!handle.is_null()); + + const FieldLayout* field_layout = layout->get_field_layout(field); + if (!field_layout) { + return false; + } + + // Write directly to base node + return set_field_value_internal(handle.ptr, layout, field_layout, value); + } + + /** + * Set field value. + * Creates new version if versioning enabled, direct write otherwise. + */ + bool set_field_value(NodeHandle& handle, const std::shared_ptr& layout, const std::shared_ptr& field, const Value& value) { - // Logger::get_instance().debug("set_field_value: {}.{} = {}", schema_name, - // field_name, value.to_string()); assert(!handle.is_null()); - // Handle string deallocation for any field that might contain strings - // Value storage_value = value; const FieldLayout* field_layout = layout->get_field_layout(field); if (!field_layout) { - // log_error("set_field_value: field_layout is null for field '{}'", - // field ? field->name() : "null"); - return false; + return false; // Invalid field } + // VERSIONED PATH: Create a new version + if (versioning_enabled_ && handle.is_versioned()) { + auto result = + create_new_version(handle, layout, field_layout->index, value); + return result.ok() && result.ValueOrDie(); + } + + // ======================================================================== + // NON-VERSIONED PATH: Direct write (in-place update) + // ======================================================================== + // If the field currently contains a string, deallocate it first if (is_string_type(field_layout->type) && is_field_set(static_cast(handle.ptr), field_layout->index)) { - Value old_value = layout->get_field_value(static_cast(handle.ptr), - *field_layout); + const Value old_value = layout->get_field_value( + static_cast(handle.ptr), *field_layout); if (!old_value.is_null() && old_value.type() != ValueType::NA) { try { - StringRef old_str_ref = old_value.as_string_ref(); - // Logger::get_instance().debug("deallocate old string: {}", - // old_str_ref.to_string()); + const StringRef& old_str_ref = old_value.as_string_ref(); if (!old_str_ref.is_null()) { string_arena_->mark_for_deletion(old_str_ref); } @@ -197,15 +615,11 @@ class NodeArena { } } } - // Value storage_value; + + // Handle string storage if (value.type() == ValueType::STRING) { - // Check if it's a temporary std::string that needs to be stored in - // arena const std::string& str_content = value.as_string(); - StringRef str_ref = string_arena_->store_string_auto(str_content); - // Logger::get_instance().debug("store string: {}", - // str_ref.to_string()); - + const StringRef str_ref = string_arena_->store_string_auto(str_content); return layout->set_field_value(static_cast(handle.ptr), *field_layout, Value{str_ref, field_layout->type}); @@ -213,77 +627,165 @@ class NodeArena { return layout->set_field_value(static_cast(handle.ptr), *field_layout, value); } - - // Logger::get_instance().debug("storage_value: {}", - // storage_value.to_string()); } - /** - * Reset the arena - keeps allocated chunks but resets usage - */ + /** Reset arenas (keeps chunks). */ void reset() { mem_arena_->reset(); string_arena_->reset(); } - /** - * Clear all allocated memory - */ + /** Clear all memory. */ void clear() { mem_arena_->clear(); string_arena_->clear(); } - /** - * Get the string arena for direct string management - */ + /** Get string arena. */ StringArena* get_string_arena() const { return string_arena_.get(); } - // Statistics + // Statistics and getters size_t get_total_allocated() const { return mem_arena_->get_total_allocated(); } - size_t get_chunk_count() const { return mem_arena_->get_chunk_count(); } - - // Get the underlying arena (for advanced usage) MemArena* get_mem_arena() const { return mem_arena_.get(); } + bool is_versioning_enabled() const { return versioning_enabled_; } + uint64_t get_version_counter() const { + return version_counter_.load(std::memory_order_relaxed); + } private: - std::unique_ptr mem_arena_; // For fixed-size node layouts + static uint64_t get_current_timestamp_ns() { + auto now = std::chrono::system_clock::now(); + auto duration = now.time_since_epoch(); + return std::chrono::duration_cast(duration) + .count(); + } + + /** Write field directly to node memory (handles strings). */ + bool set_field_value_internal(void* node_ptr, + const std::shared_ptr& layout, + const FieldLayout* field_layout, + const Value& value) const { + // If the field currently contains a string, deallocate it first + if (is_string_type(field_layout->type) && + is_field_set(static_cast(node_ptr), field_layout->index)) { + Value old_value = + layout->get_field_value(static_cast(node_ptr), *field_layout); + if (!old_value.is_null() && old_value.type() != ValueType::NA) { + try { + const StringRef& old_str_ref = old_value.as_string_ref(); + if (!old_str_ref.is_null()) { + string_arena_->mark_for_deletion(old_str_ref); + } + } catch (...) { + // Old value wasn't a StringRef, ignore + } + } + } + + // Handle string storage + if (value.type() == ValueType::STRING) { + const std::string& str_content = value.as_string(); + const StringRef str_ref = string_arena_->store_string_auto(str_content); + return layout->set_field_value(static_cast(node_ptr), + *field_layout, + Value{str_ref, field_layout->type}); + } + return layout->set_field_value(static_cast(node_ptr), *field_layout, + value); + } + + /** Traverse the version chain to find field pointer. */ + static const char* get_field_ptr_from_version_chain( + const VersionInfo* version_info, uint16_t field_idx, + const SchemaLayout* layout) { + const VersionInfo* current = version_info; + while (current != nullptr) { + // Check if this version has an override for this field + if (auto it = current->updated_fields.find(field_idx); + it != current->updated_fields.end()) { + return it->second; + } + current = current->prev; + } + + // Not found in any version, would need to read from base node + // (caller should handle this case) + return nullptr; + } + + /** Write value to memory (type-safe). */ + static bool write_value_to_memory(char* ptr, ValueType type, + const Value& value) { + switch (type) { + case ValueType::INT64: + if (value.type() != ValueType::INT64) return false; + *reinterpret_cast(ptr) = value.as_int64(); + return true; + + case ValueType::INT32: + if (value.type() != ValueType::INT32) return false; + *reinterpret_cast(ptr) = value.as_int32(); + return true; + + case ValueType::DOUBLE: + if (value.type() != ValueType::DOUBLE) return false; + *reinterpret_cast(ptr) = value.as_double(); + return true; + + case ValueType::BOOL: + if (value.type() != ValueType::BOOL) return false; + *reinterpret_cast(ptr) = value.as_bool(); + return true; + + case ValueType::STRING: + case ValueType::FIXED_STRING16: + case ValueType::FIXED_STRING32: + case ValueType::FIXED_STRING64: + if (!is_string_type(value.type())) return false; + *reinterpret_cast(ptr) = value.as_string_ref(); + return true; + + default: + return false; + } + } + + std::unique_ptr mem_arena_; std::shared_ptr layout_registry_; - std::unique_ptr - string_arena_; // For variable-size string content + std::unique_ptr string_arena_; + + // Versioning (optional) + bool versioning_enabled_; + std::unique_ptr version_arena_; + std::atomic version_counter_; }; -/** - * Factory functions for creating NodeArenas with different underlying - * implementations - */ +/** Factory functions for creating NodeArenas. */ namespace node_arena_factory { -/** - * Create a NodeArena using MemoryArena (fast allocation, reset/clear only) - * Creates its own StringArena for string management - */ +/** Create NodeArena with MemoryArena (fast, no individual deallocation). */ inline std::unique_ptr create_simple_arena( const std::shared_ptr& layout_registry, - size_t initial_size = 2 * 1024 * 1024) { // 2MB default + size_t initial_size = 2 * 1024 * 1024, // 2MB default + bool enable_versioning = false) { auto mem_arena = std::make_unique(initial_size); - return std::make_unique(std::move(mem_arena), layout_registry); + return std::make_unique(std::move(mem_arena), layout_registry, + nullptr, enable_versioning); } -/** - * Create a NodeArena using FreeListArena (individual deallocation supported) - * Creates its own StringArena for string management - */ +/** Create NodeArena with FreeListArena (supports individual deallocation). */ inline std::unique_ptr create_free_list_arena( const std::shared_ptr& layout_registry, size_t initial_size = 2 * 1024 * 1024, // 2MB default - size_t min_fragment_size = 64) { // 64 bytes minimum fragment + size_t min_fragment_size = 64, // 64 bytes minimum fragment + bool enable_versioning = false) { auto mem_arena = std::make_unique(initial_size, min_fragment_size); - return std::make_unique(std::move(mem_arena), layout_registry); + return std::make_unique(std::move(mem_arena), layout_registry, + nullptr, enable_versioning); } } // namespace node_arena_factory diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index bf31680..b9cc6d2 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -134,6 +134,10 @@ add_executable(node_test add_executable(string_ref_concurrent_test string_ref_concurrent_test.cpp) +# Node versioning test +add_executable(node_version_test + node_version_test.cpp) + # Link against Arrow and GTest target_link_libraries(sharding_test PRIVATE @@ -326,6 +330,15 @@ target_link_libraries(string_ref_concurrent_test LLVMSupport LLVMCore ) +target_link_libraries(node_version_test + PRIVATE + core + GTest::GTest + GTest::Main + spdlog::spdlog + LLVMSupport LLVMCore +) + # Apply sanitizer flags to all test targets if enabled if(ENABLE_SANITIZERS) target_compile_options(sharding_test PRIVATE ${SANITIZER_COMPILE_FLAGS}) @@ -357,6 +370,9 @@ if(ENABLE_SANITIZERS) target_compile_options(free_list_arena_test PRIVATE ${SANITIZER_COMPILE_FLAGS}) target_link_options(free_list_arena_test PRIVATE ${SANITIZER_LINK_FLAGS}) + + target_compile_options(node_version_test PRIVATE ${SANITIZER_COMPILE_FLAGS}) + target_link_options(node_version_test PRIVATE ${SANITIZER_LINK_FLAGS}) endif() # Simple test registration @@ -375,6 +391,7 @@ add_test(NAME FreeListArenaTest COMMAND free_list_arena_test) add_test(NAME NodeArenaTest COMMAND node_arena_test) add_test(NAME NodeTest COMMAND node_test) add_test(NAME StringRefConcurrentTest COMMAND string_ref_concurrent_test) +add_test(NAME NodeVersionTest COMMAND node_version_test) # Set TSan options for tests after they've been registered if(ENABLE_SANITIZERS AND SANITIZER_TYPE STREQUAL "thread" AND EXISTS ${TSAN_SUPPRESSIONS_FILE}) diff --git a/tests/node_arena_test.cpp b/tests/node_arena_test.cpp index 229d6d5..72a60a4 100644 --- a/tests/node_arena_test.cpp +++ b/tests/node_arena_test.cpp @@ -13,27 +13,23 @@ // // class NodeArenaTest : public ::testing::Test { // protected: -// void SetUp() override { -// registry_ = std::make_unique(); -// -// // Create a comprehensive test schema with all ValueTypes -// auto layout = std::make_unique("TestNode"); -// layout->add_field("id", ValueType::INT64); -// layout->add_field("count", ValueType::INT32); -// layout->add_field("score", ValueType::DOUBLE); -// layout->add_field("active", ValueType::BOOL); -// layout->add_field("description", ValueType::STRING); // Variable length -// layout->add_field("short_name", ValueType::FIXED_STRING16); // ≤16 -// chars layout->add_field("medium_name", ValueType::FIXED_STRING32); // -// ≤32 chars layout->add_field("long_name", ValueType::FIXED_STRING64); // -// ≤64 chars layout->finalize(); -// -// total_node_size_ = layout->get_total_size_with_bitset(); -// registry_->register_layout(std::move(layout)); -// -// // Create NodeArena with FreeListArena for individual deallocation -// node_arena_ = node_arena_factory::create_free_list_arena(registry_); -// } +// void SetUp() override { +// registry_ = std::make_unique(); +// +// // Create a comprehensive test schema with all ValueTypes +// auto layout = std::make_unique("TestNode"); +// layout->add_field("id", ValueType::INT64); +// layout->add_field("count", ValueType::INT32); +// layout->add_field("score", ValueType::DOUBLE); +// layout->add_field("active", ValueType::BOOL); +// layout->add_field("description", ValueType::STRING); // Variable length +// +// total_node_size_ = layout->get_total_size_with_bitset(); +// registry_->register_layout(std::move(layout)); +// +// // Create NodeArena with FreeListArena for individual deallocation +// node_arena_ = node_arena_factory::create_free_list_arena(registry_); +// } // // void TearDown() override { // node_arena_.reset(); diff --git a/tests/node_version_test.cpp b/tests/node_version_test.cpp new file mode 100644 index 0000000..c171b10 --- /dev/null +++ b/tests/node_version_test.cpp @@ -0,0 +1,446 @@ +#include + +#include +#include + +#include "../include/node_arena.hpp" +#include "../include/schema.hpp" +#include "../include/schema_layout.hpp" + +using namespace tundradb; + +class NodeVersionTest : public ::testing::Test { + protected: + void SetUp() override { + layout_registry_ = std::make_shared(); + + // Create fields + llvm::SmallVector, 5> fields; + fields.push_back(std::make_shared("id", ValueType::INT64)); + fields.push_back(std::make_shared("count", ValueType::INT32)); + fields.push_back(std::make_shared("score", ValueType::DOUBLE)); + fields.push_back(std::make_shared("active", ValueType::BOOL)); + fields.push_back(std::make_shared("description", ValueType::STRING)); + + // Create schema (need to move the fields vector) + schema_ = std::make_shared(std::string("TestNode"), 1u, + std::move(fields)); + + // Get field pointers for tests + id_field_ = schema_->get_field("id"); + count_field_ = schema_->get_field("count"); + score_field_ = schema_->get_field("score"); + active_field_ = schema_->get_field("active"); + desc_field_ = schema_->get_field("description"); + + // Create layout from schema + auto layout = std::make_unique(schema_); + total_node_size_ = layout->get_total_size_with_bitset(); + layout_registry_->register_layout(std::move(layout)); + + // Get layout pointer + layout_ = layout_registry_->get_layout("TestNode"); + + // Create NodeArena WITH versioning enabled + node_arena_versioned_ = node_arena_factory::create_free_list_arena( + layout_registry_, 2 * 1024 * 1024, 64, true); + + // Create NodeArena WITHOUT versioning for comparison + node_arena_non_versioned_ = + node_arena_factory::create_free_list_arena(layout_registry_); + } + + std::shared_ptr layout_registry_; + std::shared_ptr schema_; + std::shared_ptr layout_; + std::unique_ptr node_arena_versioned_; + std::unique_ptr node_arena_non_versioned_; + size_t total_node_size_; + + // Field pointers for convenience + std::shared_ptr id_field_; + std::shared_ptr count_field_; + std::shared_ptr score_field_; + std::shared_ptr active_field_; + std::shared_ptr desc_field_; +}; + +// ============================================================================= +// Basic Versioning Tests +// ============================================================================= + +TEST_F(NodeVersionTest, AllocateVersionedNode) { + // Allocate node with versioning enabled + NodeHandle handle = node_arena_versioned_->allocate_node("TestNode"); + + ASSERT_FALSE(handle.is_null()); + ASSERT_TRUE(handle.is_versioned()); + ASSERT_EQ(handle.get_version_id(), 0); // Base version + ASSERT_EQ(handle.count_versions(), 1); +} + +TEST_F(NodeVersionTest, AllocateNonVersionedNode) { + // Allocate node with versioning disabled + NodeHandle handle = node_arena_non_versioned_->allocate_node("TestNode"); + + ASSERT_FALSE(handle.is_null()); + ASSERT_FALSE(handle.is_versioned()); + ASSERT_EQ(handle.get_version_id(), 0); + ASSERT_EQ(handle.count_versions(), 1); +} + +TEST_F(NodeVersionTest, CreateSingleVersion) { + // Create base node + NodeHandle handle = node_arena_versioned_->allocate_node("TestNode"); + ASSERT_TRUE(handle.is_versioned()); + ASSERT_EQ(handle.get_version_id(), 0); + + // Set initial value in v0 (use v0 method) + node_arena_versioned_->set_field_value_v0(handle, layout_, id_field_, + Value(int64_t(100))); + + // Still at v0 + ASSERT_EQ(handle.get_version_id(), 0); + ASSERT_EQ(handle.count_versions(), 1); + + // Get value back + Value id_value = + node_arena_versioned_->get_field_value(handle, layout_, id_field_); + ASSERT_EQ(id_value.as_int64(), 100); + + // Update value (creates version 1) + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + node_arena_versioned_->set_field_value(handle, layout_, id_field_, + Value(int64_t(200))); + + // Verify version chain + ASSERT_EQ(handle.count_versions(), 2); // v0 + v1 + ASSERT_EQ(handle.get_version_id(), 1); // Current version + + // Verify new value + Value new_id_value = + node_arena_versioned_->get_field_value(handle, layout_, id_field_); + ASSERT_EQ(new_id_value.as_int64(), 200); +} + +TEST_F(NodeVersionTest, CreateMultipleVersions) { + NodeHandle handle = node_arena_versioned_->allocate_node("TestNode"); + + // Set initial value in v0 + node_arena_versioned_->set_field_value_v0(handle, layout_, id_field_, + Value(int64_t(0))); + + // Create 4 more versions (v1, v2, v3, v4) + for (int64_t i = 1; i <= 4; i++) { + node_arena_versioned_->set_field_value(handle, layout_, id_field_, + Value(i * 100)); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + + // Verify version count (v0, v1, v2, v3, v4 = 5 versions) + ASSERT_EQ(handle.count_versions(), 5); + ASSERT_EQ(handle.get_version_id(), 4); // Current version + + // Verify final value + Value final_value = + node_arena_versioned_->get_field_value(handle, layout_, id_field_); + ASSERT_EQ(final_value.as_int64(), 400); +} + +// ============================================================================= +// Field-Level Copy-on-Write Tests +// ============================================================================= + +TEST_F(NodeVersionTest, FieldLevelCopyOnWrite) { + NodeHandle handle = node_arena_versioned_->allocate_node("TestNode"); + + // Set initial values for multiple fields in v0 + node_arena_versioned_->set_field_value_v0(handle, layout_, id_field_, + Value(int64_t(100))); + node_arena_versioned_->set_field_value_v0(handle, layout_, count_field_, + Value(int32_t(10))); + node_arena_versioned_->set_field_value_v0(handle, layout_, score_field_, + Value(double(1.5))); + + // Still at v0 + ASSERT_EQ(handle.get_version_id(), 0); + VersionInfo* v0 = handle.get_version_info(); + ASSERT_EQ(v0->version_id, 0); + + // Update only ONE field (should only store that field in new version) + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + node_arena_versioned_->set_field_value(handle, layout_, count_field_, + Value(int32_t(20))); + + // Check new version + ASSERT_EQ(handle.get_version_id(), 1); + ASSERT_EQ(handle.count_versions(), 2); // v0 + v1 + + VersionInfo* v1 = handle.get_version_info(); + ASSERT_EQ(v1->updated_fields.size(), 1); // Only 1 field changed in v1! + ASSERT_EQ(v1->version_id, 1); + ASSERT_NE(v1->prev, nullptr); + ASSERT_EQ(v1->prev->version_id, 0); // Linked to v0 + + // Verify values + Value id_value = + node_arena_versioned_->get_field_value(handle, layout_, id_field_); + Value count_value = + node_arena_versioned_->get_field_value(handle, layout_, count_field_); + Value score_value = + node_arena_versioned_->get_field_value(handle, layout_, score_field_); + + ASSERT_EQ(id_value.as_int64(), 100); // From v0 (base) + ASSERT_EQ(count_value.as_int32(), 20); // From v1 (updated) + ASSERT_DOUBLE_EQ(score_value.as_double(), 1.5); // From v0 (base) +} + +// ============================================================================= +// Temporal Validity Tests +// ============================================================================= + +TEST_F(NodeVersionTest, TemporalValidityIntervals) { + NodeHandle handle = node_arena_versioned_->allocate_node("TestNode"); + + node_arena_versioned_->set_field_value_v0(handle, layout_, id_field_, + Value(int64_t(100))); + + VersionInfo* v0 = handle.get_version_info(); + uint64_t t0 = v0->valid_from; + + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + + node_arena_versioned_->set_field_value(handle, layout_, id_field_, + Value(int64_t(200))); + + VersionInfo* v1 = handle.get_version_info(); + uint64_t t1 = v1->valid_from; + + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + + node_arena_versioned_->set_field_value(handle, layout_, id_field_, + Value(int64_t(300))); + + VersionInfo* v2 = handle.get_version_info(); + uint64_t t2 = v2->valid_from; + + ASSERT_TRUE(v2->is_valid_at(t2)); + ASSERT_TRUE(v2->is_valid_at(t2 + 1000000)); + + ASSERT_NE(v1, nullptr); + v1 = v2->prev; + ASSERT_TRUE(v1->is_valid_at(t1)); + ASSERT_FALSE(v1->is_valid_at(t2)); + + ASSERT_NE(v1->prev, nullptr); + v0 = v1->prev; + ASSERT_TRUE(v0->is_valid_at(t0)); + ASSERT_FALSE(v0->is_valid_at(t1)); +} + +TEST_F(NodeVersionTest, FindVersionAtTime) { + NodeHandle handle = node_arena_versioned_->allocate_node("TestNode"); + + node_arena_versioned_->set_field_value_v0(handle, layout_, id_field_, + Value(int64_t(100))); + VersionInfo* v0 = handle.get_version_info(); + uint64_t t0 = v0->valid_from; + + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + + node_arena_versioned_->set_field_value(handle, layout_, id_field_, + Value(int64_t(200))); + VersionInfo* v1 = handle.get_version_info(); + uint64_t t1 = v1->valid_from; + + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + + node_arena_versioned_->set_field_value(handle, layout_, id_field_, + Value(int64_t(300))); + VersionInfo* v2 = handle.get_version_info(); + uint64_t t2 = v2->valid_from; + + const VersionInfo* version_at_t0 = handle.find_version_at_time(t0); + const VersionInfo* version_at_t1 = handle.find_version_at_time(t1); + const VersionInfo* version_at_t2 = handle.find_version_at_time(t2); + + ASSERT_NE(version_at_t0, nullptr); + ASSERT_NE(version_at_t1, nullptr); + ASSERT_NE(version_at_t2, nullptr); + + ASSERT_EQ(version_at_t0->version_id, 0); + ASSERT_EQ(version_at_t1->version_id, 1); + ASSERT_EQ(version_at_t2->version_id, 2); +} + +// ============================================================================= +// String Handling Tests +// ============================================================================= + +TEST_F(NodeVersionTest, StringVersioning) { + NodeHandle handle = node_arena_versioned_->allocate_node("TestNode"); + + node_arena_versioned_->set_field_value_v0(handle, layout_, desc_field_, + Value("Alice")); + + Value v0_value = + node_arena_versioned_->get_field_value(handle, layout_, desc_field_); + ASSERT_EQ(v0_value.as_string(), "Alice"); + + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + node_arena_versioned_->set_field_value(handle, layout_, desc_field_, + Value("Alicia")); + + Value v1_value = + node_arena_versioned_->get_field_value(handle, layout_, desc_field_); + ASSERT_EQ(v1_value.as_string(), "Alicia"); + + ASSERT_EQ(handle.count_versions(), 2); +} + +// ============================================================================= +// Non-Versioned Mode Tests +// ============================================================================= + +TEST_F(NodeVersionTest, NonVersionedUpdateNoVersionCreated) { + NodeHandle handle = node_arena_non_versioned_->allocate_node("TestNode"); + + ASSERT_FALSE(handle.is_versioned()); + + node_arena_non_versioned_->set_field_value(handle, layout_, id_field_, + Value(int64_t(100))); + + node_arena_non_versioned_->set_field_value(handle, layout_, id_field_, + Value(int64_t(200))); + + ASSERT_EQ(handle.count_versions(), 1); + ASSERT_FALSE(handle.is_versioned()); + + Value value = + node_arena_non_versioned_->get_field_value(handle, layout_, id_field_); + ASSERT_EQ(value.as_int64(), 200); +} + +// ============================================================================= +// Batch Update Tests +// ============================================================================= + +TEST_F(NodeVersionTest, BatchUpdateMultipleFields) { + NodeHandle handle = node_arena_versioned_->allocate_node("TestNode"); + + node_arena_versioned_->set_field_value_v0(handle, layout_, id_field_, + Value(int64_t(100))); + node_arena_versioned_->set_field_value_v0(handle, layout_, count_field_, + Value(int32_t(10))); + node_arena_versioned_->set_field_value_v0(handle, layout_, score_field_, + Value(double(1.5))); + + ASSERT_EQ(handle.get_version_id(), 0); + + std::vector, Value>> updates = { + {id_field_, Value(int64_t(200))}, + {count_field_, Value(int32_t(20))}, + {score_field_, Value(double(2.5))}}; + + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + auto result = node_arena_versioned_->update_fields(handle, layout_, updates); + ASSERT_TRUE(result.ok()) << "Update failed: " << result.status().message(); + + ASSERT_EQ(handle.get_version_id(), 1); + ASSERT_EQ(handle.count_versions(), 2); + + VersionInfo* v1 = handle.get_version_info(); + ASSERT_EQ(v1->updated_fields.size(), 3); + + Value id = node_arena_versioned_->get_field_value(handle, layout_, id_field_); + Value count = + node_arena_versioned_->get_field_value(handle, layout_, count_field_); + Value score = + node_arena_versioned_->get_field_value(handle, layout_, score_field_); + + ASSERT_EQ(id.as_int64(), 200); + ASSERT_EQ(count.as_int32(), 20); + ASSERT_DOUBLE_EQ(score.as_double(), 2.5); +} + +// ============================================================================= +// NULL Value Handling Tests +// ============================================================================= + +TEST_F(NodeVersionTest, NullValueHandling) { + NodeHandle handle = node_arena_versioned_->allocate_node("TestNode"); + + node_arena_versioned_->set_field_value_v0(handle, layout_, id_field_, + Value(int64_t(100))); + + Value v0_value = + node_arena_versioned_->get_field_value(handle, layout_, id_field_); + ASSERT_FALSE(v0_value.is_null()); + ASSERT_EQ(v0_value.as_int64(), 100); + + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + node_arena_versioned_->set_field_value(handle, layout_, id_field_, Value{}); + + ASSERT_EQ(handle.get_version_id(), 1); + ASSERT_EQ(handle.count_versions(), 2); + + Value v1_value = + node_arena_versioned_->get_field_value(handle, layout_, id_field_); + ASSERT_TRUE(v1_value.is_null()); + + VersionInfo* v1 = handle.get_version_info(); + ASSERT_EQ(v1->updated_fields.size(), 1); + + const FieldLayout* id_layout = layout_->get_field_layout(id_field_); + ASSERT_NE(id_layout, nullptr); + ASSERT_EQ(v1->updated_fields[id_layout->index], nullptr); // nullptr = NULL +} + +TEST_F(NodeVersionTest, NullToNonNullTransition) { + NodeHandle handle = node_arena_versioned_->allocate_node("TestNode"); + + node_arena_versioned_->set_field_value_v0(handle, layout_, id_field_, + Value{}); + + Value v0_value = + node_arena_versioned_->get_field_value(handle, layout_, id_field_); + ASSERT_TRUE(v0_value.is_null()); + + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + node_arena_versioned_->set_field_value(handle, layout_, id_field_, + Value(int64_t(100))); + + ASSERT_EQ(handle.get_version_id(), 1); + Value v1_value = + node_arena_versioned_->get_field_value(handle, layout_, id_field_); + ASSERT_FALSE(v1_value.is_null()); + ASSERT_EQ(v1_value.as_int64(), 100); +} + +// ============================================================================= +// Performance/Stats Tests +// ============================================================================= + +TEST_F(NodeVersionTest, VersionCounterIncreases) { + NodeHandle handle = node_arena_versioned_->allocate_node("TestNode"); + + uint64_t initial_counter = node_arena_versioned_->get_version_counter(); + node_arena_versioned_->set_field_value_v0(handle, layout_, id_field_, + Value(int64_t(0))); + + for (int i = 1; i <= 10; i++) { + node_arena_versioned_->set_field_value(handle, layout_, id_field_, + Value(int64_t(i))); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + + uint64_t final_counter = node_arena_versioned_->get_version_counter(); + + // Counter should have increased by 10 (v1-v10) + ASSERT_EQ(final_counter - initial_counter, 10); +} + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +}