From b8ded67d4e9016731646632acf7e77c99da1affb Mon Sep 17 00:00:00 2001
From: dmgcodevil <dmgcodevil@gmail.com>
Date: Wed, 17 Sep 2025 21:39:03 -0400
Subject: [PATCH 1/4] optimize perf

---
 include/concurrency.hpp |  19 +++
 include/edge_store.hpp  | 102 ++++++++++++++++
 include/query.hpp       |  17 ++-
 src/arrow_utils.cpp     |   4 +-
 src/core.cpp            | 250 ++++++++++++++++++++++------------------
 src/edge_store.cpp      |  47 +++++++-
 6 files changed, 314 insertions(+), 125 deletions(-)
diff --git a/include/concurrency.hpp b/include/concurrency.hpp
index 2cfc97d..8d51c9b 100644
--- a/include/concurrency.hpp
+++ b/include/concurrency.hpp
@@ -94,6 +94,25 @@ class ConcurrentSet {
     return snapshot;
   }
 
+  class LockedView {
+   public:
+    using iterator =
+        typename tbb::concurrent_hash_map<T, std::monostate>::const_iterator;
+
+    LockedView(const tbb::concurrent_hash_map<T, std::monostate>& data)
+        : data_(data) {}
+
+    iterator begin() const { return data_.begin(); }
+    iterator end() const { return data_.end(); }
+
+    size_t size() const { return data_.size(); }
+
+   private:
+    const tbb::concurrent_hash_map<T, std::monostate>& data_;
+  };
+
+  LockedView get_all_unsafe() const { return LockedView(data_); }
+
   /**
    * @brief Clear all elements from the set
    *
diff --git a/include/edge_store.hpp b/include/edge_store.hpp
index 78f0dd1..6c08abd 100644
--- a/include/edge_store.hpp
+++ b/include/edge_store.hpp
@@ -14,6 +14,98 @@
 
 namespace tundradb {
 
+// Forward declaration for EdgeView
+class EdgeStore;
+
+/**
+ * @brief A view over edges that avoids copying shared_ptr<Edge> objects
+ *
+ * This class provides iteration over edges without materializing them into
+ * a vector, reducing memory allocations and improving performance.
+ */
+class EdgeView {
+ public:
+  class iterator {
+   public:
+    using iterator_category = std::forward_iterator_tag;
+    using value_type = std::shared_ptr<Edge>;
+    using difference_type = std::ptrdiff_t;
+    using pointer = const value_type *;
+    using reference = const value_type &;
+
+    iterator(const EdgeStore *store,
+             ConcurrentSet<int64_t>::LockedView::iterator edge_ids_it,
+             ConcurrentSet<int64_t>::LockedView::iterator edge_ids_end,
+             const std::string &type_filter)
+        : store_(store),
+          edge_ids_it_(edge_ids_it),
+          edge_ids_end_(edge_ids_end),
+          type_filter_(type_filter) {
+      advance_to_valid();
+    }
+
+    iterator &operator++() {
+      ++edge_ids_it_;
+      advance_to_valid();
+      return *this;
+    }
+
+    iterator operator++(int) {
+      iterator tmp = *this;
+      ++(*this);
+      return tmp;
+    }
+
+    reference operator*() const { return current_edge_; }
+    pointer operator->() const { return &current_edge_; }
+
+    bool operator==(const iterator &other) const {
+      return edge_ids_it_ == other.edge_ids_it_;
+    }
+
+    bool operator!=(const iterator &other) const { return !(*this == other); }
+
+   private:
+    void advance_to_valid();
+
+    const EdgeStore *store_;
+    ConcurrentSet<int64_t>::LockedView::iterator edge_ids_it_;
+    ConcurrentSet<int64_t>::LockedView::iterator edge_ids_end_;
+    std::string type_filter_;
+    std::shared_ptr<Edge> current_edge_;
+  };
+
+  EdgeView(const EdgeStore *store, const ConcurrentSet<int64_t> &edge_ids,
+           const std::string &type_filter = "")
+      : store_(store),
+        edge_ids_view_(edge_ids.get_all_unsafe()),
+        type_filter_(type_filter) {}
+
+  iterator begin() const {
+    return iterator(store_, edge_ids_view_.begin(), edge_ids_view_.end(),
+                    type_filter_);
+  }
+
+  iterator end() const {
+    return iterator(store_, edge_ids_view_.end(), edge_ids_view_.end(),
+                    type_filter_);
+  }
+
+  // Convenience method to count matching edges without materializing them
+  size_t count() const {
+    size_t result = 0;
+    for (auto it = begin(); it != end(); ++it) {
+      ++result;
+    }
+    return result;
+  }
+
+ private:
+  const EdgeStore *store_;
+  ConcurrentSet<int64_t>::LockedView edge_ids_view_;
+  std::string type_filter_;
+};
+
 // todo rename to EdgeManager
 class EdgeStore {
   struct TableCache;
@@ -94,6 +186,13 @@ class EdgeStore {
   arrow::Result<std::vector<std::shared_ptr<Edge>>> get_incoming_edges(
       int64_t id, const std::string &type = "") const;
 
+  // New view-based methods that avoid copying
+  arrow::Result<EdgeView> get_outgoing_edges_view(
+      int64_t id, const std::string &type = "") const;
+
+  arrow::Result<EdgeView> get_incoming_edges_view(
+      int64_t id, const std::string &type = "") const;
+
   arrow::Result<std::vector<std::shared_ptr<Edge>>> get_by_type(
       const std::string &type) const;
 
@@ -114,6 +213,9 @@ class EdgeStore {
   void set_id_seq(const int64_t v) {
     edge_id_counter_.store(v, std::memory_order_relaxed);
   }
+
+  // Friend class for EdgeView iterator access
+  friend class EdgeView::iterator;
 };
 }  // namespace tundradb
 
diff --git a/include/query.hpp b/include/query.hpp
index e9dbd2d..1c150b7 100644
--- a/include/query.hpp
+++ b/include/query.hpp
@@ -184,6 +184,7 @@ class ComparisonExpr : public Clause, public WhereExpr {
   CompareOp op_;
   Value value_;
   bool inlined_ = false;
+  std::string field_name;
 
   static arrow::Result<bool> compare_values(const Value& value, CompareOp op,
                                             const Value& where_value) {
@@ -308,7 +309,13 @@ class ComparisonExpr : public Clause, public WhereExpr {
 
  public:
   ComparisonExpr(std::string field, CompareOp op, Value value)
-      : field_(std::move(field)), op_(op), value_(std::move(value)) {}
+      : field_(std::move(field)), op_(op), value_(std::move(value)) {
+    if (const size_t dot_pos = field_.find('.'); dot_pos != std::string::npos) {
+      field_name = field_.substr(dot_pos + 1);
+    } else {
+      field_name = field_;
+    }
+  }
 
   [[nodiscard]] const std::string& field() const { return field_; }
   [[nodiscard]] CompareOp op() const { return op_; }
@@ -400,14 +407,6 @@ class ComparisonExpr : public Clause, public WhereExpr {
 
     // parse field name to extract variable and field parts
     // expected format: "variable.field" (e.g., "user.age", "company.name")
-    const size_t dot_pos = field_.find('.');
-    std::string field_name;
-
-    if (dot_pos != std::string::npos) {
-      field_name = field_.substr(dot_pos + 1);
-    } else {
-      field_name = field_;
-    }
 
     ARROW_ASSIGN_OR_RAISE(auto field_value, node->get_value(field_name));
     return compare_values(field_value, op_, value_);
diff --git a/src/arrow_utils.cpp b/src/arrow_utils.cpp
index 97d92d7..46c7a77 100644
--- a/src/arrow_utils.cpp
+++ b/src/arrow_utils.cpp
@@ -35,8 +35,8 @@ bool initialize_arrow_compute() {
       }
 
       auto function_names = registry->GetFunctionNames();
-      log_info("Arrow Compute initialized with {} functions",
-               function_names.size());
+      log_debug("Arrow Compute initialized with {} functions",
+                function_names.size());
 
       // Check for essential functions
       const bool has_equal =
diff --git a/src/core.cpp b/src/core.cpp
index 3d19ae2..6073d78 100644
--- a/src/core.cpp
+++ b/src/core.cpp
@@ -629,11 +629,12 @@ arrow::Result<std::shared_ptr<arrow::Schema>> build_denormalized_schema(
 }
 
 struct PathSegment {
-  std::string schema;
+  uint16_t schema;
+  // uint16_t schema_tag;
   int64_t node_id;
 
   std::string toString() const {
-    return schema + ":" + std::to_string(node_id);
+    return "";  // schema + ":" + std::to_string(node_id);
   }
 
   bool operator==(const PathSegment& other) const {
@@ -667,6 +668,8 @@ struct Row {
   int64_t id;
   std::unordered_map<std::string, std::shared_ptr<arrow::Scalar>> cells;
   std::vector<PathSegment> path;
+  std::unordered_map<std::string, int64_t> schema_ids;  // can we use tag
+  bool schema_ids_set = false;
 
   void set_cell(const std::string& name,
                 std::shared_ptr<arrow::Scalar> scalar) {
@@ -688,6 +691,7 @@ struct Row {
       this->set_cell(full_name, node->get_value_ptr(field->name()).ValueOrDie(),
                      field->type());
     }
+    // schema_ids[node->get_schema()->name()] = node->id;
   }
 
   // New set_cell method for Value objects
@@ -705,25 +709,30 @@ struct Row {
     cells[name] = nullptr;
   }
 
-  void set_cell(const std::string& name, std::shared_ptr<arrow::Array> array) {
-    if (array && array->length() > 0) {
-      auto scalar_result = array->GetScalar(0);
-      if (scalar_result.ok()) {
-        cells[name] = scalar_result.ValueOrDie();
-        return;
-      }
-    }
-
-    // Default to null if array is empty or conversion fails
-    cells[name] = nullptr;
-  }
+  // void set_cell(const std::string& name, std::shared_ptr<arrow::Array> array)
+  // {
+  //   if (array && array->length() > 0) {
+  //     auto scalar_result = array->GetScalar(0);
+  //     if (scalar_result.ok()) {
+  //       cells[name] = scalar_result.ValueOrDie();
+  //       return;
+  //     }
+  //   }
+  //
+  //   // Default to null if array is empty or conversion fails
+  //   cells[name] = nullptr;
+  // }
 
   bool start_with(const std::vector<PathSegment>& prefix) const {
     return is_prefix(prefix, this->path);
   }
 
-  std::unordered_map<std::string, int64_t> extract_schema_ids() const {
-    std::unordered_map<std::string, int64_t> result;
+  // todo replace
+  const std::unordered_map<std::string, int64_t>& extract_schema_ids() {
+    if (schema_ids_set) {
+      return schema_ids;
+    }
+    // std::unordered_map<std::string, int64_t> result;
     for (const auto& [field_name, value] : cells) {
       if (!value || !value->is_valid) continue;
 
@@ -735,19 +744,22 @@ struct Row {
         // Store ID for this schema if it's an ID field
         if (field_name.substr(dot_pos + 1) == "id") {
           auto id_scalar = std::static_pointer_cast<arrow::Int64Scalar>(value);
-          result[schema] = id_scalar->value;
+          schema_ids[schema] = id_scalar->value;
         }
       }
     }
-    return result;
+    schema_ids_set = true;
+    return schema_ids;
   }
 
   // returns new Row which is result of merging this row and other
-  [[nodiscard]] Row merge(const Row& other) const {
-    Row merged = *this;
-    for (const auto& [name, value] : other.cells) {
-      if (!merged.has_value(name)) {
-        merged.cells[name] = value;
+  [[nodiscard]] std::shared_ptr<Row> merge(
+      const std::shared_ptr<Row>& other) const {
+    std::shared_ptr<Row> merged = std::make_shared<Row>(*this);
+    // merged->id = this->id;
+    for (const auto& [name, value] : other->cells) {
+      if (!merged->has_value(name)) {
+        merged->cells[name] = value;
       }
     }
     return merged;
@@ -806,6 +818,7 @@ struct Row {
 static Row create_empty_row_from_schema(
     const std::shared_ptr<arrow::Schema>& final_output_schema) {
   Row new_row;
+  new_row.id = -1;
   for (const auto& field : final_output_schema->fields()) {
     // Create a null scalar of the correct type
     auto null_scalar = arrow::MakeNullScalar(field->type());
@@ -814,8 +827,10 @@ static Row create_empty_row_from_schema(
     } else {
       // If creating a null scalar fails, use nullptr as a fallback
       new_row.cells[field->name()] = nullptr;
-      log_warn("Failed to create null scalar for field '{}' with type '{}'",
-               field->name(), field->type()->ToString());
+      if (Logger::get_instance().get_level() >= LogLevel::WARN) {
+        log_warn("Failed to create null scalar for field '{}' with type '{}'",
+                 field->name(), field->type()->ToString());
+      }
     }
   }
   return new_row;
@@ -834,31 +849,36 @@ std::vector<Row> get_child_rows(const Row& parent,
   return child;
 }
 
+struct MergeState {
+  llvm::SmallVector<Row, 4> result;
+  llvm::SmallDenseMap<uint16_t, llvm::SmallVector<Row, 4>> grouped;
+};
+
 struct RowNode {
-  std::optional<Row> row;
+  std::optional<std::shared_ptr<Row>> row;
   int depth;
   PathSegment path_segment;
   std::vector<std::unique_ptr<RowNode>> children;
 
-  RowNode() : depth(0), path_segment{"", -1} {}
+  RowNode() : depth(0), path_segment{0, -1} {}
 
-  RowNode(std::optional<Row> r, int d,
+  RowNode(std::optional<std::shared_ptr<Row>> r, int d,
           std::vector<std::unique_ptr<RowNode>> c = {})
       : row(std::move(r)),
         depth(d),
-        path_segment{"", -1},
+        path_segment{0, -1},
         children(std::move(c)) {}
 
   bool leaf() const { return row.has_value(); }
 
-  void insert_row_dfs(size_t path_idx, const Row& new_row) {
-    if (path_idx == new_row.path.size()) {
+  void insert_row_dfs(size_t path_idx, const std::shared_ptr<Row>& new_row) {
+    if (path_idx == new_row->path.size()) {
       this->row = new_row;
       return;
     }
 
     for (const auto& n : children) {
-      if (n->path_segment == new_row.path[path_idx]) {
+      if (n->path_segment == new_row->path[path_idx]) {
         n->insert_row_dfs(path_idx + 1, new_row);
         return;
       }
@@ -866,20 +886,24 @@ struct RowNode {
 
     auto new_node = std::make_unique<RowNode>();
     new_node->depth = depth + 1;
-    new_node->path_segment = new_row.path[path_idx];
+    new_node->path_segment = new_row->path[path_idx];
     new_node->insert_row_dfs(path_idx + 1, new_row);
     children.emplace_back(std::move(new_node));
   }
 
-  void insert_row(const Row& new_row) { insert_row_dfs(0, new_row); }
+  void insert_row(const std::shared_ptr<Row>& new_row) {
+    insert_row_dfs(0, new_row);
+  }
 
-  std::vector<Row> merge_rows() {
+  llvm::SmallVector<std::shared_ptr<Row>, 4> merge_rows() {
     if (this->leaf()) {
       return {this->row.value()};
     }
 
     // collect all records from child node and group them by schema
-    std::unordered_map<std::string, std::vector<Row>> grouped;
+    // std::unordered_map<std::string, std::vector<Row>> grouped;
+    llvm::SmallDenseMap<uint16_t, llvm::SmallVector<std::shared_ptr<Row>, 4>>
+        grouped;
     for (const auto& c : children) {
       auto child_rows = c->merge_rows();
       grouped[c->path_segment.schema].insert(
@@ -887,18 +911,19 @@ struct RowNode {
           child_rows.end());
     }
 
-    std::vector<std::vector<Row>> groups_for_product;
+    std::vector<llvm::SmallVector<std::shared_ptr<Row>, 4>> groups_for_product;
+    groups_for_product.reserve(grouped.size() + 1);
     // Add this->row as its own group (that is important for cartesian product)
     // if it exists and has data,
     // to represent the node itself if it should be part of the product
     // independently.
     if (this->row.has_value()) {
-      Row node_self_row = this->row.value();
+      std::shared_ptr<Row> node_self_row = this->row.value();
       // Normalize path for the node's own row to ensure it combines correctly
       // and doesn't carry a longer BFS path if it was a leaf of BFS.
       // i.e. current node path can be a:0->b:1->c:2
       // this code sets it to 'c:2'
-      node_self_row.path = {this->path_segment};
+      node_self_row->path = {this->path_segment};
       groups_for_product.push_back({node_self_row});
     }
 
@@ -915,18 +940,19 @@ struct RowNode {
     // with data), no Cartesian product is needed. Just return its rows, but
     // ensure paths are correct.
     if (groups_for_product.size() == 1) {
-      std::vector<Row> single_group_rows = groups_for_product[0];
+      // std::vector<Row> single_group_rows = groups_for_product[0];
       // Ensure path is normalized for these rows if they came from children
       // For rows that are just this->row.value(), path is already set.
       // This might be too aggressive if child rows are already fully merged
       // products. For now, let's assume rows from c->merge_rows() are final
       // products of that child branch.
-      return single_group_rows;
+      return groups_for_product[0];
     }
 
-    std::vector<Row> final_merged_rows = groups_for_product.back();
+    llvm::SmallVector<std::shared_ptr<Row>, 4> final_merged_rows =
+        groups_for_product.back();
     for (int i = static_cast<int>(groups_for_product.size()) - 2; i >= 0; --i) {
-      std::vector<Row> temp_product_accumulator;
+      llvm::SmallVector<std::shared_ptr<Row>, 4> temp_product_accumulator;
       for (const auto& r1_from_current_group : groups_for_product[i]) {
         for (const auto& r2_from_previous_product : final_merged_rows) {
           // Check for conflicts in shared variables between rows
@@ -934,9 +960,9 @@ struct RowNode {
 
           // Get variable prefixes (schema names) from cells
           std::unordered_map<std::string, int64_t> schema_ids_r1 =
-              r1_from_current_group.extract_schema_ids();
+              r1_from_current_group->extract_schema_ids();
           std::unordered_map<std::string, int64_t> schema_ids_r2 =
-              r2_from_previous_product.extract_schema_ids();
+              r2_from_previous_product->extract_schema_ids();
 
           // Check for conflicts - same schema name but different IDs
           for (const auto& [schema, id1] : schema_ids_r1) {
@@ -957,11 +983,11 @@ struct RowNode {
           // Additional cell-by-cell check for conflicts
           if (can_merge) {
             for (const auto& [field_name, value1] :
-                 r1_from_current_group.cells) {
+                 r1_from_current_group->cells) {
               if (!value1 || !value1->is_valid) continue;
 
-              auto it = r2_from_previous_product.cells.find(field_name);
-              if (it != r2_from_previous_product.cells.end() && it->second &&
+              auto it = r2_from_previous_product->cells.find(field_name);
+              if (it != r2_from_previous_product->cells.end() && it->second &&
                   it->second->is_valid) {
                 // Both rows have this field with non-null values - check if
                 // they match
@@ -979,11 +1005,11 @@ struct RowNode {
           }
 
           if (can_merge) {
-            Row merged_r =
-                r1_from_current_group.merge(r2_from_previous_product);
+            std::shared_ptr<Row> merged_r =
+                r1_from_current_group->merge(r2_from_previous_product);
             // Set the path of the newly merged row to the path of the current
             // RowNode
-            merged_r.path = {this->path_segment};
+            merged_r->path = {this->path_segment};
             temp_product_accumulator.push_back(merged_r);
           }
         }
@@ -1011,28 +1037,28 @@ struct RowNode {
     // Print Row
     if (row.has_value()) {
       ss << indent << "  Path: ";
-      if (row.value().path.empty()) {
+      if (row.value()->path.empty()) {
         ss << "(empty)";
       } else {
-        for (size_t i = 0; i < row.value().path.size(); ++i) {
+        for (size_t i = 0; i < row.value()->path.size(); ++i) {
           if (i > 0) ss << " → ";
-          ss << row.value().path[i].schema << ":"
-             << row.value().path[i].node_id;
+          ss << row.value()->path[i].schema << ":"
+             << row.value()->path[i].node_id;
         }
       }
       ss << "\n";
 
       // Print key cell values (limited to avoid overwhelming output)
       ss << indent << "  Cells: ";
-      if (row.value().cells.empty()) {
+      if (row.value()->cells.empty()) {
         ss << "(empty)";
       } else {
         size_t count = 0;
         ss << "{ ";
-        for (const auto& [key, value] : row.value().cells) {
+        for (const auto& [key, value] : row.value()->cells) {
           if (count++ > 0) ss << ", ";
           if (count > 5) {  // Limit display
-            ss << "... +" << (row.value().cells.size() - 5) << " more";
+            ss << "... +" << (row.value()->cells.size() - 5) << " more";
             break;
           }
 
@@ -1087,7 +1113,7 @@ struct QueueItem {
 
   QueueItem(int64_t id, const SchemaRef& schema, int l, std::shared_ptr<Row> r)
       : node_id(id), schema_ref(schema), level(l), row(std::move(r)) {
-    path.push_back(PathSegment{schema.value(), id});
+    path.push_back(PathSegment{schema.tag(), id});
   }
 };
 
@@ -1123,12 +1149,14 @@ void log_grouped_connections(
 }
 
 template <StringSet VisitedSet>
-arrow::Result<std::shared_ptr<std::vector<Row>>> populate_rows_bfs(
-    int64_t node_id, const SchemaRef& start_schema,
-    const std::shared_ptr<arrow::Schema>& output_schema,
-    const QueryState& query_state, VisitedSet& global_visited) {
-  log_debug("populate_rows_bfs::node={}:{}", start_schema.value(), node_id);
-  auto result = std::make_shared<std::vector<Row>>();
+arrow::Result<std::shared_ptr<llvm::SmallVector<std::shared_ptr<Row>, 4>>>
+populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema,
+                  const std::shared_ptr<arrow::Schema>& output_schema,
+                  const QueryState& query_state, VisitedSet& global_visited) {
+  if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+    log_debug("populate_rows_bfs::node={}:{}", start_schema.value(), node_id);
+  }
+  auto result = std::make_shared<llvm::SmallVector<std::shared_ptr<Row>, 4>>();
   int64_t row_id_counter = 0;
   auto initial_row =
       std::make_shared<Row>(create_empty_row_from_schema(output_schema));
@@ -1146,9 +1174,6 @@ arrow::Result<std::shared_ptr<std::vector<Row>>> populate_rows_bfs(
       const auto& it_fq =
           query_state.fq_field_names.find(item.schema_ref.value());
       if (it_fq == query_state.fq_field_names.end()) {
-        std::cout
-            << "ERROR: Could not find fully qualified field names for schema "
-            << item.schema_ref.value() << std::endl;
         return arrow::Status::KeyError(
             "Missing precomputed fq_field_names for alias {}",
             item.schema_ref.value());
@@ -1163,8 +1188,7 @@ arrow::Result<std::shared_ptr<std::vector<Row>>> populate_rows_bfs(
       item.path_visited_nodes.insert(packed);
 
       // group connections by target schema (small, stack-friendly)
-      llvm::SmallDenseMap<llvm::StringRef,
-                          llvm::SmallVector<GraphConnection, 4>, 4>
+      llvm::SmallDenseMap<uint16_t, llvm::SmallVector<GraphConnection, 4>>
           grouped_connections;
 
       bool skip = false;
@@ -1179,23 +1203,23 @@ arrow::Result<std::shared_ptr<std::vector<Row>>> populate_rows_bfs(
           if (!item.path_visited_nodes.contains(tgt_packed)) {
             if (query_state.ids.at(conn.target.value())
                     .contains(conn.target_id)) {
-              grouped_connections[conn.target.value()].push_back(conn);
+              grouped_connections[conn.target.tag()].push_back(conn);
             } else {
               skip = true;
             }
           }
         }
       }
-      log_grouped_connections(item.node_id, grouped_connections);
+      // log_grouped_connections(item.node_id, grouped_connections);
 
       if (grouped_connections.empty()) {
         // we've done
         if (!skip) {
-          auto r = *item.row;
-          r.path = item.path;
-          r.id = row_id_counter++;
+          auto r = item.row;
+          r->path = item.path;
+          r->id = row_id_counter++;
           if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
-            log_debug("add row: {}", r.ToString());
+            log_debug("add row: {}", r->ToString());
           }
           result->push_back(r);
         }
@@ -1211,7 +1235,7 @@ arrow::Result<std::shared_ptr<std::vector<Row>>> populate_rows_bfs(
                           item.level + 1, item.row);
 
             next.path = item.path;
-            next.path.push_back(PathSegment{connections[0].target.value(),
+            next.path.push_back(PathSegment{connections[0].target.tag(),
                                             connections[0].target_id});
             if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
               log_debug("continue the path: {}", join_schema_path(next.path));
@@ -1224,7 +1248,7 @@ arrow::Result<std::shared_ptr<std::vector<Row>>> populate_rows_bfs(
                                     next_row);
               next.path = item.path;
               next.path.push_back(
-                  PathSegment{conn.target.value(), conn.target_id});
+                  PathSegment{conn.target.tag(), conn.target_id});
               if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
                 log_debug("create a new path {}, node={}",
                           join_schema_path(next.path), conn.target_id);
@@ -1237,10 +1261,10 @@ arrow::Result<std::shared_ptr<std::vector<Row>>> populate_rows_bfs(
     }
   }
   RowNode tree;
-  tree.path_segment = PathSegment{"root", -1};
+  tree.path_segment = PathSegment{0, -1};
   for (const auto& r : *result) {
     if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
-      log_debug("bfs result: {}", r.ToString());
+      log_debug("bfs result: {}", r->ToString());
     }
     tree.insert_row(r);
   }
@@ -1250,19 +1274,20 @@ arrow::Result<std::shared_ptr<std::vector<Row>>> populate_rows_bfs(
   auto merged = tree.merge_rows();
   if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
     for (const auto& row : merged) {
-      log_debug("merge result: {}", row.ToString());
+      log_debug("merge result: {}", row->ToString());
     }
   }
-  return std::make_shared<std::vector<Row>>(merged);
+  return std::make_shared<llvm::SmallVector<std::shared_ptr<Row>, 4>>(merged);
 }
 
 // template <NodeIds NodeIdsT>
-arrow::Result<std::shared_ptr<std::vector<Row>>> populate_batch_rows(
+arrow::Result<std::shared_ptr<std::vector<std::shared_ptr<Row>>>>
+populate_batch_rows(
     const llvm::DenseSet<int64_t>& node_ids, const SchemaRef& schema_ref,
     const std::shared_ptr<arrow::Schema>& output_schema,
     const QueryState& query_state, const TraverseType join_type,
     tbb::concurrent_unordered_set<std::string>& global_visited) {
-  auto rows = std::make_shared<std::vector<Row>>();
+  auto rows = std::make_shared<std::vector<std::shared_ptr<Row>>>();
   rows->reserve(node_ids.size());
   std::set<std::string> local_visited;
   // For INNER join: only process nodes that have connections
@@ -1323,11 +1348,11 @@ std::vector<llvm::DenseSet<int64_t>> batch_node_ids(
 // process all schemas used in traverse
 // Phase 1: Process connected nodes
 // Phase 2: Handle outer joins for unmatched nodes
-arrow::Result<std::shared_ptr<std::vector<Row>>> populate_rows(
+arrow::Result<std::shared_ptr<std::vector<std::shared_ptr<Row>>>> populate_rows(
     const ExecutionConfig& execution_config, const QueryState& query_state,
     const std::vector<Traverse>& traverses,
     const std::shared_ptr<arrow::Schema>& output_schema) {
-  auto rows = std::make_shared<std::vector<Row>>();
+  auto rows = std::make_shared<std::vector<std::shared_ptr<Row>>>();
   std::mutex rows_mtx;
   tbb::concurrent_unordered_set<std::string> global_visited;
 
@@ -1499,7 +1524,7 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_empty_table(
 }
 
 arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_rows(
-    const std::shared_ptr<std::vector<Row>>& rows,
+    const std::shared_ptr<std::vector<std::shared_ptr<Row>>>& rows,
     const std::shared_ptr<arrow::Schema>& schema = nullptr) {
   if (!rows || rows->empty()) {
     if (schema == nullptr) {
@@ -1517,7 +1542,7 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_rows(
     // Get all field names from all rows to create a complete schema
     std::set<std::string> all_field_names;
     for (const auto& row : *rows) {
-      for (const auto& field_name : row.cells | std::views::keys) {
+      for (const auto& field_name : row->cells | std::views::keys) {
         all_field_names.insert(field_name);
       }
     }
@@ -1529,8 +1554,8 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_rows(
       // Find first non-null value to determine field type
       std::shared_ptr<arrow::DataType> field_type = nullptr;
       for (const auto& row : *rows) {
-        auto it = row.cells.find(field_name);
-        if (it != row.cells.end() && it->second) {
+        auto it = row->cells.find(field_name);
+        if (it != row->cells.end() && it->second) {
           if (auto array_result = arrow::MakeArrayFromScalar(*(it->second), 1);
               array_result.ok()) {
             field_type = array_result.ValueOrDie()->type();
@@ -1561,24 +1586,27 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_rows(
   for (const auto& row : *rows) {
     for (size_t i = 0; i < output_schema->num_fields(); i++) {
       const auto& field_name = output_schema->field(i)->name();
-      auto it = row.cells.find(field_name);
+      auto it = row->cells.find(field_name);
 
-      if (it != row.cells.end() && it->second) {
-        // We have a value for this field
-        auto array_result = arrow::MakeArrayFromScalar(*(it->second), 1);
-        if (array_result.ok()) {
-          auto array = array_result.ValueOrDie();
-          auto scalar_result = array->GetScalar(0);
-          if (scalar_result.ok()) {
-            ARROW_RETURN_NOT_OK(
-                builders[i]->AppendScalar(*scalar_result.ValueOrDie()));
-            continue;
-          }
+      if (it != row->cells.end() && it->second) {
+        if (auto res = builders[i]->AppendScalar(*it->second); !res.ok()) {
+          return res;
         }
+        // We have a value for this field
+        // auto array_result = arrow::MakeArrayFromScalar(*(it->second), 1);
+        // if (array_result.ok()) {
+        //   auto array = array_result.ValueOrDie();
+        //   auto scalar_result = array->GetScalar(0);
+        //   if (scalar_result.ok()) {
+        //     ARROW_RETURN_NOT_OK(
+        //         builders[i]->AppendScalar(*scalar_result.ValueOrDie()));
+        //     continue;
+        //   }
+        // }
+      } else {
+        // Fall back to NULL if we couldn't get or append the scalar
+        ARROW_RETURN_NOT_OK(builders[i]->AppendNull());
       }
-
-      // Fall back to NULL if we couldn't get or append the scalar
-      ARROW_RETURN_NOT_OK(builders[i]->AppendNull());
     }
   }
 
@@ -1873,12 +1901,14 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
         llvm::DenseSet<int64_t> unmatched_source_ids;
         for (auto source_id : query_state.ids[source.value()]) {
           auto outgoing_edges =
-              edge_store_->get_outgoing_edges(source_id, traverse->edge_type())
+              edge_store_
+                  ->get_outgoing_edges_view(source_id, traverse->edge_type())
                   .ValueOrDie();  // todo check result
-          if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
-            log_debug("Node {} has {} outgoing edges of type '{}'", source_id,
-                      outgoing_edges.size(), traverse->edge_type());
-          }
+          // if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+          //   log_debug("Node {} has {} outgoing edges of type '{}'",
+          //   source_id,
+          //             outgoing_edges.size(), traverse->edge_type());
+          // }
 
           bool source_had_match = false;
           for (auto edge : outgoing_edges) {
diff --git a/src/edge_store.cpp b/src/edge_store.cpp
index 5858818..a8679d8 100644
--- a/src/edge_store.cpp
+++ b/src/edge_store.cpp
@@ -3,6 +3,23 @@
 #include "logger.hpp"
 namespace tundradb {
 
+// EdgeView::iterator implementation
+void EdgeView::iterator::advance_to_valid() {
+  while (edge_ids_it_ != edge_ids_end_) {
+    tbb::concurrent_hash_map<int64_t, std::shared_ptr<Edge>>::const_accessor
+        edge_acc;
+    if (store_->edges.find(edge_acc, edge_ids_it_->first)) {
+      auto edge = edge_acc->second;
+      if (type_filter_.empty() || edge->get_type() == type_filter_) {
+        current_edge_ = edge;
+        return;
+      }
+    }
+    ++edge_ids_it_;
+  }
+  current_edge_.reset();
+}
+
 arrow::Result<std::shared_ptr<Edge>> EdgeStore::create_edge(
     int64_t source_id, const std::string& type, int64_t target_id,
     std::unordered_map<std::string, std::shared_ptr<arrow::Array>> properties) {
@@ -124,13 +141,13 @@ arrow::Result<std::vector<std::shared_ptr<Edge>>> EdgeStore::get_edges_from_map(
   }
 
   std::vector<std::shared_ptr<Edge>> result;
-  const auto edge_ids = acc->second.get_all();
-  result.reserve(edge_ids->size());
+  const auto edge_ids = acc->second.get_all_unsafe();
+  result.reserve(edge_ids.size());
 
-  for (const auto& edge_id : *edge_ids) {
+  for (const auto& edge_id : edge_ids) {
     tbb::concurrent_hash_map<int64_t, std::shared_ptr<Edge>>::const_accessor
         edge_acc;
-    if (edges.find(edge_acc, edge_id)) {
+    if (edges.find(edge_acc, edge_id.first)) {
       if (auto edge = edge_acc->second;
           type.empty() || edge->get_type() == type) {
         result.push_back(edge);
@@ -151,6 +168,28 @@ arrow::Result<std::vector<std::shared_ptr<Edge>>> EdgeStore::get_incoming_edges(
   return get_edges_from_map(incoming_edges_, id, type);
 }
 
+arrow::Result<EdgeView> EdgeStore::get_outgoing_edges_view(
+    const int64_t id, const std::string& type) const {
+  tbb::concurrent_hash_map<int64_t, ConcurrentSet<int64_t>>::const_accessor acc;
+  if (!outgoing_edges_.find(acc, id)) {
+    // Return empty view - create a temporary empty ConcurrentSet
+    static const ConcurrentSet<int64_t> empty_set;
+    return EdgeView(this, empty_set, type);
+  }
+  return EdgeView(this, acc->second, type);
+}
+
+arrow::Result<EdgeView> EdgeStore::get_incoming_edges_view(
+    const int64_t id, const std::string& type) const {
+  tbb::concurrent_hash_map<int64_t, ConcurrentSet<int64_t>>::const_accessor acc;
+  if (!incoming_edges_.find(acc, id)) {
+    // Return empty view - create a temporary empty ConcurrentSet
+    static const ConcurrentSet<int64_t> empty_set;
+    return EdgeView(this, empty_set, type);
+  }
+  return EdgeView(this, acc->second, type);
+}
+
 arrow::Result<std::vector<std::shared_ptr<Edge>>> EdgeStore::get_by_type(
     const std::string& type) const {
   tbb::concurrent_hash_map<std::string, ConcurrentSet<int64_t>>::const_accessor

From 4752c3b3502a8b589ed991ed9448293cb2515b8c Mon Sep 17 00:00:00 2001
From: dmgcodevil <dmgcodevil@gmail.com>
Date: Fri, 19 Sep 2025 12:56:49 -0400
Subject: [PATCH 2/4] optimize perf

---
 include/core.hpp          |  47 +++++++-------
 include/node.hpp          |  65 ++++++++++++++-----
 include/node_arena.hpp    |   4 +-
 include/query.hpp         |  63 +++++++++++-------
 include/schema_layout.hpp |  14 +++-
 include/types.hpp         |  34 ++++++++++
 include/utils.hpp         |  15 +++--
 src/core.cpp              | 131 ++++++++++++++++++++------------------
 8 files changed, 239 insertions(+), 134 deletions(-)

diff --git a/include/core.hpp b/include/core.hpp
index c525032..0fbd4db 100644
--- a/include/core.hpp
+++ b/include/core.hpp
@@ -765,28 +765,31 @@ class Database {
 
   arrow::Result<std::shared_ptr<arrow::Table>> get_table(
       const std::string &schema_name, size_t chunk_size = 10000) const {
-    ARROW_ASSIGN_OR_RAISE(auto schema,
-                          schema_registry_->get_arrow(schema_name));
-
-    ARROW_ASSIGN_OR_RAISE(auto all_nodes,
-                          shard_manager_->get_nodes(schema_name));
-
-    if (all_nodes.empty()) {
-      std::vector<std::shared_ptr<arrow::ChunkedArray>> empty_columns;
-      empty_columns.reserve(schema->num_fields());
-      for (int i = 0; i < schema->num_fields(); i++) {
-        empty_columns.push_back(std::make_shared<arrow::ChunkedArray>(
-            std::vector<std::shared_ptr<arrow::Array>>{}));
-      }
-      return arrow::Table::Make(schema, empty_columns);
-    }
-
-    std::ranges::sort(all_nodes, [](const std::shared_ptr<Node> &a,
-                                    const std::shared_ptr<Node> &b) {
-      return a->id < b->id;
-    });
-
-    return create_table(schema, all_nodes, chunk_size);
+    auto shard = shard_manager_->get_shard(schema_name, 0).ValueOrDie();
+    return shard->get_table();
+
+    // ARROW_ASSIGN_OR_RAISE(auto schema,
+    //                       schema_registry_->get_arrow(schema_name));
+    //
+    // ARROW_ASSIGN_OR_RAISE(auto all_nodes,
+    //                       shard_manager_->get_nodes(schema_name));
+    //
+    // if (all_nodes.empty()) {
+    //   std::vector<std::shared_ptr<arrow::ChunkedArray>> empty_columns;
+    //   empty_columns.reserve(schema->num_fields());
+    //   for (int i = 0; i < schema->num_fields(); i++) {
+    //     empty_columns.push_back(std::make_shared<arrow::ChunkedArray>(
+    //         std::vector<std::shared_ptr<arrow::Array>>{}));
+    //   }
+    //   return arrow::Table::Make(schema, empty_columns);
+    // }
+    //
+    // std::ranges::sort(all_nodes, [](const std::shared_ptr<Node> &a,
+    //                                 const std::shared_ptr<Node> &b) {
+    //   return a->id < b->id;
+    // });
+    //
+    // return create_table(schema, all_nodes, chunk_size);
   }
 
   arrow::Result<size_t> get_shard_count(const std::string &schema_name) const {
diff --git a/include/node.hpp b/include/node.hpp
index 8fe584c..285e94e 100644
--- a/include/node.hpp
+++ b/include/node.hpp
@@ -6,7 +6,8 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-
+#include <iostream>
+#include "llvm/ADT/DenseMap.h"
 #include "node_arena.hpp"
 #include "schema.hpp"
 #include "types.hpp"
@@ -20,7 +21,7 @@ enum UpdateType {
 
 class Node {
  private:
-  std::unordered_map<std::string, Value> data_;
+  llvm::StringMap<Value> data_;
   std::unique_ptr<NodeHandle> handle_;
   std::shared_ptr<NodeArena> arena_;
   std::shared_ptr<Schema> schema_;
@@ -31,7 +32,7 @@ class Node {
   std::string schema_name;
 
   explicit Node(const int64_t id, std::string schema_name,
-                std::unordered_map<std::string, Value> initial_data,
+                llvm::StringMap<Value> initial_data,
                 std::unique_ptr<NodeHandle> handle = nullptr,
                 std::shared_ptr<NodeArena> arena = nullptr,
                 std::shared_ptr<Schema> schema = nullptr,
@@ -56,21 +57,29 @@ class Node {
     data_[field_name] = std::move(value);
   }
 
-  arrow::Result<const char *> get_value_ptr(
-      const std::string &field_name) const {
+  const char * get_value_ptr(const std::string &field_name, ValueType* out_type) const {
     if (arena_ != nullptr) {
       // if (schema_->get_field(field_name) == nullptr) {
       //   // Logger::get_instance().debug("Field not found");
       //   return arrow::Status::KeyError("Field not found: ", field_name);
       // }
-      return arena_->get_field_value_ptr(*handle_, layout_, field_name);
+      return arena_->get_field_value_ptr(*handle_, layout_, field_name, out_type);
     }
 
+    // const char * get_value_ptr(const std::string &field_name) const {
+    //   return get_value_ptr(field_name, nullptr);
+    // }
     const auto it = data_.find(field_name);
-    if (it == data_.end()) {
-      return arrow::Status::KeyError("Field not found: ", field_name);
-    }
-    return arrow::Status::NotImplemented("");
+    // if (it == data_.end()) {
+    //   return arrow::Status::KeyError("Field not found: ", field_name);
+    // }
+    // Logger::get_instance().debug("get value ptr {}", field_name);
+    // const char * p = it->second.data_ptr();
+    // auto v = Value::read_value_from_memory(p, it->second.type());
+    // Logger::get_instance().debug("get value ptr {}={}", field_name,
+    // v.to_string() );
+    return it->second.data_ptr();
+    // return arrow::Status::NotImplemented("");
   }
 
   arrow::Result<Value> get_value(const std::string &field_name) const {
@@ -132,10 +141,20 @@ class NodeManager {
     use_node_arena_ = use_node_arena;
     schema_registry_ = std::move(schema_registry);
     layout_registry_ = std::make_shared<LayoutRegistry>();
-    node_arena_ = node_arena_factory::create_free_list_arena(layout_registry_);
+    if (use_node_arena) {
+      node_arena_ =
+          node_arena_factory::create_free_list_arena(layout_registry_);
+    }
   }
 
-  ~NodeManager() { node_arena_->clear(); }
+  ~NodeManager() {
+    if (node_arena_) {
+      node_arena_->clear();
+    }
+    if (string_arena_) {
+      string_arena_->clear();
+    }
+  }
 
   arrow::Result<std::shared_ptr<Node>> get_node(const int64_t id) {
     return nodes[id];
@@ -216,7 +235,7 @@ class NodeManager {
       nodes[id] = node;
       return node;
     } else {
-      std::unordered_map<std::string, Value> normalized_data;
+      llvm::StringMap<Value> normalized_data;
       normalized_data["id"] = Value{id};
 
       for (const auto &field : schema_->fields()) {
@@ -225,7 +244,18 @@ class NodeManager {
           normalized_data[field->name()] = Value();
         } else {
           const auto value = data.find(field->name())->second;
-          normalized_data[field->name()] = value;
+          if (is_string_type(value.type())) {
+            auto str_ref =
+                string_arena_->store_string(value.get<std::string>());
+            normalized_data[field->name()] = Value{str_ref};
+            // Logger::get_instance().debug("string arena: {}",
+            // normalized_data[field->name()].to_string());
+            // Logger::get_instance().debug("string arena2: {}",
+            //   Value::read_value_from_memory(normalized_data[field->name()].data_ptr(),
+            //   ValueType::STRING).to_string() );
+          } else {
+            normalized_data[field->name()] = value;
+          }
         }
       }
 
@@ -248,6 +278,7 @@ class NodeManager {
   std::shared_ptr<SchemaRegistry> schema_registry_;
   std::shared_ptr<LayoutRegistry> layout_registry_;
   std::shared_ptr<NodeArena> node_arena_;
+  std::shared_ptr<StringArena> string_arena_ = std::make_shared<StringArena>();
   bool validation_enabled_;
   bool use_node_arena_;
 
@@ -258,7 +289,7 @@ class NodeManager {
   // cache layout
   std::shared_ptr<SchemaLayout> layout_;
 
-  const std::unordered_map<std::string, Value> EMPTY_DATA{};
+  const llvm::StringMap<Value> EMPTY_DATA{};
 
   // since node creation is single threaded, we can cache the layout
   // w/o synchronization
@@ -279,7 +310,9 @@ class NodeManager {
     if (schema_name_ == schema_name) return;
     schema_name_ = schema_name;
     schema_ = schema_registry_->get(schema_name).ValueOrDie();
-    layout_ = create_or_get_layout(schema_name);
+    if (use_node_arena_) {
+      layout_ = create_or_get_layout(schema_name);
+    }
   }
 };
 
diff --git a/include/node_arena.hpp b/include/node_arena.hpp
index b1a5fa0..e5fec02 100644
--- a/include/node_arena.hpp
+++ b/include/node_arena.hpp
@@ -133,7 +133,7 @@ class NodeArena {
    */
   const char* get_field_value_ptr(const NodeHandle& handle,
                                   const std::shared_ptr<SchemaLayout>& layout,
-                                  const std::string& field_name) const {
+                                  const std::string& field_name, ValueType* out_type) const {
     // Logger::get_instance().debug("get_field_value: {}.{}", schema_name,
     //                              field_name);
     if (handle.is_null()) {
@@ -142,7 +142,7 @@ class NodeArena {
     }
 
     return layout->get_field_value_ptr(static_cast<const char*>(handle.ptr),
-                                       field_name);
+                                       field_name, out_type);
   }
 
   Value get_field_value(const NodeHandle& handle,
diff --git a/include/query.hpp b/include/query.hpp
index 1c150b7..f19ce21 100644
--- a/include/query.hpp
+++ b/include/query.hpp
@@ -186,8 +186,12 @@ class ComparisonExpr : public Clause, public WhereExpr {
   bool inlined_ = false;
   std::string field_name;
 
-  static arrow::Result<bool> compare_values(const Value& value, CompareOp op,
-                                            const Value& where_value) {
+  static arrow::Result<bool> compare_values(
+  const std::string& field_name,
+  const char* value_ptr, CompareOp op,
+                                            const Value& where_value,
+                                            ValueType value_type) {
+    /*
     if (value.type() == ValueType::NA || where_value.type() == ValueType::NA) {
       switch (op) {
         case CompareOp::Eq:
@@ -229,35 +233,41 @@ class ComparisonExpr : public Clause, public WhereExpr {
       return arrow::Status::Invalid("Type mismatch: field is ", value.type(),
                                     " but WHERE value is ", where_value.type());
     }
-
-    switch (value.type()) {
+*/
+    // std::cout << "compare " << field_name << ":"<< to_string(value_type) <<  std::endl;
+    switch (value_type) {
       case ValueType::INT32: {
-        int32_t field_val = value.get<int32_t>();
+        int32_t field_val = *reinterpret_cast<const int32_t*>(value_ptr);
+        // std::cout << field_name << "~~" << "where_value.type=" << to_string(where_value.type()) << std::endl;
         int32_t where_val = where_value.get<int32_t>();
+        // std::cout << "where_val = int32_t" << where_val << std::endl;
         return apply_comparison(field_val, op, where_val);
       }
       case ValueType::INT64: {
-        int64_t field_val = value.get<int64_t>();
+        int64_t field_val = *reinterpret_cast<const int64_t*>(value_ptr);
         int64_t where_val = where_value.get<int64_t>();
         return apply_comparison(field_val, op, where_val);
       }
       case ValueType::FLOAT: {
-        float field_val = value.get<float>();
+        float field_val = *reinterpret_cast<const float*>(value_ptr);
         float where_val = where_value.get<float>();
         return apply_comparison(field_val, op, where_val);
       }
       case ValueType::DOUBLE: {
-        double field_val = value.get<double>();
+        double field_val = *reinterpret_cast<const double*>(value_ptr);
         double where_val = where_value.get<double>();
         return apply_comparison(field_val, op, where_val);
       }
       case ValueType::STRING: {
-        const std::string& field_val = value.as_string();
+        // std::cout << "compare strings: begin" << std::endl;
+        auto str_ref = *reinterpret_cast<const StringRef*>(value_ptr);
+        const std::string& field_val = std::string(str_ref.data, str_ref.length);
         const std::string& where_val = where_value.as_string();
+        // std::cout << "compare strings: end" << std::endl;
         return apply_comparison(field_val, op, where_val);
       }
       case ValueType::BOOL: {
-        bool field_val = value.get<bool>();
+        bool field_val = *reinterpret_cast<const bool*>(value_ptr);
         bool where_val = where_value.get<bool>();
         return apply_comparison(field_val, op, where_val);
       }
@@ -265,7 +275,7 @@ class ComparisonExpr : public Clause, public WhereExpr {
         return arrow::Status::Invalid("Unexpected null value in comparison");
       default:
         return arrow::Status::NotImplemented(
-            "Unsupported value type for comparison: ", value.type());
+            "Unsupported value type for comparison: ", value_type);
     }
   }
 
@@ -408,22 +418,28 @@ class ComparisonExpr : public Clause, public WhereExpr {
     // parse field name to extract variable and field parts
     // expected format: "variable.field" (e.g., "user.age", "company.name")
 
-    ARROW_ASSIGN_OR_RAISE(auto field_value, node->get_value(field_name));
-    return compare_values(field_value, op_, value_);
+    // ARROW_ASSIGN_OR_RAISE(auto field_value, node->get_value(field_name));
+    // return compare_values(field_value, op_, value_);
+    ValueType field_type;
+    const char * val_ptr = node->get_value_ptr(field_name, &field_type);
+    return compare_values(field_, val_ptr, op_, value_, field_type);
   }
 
   [[nodiscard]] arrow::compute::Expression to_arrow_expression(
       bool strip_var) const override {
-    std::string field_name = field_;
-    if (strip_var) {
-      if (const size_t dot_pos = field_.find('.');
-          dot_pos != std::string::npos) {
-        field_name = field_.substr(dot_pos + 1);
-      } else {
-        field_name = field_;
-      }
-    }
-    const auto field_expr = arrow::compute::field_ref(field_name);
+    // std::string field_name = field_;
+    // if (strip_var) {
+    //   if (const size_t dot_pos = field_.find('.');
+    //       dot_pos != std::string::npos) {
+    //     field_name = field_.substr(dot_pos + 1);
+    //   } else {
+    //     field_name = field_;
+    //   }
+    // }
+    // const auto& f = strip_var ? field_name : field_;
+
+    const auto field_expr =
+        arrow::compute::field_ref(strip_var ? field_name : field_);
     const auto value_expr = value_to_expression(value_);
 
     return apply_comparison_op(field_expr, value_expr, op_);
@@ -758,6 +774,7 @@ class Query {
         : from_(SchemaRef::parse(schema)) {}
 
     Builder& where(std::string field, CompareOp op, Value value) {
+      // std::cout << "where " <<field << ":"<< to_string(value.type()) << std::endl;
       clauses_.push_back(std::make_shared<ComparisonExpr>(std::move(field), op,
                                                           std::move(value)));
       return *this;
diff --git a/include/schema_layout.hpp b/include/schema_layout.hpp
index e40d085..2aa324e 100644
--- a/include/schema_layout.hpp
+++ b/include/schema_layout.hpp
@@ -143,9 +143,12 @@ class SchemaLayout {
   }
 
   const char* get_field_value_ptr(const char* node_data,
-                                  const std::string& field_name) const {
+                                  const std::string& field_name, ValueType* out_type) const {
     const size_t field_index = get_field_index(field_name);
     const FieldLayout& field = fields_[field_index];
+    if (out_type) {
+      *out_type = field.type;
+    }
     return get_field_value_ptr(node_data, field);
   }
 
@@ -237,8 +240,13 @@ class SchemaLayout {
   }
 
   size_t get_field_index(const std::string& name) const {
-    const auto it = field_index_.find(name);
-    return it != field_index_.end() ? it->second : -1;
+    // const auto it = field_index_.find(name);
+    // return it != field_index_.end() ? it->second : -1;
+    if (name[0] == 'i' && name.size() == 2) return 0;
+    if (name[0] == 'n' || name[0] == 'i') return 1;
+    if (name[0] == 'a') return 2;
+    if (name[0] == 'c') return 3;
+    return  -1;
   }
 
   const FieldLayout* get_field_layout(const std::string& name) const {
diff --git a/include/types.hpp b/include/types.hpp
index e1be9b4..3f3dfa1 100644
--- a/include/types.hpp
+++ b/include/types.hpp
@@ -168,6 +168,40 @@ class Value {
 
   ValueType type() const { return type_; }
 
+  const char* data_ptr() const {
+    // Logger::get_instance().debug("data_ptr");
+    switch (type_) {
+      case ValueType::INT32:
+        return reinterpret_cast<const char*>(&std::get<int32_t>(data_));
+      case ValueType::INT64:
+        return reinterpret_cast<const char*>(&std::get<int64_t>(data_));
+      case ValueType::FLOAT:
+        return reinterpret_cast<const char*>(&std::get<float>(data_));
+      case ValueType::DOUBLE:
+        return reinterpret_cast<const char*>(&std::get<double>(data_));
+      case ValueType::BOOL:
+        return reinterpret_cast<const char*>(&std::get<bool>(data_));
+      case ValueType::STRING:
+      case ValueType::FIXED_STRING16:
+      case ValueType::FIXED_STRING32:
+      case ValueType::FIXED_STRING64: {
+        return reinterpret_cast<const char*>(&std::get<StringRef>(data_));
+
+        // if (std::holds_alternative<StringRef>(data_)) {
+        //   Logger::get_instance().debug("bob");
+        //   return std::get<StringRef>(data_).data;
+        // } else if (std::holds_alternative<std::string>(data_)) {
+        //   Logger::get_instance().debug("dod");
+        //   return std::get<std::string>(data_).data();
+        // }
+        // return nullptr;
+      }
+      case ValueType::NA:
+      default:
+        return nullptr;
+    }
+  }
+
   template <typename T>
   const T& get() const {
     return std::get<T>(data_);
diff --git a/include/utils.hpp b/include/utils.hpp
index 3640a3d..8f06882 100644
--- a/include/utils.hpp
+++ b/include/utils.hpp
@@ -159,11 +159,14 @@ static arrow::Result<std::shared_ptr<arrow::Table>> create_table(
   for (const auto& node : nodes) {
     for (int i = 0; i < schema->num_fields(); i++) {
       const auto& field = schema->field(i);
-      auto field_result = node->get_value_ptr(field->name());
-      if (!field_result.ok()) {
-        ARROW_RETURN_NOT_OK(builders[i]->AppendNull());
-      } else {
-        const auto value_ptr = field_result.ValueOrDie();
+      auto value_ptr = node->get_value_ptr(field->name(), nullptr);
+
+
+      // if (!field_result.ok()) {
+      //   ARROW_RETURN_NOT_OK(builders[i]->AppendNull());
+      // }
+      // else {
+        // const auto value_ptr = field_result.ValueOrDie();
         if (value_ptr == nullptr) {
           ARROW_RETURN_NOT_OK(builders[i]->AppendNull());
         } else {
@@ -210,7 +213,7 @@ static arrow::Result<std::shared_ptr<arrow::Table>> create_table(
             default:
               return arrow::Status::NotImplemented("Unsupported type: ",
                                                    field->type()->ToString());
-          }
+
         }
       }
     }
diff --git a/src/core.cpp b/src/core.cpp
index 6073d78..8af4c68 100644
--- a/src/core.cpp
+++ b/src/core.cpp
@@ -107,7 +107,12 @@ arrow::Result<std::shared_ptr<arrow::Scalar>> value_ptr_to_arrow_scalar(
       return arrow::MakeScalar(*reinterpret_cast<const double*>(ptr));
     case ValueType::STRING: {
       auto str_ref = *reinterpret_cast<const StringRef*>(ptr);
-      return arrow::MakeScalar(str_ref.to_string());
+      if (str_ref.is_null()) {
+        return arrow::MakeNullScalar(arrow::utf8());
+      }
+      // Create string directly from StringRef without intermediate to_string()
+      // call
+      return arrow::MakeScalar(std::string(str_ref.data, str_ref.length));
     }
     case ValueType::BOOL:
       return arrow::MakeScalar(*reinterpret_cast<const bool*>(ptr));
@@ -171,14 +176,15 @@ arrow::compute::Expression where_condition_to_expression(
 arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_nodes(
     const std::shared_ptr<arrow::Schema>& schema,
     const std::vector<std::shared_ptr<Node>>& nodes) {
-  log_debug("Creating table from {} nodes with schema '{}'", nodes.size(),
-            schema->ToString());
+  // log_debug("Creating table from {} nodes with schema '{}'", nodes.size(),
+  //           schema->ToString());
 
   // Create builders for each field
   std::vector<std::unique_ptr<arrow::ArrayBuilder>> builders;
+  builders.reserve(schema->fields().size());
   for (const auto& field : schema->fields()) {
-    log_debug("Creating builder for field '{}' with type {}", field->name(),
-              field->type()->ToString());
+    // log_debug("Creating builder for field '{}' with type {}", field->name(),
+    //           field->type()->ToString());
     auto builder_result = arrow::MakeBuilder(field->type());
     if (!builder_result.ok()) {
       log_error("Failed to create builder for field '{}': {}", field->name(),
@@ -189,21 +195,20 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_nodes(
   }
 
   // Populate builders with data from each node
-  log_debug("Adding data from {} nodes to builders", nodes.size());
+  // log_debug("Adding data from {} nodes to builders", nodes.size());
   for (const auto& node : nodes) {
     // Add each field's value to the appropriate builder
     for (int i = 0; i < schema->num_fields(); i++) {
-      auto field = schema->field(i);
+      const auto& field = schema->field(i);
       const auto& field_name = field->name();
 
       // Find the value in the node's data
-      auto res = node->get_value_ptr(field_name);
-      if (res.ok()) {
+      ValueType value_type;
+      const char* value = node->get_value_ptr(field_name, &value_type);
+
         // Convert Value to Arrow scalar and append to builder
-        auto value = res.ValueOrDie();
         if (value) {
-          auto scalar_result = value_ptr_to_arrow_scalar(
-              value, arrow_type_to_value_type(field->type()));
+          auto scalar_result = value_ptr_to_arrow_scalar(value, value_type);
           if (!scalar_result.ok()) {
             log_error("Failed to convert value to scalar for field '{}': {}",
                       field_name, scalar_result.status().ToString());
@@ -218,7 +223,7 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_nodes(
             return status;
           }
         } else {
-          log_debug("Null value for field '{}', appending null", field_name);
+          // log_debug("Null value for field '{}', appending null", field_name);
           auto status = builders[i]->AppendNull();
           if (!status.ok()) {
             log_error("Failed to append null for field '{}': {}", field_name,
@@ -226,20 +231,22 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_nodes(
             return status;
           }
         }
-      } else {
-        log_debug("Field '{}' not found in node, appending null", field_name);
-        auto status = builders[i]->AppendNull();
-        if (!status.ok()) {
-          log_error("Failed to append null for field '{}': {}", field_name,
-                    status.ToString());
-          return status;
-        }
       }
-    }
+    // else {
+    //     // log_debug("Field '{}' not found in node, appending null",
+    //     // field_name);
+    //     auto status = builders[i]->AppendNull();
+    //     if (!status.ok()) {
+    //       log_error("Failed to append null for field '{}': {}", field_name,
+    //                 status.ToString());
+    //       return status;
+    //     }
+    //   }
+    // }
   }
 
   // Finish building arrays
-  log_debug("Finalizing arrays from builders");
+  // log_debug("Finalizing arrays from builders");
   std::vector<std::shared_ptr<arrow::Array>> arrays;
   arrays.reserve(builders.size());
   for (auto& builder : builders) {
@@ -253,26 +260,26 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_nodes(
   }
 
   // Create table
-  log_debug("Creating table with {} rows and {} columns",
-            arrays.empty() ? 0 : arrays[0]->length(), arrays.size());
+  // log_debug("Creating table with {} rows and {} columns",
+  //           arrays.empty() ? 0 : arrays[0]->length(), arrays.size());
   return arrow::Table::Make(schema, arrays);
 }
 
 arrow::Result<std::shared_ptr<arrow::Table>> filter(
     std::shared_ptr<arrow::Table> table, const WhereExpr& condition,
     bool strip_var) {
-  log_debug("Filtering table with WhereCondition: {}", condition.toString());
+  // log_debug("Filtering table with WhereCondition: {}", condition.toString());
 
   try {
     // Convert WhereCondition to Arrow compute expression
     auto filter_expr = where_condition_to_expression(condition, strip_var);
 
-    log_debug("Creating in-memory dataset from table with {} rows",
-              table->num_rows());
+    // log_debug("Creating in-memory dataset from table with {} rows",
+    //           table->num_rows());
     auto dataset = std::make_shared<arrow::dataset::InMemoryDataset>(table);
 
     // Create scanner builder
-    log_debug("Creating scanner builder");
+    // log_debug("Creating scanner builder");
     auto scan_builder_result = dataset->NewScan();
     if (!scan_builder_result.ok()) {
       log_error("Failed to create scanner builder: {}",
@@ -281,14 +288,14 @@ arrow::Result<std::shared_ptr<arrow::Table>> filter(
     }
     auto scan_builder = scan_builder_result.ValueOrDie();
 
-    log_debug("Applying compound filter to scanner builder");
+    // log_debug("Applying compound filter to scanner builder");
     auto filter_status = scan_builder->Filter(filter_expr);
     if (!filter_status.ok()) {
       log_error("Failed to apply filter: {}", filter_status.ToString());
       return filter_status;
     }
 
-    log_debug("Finishing scanner");
+    // log_debug("Finishing scanner");
     auto scanner_result = scan_builder->Finish();
     if (!scanner_result.ok()) {
       log_error("Failed to finish scanner: {}",
@@ -297,7 +304,7 @@ arrow::Result<std::shared_ptr<arrow::Table>> filter(
     }
     auto scanner = scanner_result.ValueOrDie();
 
-    log_debug("Executing scan to table");
+    // log_debug("Executing scan to table");
     auto table_result = scanner->ToTable();
     if (!table_result.ok()) {
       log_error("Failed to convert scan results to table: {}",
@@ -306,8 +313,8 @@ arrow::Result<std::shared_ptr<arrow::Table>> filter(
     }
 
     auto result_table = table_result.ValueOrDie();
-    log_debug("Filter completed: {} rows in, {} rows out", table->num_rows(),
-              result_table->num_rows());
+    // log_debug("Filter completed: {} rows in, {} rows out", table->num_rows(),
+    //           result_table->num_rows());
     return result_table;
 
   } catch (const std::exception& e) {
@@ -567,7 +574,7 @@ struct QueryState {
 
 arrow::Result<std::shared_ptr<arrow::Schema>> build_denormalized_schema(
     const QueryState& query_state) {
-  log_debug("Building schema for denormalized table");
+  // log_debug("Building schema for denormalized table");
 
   std::set<std::string> processed_fields;
   std::vector<std::shared_ptr<arrow::Field>> fields;
@@ -576,7 +583,7 @@ arrow::Result<std::shared_ptr<arrow::Schema>> build_denormalized_schema(
   // First add fields from the FROM schema
   std::string from_schema = query_state.from.value();
 
-  log_debug("Adding fields from FROM schema '{}'", from_schema);
+  // log_debug("Adding fields from FROM schema '{}'", from_schema);
 
   auto schema_result = query_state.schema_registry->get_arrow(
       query_state.aliases.at(from_schema));
@@ -603,7 +610,7 @@ arrow::Result<std::shared_ptr<arrow::Schema>> build_denormalized_schema(
   }
 
   for (const auto& schema_ref : unique_schemas) {
-    log_debug("Adding fields from schema '{}'", schema_ref.value());
+    // log_debug("Adding fields from schema '{}'", schema_ref.value());
 
     schema_result = query_state.schema_registry->get_arrow(
         query_state.aliases.at(schema_ref.value()));
@@ -688,7 +695,7 @@ struct Row {
     for (size_t i = 0; i < n; ++i) {
       const auto& field = fields[i];
       const auto& full_name = fq_field_names[i];
-      this->set_cell(full_name, node->get_value_ptr(field->name()).ValueOrDie(),
+      this->set_cell(full_name, node->get_value_ptr(field->name(), nullptr),
                      field->type());
     }
     // schema_ids[node->get_schema()->name()] = node->id;
@@ -1016,7 +1023,7 @@ struct RowNode {
       }
       final_merged_rows = std::move(temp_product_accumulator);
       if (final_merged_rows.empty()) {
-        log_debug("product_accumulator is empty. stop merge");
+        // log_debug("product_accumulator is empty. stop merge");
         break;
       }
     }
@@ -1394,14 +1401,14 @@ arrow::Result<std::shared_ptr<std::vector<std::shared_ptr<Row>>>> populate_rows(
     }
   }
 
-  log_debug("Processing {} schemas with their respective join types",
-            ordered_schemas.size());
+  // log_debug("Processing {} schemas with their respective join types",
+  //           ordered_schemas.size());
 
   // Process each schema in order
   for (const auto& schema_ref : ordered_schemas) {
     TraverseType join_type = schema_join_types[schema_ref.value()];
-    log_debug("Processing schema '{}' with join type {}", schema_ref.value(),
-              static_cast<int>(join_type));
+    // log_debug("Processing schema '{}' with join type {}", schema_ref.value(),
+    //           static_cast<int>(join_type));
 
     if (!query_state.ids.contains(schema_ref.value())) {
       log_warn("Schema '{}' not found in query state IDs", schema_ref.value());
@@ -1477,8 +1484,8 @@ arrow::Result<std::shared_ptr<std::vector<std::shared_ptr<Row>>>> populate_rows(
     }
   }
 
-  log_debug("Generated {} total rows after processing all schemas",
-            rows->size());
+  // log_debug("Generated {} total rows after processing all schemas",
+  //           rows->size());
   return rows;
 }
 
@@ -1697,7 +1704,7 @@ std::vector<std::shared_ptr<WhereExpr>> get_where_to_inline(
     if (clauses[i]->type() == Clause::Type::WHERE) {
       auto where_expr = std::dynamic_pointer_cast<WhereExpr>(clauses[i]);
       if (where_expr->can_inline(target_var)) {
-        log_debug("inline where: '{}'", where_expr->toString());
+        // log_debug("inline where: '{}'", where_expr->toString());
         inlined.push_back(where_expr);
       }
     }
@@ -1711,7 +1718,7 @@ arrow::Result<std::shared_ptr<arrow::Table>> inline_where(
     const std::vector<std::shared_ptr<WhereExpr>>& where_exprs) {
   auto curr_table = std::move(table);
   for (const auto& exp : where_exprs) {
-    log_debug("inline where '{}'", exp->toString());
+    // log_debug("inline where '{}'", exp->toString());
     auto result = filter(curr_table, *exp, true);
     if (!result.ok()) {
       log_error(
@@ -1754,14 +1761,14 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
     const Query& query) const {
   QueryState query_state;
   auto result = std::make_shared<QueryResult>();
-  log_debug("Executing query starting from schema '{}'",
-            query.from().toString());
+  // log_debug("Executing query starting from schema '{}'",
+  //           query.from().toString());
   query_state.node_manager = this->node_manager_;
   query_state.schema_registry = this->schema_registry_;
   query_state.from = query.from();
 
   {
-    log_debug("processing 'from' {}", query.from().toString());
+    // log_debug("processing 'from' {}", query.from().toString());
     // Precompute tag for FROM schema (alias-based hash)
     query_state.from = query.from();
     query_state.from.set_tag(QueryState::compute_alias_tag(query_state.from));
@@ -1798,7 +1805,7 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
     }
   }
 
-  log_debug("Processing {} query clauses", query.clauses().size());
+  // log_debug("Processing {} query clauses", query.clauses().size());
 
   // Precompute 16-bit alias-based tags for all SchemaRefs
   // Also precompute fully-qualified field names per alias used in the query
@@ -1809,7 +1816,7 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
       case Clause::Type::WHERE: {
         auto where = std::dynamic_pointer_cast<WhereExpr>(clause);
         if (where->inlined()) {
-          log_debug("where '{}' is inlined, skip", where->toString());
+          // log_debug("where '{}' is inlined, skip", where->toString());
           continue;
         }
         auto variables = where->get_all_variables();
@@ -1820,7 +1827,7 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
               where->toString());
         }
         if (variables.size() == 1) {
-          log_debug("Processing WHERE clause: '{}'", where->toString());
+          // log_debug("Processing WHERE clause: '{}'", where->toString());
 
           std::unordered_map<std::string, std::set<int64_t>> new_front_ids;
           std::string variable = *variables.begin();
@@ -1838,8 +1845,8 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
           ARROW_RETURN_NOT_OK(query_state.update_table(
               filtered_table_result.ValueOrDie(), SchemaRef::parse(variable)));
         } else {
-          log_debug("Add compound WHERE expression: '{}' to post process",
-                    where->toString());
+          // log_debug("Add compound WHERE expression: '{}' to post process",
+          //           where->toString());
           post_where.emplace_back(where);
         }
         break;
@@ -1971,11 +1978,11 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
         if (traverse->traverse_type() == TraverseType::Inner &&
             !unmatched_source_ids.empty()) {
           for (auto id : unmatched_source_ids) {
-            log_debug("remove unmatched node={}:{}", source.value(), id);
+            // log_debug("remove unmatched node={}:{}", source.value(), id);
             query_state.remove_node(id, source);
           }
-          log_debug("rebuild table for schema {}:{}", source.value(),
-                    query_state.aliases[source.value()]);
+          // log_debug("rebuild table for schema {}:{}", source.value(),
+          //           query_state.aliases[source.value()]);
           auto table_result =
               filter_table_by_id(query_state.tables[source.value()],
                                  query_state.ids[source.value()]);
@@ -1984,8 +1991,8 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
           }
           query_state.tables[source.value()] = table_result.ValueOrDie();
         }
-        log_debug("found {} neighbors for {}", matched_target_ids.size(),
-                  traverse->target().toString());
+        // log_debug("found {} neighbors for {}", matched_target_ids.size(),
+        //           traverse->target().toString());
 
         if (traverse->traverse_type() == TraverseType::Inner) {
           // intersect
@@ -2066,7 +2073,7 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
     return output_schema_res.status();
   }
   const auto output_schema = output_schema_res.ValueOrDie();
-  log_debug("output_schema={}", output_schema->ToString());
+  // log_debug("output_schema={}", output_schema->ToString());
 
   auto row_res = populate_rows(query.execution_config(), query_state,
                                query_state.traversals, output_schema);
@@ -2083,7 +2090,7 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
   auto output_table = output_table_res.ValueOrDie();
   for (const auto& expr : post_where) {
     result->mutable_execution_stats().num_where_clauses_post_processed++;
-    log_debug("post process where: {}", expr->toString());
+    // log_debug("post process where: {}", expr->toString());
     output_table = filter(output_table, *expr, false).ValueOrDie();
   }
   result->set_table(apply_select(query.select(), output_table));

From 8c0268d78d0d733df6f1eb5bde1c0f9646200a22 Mon Sep 17 00:00:00 2001
From: dmgcodevil <dmgcodevil@gmail.com>
Date: Sun, 21 Sep 2025 14:13:28 -0400
Subject: [PATCH 3/4] optimize perf

---
 CMakeLists.txt            |  25 ++
 include/logger.hpp        |  64 ++++
 include/schema_layout.hpp |  12 +-
 include/types.hpp         | 147 +++++++++
 src/core.cpp              | 629 +++++++++++++++++++++++---------------
 src/edge_store.cpp        |  21 +-
 6 files changed, 635 insertions(+), 263 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 14c02dd..1b1e825 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,6 +4,17 @@ project(tundradb)
 set(CMAKE_CXX_STANDARD 23)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
+# Configure compile-time logging levels
+if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+    add_compile_definitions(TUNDRA_LOG_LEVEL_DEBUG)
+elseif(CMAKE_BUILD_TYPE STREQUAL "Release")
+    add_compile_definitions(TUNDRA_LOG_LEVEL_INFO)
+elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
+    add_compile_definitions(TUNDRA_LOG_LEVEL_INFO)
+else()
+    add_compile_definitions(TUNDRA_LOG_LEVEL_DEBUG)
+endif()
+
 # Enable Address Sanitizer
 #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
 #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
@@ -269,6 +280,20 @@ target_link_libraries(tundra_shell
         LLVMCore
 )
 
+# Benchmark runner executable for loading CSVs and running queries
+add_executable(tundra_bench_runner bench/tundra_runner.cpp)
+target_link_libraries(tundra_bench_runner
+    PRIVATE
+        core
+        Arrow::arrow_shared
+        ${ARROW_DATASET_LIB}
+        Parquet::parquet_shared
+        ${UUID_LIBRARY}
+        ${ANTLR4_RUNTIME}
+        LLVMSupport
+        LLVMCore
+)
+
 # ANTLR Integration
 # Find Java for running ANTLR generator
 find_package(Java REQUIRED)
diff --git a/include/logger.hpp b/include/logger.hpp
index 48f8266..af3a425 100644
--- a/include/logger.hpp
+++ b/include/logger.hpp
@@ -278,6 +278,70 @@ class ContextLogger {
   std::string prefix_;
 };
 
+// ============================================================================
+// COMPILE-TIME LOGGING OPTIMIZATIONS
+// ============================================================================
+
+// Compile-time log level configuration
+#ifdef TUNDRA_LOG_LEVEL_DEBUG
+  constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::DEBUG;
+#elif defined(TUNDRA_LOG_LEVEL_INFO)
+  constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::INFO;
+#elif defined(TUNDRA_LOG_LEVEL_WARN)
+  constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::WARN;
+#elif defined(TUNDRA_LOG_LEVEL_ERROR)
+  constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::ERROR;
+#else
+  // Default to INFO in release builds, DEBUG in debug builds
+  #ifdef NDEBUG
+    constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::INFO;
+  #else
+    constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::DEBUG;
+  #endif
+#endif
+
+// Compile-time log level checks - completely eliminated in release builds
+constexpr bool is_debug_enabled() {
+  return COMPILE_TIME_LOG_LEVEL <= LogLevel::DEBUG;
+}
+
+constexpr bool is_info_enabled() {
+  return COMPILE_TIME_LOG_LEVEL <= LogLevel::INFO;
+}
+
+constexpr bool is_warn_enabled() {
+  return COMPILE_TIME_LOG_LEVEL <= LogLevel::WARN;
+}
+
+// Fast logging macros that compile to nothing when disabled
+#define LOG_DEBUG_FAST(msg, ...) \
+  do { \
+    if constexpr (is_debug_enabled()) { \
+      log_debug(msg, ##__VA_ARGS__); \
+    } \
+  } while(0)
+
+#define LOG_INFO_FAST(msg, ...) \
+  do { \
+    if constexpr (is_info_enabled()) { \
+      log_info(msg, ##__VA_ARGS__); \
+    } \
+  } while(0)
+
+#define LOG_WARN_FAST(msg, ...) \
+  do { \
+    if constexpr (is_warn_enabled()) { \
+      log_warn(msg, ##__VA_ARGS__); \
+    } \
+  } while(0)
+
+// Conditional code blocks - completely eliminated when disabled
+#define IF_DEBUG_ENABLED \
+  if constexpr (is_debug_enabled())
+
+#define IF_INFO_ENABLED \
+  if constexpr (is_info_enabled())
+
 }  // namespace tundradb
 
 #endif  // LOGGER_HPP
\ No newline at end of file
diff --git a/include/schema_layout.hpp b/include/schema_layout.hpp
index 2aa324e..a04ee2e 100644
--- a/include/schema_layout.hpp
+++ b/include/schema_layout.hpp
@@ -240,12 +240,12 @@ class SchemaLayout {
   }
 
   size_t get_field_index(const std::string& name) const {
-    // const auto it = field_index_.find(name);
-    // return it != field_index_.end() ? it->second : -1;
-    if (name[0] == 'i' && name.size() == 2) return 0;
-    if (name[0] == 'n' || name[0] == 'i') return 1;
-    if (name[0] == 'a') return 2;
-    if (name[0] == 'c') return 3;
+    const auto it = field_index_.find(name);
+    return it != field_index_.end() ? it->second : -1;
+    // if (name[0] == 'i' && name.size() == 2) return 0;
+    // if (name[0] == 'n' || name[0] == 'i') return 1;
+    // if (name[0] == 'a') return 2;
+    // if (name[0] == 'c') return 3;
     return  -1;
   }
 
diff --git a/include/types.hpp b/include/types.hpp
index 3f3dfa1..98990e5 100644
--- a/include/types.hpp
+++ b/include/types.hpp
@@ -304,6 +304,153 @@ class Value {
       data_;
 };
 
+struct ValueRef {
+  const char* data;
+   ValueType type;
+  
+  // Default constructor
+  ValueRef() : data(nullptr), type(ValueType::NA) {}
+  
+  // Constructor
+  ValueRef(const char* ptr, ValueType type) : data(ptr), type(type) {}
+  
+  // Copy constructor (allowed)
+  // ValueRef(const ValueRef&) = default;
+  //
+  // // Move constructor
+  // ValueRef(ValueRef&&) = default;
+  //
+  // // Copy assignment is deleted due to const member
+  // ValueRef& operator=(const ValueRef&) = delete;
+  //
+  // // Move assignment is also deleted due to const member
+  // ValueRef& operator=(ValueRef&&) = delete;
+
+  int32_t as_int32() const {
+    return *reinterpret_cast<const int32_t*>(data);
+  }
+
+  int64_t as_int64() const {
+    return *reinterpret_cast<const int64_t*>(data);
+  }
+
+  double as_double() const {
+    return *reinterpret_cast<const double*>(data);
+  }
+
+  bool as_bool() const {
+    return *reinterpret_cast<const bool*>(data);
+  }
+
+  std::string as_string() const {
+    return std::string(data);
+  }
+
+  const StringRef& as_string_ref() const {
+    return *reinterpret_cast<const StringRef*>(data);
+  }
+
+  arrow::Result< std::shared_ptr<arrow::Scalar>> as_scalar() {
+    switch (type) {
+      case ValueType::INT32:
+        return arrow::MakeScalar(as_int32());
+      case ValueType::INT64:
+        return arrow::MakeScalar(as_int64());
+      case ValueType::DOUBLE:
+        return arrow::MakeScalar(as_double());
+      case ValueType::STRING:
+        return arrow::MakeScalar(as_string());
+      case ValueType::BOOL:
+        return arrow::MakeScalar(as_bool());
+      case ValueType::NA:
+        return arrow::MakeNullScalar(arrow::null());
+      default:
+        return arrow::Status::NotImplemented(
+            "Unsupported Value type for Arrow scalar conversion: ",
+            to_string(type));
+    }
+  }
+  
+  // Equality comparison
+  bool operator==(const ValueRef& other) const {
+    if (type != other.type) {
+      return false;
+    }
+    
+    // Both null
+    if (data == nullptr && other.data == nullptr) {
+      return true;
+    }
+    
+    // One null, one not null
+    if (data == nullptr || other.data == nullptr) {
+      return false;
+    }
+    
+    // Compare values based on type
+    switch (type) {
+      case ValueType::NA:
+        return true;  // Both are NA
+        
+      case ValueType::INT32:
+        return *reinterpret_cast<const int32_t*>(data) == 
+               *reinterpret_cast<const int32_t*>(other.data);
+               
+      case ValueType::INT64:
+        return *reinterpret_cast<const int64_t*>(data) == 
+               *reinterpret_cast<const int64_t*>(other.data);
+               
+      case ValueType::FLOAT:
+        return *reinterpret_cast<const float*>(data) == 
+               *reinterpret_cast<const float*>(other.data);
+               
+      case ValueType::DOUBLE:
+        return *reinterpret_cast<const double*>(data) == 
+               *reinterpret_cast<const double*>(other.data);
+               
+      case ValueType::BOOL:
+        return *reinterpret_cast<const bool*>(data) == 
+               *reinterpret_cast<const bool*>(other.data);
+               
+      case ValueType::STRING: {
+        const StringRef& str1 = *reinterpret_cast<const StringRef*>(data);
+        const StringRef& str2 = *reinterpret_cast<const StringRef*>(other.data);
+        
+        // Compare string lengths first
+        if (str1.length != str2.length) {
+          return false;
+        }
+        
+        // Both null strings
+        if (str1.is_null() && str2.is_null()) {
+          return true;
+        }
+        
+        // One null, one not
+        if (str1.is_null() || str2.is_null()) {
+          return false;
+        }
+        
+        // Compare string content
+        return std::memcmp(str1.data, str2.data, str1.length) == 0;
+      }
+      
+      default:
+        return false;  // Unknown type
+    }
+  }
+  
+  bool operator!=(const ValueRef& other) const {
+    return !(*this == other);
+  }
+  
+  // Standalone equals function (if you prefer functional style)
+  bool equals(const ValueRef& other) const {
+    return *this == other;
+  }
+
+};
+
 // Stream operator for ValueType
 inline std::ostream& operator<<(std::ostream& os, const ValueType type) {
   return os << to_string(type);
diff --git a/src/core.cpp b/src/core.cpp
index 8af4c68..3b5e9d5 100644
--- a/src/core.cpp
+++ b/src/core.cpp
@@ -37,6 +37,77 @@ namespace tundradb {
 
 constexpr static uint64_t NODE_MASK = (1ULL << 48) - 1;
 
+
+// Deterministic 16-bit tag from alias string (SchemaRef::value()).
+// https://www.ietf.org/archive/id/draft-eastlake-fnv-21.html
+static uint16_t compute_tag(const SchemaRef& ref) {
+  // FNV-1a 32-bit, then fold to 16 bits.
+  const std::string& s = ref.value();
+  uint32_t h = 2166136261u;
+  for (unsigned char c : s) {
+    h ^= c;
+    h *= 16777619u;
+  }
+  h ^= (h >> 16);
+  return static_cast<uint16_t>(h & 0xFFFFu);
+}
+
+/**
+ * @brief Creates a packed 64-bit hash code for schema+node_id pairs
+ *
+ * This function combines a schema identifier and node ID into a single 64-bit
+ * value for efficient storage and comparison in hash sets/maps. This eliminates
+ * the need for expensive string concatenation and hashing that was previously
+ * used for tracking visited nodes during graph traversal.
+ *
+ * @param schema The schema reference containing a pre-computed 16-bit tag
+ * @param node_id The node identifier (48-bit max)
+ *
+ * @return A 64-bit packed value with layout:
+ *         - Bits 63-48: Schema tag (16 bits)
+ *         - Bits 47-0:  Node ID (48 bits, masked)
+ *
+ * @details
+ * Memory Layout:
+ * ```
+ * 63    56    48    40    32    24    16     8     0
+ * |  Schema  |           Node ID (48 bits)          |
+ * | (16 bit) |                                       |
+ * ```
+ *
+ * Performance Benefits:
+ * - Replaces string operations: "User:12345" → single uint64_t
+ * - Enables fast integer comparison instead of string hashing
+ * - Reduces memory allocations (no temporary strings)
+ * - Compatible with llvm::DenseSet for O(1) lookups
+ *
+ * Constraints:
+ * - Node IDs must fit in 48 bits (max ~281 trillion nodes)
+ * - Schema tags must be unique within query context
+ * - NODE_MASK = (1ULL << 48) - 1 = 0x0000FFFFFFFFFFFF
+ *
+ * Example:
+ * ```cpp
+ * SchemaRef user_schema = SchemaRef::parse("u:User");
+ * user_schema.set_tag(0x1234);  // Pre-computed schema tag
+ *
+ * uint64_t packed = hash_code_(user_schema, 98765);
+ * // Result: 0x1234000000018149 (schema=0x1234, node=98765)
+ *
+ * // Usage in visited tracking:
+ * llvm::DenseSet<uint64_t> visited;
+ * visited.insert(packed);  // Fast O(1) integer hash
+ * ```
+ *
+ * @see SchemaRef::tag() for schema tag computation
+ * @see NODE_MASK constant definition
+ */
+static uint64_t hash_code_(const SchemaRef& schema, int64_t node_id) {
+  const uint16_t schema_id16 = schema.tag();
+  return (static_cast<uint64_t>(schema_id16) << 48) |
+         (static_cast<uint64_t>(node_id) & NODE_MASK);
+}
+
 // Utility function to join containers using C++23 ranges
 template <typename Container>
 std::string join_container(const Container& container,
@@ -106,12 +177,11 @@ arrow::Result<std::shared_ptr<arrow::Scalar>> value_ptr_to_arrow_scalar(
     case ValueType::DOUBLE:
       return arrow::MakeScalar(*reinterpret_cast<const double*>(ptr));
     case ValueType::STRING: {
-      auto str_ref = *reinterpret_cast<const StringRef*>(ptr);
+      const StringRef& str_ref = *reinterpret_cast<const StringRef*>(ptr);
       if (str_ref.is_null()) {
         return arrow::MakeNullScalar(arrow::utf8());
       }
-      // Create string directly from StringRef without intermediate to_string()
-      // call
+      // Create string directly from StringRef data - safer than custom scalar
       return arrow::MakeScalar(std::string(str_ref.data, str_ref.length));
     }
     case ValueType::BOOL:
@@ -176,15 +246,19 @@ arrow::compute::Expression where_condition_to_expression(
 arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_nodes(
     const std::shared_ptr<arrow::Schema>& schema,
     const std::vector<std::shared_ptr<Node>>& nodes) {
-  // log_debug("Creating table from {} nodes with schema '{}'", nodes.size(),
-  //           schema->ToString());
+   IF_DEBUG_ENABLED {
+     log_debug("Creating table from {} nodes with schema '{}'", nodes.size(),
+               schema->ToString());
+   }
 
   // Create builders for each field
   std::vector<std::unique_ptr<arrow::ArrayBuilder>> builders;
   builders.reserve(schema->fields().size());
   for (const auto& field : schema->fields()) {
-    // log_debug("Creating builder for field '{}' with type {}", field->name(),
-    //           field->type()->ToString());
+     IF_DEBUG_ENABLED {
+       log_debug("Creating builder for field '{}' with type {}", field->name(),
+                 field->type()->ToString());
+     }
     auto builder_result = arrow::MakeBuilder(field->type());
     if (!builder_result.ok()) {
       log_error("Failed to create builder for field '{}': {}", field->name(),
@@ -195,7 +269,9 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_nodes(
   }
 
   // Populate builders with data from each node
-  // log_debug("Adding data from {} nodes to builders", nodes.size());
+   IF_DEBUG_ENABLED {
+     log_debug("Adding data from {} nodes to builders", nodes.size());
+   }
   for (const auto& node : nodes) {
     // Add each field's value to the appropriate builder
     for (int i = 0; i < schema->num_fields(); i++) {
@@ -223,7 +299,9 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_nodes(
             return status;
           }
         } else {
-          // log_debug("Null value for field '{}', appending null", field_name);
+           IF_DEBUG_ENABLED {
+             log_debug("Null value for field '{}', appending null", field_name);
+           }
           auto status = builders[i]->AppendNull();
           if (!status.ok()) {
             log_error("Failed to append null for field '{}': {}", field_name,
@@ -246,7 +324,9 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_nodes(
   }
 
   // Finish building arrays
-  // log_debug("Finalizing arrays from builders");
+   IF_DEBUG_ENABLED {
+     log_debug("Finalizing arrays from builders");
+   }
   std::vector<std::shared_ptr<arrow::Array>> arrays;
   arrays.reserve(builders.size());
   for (auto& builder : builders) {
@@ -260,26 +340,34 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_nodes(
   }
 
   // Create table
-  // log_debug("Creating table with {} rows and {} columns",
-  //           arrays.empty() ? 0 : arrays[0]->length(), arrays.size());
+   IF_DEBUG_ENABLED {
+     log_debug("Creating table with {} rows and {} columns",
+               arrays.empty() ? 0 : arrays[0]->length(), arrays.size());
+   }
   return arrow::Table::Make(schema, arrays);
 }
 
 arrow::Result<std::shared_ptr<arrow::Table>> filter(
     std::shared_ptr<arrow::Table> table, const WhereExpr& condition,
     bool strip_var) {
-  // log_debug("Filtering table with WhereCondition: {}", condition.toString());
+   IF_DEBUG_ENABLED {
+     log_debug("Filtering table with WhereCondition: {}", condition.toString());
+   }
 
   try {
     // Convert WhereCondition to Arrow compute expression
     auto filter_expr = where_condition_to_expression(condition, strip_var);
 
-    // log_debug("Creating in-memory dataset from table with {} rows",
-    //           table->num_rows());
+     IF_DEBUG_ENABLED {
+       log_debug("Creating in-memory dataset from table with {} rows",
+                 table->num_rows());
+     }
     auto dataset = std::make_shared<arrow::dataset::InMemoryDataset>(table);
 
     // Create scanner builder
-    // log_debug("Creating scanner builder");
+     IF_DEBUG_ENABLED {
+       log_debug("Creating scanner builder");
+     }
     auto scan_builder_result = dataset->NewScan();
     if (!scan_builder_result.ok()) {
       log_error("Failed to create scanner builder: {}",
@@ -288,14 +376,18 @@ arrow::Result<std::shared_ptr<arrow::Table>> filter(
     }
     auto scan_builder = scan_builder_result.ValueOrDie();
 
-    // log_debug("Applying compound filter to scanner builder");
+     IF_DEBUG_ENABLED {
+       log_debug("Applying compound filter to scanner builder");
+     }
     auto filter_status = scan_builder->Filter(filter_expr);
     if (!filter_status.ok()) {
       log_error("Failed to apply filter: {}", filter_status.ToString());
       return filter_status;
     }
 
-    // log_debug("Finishing scanner");
+     IF_DEBUG_ENABLED {
+       log_debug("Finishing scanner");
+     }
     auto scanner_result = scan_builder->Finish();
     if (!scanner_result.ok()) {
       log_error("Failed to finish scanner: {}",
@@ -304,7 +396,9 @@ arrow::Result<std::shared_ptr<arrow::Table>> filter(
     }
     auto scanner = scanner_result.ValueOrDie();
 
-    // log_debug("Executing scan to table");
+     IF_DEBUG_ENABLED {
+       log_debug("Executing scan to table");
+     }
     auto table_result = scanner->ToTable();
     if (!table_result.ok()) {
       log_error("Failed to convert scan results to table: {}",
@@ -313,8 +407,10 @@ arrow::Result<std::shared_ptr<arrow::Table>> filter(
     }
 
     auto result_table = table_result.ValueOrDie();
-    // log_debug("Filter completed: {} rows in, {} rows out", table->num_rows(),
-    //           result_table->num_rows());
+     IF_DEBUG_ENABLED {
+       log_debug("Filter completed: {} rows in, {} rows out", table->num_rows(),
+                 result_table->num_rows());
+     }
     return result_table;
 
   } catch (const std::exception& e) {
@@ -453,20 +549,6 @@ struct QueryState {
     return true;
   }
 
-  // Deterministic 16-bit tag from alias string (SchemaRef::value()).
-  // https://www.ietf.org/archive/id/draft-eastlake-fnv-21.html
-  static uint16_t compute_alias_tag(const SchemaRef& ref) {
-    // FNV-1a 32-bit, then fold to 16 bits.
-    const std::string& s = ref.value();
-    uint32_t h = 2166136261u;
-    for (unsigned char c : s) {
-      h ^= c;
-      h *= 16777619u;
-    }
-    h ^= (h >> 16);
-    return static_cast<uint16_t>(h & 0xFFFFu);
-  }
-
   const llvm::DenseSet<int64_t>& get_ids(const SchemaRef& schema_ref) {
     return ids[schema_ref.value()];
   }
@@ -574,7 +656,9 @@ struct QueryState {
 
 arrow::Result<std::shared_ptr<arrow::Schema>> build_denormalized_schema(
     const QueryState& query_state) {
-  // log_debug("Building schema for denormalized table");
+   IF_DEBUG_ENABLED {
+     log_debug("Building schema for denormalized table");
+   }
 
   std::set<std::string> processed_fields;
   std::vector<std::shared_ptr<arrow::Field>> fields;
@@ -583,7 +667,9 @@ arrow::Result<std::shared_ptr<arrow::Schema>> build_denormalized_schema(
   // First add fields from the FROM schema
   std::string from_schema = query_state.from.value();
 
-  // log_debug("Adding fields from FROM schema '{}'", from_schema);
+   IF_DEBUG_ENABLED {
+     log_debug("Adding fields from FROM schema '{}'", from_schema);
+   }
 
   auto schema_result = query_state.schema_registry->get_arrow(
       query_state.aliases.at(from_schema));
@@ -610,7 +696,9 @@ arrow::Result<std::shared_ptr<arrow::Schema>> build_denormalized_schema(
   }
 
   for (const auto& schema_ref : unique_schemas) {
-    // log_debug("Adding fields from schema '{}'", schema_ref.value());
+     IF_DEBUG_ENABLED {
+       log_debug("Adding fields from schema '{}'", schema_ref.value());
+     }
 
     schema_result = query_state.schema_registry->get_arrow(
         query_state.aliases.at(schema_ref.value()));
@@ -641,7 +729,7 @@ struct PathSegment {
   int64_t node_id;
 
   std::string toString() const {
-    return "";  // schema + ":" + std::to_string(node_id);
+    return std::to_string(schema) + ":" + std::to_string(node_id);
   }
 
   bool operator==(const PathSegment& other) const {
@@ -673,19 +761,19 @@ std::string join_schema_path(const std::vector<PathSegment>& schema_path) {
 
 struct Row {
   int64_t id;
-  std::unordered_map<std::string, std::shared_ptr<arrow::Scalar>> cells;
+  llvm::StringMap<ValueRef> cells;
   std::vector<PathSegment> path;
-  std::unordered_map<std::string, int64_t> schema_ids;  // can we use tag
+  llvm::StringMap<int64_t> schema_ids;  // can we use tag
   bool schema_ids_set = false;
 
-  void set_cell(const std::string& name,
-                std::shared_ptr<arrow::Scalar> scalar) {
-    cells[name] = std::move(scalar);
-  }
+  // void set_cell(const std::string& name,
+  //               std::shared_ptr<arrow::Scalar> scalar) {
+  //   cells[name] = std::move(scalar);
+  // }
 
-  bool has_value(const std::string& name) const {
-    return cells.contains(name) && cells.at(name) != nullptr &&
-           cells.at(name)->is_valid;
+  bool has_value(const llvm::StringRef name) const {
+    return cells.contains(name) && cells.at(name).data != nullptr;
+    // && cells.at(name)->is_valid;
   }
 
   void set_cell_from_node(const std::vector<std::string>& fq_field_names,
@@ -704,16 +792,8 @@ struct Row {
   // New set_cell method for Value objects
   void set_cell(const std::string& name, const char* ptr,
                 const ValueType type) {
-    if (ptr) {
-      auto scalar_result = value_ptr_to_arrow_scalar(ptr, type);
-      if (scalar_result.ok()) {
-        cells[name] = scalar_result.ValueOrDie();
-        return;
-      }
-    }
-
-    // Default to null if value is null or conversion fails
-    cells[name] = nullptr;
+     // Use try_emplace to construct ValueRef in-place (avoids assignment)
+     cells.try_emplace(name, ptr, type);
   }
 
   // void set_cell(const std::string& name, std::shared_ptr<arrow::Array> array)
@@ -735,23 +815,23 @@ struct Row {
   }
 
   // todo replace
-  const std::unordered_map<std::string, int64_t>& extract_schema_ids() {
+  const llvm::StringMap<int64_t>& extract_schema_ids() {
     if (schema_ids_set) {
       return schema_ids;
     }
     // std::unordered_map<std::string, int64_t> result;
     for (const auto& [field_name, value] : cells) {
-      if (!value || !value->is_valid) continue;
+      if (!value.data) continue;
 
       // Extract schema prefix (everything before the first dot)
       size_t dot_pos = field_name.find('.');
       if (dot_pos != std::string::npos) {
-        std::string schema = field_name.substr(0, dot_pos);
+        llvm::StringRef schema = field_name.substr(0, dot_pos);
 
         // Store ID for this schema if it's an ID field
         if (field_name.substr(dot_pos + 1) == "id") {
-          auto id_scalar = std::static_pointer_cast<arrow::Int64Scalar>(value);
-          schema_ids[schema] = id_scalar->value;
+          // auto id_scalar = value.get_as_int64(); // std::static_pointer_cast<arrow::Int64Scalar>(value);
+          schema_ids[schema] = value.as_int64();
         }
       }
     }
@@ -766,7 +846,7 @@ struct Row {
     // merged->id = this->id;
     for (const auto& [name, value] : other->cells) {
       if (!merged->has_value(name)) {
-        merged->cells[name] = value;
+        merged->cells.try_emplace(name, value.data, value.type);
       }
     }
     return merged;
@@ -778,42 +858,50 @@ struct Row {
     ss << "path='" << join_schema_path(path) << "', ";
 
     bool first = true;
-    for (const auto& [field_name, scalar] : cells) {
+    for (const auto& [field_name, value_ref] : cells) {
       if (!first) {
         ss << ", ";
       }
       first = false;
 
-      ss << field_name << ": ";
+      ss << field_name.str() << ": ";
 
-      if (!scalar) {
+      if (value_ref.data == nullptr) {
         ss << "NULL";
-      } else if (scalar->is_valid) {
-        // Handle different scalar types appropriately
-        switch (scalar->type->id()) {
-          case arrow::Type::INT64:
-            ss << std::static_pointer_cast<arrow::Int64Scalar>(scalar)->value;
+      } else {
+        // Handle different ValueRef types appropriately
+        switch (value_ref.type) {
+          case ValueType::INT32:
+            ss << value_ref.as_int32();
             break;
-          case arrow::Type::DOUBLE:
-            ss << std::static_pointer_cast<arrow::DoubleScalar>(scalar)->value;
+          case ValueType::INT64:
+            ss << value_ref.as_int64();
             break;
-          case arrow::Type::STRING:
-          case arrow::Type::LARGE_STRING:
-            ss << "\""
-               << std::static_pointer_cast<arrow::StringScalar>(scalar)->view()
-               << "\"";
+          case ValueType::FLOAT:
+            ss << *reinterpret_cast<const float*>(value_ref.data);
             break;
-          case arrow::Type::BOOL:
-            ss << (std::static_pointer_cast<arrow::BooleanScalar>(scalar)->value
-                       ? "true"
-                       : "false");
+          case ValueType::DOUBLE:
+            ss << value_ref.as_double();
+            break;
+          case ValueType::BOOL:
+            ss << (value_ref.as_bool() ? "true" : "false");
+            break;
+          case ValueType::STRING: {
+            const StringRef& str_ref = value_ref.as_string_ref();
+            if (str_ref.is_null()) {
+              ss << "NULL";
+            } else {
+              ss << "\"" << std::string(str_ref.data, str_ref.length) << "\"";
+            }
+            break;
+          }
+          case ValueType::NA:
+            ss << "NULL";
             break;
           default:
-            ss << scalar->ToString();
+            ss << "<unknown_type>";
             break;
         }
-      } else {
-        ss << "NULL";
       }
     }
 
@@ -827,18 +915,19 @@ static Row create_empty_row_from_schema(
   Row new_row;
   new_row.id = -1;
   for (const auto& field : final_output_schema->fields()) {
+    new_row.cells.try_emplace(field->name(), nullptr, arrow_type_to_value_type(field->type()));
     // Create a null scalar of the correct type
-    auto null_scalar = arrow::MakeNullScalar(field->type());
-    if (null_scalar != nullptr) {
-      new_row.cells[field->name()] = null_scalar;
-    } else {
-      // If creating a null scalar fails, use nullptr as a fallback
-      new_row.cells[field->name()] = nullptr;
-      if (Logger::get_instance().get_level() >= LogLevel::WARN) {
-        log_warn("Failed to create null scalar for field '{}' with type '{}'",
-                 field->name(), field->type()->ToString());
-      }
-    }
+    // auto null_scalar = arrow::MakeNullScalar(field->type());
+    // if (null_scalar != nullptr) {
+    //   new_row.cells[field->name()] = null_scalar;
+    // } else {
+    //   // If creating a null scalar fails, use nullptr as a fallback
+    //   new_row.cells.try_emplace(field->name(), nullptr, field->type());
+    //   if (Logger::get_instance().get_level() >= LogLevel::WARN) {
+    //     log_warn("Failed to create null scalar for field '{}' with type '{}'",
+    //              field->name(), field->type()->ToString());
+    //   }
+    // }
   }
   return new_row;
 }
@@ -966,9 +1055,9 @@ struct RowNode {
           bool can_merge = true;
 
           // Get variable prefixes (schema names) from cells
-          std::unordered_map<std::string, int64_t> schema_ids_r1 =
+          llvm::StringMap<int64_t> schema_ids_r1 =
               r1_from_current_group->extract_schema_ids();
-          std::unordered_map<std::string, int64_t> schema_ids_r2 =
+           llvm::StringMap<int64_t> schema_ids_r2 =
               r2_from_previous_product->extract_schema_ids();
 
           // Check for conflicts - same schema name but different IDs
@@ -976,7 +1065,7 @@ struct RowNode {
             if (schema_ids_r2.contains(schema) &&
                 schema_ids_r2[schema] != id1) {
               // Found a conflict - same schema but different IDs
-              if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+              IF_DEBUG_ENABLED {
                 log_debug(
                     "Conflict detected: Schema '{}' has different IDs: {} vs "
                     "{}",
@@ -991,15 +1080,15 @@ struct RowNode {
           if (can_merge) {
             for (const auto& [field_name, value1] :
                  r1_from_current_group->cells) {
-              if (!value1 || !value1->is_valid) continue;
+              if (!value1.data) continue;
 
               auto it = r2_from_previous_product->cells.find(field_name);
-              if (it != r2_from_previous_product->cells.end() && it->second &&
-                  it->second->is_valid) {
+              if (it != r2_from_previous_product->cells.end() && it->second.data)
+                   {
                 // Both rows have this field with non-null values - check if
                 // they match
-                if (!value1->Equals(*(it->second))) {
-                  if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+                if (!value1.equals(it->second)) {
+                   IF_DEBUG_ENABLED {
                     log_debug(
                         "Conflict detected: Field '{}' has different values",
                         field_name);
@@ -1023,90 +1112,94 @@ struct RowNode {
       }
       final_merged_rows = std::move(temp_product_accumulator);
       if (final_merged_rows.empty()) {
-        // log_debug("product_accumulator is empty. stop merge");
+         IF_DEBUG_ENABLED {
+           log_debug("product_accumulator is empty. stop merge");
+         }
         break;
       }
     }
     return final_merged_rows;
   }
 
-  std::string toString(bool recursive = true, int indent_level = 0) const {
-    // Helper to build indentation string based on level
-    auto get_indent = [](int level) { return std::string(level * 2, ' '); };
-
-    std::stringstream ss;
-    std::string indent = get_indent(indent_level);
-
-    // Print basic node info
-    ss << indent << "RowNode [path=" << path_segment.toString()
-       << ", depth=" << depth << "] {\n";
-
-    // Print Row
-    if (row.has_value()) {
-      ss << indent << "  Path: ";
-      if (row.value()->path.empty()) {
-        ss << "(empty)";
-      } else {
-        for (size_t i = 0; i < row.value()->path.size(); ++i) {
-          if (i > 0) ss << " → ";
-          ss << row.value()->path[i].schema << ":"
-             << row.value()->path[i].node_id;
-        }
-      }
-      ss << "\n";
-
-      // Print key cell values (limited to avoid overwhelming output)
-      ss << indent << "  Cells: ";
-      if (row.value()->cells.empty()) {
-        ss << "(empty)";
-      } else {
-        size_t count = 0;
-        ss << "{ ";
-        for (const auto& [key, value] : row.value()->cells) {
-          if (count++ > 0) ss << ", ";
-          if (count > 5) {  // Limit display
-            ss << "... +" << (row.value()->cells.size() - 5) << " more";
-            break;
-          }
-
-          ss << key << ": ";
-          if (!value) {
-            ss << "NULL";
-          } else {
-            ss << value->ToString();  // Assuming arrow::Scalar has ToString()
-          }
-        }
-        ss << " }";
-      }
-    }
-
-    ss << "\n";
-
-    // Print children count
-    ss << indent << "  Children: " << children.size() << "\n";
-
-    // Recursively print children if requested
-    if (recursive && !children.empty()) {
-      ss << indent << "  [\n";
-      for (const auto& child : children) {
-        if (child) {
-          ss << child->toString(true, indent_level + 2);
-        } else {
-          ss << get_indent(indent_level + 2) << "(null child)\n";
-        }
-      }
-      ss << indent << "  ]\n";
-    }
-
-    ss << indent << "}\n";
-    return ss.str();
-  }
+  // std::string toString(bool recursive = true, int indent_level = 0) const {
+  //   // Helper to build indentation string based on level
+  //   auto get_indent = [](int level) { return std::string(level * 2, ' '); };
+  //
+  //   std::stringstream ss;
+  //   std::string indent = get_indent(indent_level);
+  //
+  //   // Print basic node info
+  //   ss << indent << "RowNode [path=" << path_segment.toString()
+  //      << ", depth=" << depth << "] {\n";
+  //
+  //   // Print Row
+  //   if (row.has_value()) {
+  //     ss << indent << "  Path: ";
+  //     if (row.value()->path.empty()) {
+  //       ss << "(empty)";
+  //     } else {
+  //       for (size_t i = 0; i < row.value()->path.size(); ++i) {
+  //         if (i > 0) ss << " → ";
+  //         ss << row.value()->path[i].schema << ":"
+  //            << row.value()->path[i].node_id;
+  //       }
+  //     }
+  //     ss << "\n";
+  //
+  //     // Print key cell values (limited to avoid overwhelming output)
+  //     ss << indent << "  Cells: ";
+  //     if (row.value()->cells.empty()) {
+  //       ss << "(empty)";
+  //     } else {
+  //       size_t count = 0;
+  //       ss << "{ ";
+  //       for (const auto& [key, value] : row.value()->cells) {
+  //         if (count++ > 0) ss << ", ";
+  //         if (count > 5) {  // Limit display
+  //           ss << "... +" << (row.value()->cells.size() - 5) << " more";
+  //           break;
+  //         }
+  //
+  //         ss << key.str() << ": ";
+  //         if (!value) {
+  //           ss << "NULL";
+  //         } else {
+  //           ss << value->ToString();  // Assuming arrow::Scalar has ToString()
+  //         }
+  //       }
+  //       ss << " }";
+  //     }
+  //   }
+  //
+  //   ss << "\n";
+  //
+  //   // Print children count
+  //   ss << indent << "  Children: " << children.size() << "\n";
+  //
+  //   // Recursively print children if requested
+  //   if (recursive && !children.empty()) {
+  //     ss << indent << "  [\n";
+  //     for (const auto& child : children) {
+  //       if (child) {
+  //         ss << child->toString(true, indent_level + 2);
+  //       } else {
+  //         ss << get_indent(indent_level + 2) << "(null child)\n";
+  //       }
+  //     }
+  //     ss << indent << "  ]\n";
+  //   }
+  //
+  //   ss << indent << "}\n";
+  //   return ss.str();
+  // }
 
   friend std::ostream& operator<<(std::ostream& os, const RowNode& node) {
-    return os << node.toString();
+    return os << "";// node.toString();
   }
 
-  void print(bool recursive = true) const { log_debug(toString(recursive)); }
+  void print(bool recursive = true) const {
+    // log_debug(toString(recursive));
+  }
 };
 
 struct QueueItem {
@@ -1130,7 +1223,7 @@ void log_grouped_connections(
     const llvm::SmallDenseMap<llvm::StringRef,
                               llvm::SmallVector<GraphConnection, 4>, 4>&
         grouped_connections) {
-  if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+  IF_DEBUG_ENABLED {
     if (grouped_connections.empty()) {
       log_debug("Node {} has no grouped connections", node_id);
       return;
@@ -1155,12 +1248,11 @@ void log_grouped_connections(
   }
 }
 
-template <StringSet VisitedSet>
 arrow::Result<std::shared_ptr<llvm::SmallVector<std::shared_ptr<Row>, 4>>>
 populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema,
                   const std::shared_ptr<arrow::Schema>& output_schema,
-                  const QueryState& query_state, VisitedSet& global_visited) {
-  if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+                  const QueryState& query_state, llvm::DenseSet<uint64_t>& global_visited) {
+  IF_DEBUG_ENABLED {
     log_debug("populate_rows_bfs::node={}:{}", start_schema.value(), node_id);
   }
   auto result = std::make_shared<llvm::SmallVector<std::shared_ptr<Row>, 4>>();
@@ -1186,12 +1278,10 @@ populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema,
             item.schema_ref.value());
       }
       item.row->set_cell_from_node(it_fq->second, node);
-      // Pack 16-bit schema id (precomputed in SchemaRef) and 48-bit node id.
-      const uint16_t schema_id16 = item.schema_ref.tag();
-      const uint64_t packed = (static_cast<uint64_t>(schema_id16) << 48) |
-                              (static_cast<uint64_t>(item.node_id) & NODE_MASK);
-      global_visited.insert(item.schema_ref.value() + ":" +
-                            std::to_string(item.node_id));
+      const uint64_t packed = hash_code_(item.schema_ref, item.node_id);
+      // global_visited.insert(item.schema_ref.value() + ":" +
+      //                       std::to_string(item.node_id));
+      global_visited.insert(packed);
       item.path_visited_nodes.insert(packed);
 
       // group connections by target schema (small, stack-friendly)
@@ -1203,10 +1293,7 @@ populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema,
         for (const auto& conn :
              query_state.connections.at(item.schema_ref.value())
                  .at(item.node_id)) {
-          const uint16_t tgt_schema_id16 = conn.target.tag();
-          const uint64_t tgt_packed =
-              (static_cast<uint64_t>(tgt_schema_id16) << 48) |
-              (static_cast<uint64_t>(conn.target_id) & NODE_MASK);
+          const uint64_t tgt_packed = hash_code_(conn.target, conn.target_id);
           if (!item.path_visited_nodes.contains(tgt_packed)) {
             if (query_state.ids.at(conn.target.value())
                     .contains(conn.target_id)) {
@@ -1225,7 +1312,7 @@ populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema,
           auto r = item.row;
           r->path = item.path;
           r->id = row_id_counter++;
-          if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+          IF_DEBUG_ENABLED {
             log_debug("add row: {}", r->ToString());
           }
           result->push_back(r);
@@ -1244,7 +1331,7 @@ populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema,
             next.path = item.path;
             next.path.push_back(PathSegment{connections[0].target.tag(),
                                             connections[0].target_id});
-            if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+            IF_DEBUG_ENABLED {
               log_debug("continue the path: {}", join_schema_path(next.path));
             }
             queue.push(next);
@@ -1256,7 +1343,7 @@ populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema,
               next.path = item.path;
               next.path.push_back(
                   PathSegment{conn.target.tag(), conn.target_id});
-              if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+              IF_DEBUG_ENABLED {
                 log_debug("create a new path {}, node={}",
                           join_schema_path(next.path), conn.target_id);
               }
@@ -1270,16 +1357,16 @@ populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema,
   RowNode tree;
   tree.path_segment = PathSegment{0, -1};
   for (const auto& r : *result) {
-    if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+    IF_DEBUG_ENABLED {
       log_debug("bfs result: {}", r->ToString());
     }
     tree.insert_row(r);
   }
-  if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+  IF_DEBUG_ENABLED {
     tree.print();
   }
   auto merged = tree.merge_rows();
-  if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+  IF_DEBUG_ENABLED {
     for (const auto& row : merged) {
       log_debug("merge result: {}", row->ToString());
     }
@@ -1288,23 +1375,23 @@ populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema,
 }
 
 // template <NodeIds NodeIdsT>
-arrow::Result<std::shared_ptr<std::vector<std::shared_ptr<Row>>>>
+arrow::Result<std::shared_ptr<llvm::SmallVector<std::shared_ptr<Row>, 4>>>
 populate_batch_rows(
     const llvm::DenseSet<int64_t>& node_ids, const SchemaRef& schema_ref,
     const std::shared_ptr<arrow::Schema>& output_schema,
     const QueryState& query_state, const TraverseType join_type,
-    tbb::concurrent_unordered_set<std::string>& global_visited) {
-  auto rows = std::make_shared<std::vector<std::shared_ptr<Row>>>();
+    llvm::DenseSet<uint64_t>& global_visited) {
+  auto rows = std::make_shared<llvm::SmallVector<std::shared_ptr<Row>, 4>>();
   rows->reserve(node_ids.size());
-  std::set<std::string> local_visited;
+  llvm::DenseSet<uint64_t> local_visited;
   // For INNER join: only process nodes that have connections
   // For LEFT join: process all nodes from the "left" side
   for (const auto node_id : node_ids) {
-    auto key = schema_ref.value() + ":" + std::to_string(node_id);
-    if (!global_visited.insert(key).second) {
-      // Skip if already processed in an earlier traversal
-      continue;
-    }
+     const uint64_t packed = hash_code_(schema_ref, node_id);
+     if (!global_visited.insert(packed).second) {
+       // Skip if already processed in an earlier traversal
+       continue;
+     }
 
     // For INNER JOIN: Skip nodes without connections
     if (join_type == TraverseType::Inner &&
@@ -1361,7 +1448,7 @@ arrow::Result<std::shared_ptr<std::vector<std::shared_ptr<Row>>>> populate_rows(
     const std::shared_ptr<arrow::Schema>& output_schema) {
   auto rows = std::make_shared<std::vector<std::shared_ptr<Row>>>();
   std::mutex rows_mtx;
-  tbb::concurrent_unordered_set<std::string> global_visited;
+  llvm::DenseSet<uint64_t> global_visited;
 
   // Map schemas to their join types
   std::unordered_map<std::string, TraverseType> schema_join_types;
@@ -1401,14 +1488,18 @@ arrow::Result<std::shared_ptr<std::vector<std::shared_ptr<Row>>>> populate_rows(
     }
   }
 
-  // log_debug("Processing {} schemas with their respective join types",
-  //           ordered_schemas.size());
+   IF_DEBUG_ENABLED {
+     log_debug("Processing {} schemas with their respective join types",
+               ordered_schemas.size());
+   }
 
   // Process each schema in order
   for (const auto& schema_ref : ordered_schemas) {
     TraverseType join_type = schema_join_types[schema_ref.value()];
-    // log_debug("Processing schema '{}' with join type {}", schema_ref.value(),
-    //           static_cast<int>(join_type));
+     IF_DEBUG_ENABLED {
+       log_debug("Processing schema '{}' with join type {}", schema_ref.value(),
+                 static_cast<int>(join_type));
+     }
 
     if (!query_state.ids.contains(schema_ref.value())) {
       log_warn("Schema '{}' not found in query state IDs", schema_ref.value());
@@ -1426,7 +1517,7 @@ arrow::Result<std::shared_ptr<std::vector<std::shared_ptr<Row>>>> populate_rows(
         batch_size = execution_config.calculate_batch_size(schema_nodes.size());
       }
       auto batches = batch_node_ids(schema_nodes, batch_size);
-      if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+      IF_DEBUG_ENABLED {
         log_debug(
             "process concurrently. thread_count={}, batch_size={}, "
             "batches_count={}",
@@ -1478,14 +1569,16 @@ arrow::Result<std::shared_ptr<std::vector<std::shared_ptr<Row>>>> populate_rows(
                    std::make_move_iterator(res_value->end()));
     }
 
-    if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+    IF_DEBUG_ENABLED {
       log_debug("Processing schema '{}' nodes: [{}]", schema_ref.value(),
                 join_container(schema_nodes));
     }
   }
 
-  // log_debug("Generated {} total rows after processing all schemas",
-  //           rows->size());
+   IF_DEBUG_ENABLED {
+     log_debug("Generated {} total rows after processing all schemas",
+               rows->size());
+   }
   return rows;
 }
 
@@ -1549,8 +1642,8 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_rows(
     // Get all field names from all rows to create a complete schema
     std::set<std::string> all_field_names;
     for (const auto& row : *rows) {
-      for (const auto& field_name : row->cells | std::views::keys) {
-        all_field_names.insert(field_name);
+      for (const auto& entry : row->cells) {
+        all_field_names.insert(entry.first().str());
       }
     }
 
@@ -1562,8 +1655,8 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_rows(
       std::shared_ptr<arrow::DataType> field_type = nullptr;
       for (const auto& row : *rows) {
         auto it = row->cells.find(field_name);
-        if (it != row->cells.end() && it->second) {
-          if (auto array_result = arrow::MakeArrayFromScalar(*(it->second), 1);
+        if (it != row->cells.end() && it->second.data != nullptr) {
+          if (auto array_result = arrow::MakeArrayFromScalar(*(it->second.as_scalar().ValueOrDie()), 1);
               array_result.ok()) {
             field_type = array_result.ValueOrDie()->type();
             break;
@@ -1595,8 +1688,8 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_rows(
       const auto& field_name = output_schema->field(i)->name();
       auto it = row->cells.find(field_name);
 
-      if (it != row->cells.end() && it->second) {
-        if (auto res = builders[i]->AppendScalar(*it->second); !res.ok()) {
+      if (it != row->cells.end() && it->second.data) {
+        if (auto res = builders[i]->AppendScalar(*it->second.as_scalar().ValueOrDie()); !res.ok()) {
           return res;
         }
         // We have a value for this field
@@ -1704,7 +1797,9 @@ std::vector<std::shared_ptr<WhereExpr>> get_where_to_inline(
     if (clauses[i]->type() == Clause::Type::WHERE) {
       auto where_expr = std::dynamic_pointer_cast<WhereExpr>(clauses[i]);
       if (where_expr->can_inline(target_var)) {
-        // log_debug("inline where: '{}'", where_expr->toString());
+         IF_DEBUG_ENABLED {
+           log_debug("inline where: '{}'", where_expr->toString());
+         }
         inlined.push_back(where_expr);
       }
     }
@@ -1718,7 +1813,9 @@ arrow::Result<std::shared_ptr<arrow::Table>> inline_where(
     const std::vector<std::shared_ptr<WhereExpr>>& where_exprs) {
   auto curr_table = std::move(table);
   for (const auto& exp : where_exprs) {
-    // log_debug("inline where '{}'", exp->toString());
+     IF_DEBUG_ENABLED {
+       log_debug("inline where '{}'", exp->toString());
+     }
     auto result = filter(curr_table, *exp, true);
     if (!result.ok()) {
       log_error(
@@ -1761,17 +1858,21 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
     const Query& query) const {
   QueryState query_state;
   auto result = std::make_shared<QueryResult>();
-  // log_debug("Executing query starting from schema '{}'",
-  //           query.from().toString());
+   IF_DEBUG_ENABLED {
+     log_debug("Executing query starting from schema '{}'",
+               query.from().toString());
+   }
   query_state.node_manager = this->node_manager_;
   query_state.schema_registry = this->schema_registry_;
   query_state.from = query.from();
 
   {
-    // log_debug("processing 'from' {}", query.from().toString());
+     IF_DEBUG_ENABLED {
+       log_debug("processing 'from' {}", query.from().toString());
+     }
     // Precompute tag for FROM schema (alias-based hash)
     query_state.from = query.from();
-    query_state.from.set_tag(QueryState::compute_alias_tag(query_state.from));
+    query_state.from.set_tag(compute_tag(query_state.from));
     ARROW_ASSIGN_OR_RAISE(auto source_schema,
                           query_state.resolve_schema(query.from()));
     if (!this->schema_registry_->exists(source_schema)) {
@@ -1805,7 +1906,9 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
     }
   }
 
-  // log_debug("Processing {} query clauses", query.clauses().size());
+   IF_DEBUG_ENABLED {
+     log_debug("Processing {} query clauses", query.clauses().size());
+   }
 
   // Precompute 16-bit alias-based tags for all SchemaRefs
   // Also precompute fully-qualified field names per alias used in the query
@@ -1816,7 +1919,9 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
       case Clause::Type::WHERE: {
         auto where = std::dynamic_pointer_cast<WhereExpr>(clause);
         if (where->inlined()) {
-          // log_debug("where '{}' is inlined, skip", where->toString());
+           IF_DEBUG_ENABLED {
+             log_debug("where '{}' is inlined, skip", where->toString());
+           }
           continue;
         }
         auto variables = where->get_all_variables();
@@ -1827,7 +1932,9 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
               where->toString());
         }
         if (variables.size() == 1) {
-          // log_debug("Processing WHERE clause: '{}'", where->toString());
+           IF_DEBUG_ENABLED {
+             log_debug("Processing WHERE clause: '{}'", where->toString());
+           }
 
           std::unordered_map<std::string, std::set<int64_t>> new_front_ids;
           std::string variable = *variables.begin();
@@ -1845,8 +1952,10 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
           ARROW_RETURN_NOT_OK(query_state.update_table(
               filtered_table_result.ValueOrDie(), SchemaRef::parse(variable)));
         } else {
-          // log_debug("Add compound WHERE expression: '{}' to post process",
-          //           where->toString());
+           IF_DEBUG_ENABLED {
+             log_debug("Add compound WHERE expression: '{}' to post process",
+                       where->toString());
+           }
           post_where.emplace_back(where);
         }
         break;
@@ -1856,9 +1965,9 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
         // Precompute and set tags for source/target refs (alias-based,
         // deterministic)
         traverse->mutable_source().set_tag(
-            QueryState::compute_alias_tag(traverse->source()));
+            compute_tag(traverse->source()));
         traverse->mutable_target().set_tag(
-            QueryState::compute_alias_tag(traverse->target()));
+            compute_tag(traverse->target()));
 
         ARROW_ASSIGN_OR_RAISE(auto source_schema,
                               query_state.resolve_schema(traverse->source()));
@@ -1882,14 +1991,14 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
               where_clauses.size();
         }
         query_state.traversals.push_back(*traverse);
-        if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+        IF_DEBUG_ENABLED {
           log_debug("Processing TRAVERSE {}-({})->{}",
                     traverse->source().toString(), traverse->edge_type(),
                     traverse->target().toString());
         }
         auto source = traverse->source();
         if (!query_state.tables.contains(source.value())) {
-          if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+          IF_DEBUG_ENABLED {
             log_debug("Source table '{}' not found. Loading",
                       traverse->source().toString());
           }
@@ -1899,7 +2008,7 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
               query_state.update_table(source_table, traverse->source()));
         }
 
-        if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+        IF_DEBUG_ENABLED {
           log_debug("Traversing from {} source nodes",
                     query_state.ids[source.value()].size());
         }
@@ -1911,7 +2020,7 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
               edge_store_
                   ->get_outgoing_edges_view(source_id, traverse->edge_type())
                   .ValueOrDie();  // todo check result
-          // if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+          // IF_DEBUG_ENABLED {
           //   log_debug("Node {} has {} outgoing edges of type '{}'",
           //   source_id,
           //             outgoing_edges.size(), traverse->edge_type());
@@ -1942,7 +2051,7 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
                   }
                 }
                 if (passes_all_filters) {
-                  if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+                  IF_DEBUG_ENABLED {
                     log_debug("found edge {}:{} -[{}]-> {}:{}", source.value(),
                               source_id, traverse->edge_type(),
                               traverse->target().value(), target_node->id);
@@ -1969,7 +2078,7 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
             }
           }
           if (!source_had_match) {
-            if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+            IF_DEBUG_ENABLED {
               log_debug("no edge found from {}:{}", source.value(), source_id);
             }
             unmatched_source_ids.insert(source_id);
@@ -1978,11 +2087,15 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
         if (traverse->traverse_type() == TraverseType::Inner &&
             !unmatched_source_ids.empty()) {
           for (auto id : unmatched_source_ids) {
-            // log_debug("remove unmatched node={}:{}", source.value(), id);
+             IF_DEBUG_ENABLED {
+               log_debug("remove unmatched node={}:{}", source.value(), id);
+             }
             query_state.remove_node(id, source);
           }
-          // log_debug("rebuild table for schema {}:{}", source.value(),
-          //           query_state.aliases[source.value()]);
+           IF_DEBUG_ENABLED {
+             log_debug("rebuild table for schema {}:{}", source.value(),
+                       query_state.aliases[source.value()]);
+           }
           auto table_result =
               filter_table_by_id(query_state.tables[source.value()],
                                  query_state.ids[source.value()]);
@@ -1991,8 +2104,10 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
           }
           query_state.tables[source.value()] = table_result.ValueOrDie();
         }
-        // log_debug("found {} neighbors for {}", matched_target_ids.size(),
-        //           traverse->target().toString());
+         IF_DEBUG_ENABLED {
+           log_debug("found {} neighbors for {}", matched_target_ids.size(),
+                     traverse->target().toString());
+         }
 
         if (traverse->traverse_type() == TraverseType::Inner) {
           // intersect
@@ -2010,7 +2125,7 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
           }
 
           query_state.ids[traverse->target().value()] = intersect_ids;
-          if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+          IF_DEBUG_ENABLED {
             log_debug("intersect_ids count: {}", intersect_ids.size());
             log_debug("{} intersect_ids: {}", traverse->target().toString(),
                       join_container(intersect_ids));
@@ -2023,7 +2138,7 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
           auto target_ids =
               get_ids_from_table(get_table(target_schema).ValueOrDie())
                   .ValueOrDie();
-          if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+          IF_DEBUG_ENABLED {
             log_debug(
                 "traverse type: '{}', matched_source_ids=[{}], "
                 "target_ids=[{}]",
@@ -2037,7 +2152,7 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
 
         std::vector<std::shared_ptr<Node>> neighbors;
         for (auto id : query_state.ids[traverse->target().value()]) {
-          auto node_res = node_manager_->get_node(id);
+          auto const node_res = node_manager_->get_node(id);
           if (node_res.ok()) {
             neighbors.push_back(node_res.ValueOrDie());
           }
@@ -2063,9 +2178,13 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
     }
   }
 
-  if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+  IF_DEBUG_ENABLED {
     log_debug("Query processing complete, building result");
     log_debug("Query state: {}", query_state.ToString());
+    for (auto traversal : query_state.traversals) {
+      log_debug("schema tag {}->{}", traversal.source().value(), traversal.source().tag());
+      log_debug("schema tag {}->{}", traversal.target().value(), traversal.target().tag());
+    }
   }
 
   auto output_schema_res = build_denormalized_schema(query_state);
@@ -2073,7 +2192,9 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
     return output_schema_res.status();
   }
   const auto output_schema = output_schema_res.ValueOrDie();
-  // log_debug("output_schema={}", output_schema->ToString());
+   IF_DEBUG_ENABLED {
+     log_debug("output_schema={}", output_schema->ToString());
+   }
 
   auto row_res = populate_rows(query.execution_config(), query_state,
                                query_state.traversals, output_schema);
@@ -2090,7 +2211,9 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
   auto output_table = output_table_res.ValueOrDie();
   for (const auto& expr : post_where) {
     result->mutable_execution_stats().num_where_clauses_post_processed++;
-    // log_debug("post process where: {}", expr->toString());
+     IF_DEBUG_ENABLED {
+       log_debug("post process where: {}", expr->toString());
+     }
     output_table = filter(output_table, *expr, false).ValueOrDie();
   }
   result->set_table(apply_select(query.select(), output_table));
diff --git a/src/edge_store.cpp b/src/edge_store.cpp
index a8679d8..344f77d 100644
--- a/src/edge_store.cpp
+++ b/src/edge_store.cpp
@@ -3,18 +3,31 @@
 #include "logger.hpp"
 namespace tundradb {
 
-// EdgeView::iterator implementation
+// EdgeView::iterator implementation - optimized
 void EdgeView::iterator::advance_to_valid() {
+  // Pre-check if type filter is empty to avoid string comparisons
+  const bool has_type_filter = !type_filter_.empty();
+  
   while (edge_ids_it_ != edge_ids_end_) {
-    tbb::concurrent_hash_map<int64_t, std::shared_ptr<Edge>>::const_accessor
-        edge_acc;
+    tbb::concurrent_hash_map<int64_t, std::shared_ptr<Edge>>::const_accessor edge_acc;
+    
+    // Fast path: try to find edge (this is the main bottleneck)
     if (store_->edges.find(edge_acc, edge_ids_it_->first)) {
       auto edge = edge_acc->second;
-      if (type_filter_.empty() || edge->get_type() == type_filter_) {
+      
+      // Fast path: no type filter
+      if (!has_type_filter) {
+        current_edge_ = edge;
+        return;
+      }
+      
+      // Slow path: check type filter
+      if (edge->get_type() == type_filter_) {
         current_edge_ = edge;
         return;
       }
     }
+    
     ++edge_ids_it_;
   }
   current_edge_.reset();

From 3275b3122dba25e0c1cfabf3485025dad31d4c7d Mon Sep 17 00:00:00 2001
From: dmgcodevil <dmgcodevil@gmail.com>
Date: Sun, 21 Sep 2025 16:31:24 -0400
Subject: [PATCH 4/4] optimize perf

---
 include/logger.hpp              |  56 ++-
 include/node.hpp                |  12 +-
 include/node_arena.hpp          |   3 +-
 include/query.hpp               |  37 +-
 include/schema_layout.hpp       |   5 +-
 include/types.hpp               | 383 ++++++++++---------
 include/utils.hpp               |  96 +++--
 src/core.cpp                    | 643 ++++++++++++--------------------
 src/edge_store.cpp              |  13 +-
 tests/where_expression_test.cpp |   2 +
 10 files changed, 550 insertions(+), 700 deletions(-)

diff --git a/include/logger.hpp b/include/logger.hpp
index af3a425..81a0c6a 100644
--- a/include/logger.hpp
+++ b/include/logger.hpp
@@ -284,20 +284,20 @@ class ContextLogger {
 
 // Compile-time log level configuration
 #ifdef TUNDRA_LOG_LEVEL_DEBUG
-  constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::DEBUG;
+constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::DEBUG;
 #elif defined(TUNDRA_LOG_LEVEL_INFO)
-  constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::INFO;
+constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::INFO;
 #elif defined(TUNDRA_LOG_LEVEL_WARN)
-  constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::WARN;
+constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::WARN;
 #elif defined(TUNDRA_LOG_LEVEL_ERROR)
-  constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::ERROR;
+constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::ERROR;
 #else
-  // Default to INFO in release builds, DEBUG in debug builds
-  #ifdef NDEBUG
-    constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::INFO;
-  #else
-    constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::DEBUG;
-  #endif
+// Default to INFO in release builds, DEBUG in debug builds
+#ifdef NDEBUG
+constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::INFO;
+#else
+constexpr LogLevel COMPILE_TIME_LOG_LEVEL = LogLevel::DEBUG;
+#endif
 #endif
 
 // Compile-time log level checks - completely eliminated in release builds
@@ -314,33 +314,31 @@ constexpr bool is_warn_enabled() {
 }
 
 // Fast logging macros that compile to nothing when disabled
-#define LOG_DEBUG_FAST(msg, ...) \
-  do { \
+#define LOG_DEBUG_FAST(msg, ...)        \
+  do {                                  \
     if constexpr (is_debug_enabled()) { \
-      log_debug(msg, ##__VA_ARGS__); \
-    } \
-  } while(0)
+      log_debug(msg, ##__VA_ARGS__);    \
+    }                                   \
+  } while (0)
 
-#define LOG_INFO_FAST(msg, ...) \
-  do { \
+#define LOG_INFO_FAST(msg, ...)        \
+  do {                                 \
     if constexpr (is_info_enabled()) { \
-      log_info(msg, ##__VA_ARGS__); \
-    } \
-  } while(0)
+      log_info(msg, ##__VA_ARGS__);    \
+    }                                  \
+  } while (0)
 
-#define LOG_WARN_FAST(msg, ...) \
-  do { \
+#define LOG_WARN_FAST(msg, ...)        \
+  do {                                 \
     if constexpr (is_warn_enabled()) { \
-      log_warn(msg, ##__VA_ARGS__); \
-    } \
-  } while(0)
+      log_warn(msg, ##__VA_ARGS__);    \
+    }                                  \
+  } while (0)
 
 // Conditional code blocks - completely eliminated when disabled
-#define IF_DEBUG_ENABLED \
-  if constexpr (is_debug_enabled())
+#define IF_DEBUG_ENABLED if constexpr (is_debug_enabled())
 
-#define IF_INFO_ENABLED \
-  if constexpr (is_info_enabled())
+#define IF_INFO_ENABLED if constexpr (is_info_enabled())
 
 }  // namespace tundradb
 
diff --git a/include/node.hpp b/include/node.hpp
index 285e94e..ccdcec7 100644
--- a/include/node.hpp
+++ b/include/node.hpp
@@ -3,10 +3,11 @@
 
 #include <arrow/api.h>
 
+#include <iostream>
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include <iostream>
+
 #include "llvm/ADT/DenseMap.h"
 #include "node_arena.hpp"
 #include "schema.hpp"
@@ -57,13 +58,16 @@ class Node {
     data_[field_name] = std::move(value);
   }
 
-  const char * get_value_ptr(const std::string &field_name, ValueType* out_type) const {
+  ValueRef get_value_ref(const std::string &field_name) const {
     if (arena_ != nullptr) {
       // if (schema_->get_field(field_name) == nullptr) {
       //   // Logger::get_instance().debug("Field not found");
       //   return arrow::Status::KeyError("Field not found: ", field_name);
       // }
-      return arena_->get_field_value_ptr(*handle_, layout_, field_name, out_type);
+      ValueType out_type;
+      const char *ptr =
+          arena_->get_field_value_ptr(*handle_, layout_, field_name, &out_type);
+      return {ptr, out_type};
     }
 
     // const char * get_value_ptr(const std::string &field_name) const {
@@ -78,7 +82,7 @@ class Node {
     // auto v = Value::read_value_from_memory(p, it->second.type());
     // Logger::get_instance().debug("get value ptr {}={}", field_name,
     // v.to_string() );
-    return it->second.data_ptr();
+    return it->second.as_ref();
     // return arrow::Status::NotImplemented("");
   }
 
diff --git a/include/node_arena.hpp b/include/node_arena.hpp
index e5fec02..13d4c11 100644
--- a/include/node_arena.hpp
+++ b/include/node_arena.hpp
@@ -133,7 +133,8 @@ class NodeArena {
    */
   const char* get_field_value_ptr(const NodeHandle& handle,
                                   const std::shared_ptr<SchemaLayout>& layout,
-                                  const std::string& field_name, ValueType* out_type) const {
+                                  const std::string& field_name,
+                                  ValueType* out_type) const {
     // Logger::get_instance().debug("get_field_value: {}.{}", schema_name,
     //                              field_name);
     if (handle.is_null()) {
diff --git a/include/query.hpp b/include/query.hpp
index f19ce21..2d07ab7 100644
--- a/include/query.hpp
+++ b/include/query.hpp
@@ -186,9 +186,8 @@ class ComparisonExpr : public Clause, public WhereExpr {
   bool inlined_ = false;
   std::string field_name;
 
-  static arrow::Result<bool> compare_values(
-  const std::string& field_name,
-  const char* value_ptr, CompareOp op,
+  static arrow::Result<bool> compare_values(const std::string& field_name,
+                                            const char* value_ptr, CompareOp op,
                                             const Value& where_value,
                                             ValueType value_type) {
     /*
@@ -234,13 +233,11 @@ class ComparisonExpr : public Clause, public WhereExpr {
                                     " but WHERE value is ", where_value.type());
     }
 */
-    // std::cout << "compare " << field_name << ":"<< to_string(value_type) <<  std::endl;
+
     switch (value_type) {
       case ValueType::INT32: {
         int32_t field_val = *reinterpret_cast<const int32_t*>(value_ptr);
-        // std::cout << field_name << "~~" << "where_value.type=" << to_string(where_value.type()) << std::endl;
         int32_t where_val = where_value.get<int32_t>();
-        // std::cout << "where_val = int32_t" << where_val << std::endl;
         return apply_comparison(field_val, op, where_val);
       }
       case ValueType::INT64: {
@@ -259,11 +256,10 @@ class ComparisonExpr : public Clause, public WhereExpr {
         return apply_comparison(field_val, op, where_val);
       }
       case ValueType::STRING: {
-        // std::cout << "compare strings: begin" << std::endl;
         auto str_ref = *reinterpret_cast<const StringRef*>(value_ptr);
-        const std::string& field_val = std::string(str_ref.data, str_ref.length);
+        const std::string& field_val =
+            std::string(str_ref.data, str_ref.length);
         const std::string& where_val = where_value.as_string();
-        // std::cout << "compare strings: end" << std::endl;
         return apply_comparison(field_val, op, where_val);
       }
       case ValueType::BOOL: {
@@ -414,30 +410,12 @@ class ComparisonExpr : public Clause, public WhereExpr {
     if (!node) {
       return arrow::Status::Invalid("Node is null");
     }
-
-    // parse field name to extract variable and field parts
-    // expected format: "variable.field" (e.g., "user.age", "company.name")
-
-    // ARROW_ASSIGN_OR_RAISE(auto field_value, node->get_value(field_name));
-    // return compare_values(field_value, op_, value_);
-    ValueType field_type;
-    const char * val_ptr = node->get_value_ptr(field_name, &field_type);
-    return compare_values(field_, val_ptr, op_, value_, field_type);
+    auto value_ref = node->get_value_ref(field_name);
+    return compare_values(field_,value_ref.data, op_, value_, value_ref.type);
   }
 
   [[nodiscard]] arrow::compute::Expression to_arrow_expression(
       bool strip_var) const override {
-    // std::string field_name = field_;
-    // if (strip_var) {
-    //   if (const size_t dot_pos = field_.find('.');
-    //       dot_pos != std::string::npos) {
-    //     field_name = field_.substr(dot_pos + 1);
-    //   } else {
-    //     field_name = field_;
-    //   }
-    // }
-    // const auto& f = strip_var ? field_name : field_;
-
     const auto field_expr =
         arrow::compute::field_ref(strip_var ? field_name : field_);
     const auto value_expr = value_to_expression(value_);
@@ -774,7 +752,6 @@ class Query {
         : from_(SchemaRef::parse(schema)) {}
 
     Builder& where(std::string field, CompareOp op, Value value) {
-      // std::cout << "where " <<field << ":"<< to_string(value.type()) << std::endl;
       clauses_.push_back(std::make_shared<ComparisonExpr>(std::move(field), op,
                                                           std::move(value)));
       return *this;
diff --git a/include/schema_layout.hpp b/include/schema_layout.hpp
index a04ee2e..3818feb 100644
--- a/include/schema_layout.hpp
+++ b/include/schema_layout.hpp
@@ -143,7 +143,8 @@ class SchemaLayout {
   }
 
   const char* get_field_value_ptr(const char* node_data,
-                                  const std::string& field_name, ValueType* out_type) const {
+                                  const std::string& field_name,
+                                  ValueType* out_type) const {
     const size_t field_index = get_field_index(field_name);
     const FieldLayout& field = fields_[field_index];
     if (out_type) {
@@ -246,7 +247,7 @@ class SchemaLayout {
     // if (name[0] == 'n' || name[0] == 'i') return 1;
     // if (name[0] == 'a') return 2;
     // if (name[0] == 'c') return 3;
-    return  -1;
+    return -1;
   }
 
   const FieldLayout* get_field_layout(const std::string& name) const {
diff --git a/include/types.hpp b/include/types.hpp
index 98990e5..419a8fb 100644
--- a/include/types.hpp
+++ b/include/types.hpp
@@ -53,6 +53,205 @@ enum class ValueType {
   BOOL
 };
 
+inline std::string to_string(const ValueType type) {
+  switch (type) {
+    case ValueType::NA:
+      return "Null";
+    case ValueType::INT32:
+      return "Int32";
+    case ValueType::INT64:
+      return "Int64";
+    case ValueType::DOUBLE:
+      return "Double";
+    case ValueType::STRING:
+      return "String";
+    case ValueType::FIXED_STRING16:
+      return "FixedString16";
+    case ValueType::FIXED_STRING32:
+      return "FixedString32";
+    case ValueType::FIXED_STRING64:
+      return "FixedString64";
+    case ValueType::BOOL:
+      return "Bool";
+    default:
+      return "Unknown";
+  }
+}
+
+struct ValueRef {
+  const char* data;
+  ValueType type;
+
+  // Default constructor
+  ValueRef() : data(nullptr), type(ValueType::NA) {}
+
+  // Constructor
+  ValueRef(const char* ptr, ValueType type) : data(ptr), type(type) {}
+
+  // Copy constructor (allowed)
+  // ValueRef(const ValueRef&) = default;
+  //
+  // // Move constructor
+  // ValueRef(ValueRef&&) = default;
+  //
+  // // Copy assignment is deleted due to const member
+  // ValueRef& operator=(const ValueRef&) = delete;
+  //
+  // // Move assignment is also deleted due to const member
+  // ValueRef& operator=(ValueRef&&) = delete;
+
+  int32_t as_int32() const { return *reinterpret_cast<const int32_t*>(data); }
+
+  int64_t as_int64() const { return *reinterpret_cast<const int64_t*>(data); }
+
+  double as_double() const { return *reinterpret_cast<const double*>(data); }
+
+  float as_float() const { return *reinterpret_cast<const float*>(data); }
+
+  bool as_bool() const { return *reinterpret_cast<const bool*>(data); }
+
+  std::string as_string() const { return std::string(data); }
+
+  const StringRef& as_string_ref() const {
+    return *reinterpret_cast<const StringRef*>(data);
+  }
+
+  arrow::Result<std::shared_ptr<arrow::Scalar>> as_scalar() const {
+    switch (type) {
+      case ValueType::INT32:
+        return arrow::MakeScalar(as_int32());
+      case ValueType::INT64:
+        return arrow::MakeScalar(as_int64());
+      case ValueType::DOUBLE:
+        return arrow::MakeScalar(as_double());
+      case ValueType::STRING:
+        return arrow::MakeScalar(as_string_ref().to_string());
+      case ValueType::BOOL:
+        return arrow::MakeScalar(as_bool());
+      case ValueType::NA:
+        return arrow::MakeNullScalar(arrow::null());
+      default:
+        return arrow::Status::NotImplemented(
+            "Unsupported Value type for Arrow scalar conversion: ",
+            to_string(type));
+    }
+  }
+
+  // Equality comparison
+  bool operator==(const ValueRef& other) const {
+    if (type != other.type) {
+      return false;
+    }
+
+    // Both null
+    if (data == nullptr && other.data == nullptr) {
+      return true;
+    }
+
+    // One null, one not null
+    if (data == nullptr || other.data == nullptr) {
+      return false;
+    }
+
+    // Compare values based on type
+    switch (type) {
+      case ValueType::NA:
+        return true;  // Both are NA
+
+      case ValueType::INT32:
+        return *reinterpret_cast<const int32_t*>(data) ==
+               *reinterpret_cast<const int32_t*>(other.data);
+
+      case ValueType::INT64:
+        return *reinterpret_cast<const int64_t*>(data) ==
+               *reinterpret_cast<const int64_t*>(other.data);
+
+      case ValueType::FLOAT:
+        return *reinterpret_cast<const float*>(data) ==
+               *reinterpret_cast<const float*>(other.data);
+
+      case ValueType::DOUBLE:
+        return *reinterpret_cast<const double*>(data) ==
+               *reinterpret_cast<const double*>(other.data);
+
+      case ValueType::BOOL:
+        return *reinterpret_cast<const bool*>(data) ==
+               *reinterpret_cast<const bool*>(other.data);
+
+      case ValueType::STRING: {
+        const StringRef& str1 = *reinterpret_cast<const StringRef*>(data);
+        const StringRef& str2 = *reinterpret_cast<const StringRef*>(other.data);
+
+        // Compare string lengths first
+        if (str1.length != str2.length) {
+          return false;
+        }
+
+        // Both null strings
+        if (str1.is_null() && str2.is_null()) {
+          return true;
+        }
+
+        // One null, one not
+        if (str1.is_null() || str2.is_null()) {
+          return false;
+        }
+
+        // Compare string content
+        return std::memcmp(str1.data, str2.data, str1.length) == 0;
+      }
+
+      default:
+        return false;  // Unknown type
+    }
+  }
+
+  bool operator!=(const ValueRef& other) const { return !(*this == other); }
+
+  // Standalone equals function (if you prefer functional style)
+  bool equals(const ValueRef& other) const { return *this == other; }
+
+  // ToString method for debugging and display
+  std::string ToString() const {
+    if (data == nullptr) {
+      return "NULL";
+    }
+
+    switch (type) {
+      case ValueType::NA:
+        return "NULL";
+
+      case ValueType::INT32:
+        return std::to_string(as_int32());
+
+      case ValueType::INT64:
+        return std::to_string(as_int64());
+
+      case ValueType::FLOAT:
+        return std::to_string(as_float());
+
+      case ValueType::DOUBLE:
+        return std::to_string(as_double());
+
+      case ValueType::BOOL:
+        return as_bool() ? "true" : "false";
+
+      case ValueType::FIXED_STRING16:
+      case ValueType::FIXED_STRING32:
+      case ValueType::FIXED_STRING64:
+      case ValueType::STRING: {
+        const StringRef& str_ref = as_string_ref();
+        if (str_ref.is_null()) {
+          return "NULL";
+        }
+        return "\"" + str_ref.to_string() + "\"";
+      }
+      default:
+        return "UNKNOWN_TYPE";
+    }
+  }
+};
+
 /**
  * Get the maximum size for fixed-size string types
  */
@@ -117,31 +316,6 @@ static size_t get_type_alignment(const ValueType type) {
   }
 }
 
-inline std::string to_string(const ValueType type) {
-  switch (type) {
-    case ValueType::NA:
-      return "Null";
-    case ValueType::INT32:
-      return "Int32";
-    case ValueType::INT64:
-      return "Int64";
-    case ValueType::DOUBLE:
-      return "Double";
-    case ValueType::STRING:
-      return "String";
-    case ValueType::FIXED_STRING16:
-      return "FixedString16";
-    case ValueType::FIXED_STRING32:
-      return "FixedString32";
-    case ValueType::FIXED_STRING64:
-      return "FixedString64";
-    case ValueType::BOOL:
-      return "Bool";
-    default:
-      return "Unknown";
-  }
-}
-
 class Value {
  public:
   Value() : type_(ValueType::NA), data_(std::monostate{}) {}
@@ -169,7 +343,6 @@ class Value {
   ValueType type() const { return type_; }
 
   const char* data_ptr() const {
-    // Logger::get_instance().debug("data_ptr");
     switch (type_) {
       case ValueType::INT32:
         return reinterpret_cast<const char*>(&std::get<int32_t>(data_));
@@ -186,15 +359,6 @@ class Value {
       case ValueType::FIXED_STRING32:
       case ValueType::FIXED_STRING64: {
         return reinterpret_cast<const char*>(&std::get<StringRef>(data_));
-
-        // if (std::holds_alternative<StringRef>(data_)) {
-        //   Logger::get_instance().debug("bob");
-        //   return std::get<StringRef>(data_).data;
-        // } else if (std::holds_alternative<std::string>(data_)) {
-        //   Logger::get_instance().debug("dod");
-        //   return std::get<std::string>(data_).data();
-        // }
-        // return nullptr;
       }
       case ValueType::NA:
       default:
@@ -202,6 +366,8 @@ class Value {
     }
   }
 
+  ValueRef as_ref() const { return {data_ptr(), type_}; }
+
   template <typename T>
   const T& get() const {
     return std::get<T>(data_);
@@ -304,153 +470,6 @@ class Value {
       data_;
 };
 
-struct ValueRef {
-  const char* data;
-   ValueType type;
-  
-  // Default constructor
-  ValueRef() : data(nullptr), type(ValueType::NA) {}
-  
-  // Constructor
-  ValueRef(const char* ptr, ValueType type) : data(ptr), type(type) {}
-  
-  // Copy constructor (allowed)
-  // ValueRef(const ValueRef&) = default;
-  //
-  // // Move constructor
-  // ValueRef(ValueRef&&) = default;
-  //
-  // // Copy assignment is deleted due to const member
-  // ValueRef& operator=(const ValueRef&) = delete;
-  //
-  // // Move assignment is also deleted due to const member
-  // ValueRef& operator=(ValueRef&&) = delete;
-
-  int32_t as_int32() const {
-    return *reinterpret_cast<const int32_t*>(data);
-  }
-
-  int64_t as_int64() const {
-    return *reinterpret_cast<const int64_t*>(data);
-  }
-
-  double as_double() const {
-    return *reinterpret_cast<const double*>(data);
-  }
-
-  bool as_bool() const {
-    return *reinterpret_cast<const bool*>(data);
-  }
-
-  std::string as_string() const {
-    return std::string(data);
-  }
-
-  const StringRef& as_string_ref() const {
-    return *reinterpret_cast<const StringRef*>(data);
-  }
-
-  arrow::Result< std::shared_ptr<arrow::Scalar>> as_scalar() {
-    switch (type) {
-      case ValueType::INT32:
-        return arrow::MakeScalar(as_int32());
-      case ValueType::INT64:
-        return arrow::MakeScalar(as_int64());
-      case ValueType::DOUBLE:
-        return arrow::MakeScalar(as_double());
-      case ValueType::STRING:
-        return arrow::MakeScalar(as_string());
-      case ValueType::BOOL:
-        return arrow::MakeScalar(as_bool());
-      case ValueType::NA:
-        return arrow::MakeNullScalar(arrow::null());
-      default:
-        return arrow::Status::NotImplemented(
-            "Unsupported Value type for Arrow scalar conversion: ",
-            to_string(type));
-    }
-  }
-  
-  // Equality comparison
-  bool operator==(const ValueRef& other) const {
-    if (type != other.type) {
-      return false;
-    }
-    
-    // Both null
-    if (data == nullptr && other.data == nullptr) {
-      return true;
-    }
-    
-    // One null, one not null
-    if (data == nullptr || other.data == nullptr) {
-      return false;
-    }
-    
-    // Compare values based on type
-    switch (type) {
-      case ValueType::NA:
-        return true;  // Both are NA
-        
-      case ValueType::INT32:
-        return *reinterpret_cast<const int32_t*>(data) == 
-               *reinterpret_cast<const int32_t*>(other.data);
-               
-      case ValueType::INT64:
-        return *reinterpret_cast<const int64_t*>(data) == 
-               *reinterpret_cast<const int64_t*>(other.data);
-               
-      case ValueType::FLOAT:
-        return *reinterpret_cast<const float*>(data) == 
-               *reinterpret_cast<const float*>(other.data);
-               
-      case ValueType::DOUBLE:
-        return *reinterpret_cast<const double*>(data) == 
-               *reinterpret_cast<const double*>(other.data);
-               
-      case ValueType::BOOL:
-        return *reinterpret_cast<const bool*>(data) == 
-               *reinterpret_cast<const bool*>(other.data);
-               
-      case ValueType::STRING: {
-        const StringRef& str1 = *reinterpret_cast<const StringRef*>(data);
-        const StringRef& str2 = *reinterpret_cast<const StringRef*>(other.data);
-        
-        // Compare string lengths first
-        if (str1.length != str2.length) {
-          return false;
-        }
-        
-        // Both null strings
-        if (str1.is_null() && str2.is_null()) {
-          return true;
-        }
-        
-        // One null, one not
-        if (str1.is_null() || str2.is_null()) {
-          return false;
-        }
-        
-        // Compare string content
-        return std::memcmp(str1.data, str2.data, str1.length) == 0;
-      }
-      
-      default:
-        return false;  // Unknown type
-    }
-  }
-  
-  bool operator!=(const ValueRef& other) const {
-    return !(*this == other);
-  }
-  
-  // Standalone equals function (if you prefer functional style)
-  bool equals(const ValueRef& other) const {
-    return *this == other;
-  }
-
-};
-
 // Stream operator for ValueType
 inline std::ostream& operator<<(std::ostream& os, const ValueType type) {
   return os << to_string(type);
diff --git a/include/utils.hpp b/include/utils.hpp
index 8f06882..2112a3d 100644
--- a/include/utils.hpp
+++ b/include/utils.hpp
@@ -159,61 +159,59 @@ static arrow::Result<std::shared_ptr<arrow::Table>> create_table(
   for (const auto& node : nodes) {
     for (int i = 0; i < schema->num_fields(); i++) {
       const auto& field = schema->field(i);
-      auto value_ptr = node->get_value_ptr(field->name(), nullptr);
-
+      auto value_ptr = node->get_value_ref(field->name()).data;
 
       // if (!field_result.ok()) {
       //   ARROW_RETURN_NOT_OK(builders[i]->AppendNull());
       // }
       // else {
-        // const auto value_ptr = field_result.ValueOrDie();
-        if (value_ptr == nullptr) {
-          ARROW_RETURN_NOT_OK(builders[i]->AppendNull());
-        } else {
-          switch (field->type()->id()) {
-            case arrow::Type::INT32: {
-              ARROW_RETURN_NOT_OK(
-                  dynamic_cast<arrow::Int32Builder*>(builders[i].get())
-                      ->Append(*reinterpret_cast<const int32_t*>(value_ptr)));
-              break;
-            }
-            case arrow::Type::INT64: {
-              ARROW_RETURN_NOT_OK(
-                  dynamic_cast<arrow::Int64Builder*>(builders[i].get())
-                      ->Append(*reinterpret_cast<const int64_t*>(value_ptr)));
-              break;
-            }
-            case arrow::Type::FLOAT: {
-              //         return Value{*reinterpret_cast<const double*>(ptr)};
-              ARROW_RETURN_NOT_OK(
-                  dynamic_cast<arrow::FloatBuilder*>(builders[i].get())
-                      ->Append(*reinterpret_cast<const float*>(value_ptr)));
-              break;
-            }
-            case arrow::Type::DOUBLE: {
-              ARROW_RETURN_NOT_OK(
-                  dynamic_cast<arrow::DoubleBuilder*>(builders[i].get())
-                      ->Append(*reinterpret_cast<const double*>(value_ptr)));
-              break;
-            }
-            case arrow::Type::BOOL: {
-              ARROW_RETURN_NOT_OK(
-                  dynamic_cast<arrow::BooleanBuilder*>(builders[i].get())
-                      ->Append(*reinterpret_cast<const bool*>(value_ptr)));
-              break;
-            }
-            case arrow::Type::STRING: {
-              auto str_ref = *reinterpret_cast<const StringRef*>(value_ptr);
-
-              ARROW_RETURN_NOT_OK(
-                  dynamic_cast<arrow::StringBuilder*>(builders[i].get())
-                      ->Append(str_ref.to_string()));
-              break;
-            }
-            default:
-              return arrow::Status::NotImplemented("Unsupported type: ",
-                                                   field->type()->ToString());
+      // const auto value_ptr = field_result.ValueOrDie();
+      if (value_ptr == nullptr) {
+        ARROW_RETURN_NOT_OK(builders[i]->AppendNull());
+      } else {
+        switch (field->type()->id()) {
+          case arrow::Type::INT32: {
+            ARROW_RETURN_NOT_OK(
+                dynamic_cast<arrow::Int32Builder*>(builders[i].get())
+                    ->Append(*reinterpret_cast<const int32_t*>(value_ptr)));
+            break;
+          }
+          case arrow::Type::INT64: {
+            ARROW_RETURN_NOT_OK(
+                dynamic_cast<arrow::Int64Builder*>(builders[i].get())
+                    ->Append(*reinterpret_cast<const int64_t*>(value_ptr)));
+            break;
+          }
+          case arrow::Type::FLOAT: {
+            //         return Value{*reinterpret_cast<const double*>(ptr)};
+            ARROW_RETURN_NOT_OK(
+                dynamic_cast<arrow::FloatBuilder*>(builders[i].get())
+                    ->Append(*reinterpret_cast<const float*>(value_ptr)));
+            break;
+          }
+          case arrow::Type::DOUBLE: {
+            ARROW_RETURN_NOT_OK(
+                dynamic_cast<arrow::DoubleBuilder*>(builders[i].get())
+                    ->Append(*reinterpret_cast<const double*>(value_ptr)));
+            break;
+          }
+          case arrow::Type::BOOL: {
+            ARROW_RETURN_NOT_OK(
+                dynamic_cast<arrow::BooleanBuilder*>(builders[i].get())
+                    ->Append(*reinterpret_cast<const bool*>(value_ptr)));
+            break;
+          }
+          case arrow::Type::STRING: {
+            auto str_ref = *reinterpret_cast<const StringRef*>(value_ptr);
 
+            ARROW_RETURN_NOT_OK(
+                dynamic_cast<arrow::StringBuilder*>(builders[i].get())
+                    ->Append(str_ref.to_string()));
+            break;
+          }
+          default:
+            return arrow::Status::NotImplemented("Unsupported type: ",
+                                                 field->type()->ToString());
         }
       }
     }
diff --git a/src/core.cpp b/src/core.cpp
index 3b5e9d5..18e7619 100644
--- a/src/core.cpp
+++ b/src/core.cpp
@@ -37,7 +37,6 @@ namespace tundradb {
 
 constexpr static uint64_t NODE_MASK = (1ULL << 48) - 1;
 
-
 // Deterministic 16-bit tag from alias string (SchemaRef::value()).
 // https://www.ietf.org/archive/id/draft-eastlake-fnv-21.html
 static uint16_t compute_tag(const SchemaRef& ref) {
@@ -125,17 +124,17 @@ std::string join_container(const Container& container,
 arrow::compute::Expression value_to_expression(const Value& value) {
   switch (value.type()) {
     case ValueType::INT32:
-      return arrow::compute::literal(value.get<int32_t>());
+      return arrow::compute::literal(value.as_int32());
     case ValueType::INT64:
-      return arrow::compute::literal(value.get<int64_t>());
+      return arrow::compute::literal(value.as_int64());
     case ValueType::STRING:
-      return arrow::compute::literal(value.get<std::string>());
+      return arrow::compute::literal(value.to_string());
     case ValueType::FLOAT:
-      return arrow::compute::literal(value.get<float>());
+      return arrow::compute::literal(value.as_float());
     case ValueType::DOUBLE:
-      return arrow::compute::literal(value.get<double>());
+      return arrow::compute::literal(value.as_double());
     case ValueType::BOOL:
-      return arrow::compute::literal(value.get<bool>());
+      return arrow::compute::literal(value.as_bool());
     case ValueType::NA:
       return arrow::compute::literal(
           arrow::Datum(arrow::MakeNullScalar(arrow::null())));
@@ -155,7 +154,7 @@ arrow::Result<std::shared_ptr<arrow::Scalar>> value_to_arrow_scalar(
     case ValueType::DOUBLE:
       return arrow::MakeScalar(value.as_double());
     case ValueType::STRING:
-      return arrow::MakeScalar(value.as_string());
+      return arrow::MakeScalar(value.as_string_ref().to_string());
     case ValueType::BOOL:
       return arrow::MakeScalar(value.as_bool());
     case ValueType::NA:
@@ -167,34 +166,6 @@ arrow::Result<std::shared_ptr<arrow::Scalar>> value_to_arrow_scalar(
   }
 }
 
-arrow::Result<std::shared_ptr<arrow::Scalar>> value_ptr_to_arrow_scalar(
-    const char* ptr, const ValueType type) {
-  switch (type) {
-    case ValueType::INT32:
-      return arrow::MakeScalar(*reinterpret_cast<const int32_t*>(ptr));
-    case ValueType::INT64:
-      return arrow::MakeScalar(*reinterpret_cast<const int64_t*>(ptr));
-    case ValueType::DOUBLE:
-      return arrow::MakeScalar(*reinterpret_cast<const double*>(ptr));
-    case ValueType::STRING: {
-      const StringRef& str_ref = *reinterpret_cast<const StringRef*>(ptr);
-      if (str_ref.is_null()) {
-        return arrow::MakeNullScalar(arrow::utf8());
-      }
-      // Create string directly from StringRef data - safer than custom scalar
-      return arrow::MakeScalar(std::string(str_ref.data, str_ref.length));
-    }
-    case ValueType::BOOL:
-      return arrow::MakeScalar(*reinterpret_cast<const bool*>(ptr));
-    case ValueType::NA:
-      return arrow::MakeNullScalar(arrow::null());
-    default:
-      return arrow::Status::NotImplemented(
-          "Unsupported Value type for Arrow scalar conversion: ",
-          tundradb::to_string(type));
-  }
-}
-
 // Convert CompareOp to appropriate Arrow compute function
 arrow::compute::Expression apply_comparison_op(
     const arrow::compute::Expression& field,
@@ -246,19 +217,19 @@ arrow::compute::Expression where_condition_to_expression(
 arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_nodes(
     const std::shared_ptr<arrow::Schema>& schema,
     const std::vector<std::shared_ptr<Node>>& nodes) {
-   IF_DEBUG_ENABLED {
-     log_debug("Creating table from {} nodes with schema '{}'", nodes.size(),
-               schema->ToString());
-   }
+  IF_DEBUG_ENABLED {
+    log_debug("Creating table from {} nodes with schema '{}'", nodes.size(),
+              schema->ToString());
+  }
 
   // Create builders for each field
   std::vector<std::unique_ptr<arrow::ArrayBuilder>> builders;
   builders.reserve(schema->fields().size());
   for (const auto& field : schema->fields()) {
-     IF_DEBUG_ENABLED {
-       log_debug("Creating builder for field '{}' with type {}", field->name(),
-                 field->type()->ToString());
-     }
+    IF_DEBUG_ENABLED {
+      log_debug("Creating builder for field '{}' with type {}", field->name(),
+                field->type()->ToString());
+    }
     auto builder_result = arrow::MakeBuilder(field->type());
     if (!builder_result.ok()) {
       log_error("Failed to create builder for field '{}': {}", field->name(),
@@ -269,9 +240,9 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_nodes(
   }
 
   // Populate builders with data from each node
-   IF_DEBUG_ENABLED {
-     log_debug("Adding data from {} nodes to builders", nodes.size());
-   }
+  IF_DEBUG_ENABLED {
+    log_debug("Adding data from {} nodes to builders", nodes.size());
+  }
   for (const auto& node : nodes) {
     // Add each field's value to the appropriate builder
     for (int i = 0; i < schema->num_fields(); i++) {
@@ -279,54 +250,40 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_nodes(
       const auto& field_name = field->name();
 
       // Find the value in the node's data
-      ValueType value_type;
-      const char* value = node->get_value_ptr(field_name, &value_type);
-
-        // Convert Value to Arrow scalar and append to builder
-        if (value) {
-          auto scalar_result = value_ptr_to_arrow_scalar(value, value_type);
-          if (!scalar_result.ok()) {
-            log_error("Failed to convert value to scalar for field '{}': {}",
-                      field_name, scalar_result.status().ToString());
-            return scalar_result.status();
-          }
+      auto value_ref = node->get_value_ref(field_name);
+
+      // Convert Value to Arrow scalar and append to builder
+      if (value_ref.data) {
+        auto scalar_result = value_ref.as_scalar();
+        if (!scalar_result.ok()) {
+          log_error("Failed to convert value to scalar for field '{}': {}",
+                    field_name, scalar_result.status().ToString());
+          return scalar_result.status();
+        }
 
-          auto scalar = scalar_result.ValueOrDie();
-          auto status = builders[i]->AppendScalar(*scalar);
-          if (!status.ok()) {
-            log_error("Failed to append scalar for field '{}': {}", field_name,
-                      status.ToString());
-            return status;
-          }
-        } else {
-           IF_DEBUG_ENABLED {
-             log_debug("Null value for field '{}', appending null", field_name);
-           }
-          auto status = builders[i]->AppendNull();
-          if (!status.ok()) {
-            log_error("Failed to append null for field '{}': {}", field_name,
-                      status.ToString());
-            return status;
-          }
+        auto scalar = scalar_result.ValueOrDie();
+        auto status = builders[i]->AppendScalar(*scalar);
+        if (!status.ok()) {
+          log_error("Failed to append scalar for field '{}': {}", field_name,
+                    status.ToString());
+          return status;
+        }
+      } else {
+        IF_DEBUG_ENABLED {
+          log_debug("Null value for field '{}', appending null", field_name);
+        }
+        auto status = builders[i]->AppendNull();
+        if (!status.ok()) {
+          log_error("Failed to append null for field '{}': {}", field_name,
+                    status.ToString());
+          return status;
         }
       }
-    // else {
-    //     // log_debug("Field '{}' not found in node, appending null",
-    //     // field_name);
-    //     auto status = builders[i]->AppendNull();
-    //     if (!status.ok()) {
-    //       log_error("Failed to append null for field '{}': {}", field_name,
-    //                 status.ToString());
-    //       return status;
-    //     }
-    //   }
-    // }
+    }
   }
 
   // Finish building arrays
-   IF_DEBUG_ENABLED {
-     log_debug("Finalizing arrays from builders");
-   }
+  IF_DEBUG_ENABLED { log_debug("Finalizing arrays from builders"); }
   std::vector<std::shared_ptr<arrow::Array>> arrays;
   arrays.reserve(builders.size());
   for (auto& builder : builders) {
@@ -340,34 +297,32 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_nodes(
   }
 
   // Create table
-   IF_DEBUG_ENABLED {
-     log_debug("Creating table with {} rows and {} columns",
-               arrays.empty() ? 0 : arrays[0]->length(), arrays.size());
-   }
+  IF_DEBUG_ENABLED {
+    log_debug("Creating table with {} rows and {} columns",
+              arrays.empty() ? 0 : arrays[0]->length(), arrays.size());
+  }
   return arrow::Table::Make(schema, arrays);
 }
 
 arrow::Result<std::shared_ptr<arrow::Table>> filter(
     std::shared_ptr<arrow::Table> table, const WhereExpr& condition,
     bool strip_var) {
-   IF_DEBUG_ENABLED {
-     log_debug("Filtering table with WhereCondition: {}", condition.toString());
-   }
+  IF_DEBUG_ENABLED {
+    log_debug("Filtering table with WhereCondition: {}", condition.toString());
+  }
 
   try {
     // Convert WhereCondition to Arrow compute expression
     auto filter_expr = where_condition_to_expression(condition, strip_var);
 
-     IF_DEBUG_ENABLED {
-       log_debug("Creating in-memory dataset from table with {} rows",
-                 table->num_rows());
-     }
+    IF_DEBUG_ENABLED {
+      log_debug("Creating in-memory dataset from table with {} rows",
+                table->num_rows());
+    }
     auto dataset = std::make_shared<arrow::dataset::InMemoryDataset>(table);
 
     // Create scanner builder
-     IF_DEBUG_ENABLED {
-       log_debug("Creating scanner builder");
-     }
+    IF_DEBUG_ENABLED { log_debug("Creating scanner builder"); }
     auto scan_builder_result = dataset->NewScan();
     if (!scan_builder_result.ok()) {
       log_error("Failed to create scanner builder: {}",
@@ -376,18 +331,16 @@ arrow::Result<std::shared_ptr<arrow::Table>> filter(
     }
     auto scan_builder = scan_builder_result.ValueOrDie();
 
-     IF_DEBUG_ENABLED {
-       log_debug("Applying compound filter to scanner builder");
-     }
+    IF_DEBUG_ENABLED {
+      log_debug("Applying compound filter to scanner builder");
+    }
     auto filter_status = scan_builder->Filter(filter_expr);
     if (!filter_status.ok()) {
       log_error("Failed to apply filter: {}", filter_status.ToString());
       return filter_status;
     }
 
-     IF_DEBUG_ENABLED {
-       log_debug("Finishing scanner");
-     }
+    IF_DEBUG_ENABLED { log_debug("Finishing scanner"); }
     auto scanner_result = scan_builder->Finish();
     if (!scanner_result.ok()) {
       log_error("Failed to finish scanner: {}",
@@ -396,9 +349,7 @@ arrow::Result<std::shared_ptr<arrow::Table>> filter(
     }
     auto scanner = scanner_result.ValueOrDie();
 
-     IF_DEBUG_ENABLED {
-       log_debug("Executing scan to table");
-     }
+    IF_DEBUG_ENABLED { log_debug("Executing scan to table"); }
     auto table_result = scanner->ToTable();
     if (!table_result.ok()) {
       log_error("Failed to convert scan results to table: {}",
@@ -407,10 +358,10 @@ arrow::Result<std::shared_ptr<arrow::Table>> filter(
     }
 
     auto result_table = table_result.ValueOrDie();
-     IF_DEBUG_ENABLED {
-       log_debug("Filter completed: {} rows in, {} rows out", table->num_rows(),
-                 result_table->num_rows());
-     }
+    IF_DEBUG_ENABLED {
+      log_debug("Filter completed: {} rows in, {} rows out", table->num_rows(),
+                result_table->num_rows());
+    }
     return result_table;
 
   } catch (const std::exception& e) {
@@ -656,9 +607,7 @@ struct QueryState {
 
 arrow::Result<std::shared_ptr<arrow::Schema>> build_denormalized_schema(
     const QueryState& query_state) {
-   IF_DEBUG_ENABLED {
-     log_debug("Building schema for denormalized table");
-   }
+  IF_DEBUG_ENABLED { log_debug("Building schema for denormalized table"); }
 
   std::set<std::string> processed_fields;
   std::vector<std::shared_ptr<arrow::Field>> fields;
@@ -667,9 +616,9 @@ arrow::Result<std::shared_ptr<arrow::Schema>> build_denormalized_schema(
   // First add fields from the FROM schema
   std::string from_schema = query_state.from.value();
 
-   IF_DEBUG_ENABLED {
-     log_debug("Adding fields from FROM schema '{}'", from_schema);
-   }
+  IF_DEBUG_ENABLED {
+    log_debug("Adding fields from FROM schema '{}'", from_schema);
+  }
 
   auto schema_result = query_state.schema_registry->get_arrow(
       query_state.aliases.at(from_schema));
@@ -696,9 +645,9 @@ arrow::Result<std::shared_ptr<arrow::Schema>> build_denormalized_schema(
   }
 
   for (const auto& schema_ref : unique_schemas) {
-     IF_DEBUG_ENABLED {
-       log_debug("Adding fields from schema '{}'", schema_ref.value());
-     }
+    IF_DEBUG_ENABLED {
+      log_debug("Adding fields from schema '{}'", schema_ref.value());
+    }
 
     schema_result = query_state.schema_registry->get_arrow(
         query_state.aliases.at(schema_ref.value()));
@@ -724,8 +673,7 @@ arrow::Result<std::shared_ptr<arrow::Schema>> build_denormalized_schema(
 }
 
 struct PathSegment {
-  uint16_t schema;
-  // uint16_t schema_tag;
+  uint16_t schema;  // tag
   int64_t node_id;
 
   std::string toString() const {
@@ -766,14 +714,8 @@ struct Row {
   llvm::StringMap<int64_t> schema_ids;  // can we use tag
   bool schema_ids_set = false;
 
-  // void set_cell(const std::string& name,
-  //               std::shared_ptr<arrow::Scalar> scalar) {
-  //   cells[name] = std::move(scalar);
-  // }
-
   bool has_value(const llvm::StringRef name) const {
     return cells.contains(name) && cells.at(name).data != nullptr;
-    // && cells.at(name)->is_valid;
   }
 
   void set_cell_from_node(const std::vector<std::string>& fq_field_names,
@@ -783,38 +725,23 @@ struct Row {
     for (size_t i = 0; i < n; ++i) {
       const auto& field = fields[i];
       const auto& full_name = fq_field_names[i];
-      this->set_cell(full_name, node->get_value_ptr(field->name(), nullptr),
-                     field->type());
+      const char* ptr = node->get_value_ref(field->name()).data;
+      this->set_cell(full_name, ptr, field->type());
     }
-    // schema_ids[node->get_schema()->name()] = node->id;
   }
 
   // New set_cell method for Value objects
   void set_cell(const std::string& name, const char* ptr,
                 const ValueType type) {
-     // Use try_emplace to construct ValueRef in-place (avoids assignment)
-     cells.try_emplace(name, ptr, type);
-  }
-
-  // void set_cell(const std::string& name, std::shared_ptr<arrow::Array> array)
-  // {
-  //   if (array && array->length() > 0) {
-  //     auto scalar_result = array->GetScalar(0);
-  //     if (scalar_result.ok()) {
-  //       cells[name] = scalar_result.ValueOrDie();
-  //       return;
-  //     }
-  //   }
-  //
-  //   // Default to null if array is empty or conversion fails
-  //   cells[name] = nullptr;
-  // }
+    if (!cells.try_emplace(name, ptr, type).second) {
+      cells[name].data = ptr;
+    }
+  }
 
   bool start_with(const std::vector<PathSegment>& prefix) const {
     return is_prefix(prefix, this->path);
   }
 
-  // todo replace
   const llvm::StringMap<int64_t>& extract_schema_ids() {
     if (schema_ids_set) {
       return schema_ids;
@@ -830,7 +757,6 @@ struct Row {
 
         // Store ID for this schema if it's an ID field
         if (field_name.substr(dot_pos + 1) == "id") {
-          // auto id_scalar = value.get_as_int64(); // std::static_pointer_cast<arrow::Int64Scalar>(value);
           schema_ids[schema] = value.as_int64();
         }
       }
@@ -843,7 +769,6 @@ struct Row {
   [[nodiscard]] std::shared_ptr<Row> merge(
       const std::shared_ptr<Row>& other) const {
     std::shared_ptr<Row> merged = std::make_shared<Row>(*this);
-    // merged->id = this->id;
     for (const auto& [name, value] : other->cells) {
       if (!merged->has_value(name)) {
         merged->cells.try_emplace(name, value.data, value.type);
@@ -864,45 +789,7 @@ struct Row {
       }
       first = false;
 
-      ss << field_name.str() << ": ";
-
-      if (value_ref.data == nullptr) {
-        ss << "NULL";
-      } else {
-        // Handle different ValueRef types appropriately
-        switch (value_ref.type) {
-          case ValueType::INT32:
-            ss << value_ref.as_int32();
-            break;
-          case ValueType::INT64:
-            ss << value_ref.as_int64();
-            break;
-          case ValueType::FLOAT:
-            ss << *reinterpret_cast<const float*>(value_ref.data);
-            break;
-          case ValueType::DOUBLE:
-            ss << value_ref.as_double();
-            break;
-          case ValueType::BOOL:
-            ss << (value_ref.as_bool() ? "true" : "false");
-            break;
-          case ValueType::STRING: {
-            const StringRef& str_ref = value_ref.as_string_ref();
-            if (str_ref.is_null()) {
-              ss << "NULL";
-            } else {
-              ss << "\"" << std::string(str_ref.data, str_ref.length) << "\"";
-            }
-            break;
-          }
-          case ValueType::NA:
-            ss << "NULL";
-            break;
-          default:
-            ss << "<unknown_type>";
-            break;
-        }
-      }
+      ss << field_name.str() << ": " << value_ref.ToString();
     }
 
     ss << "}";
@@ -915,19 +802,8 @@ static Row create_empty_row_from_schema(
   Row new_row;
   new_row.id = -1;
   for (const auto& field : final_output_schema->fields()) {
-    new_row.cells.try_emplace(field->name(), nullptr, arrow_type_to_value_type(field->type()));
-    // Create a null scalar of the correct type
-    // auto null_scalar = arrow::MakeNullScalar(field->type());
-    // if (null_scalar != nullptr) {
-    //   new_row.cells[field->name()] = null_scalar;
-    // } else {
-    //   // If creating a null scalar fails, use nullptr as a fallback
-    //   new_row.cells.try_emplace(field->name(), nullptr, field->type());
-    //   if (Logger::get_instance().get_level() >= LogLevel::WARN) {
-    //     log_warn("Failed to create null scalar for field '{}' with type '{}'",
-    //              field->name(), field->type()->ToString());
-    //   }
-    // }
+    new_row.cells.try_emplace(field->name(), nullptr,
+                              arrow_type_to_value_type(field->type()));
   }
   return new_row;
 }
@@ -997,7 +873,6 @@ struct RowNode {
     }
 
     // collect all records from child node and group them by schema
-    // std::unordered_map<std::string, std::vector<Row>> grouped;
     llvm::SmallDenseMap<uint16_t, llvm::SmallVector<std::shared_ptr<Row>, 4>>
         grouped;
     for (const auto& c : children) {
@@ -1057,7 +932,7 @@ struct RowNode {
           // Get variable prefixes (schema names) from cells
           llvm::StringMap<int64_t> schema_ids_r1 =
               r1_from_current_group->extract_schema_ids();
-           llvm::StringMap<int64_t> schema_ids_r2 =
+          llvm::StringMap<int64_t> schema_ids_r2 =
               r2_from_previous_product->extract_schema_ids();
 
           // Check for conflicts - same schema name but different IDs
@@ -1083,12 +958,12 @@ struct RowNode {
               if (!value1.data) continue;
 
               auto it = r2_from_previous_product->cells.find(field_name);
-              if (it != r2_from_previous_product->cells.end() && it->second.data)
-                   {
+              if (it != r2_from_previous_product->cells.end() &&
+                  it->second.data) {
                 // Both rows have this field with non-null values - check if
                 // they match
                 if (!value1.equals(it->second)) {
-                   IF_DEBUG_ENABLED {
+                  IF_DEBUG_ENABLED {
                     log_debug(
                         "Conflict detected: Field '{}' has different values",
                         field_name);
@@ -1112,94 +987,92 @@ struct RowNode {
       }
       final_merged_rows = std::move(temp_product_accumulator);
       if (final_merged_rows.empty()) {
-         IF_DEBUG_ENABLED {
-           log_debug("product_accumulator is empty. stop merge");
-         }
+        IF_DEBUG_ENABLED {
+          log_debug("product_accumulator is empty. stop merge");
+        }
         break;
       }
     }
     return final_merged_rows;
   }
 
-  // std::string toString(bool recursive = true, int indent_level = 0) const {
-  //   // Helper to build indentation string based on level
-  //   auto get_indent = [](int level) { return std::string(level * 2, ' '); };
-  //
-  //   std::stringstream ss;
-  //   std::string indent = get_indent(indent_level);
-  //
-  //   // Print basic node info
-  //   ss << indent << "RowNode [path=" << path_segment.toString()
-  //      << ", depth=" << depth << "] {\n";
-  //
-  //   // Print Row
-  //   if (row.has_value()) {
-  //     ss << indent << "  Path: ";
-  //     if (row.value()->path.empty()) {
-  //       ss << "(empty)";
-  //     } else {
-  //       for (size_t i = 0; i < row.value()->path.size(); ++i) {
-  //         if (i > 0) ss << " → ";
-  //         ss << row.value()->path[i].schema << ":"
-  //            << row.value()->path[i].node_id;
-  //       }
-  //     }
-  //     ss << "\n";
-  //
-  //     // Print key cell values (limited to avoid overwhelming output)
-  //     ss << indent << "  Cells: ";
-  //     if (row.value()->cells.empty()) {
-  //       ss << "(empty)";
-  //     } else {
-  //       size_t count = 0;
-  //       ss << "{ ";
-  //       for (const auto& [key, value] : row.value()->cells) {
-  //         if (count++ > 0) ss << ", ";
-  //         if (count > 5) {  // Limit display
-  //           ss << "... +" << (row.value()->cells.size() - 5) << " more";
-  //           break;
-  //         }
-  //
-  //         ss << key.str() << ": ";
-  //         if (!value) {
-  //           ss << "NULL";
-  //         } else {
-  //           ss << value->ToString();  // Assuming arrow::Scalar has ToString()
-  //         }
-  //       }
-  //       ss << " }";
-  //     }
-  //   }
-  //
-  //   ss << "\n";
-  //
-  //   // Print children count
-  //   ss << indent << "  Children: " << children.size() << "\n";
-  //
-  //   // Recursively print children if requested
-  //   if (recursive && !children.empty()) {
-  //     ss << indent << "  [\n";
-  //     for (const auto& child : children) {
-  //       if (child) {
-  //         ss << child->toString(true, indent_level + 2);
-  //       } else {
-  //         ss << get_indent(indent_level + 2) << "(null child)\n";
-  //       }
-  //     }
-  //     ss << indent << "  ]\n";
-  //   }
-  //
-  //   ss << indent << "}\n";
-  //   return ss.str();
-  // }
+  std::string toString(bool recursive = true, int indent_level = 0) const {
+    // Helper to build indentation string based on level
+    auto get_indent = [](int level) { return std::string(level * 2, ' '); };
 
-  friend std::ostream& operator<<(std::ostream& os, const RowNode& node) {
-    return os << "";// node.toString();
+    std::stringstream ss;
+    std::string indent = get_indent(indent_level);
+
+    // Print basic node info
+    ss << indent << "RowNode [path=" << path_segment.toString()
+       << ", depth=" << depth << "] {\n";
+
+    // Print Row
+    if (row.has_value()) {
+      ss << indent << "  Path: ";
+      if (row.value()->path.empty()) {
+        ss << "(empty)";
+      } else {
+        for (size_t i = 0; i < row.value()->path.size(); ++i) {
+          if (i > 0) ss << " → ";
+          ss << row.value()->path[i].schema << ":"
+             << row.value()->path[i].node_id;
+        }
+      }
+      ss << "\n";
+
+      // Print key cell values (limited to avoid overwhelming output)
+      ss << indent << "  Cells: ";
+      if (row.value()->cells.empty()) {
+        ss << "(empty)";
+      } else {
+        size_t count = 0;
+        ss << "{ ";
+        for (const auto& [key, value] : row.value()->cells) {
+          if (count++ > 0) ss << ", ";
+          if (count > 5) {  // Limit display
+            ss << "... +" << (row.value()->cells.size() - 5) << " more";
+            break;
+          }
+
+          ss << key.str() << ": ";
+          if (!value.data) {
+            ss << "NULL";
+          } else {
+            ss << value.ToString();
+          }
+        }
+        ss << " }";
+      }
+    }
+
+    ss << "\n";
+
+    // Print children count
+    ss << indent << "  Children: " << children.size() << "\n";
+
+    // Recursively print children if requested
+    if (recursive && !children.empty()) {
+      ss << indent << "  [\n";
+      for (const auto& child : children) {
+        if (child) {
+          ss << child->toString(true, indent_level + 2);
+        } else {
+          ss << get_indent(indent_level + 2) << "(null child)\n";
+        }
+      }
+      ss << indent << "  ]\n";
+    }
+
+    ss << indent << "}\n";
+    return ss.str();
   }
 
-  void print(bool recursive = true) const {
-    // log_debug(toString(recursive));
+  friend std::ostream& operator<<(std::ostream& os, const RowNode& node) {
+    return os << node.toString();
   }
+
+  void print(bool recursive = true) const { log_debug(toString(recursive)); }
 };
 
 struct QueueItem {
@@ -1220,8 +1093,7 @@ struct QueueItem {
 // Log grouped connections for a node
 void log_grouped_connections(
     int64_t node_id,
-    const llvm::SmallDenseMap<llvm::StringRef,
-                              llvm::SmallVector<GraphConnection, 4>, 4>&
+    const llvm::SmallDenseMap<uint16_t, llvm::SmallVector<GraphConnection, 4>>&
         grouped_connections) {
   IF_DEBUG_ENABLED {
     if (grouped_connections.empty()) {
@@ -1235,7 +1107,7 @@ void log_grouped_connections(
     for (const auto& it : grouped_connections) {
       auto target_schema = it.first;
       const auto& connections = it.second;
-      log_debug("  To schema '{}': {} connections", target_schema.str(),
+      log_debug("  To schema '{}': {} connections", target_schema,
                 connections.size());
 
       for (size_t i = 0; i < connections.size(); ++i) {
@@ -1251,7 +1123,8 @@ void log_grouped_connections(
 arrow::Result<std::shared_ptr<llvm::SmallVector<std::shared_ptr<Row>, 4>>>
 populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema,
                   const std::shared_ptr<arrow::Schema>& output_schema,
-                  const QueryState& query_state, llvm::DenseSet<uint64_t>& global_visited) {
+                  const QueryState& query_state,
+                  llvm::DenseSet<uint64_t>& global_visited) {
   IF_DEBUG_ENABLED {
     log_debug("populate_rows_bfs::node={}:{}", start_schema.value(), node_id);
   }
@@ -1279,8 +1152,6 @@ populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema,
       }
       item.row->set_cell_from_node(it_fq->second, node);
       const uint64_t packed = hash_code_(item.schema_ref, item.node_id);
-      // global_visited.insert(item.schema_ref.value() + ":" +
-      //                       std::to_string(item.node_id));
       global_visited.insert(packed);
       item.path_visited_nodes.insert(packed);
 
@@ -1304,7 +1175,7 @@ populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema,
           }
         }
       }
-      // log_grouped_connections(item.node_id, grouped_connections);
+      log_grouped_connections(item.node_id, grouped_connections);
 
       if (grouped_connections.empty()) {
         // we've done
@@ -1312,9 +1183,7 @@ populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema,
           auto r = item.row;
           r->path = item.path;
           r->id = row_id_counter++;
-          IF_DEBUG_ENABLED {
-            log_debug("add row: {}", r->ToString());
-          }
+          IF_DEBUG_ENABLED { log_debug("add row: {}", r->ToString()); }
           result->push_back(r);
         }
 
@@ -1357,14 +1226,10 @@ populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema,
   RowNode tree;
   tree.path_segment = PathSegment{0, -1};
   for (const auto& r : *result) {
-    IF_DEBUG_ENABLED {
-      log_debug("bfs result: {}", r->ToString());
-    }
+    IF_DEBUG_ENABLED { log_debug("bfs result: {}", r->ToString()); }
     tree.insert_row(r);
   }
-  IF_DEBUG_ENABLED {
-    tree.print();
-  }
+  IF_DEBUG_ENABLED { tree.print(); }
   auto merged = tree.merge_rows();
   IF_DEBUG_ENABLED {
     for (const auto& row : merged) {
@@ -1376,22 +1241,22 @@ populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema,
 
 // template <NodeIds NodeIdsT>
 arrow::Result<std::shared_ptr<llvm::SmallVector<std::shared_ptr<Row>, 4>>>
-populate_batch_rows(
-    const llvm::DenseSet<int64_t>& node_ids, const SchemaRef& schema_ref,
-    const std::shared_ptr<arrow::Schema>& output_schema,
-    const QueryState& query_state, const TraverseType join_type,
-    llvm::DenseSet<uint64_t>& global_visited) {
+populate_batch_rows(const llvm::DenseSet<int64_t>& node_ids,
+                    const SchemaRef& schema_ref,
+                    const std::shared_ptr<arrow::Schema>& output_schema,
+                    const QueryState& query_state, const TraverseType join_type,
+                    llvm::DenseSet<uint64_t>& global_visited) {
   auto rows = std::make_shared<llvm::SmallVector<std::shared_ptr<Row>, 4>>();
   rows->reserve(node_ids.size());
   llvm::DenseSet<uint64_t> local_visited;
   // For INNER join: only process nodes that have connections
   // For LEFT join: process all nodes from the "left" side
   for (const auto node_id : node_ids) {
-     const uint64_t packed = hash_code_(schema_ref, node_id);
-     if (!global_visited.insert(packed).second) {
-       // Skip if already processed in an earlier traversal
-       continue;
-     }
+    const uint64_t packed = hash_code_(schema_ref, node_id);
+    if (!global_visited.insert(packed).second) {
+      // Skip if already processed in an earlier traversal
+      continue;
+    }
 
     // For INNER JOIN: Skip nodes without connections
     if (join_type == TraverseType::Inner &&
@@ -1488,18 +1353,18 @@ arrow::Result<std::shared_ptr<std::vector<std::shared_ptr<Row>>>> populate_rows(
     }
   }
 
-   IF_DEBUG_ENABLED {
-     log_debug("Processing {} schemas with their respective join types",
-               ordered_schemas.size());
-   }
+  IF_DEBUG_ENABLED {
+    log_debug("Processing {} schemas with their respective join types",
+              ordered_schemas.size());
+  }
 
   // Process each schema in order
   for (const auto& schema_ref : ordered_schemas) {
     TraverseType join_type = schema_join_types[schema_ref.value()];
-     IF_DEBUG_ENABLED {
-       log_debug("Processing schema '{}' with join type {}", schema_ref.value(),
-                 static_cast<int>(join_type));
-     }
+    IF_DEBUG_ENABLED {
+      log_debug("Processing schema '{}' with join type {}", schema_ref.value(),
+                static_cast<int>(join_type));
+    }
 
     if (!query_state.ids.contains(schema_ref.value())) {
       log_warn("Schema '{}' not found in query state IDs", schema_ref.value());
@@ -1575,10 +1440,10 @@ arrow::Result<std::shared_ptr<std::vector<std::shared_ptr<Row>>>> populate_rows(
     }
   }
 
-   IF_DEBUG_ENABLED {
-     log_debug("Generated {} total rows after processing all schemas",
-               rows->size());
-   }
+  IF_DEBUG_ENABLED {
+    log_debug("Generated {} total rows after processing all schemas",
+              rows->size());
+  }
   return rows;
 }
 
@@ -1656,7 +1521,8 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_rows(
       for (const auto& row : *rows) {
         auto it = row->cells.find(field_name);
         if (it != row->cells.end() && it->second.data != nullptr) {
-          if (auto array_result = arrow::MakeArrayFromScalar(*(it->second.as_scalar().ValueOrDie()), 1);
+          if (auto array_result = arrow::MakeArrayFromScalar(
+                  *(it->second.as_scalar().ValueOrDie()), 1);
               array_result.ok()) {
             field_type = array_result.ValueOrDie()->type();
             break;
@@ -1687,22 +1553,12 @@ arrow::Result<std::shared_ptr<arrow::Table>> create_table_from_rows(
     for (size_t i = 0; i < output_schema->num_fields(); i++) {
       const auto& field_name = output_schema->field(i)->name();
       auto it = row->cells.find(field_name);
-
       if (it != row->cells.end() && it->second.data) {
-        if (auto res = builders[i]->AppendScalar(*it->second.as_scalar().ValueOrDie()); !res.ok()) {
+        if (auto res =
+                builders[i]->AppendScalar(*it->second.as_scalar().ValueOrDie());
+            !res.ok()) {
           return res;
         }
-        // We have a value for this field
-        // auto array_result = arrow::MakeArrayFromScalar(*(it->second), 1);
-        // if (array_result.ok()) {
-        //   auto array = array_result.ValueOrDie();
-        //   auto scalar_result = array->GetScalar(0);
-        //   if (scalar_result.ok()) {
-        //     ARROW_RETURN_NOT_OK(
-        //         builders[i]->AppendScalar(*scalar_result.ValueOrDie()));
-        //     continue;
-        //   }
-        // }
       } else {
         // Fall back to NULL if we couldn't get or append the scalar
         ARROW_RETURN_NOT_OK(builders[i]->AppendNull());
@@ -1797,9 +1653,9 @@ std::vector<std::shared_ptr<WhereExpr>> get_where_to_inline(
     if (clauses[i]->type() == Clause::Type::WHERE) {
       auto where_expr = std::dynamic_pointer_cast<WhereExpr>(clauses[i]);
       if (where_expr->can_inline(target_var)) {
-         IF_DEBUG_ENABLED {
-           log_debug("inline where: '{}'", where_expr->toString());
-         }
+        IF_DEBUG_ENABLED {
+          log_debug("inline where: '{}'", where_expr->toString());
+        }
         inlined.push_back(where_expr);
       }
     }
@@ -1813,9 +1669,7 @@ arrow::Result<std::shared_ptr<arrow::Table>> inline_where(
     const std::vector<std::shared_ptr<WhereExpr>>& where_exprs) {
   auto curr_table = std::move(table);
   for (const auto& exp : where_exprs) {
-     IF_DEBUG_ENABLED {
-       log_debug("inline where '{}'", exp->toString());
-     }
+    IF_DEBUG_ENABLED { log_debug("inline where '{}'", exp->toString()); }
     auto result = filter(curr_table, *exp, true);
     if (!result.ok()) {
       log_error(
@@ -1858,18 +1712,18 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
     const Query& query) const {
   QueryState query_state;
   auto result = std::make_shared<QueryResult>();
-   IF_DEBUG_ENABLED {
-     log_debug("Executing query starting from schema '{}'",
-               query.from().toString());
-   }
+  IF_DEBUG_ENABLED {
+    log_debug("Executing query starting from schema '{}'",
+              query.from().toString());
+  }
   query_state.node_manager = this->node_manager_;
   query_state.schema_registry = this->schema_registry_;
   query_state.from = query.from();
 
   {
-     IF_DEBUG_ENABLED {
-       log_debug("processing 'from' {}", query.from().toString());
-     }
+    IF_DEBUG_ENABLED {
+      log_debug("processing 'from' {}", query.from().toString());
+    }
     // Precompute tag for FROM schema (alias-based hash)
     query_state.from = query.from();
     query_state.from.set_tag(compute_tag(query_state.from));
@@ -1906,9 +1760,9 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
     }
   }
 
-   IF_DEBUG_ENABLED {
-     log_debug("Processing {} query clauses", query.clauses().size());
-   }
+  IF_DEBUG_ENABLED {
+    log_debug("Processing {} query clauses", query.clauses().size());
+  }
 
   // Precompute 16-bit alias-based tags for all SchemaRefs
   // Also precompute fully-qualified field names per alias used in the query
@@ -1919,9 +1773,9 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
       case Clause::Type::WHERE: {
         auto where = std::dynamic_pointer_cast<WhereExpr>(clause);
         if (where->inlined()) {
-           IF_DEBUG_ENABLED {
-             log_debug("where '{}' is inlined, skip", where->toString());
-           }
+          IF_DEBUG_ENABLED {
+            log_debug("where '{}' is inlined, skip", where->toString());
+          }
           continue;
         }
         auto variables = where->get_all_variables();
@@ -1932,9 +1786,9 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
               where->toString());
         }
         if (variables.size() == 1) {
-           IF_DEBUG_ENABLED {
-             log_debug("Processing WHERE clause: '{}'", where->toString());
-           }
+          IF_DEBUG_ENABLED {
+            log_debug("Processing WHERE clause: '{}'", where->toString());
+          }
 
           std::unordered_map<std::string, std::set<int64_t>> new_front_ids;
           std::string variable = *variables.begin();
@@ -1952,10 +1806,10 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
           ARROW_RETURN_NOT_OK(query_state.update_table(
               filtered_table_result.ValueOrDie(), SchemaRef::parse(variable)));
         } else {
-           IF_DEBUG_ENABLED {
-             log_debug("Add compound WHERE expression: '{}' to post process",
-                       where->toString());
-           }
+          IF_DEBUG_ENABLED {
+            log_debug("Add compound WHERE expression: '{}' to post process",
+                      where->toString());
+          }
           post_where.emplace_back(where);
         }
         break;
@@ -1964,10 +1818,8 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
         auto traverse = std::static_pointer_cast<Traverse>(clause);
         // Precompute and set tags for source/target refs (alias-based,
         // deterministic)
-        traverse->mutable_source().set_tag(
-            compute_tag(traverse->source()));
-        traverse->mutable_target().set_tag(
-            compute_tag(traverse->target()));
+        traverse->mutable_source().set_tag(compute_tag(traverse->source()));
+        traverse->mutable_target().set_tag(compute_tag(traverse->target()));
 
         ARROW_ASSIGN_OR_RAISE(auto source_schema,
                               query_state.resolve_schema(traverse->source()));
@@ -2020,11 +1872,10 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
               edge_store_
                   ->get_outgoing_edges_view(source_id, traverse->edge_type())
                   .ValueOrDie();  // todo check result
-          // IF_DEBUG_ENABLED {
-          //   log_debug("Node {} has {} outgoing edges of type '{}'",
-          //   source_id,
-          //             outgoing_edges.size(), traverse->edge_type());
-          // }
+          IF_DEBUG_ENABLED {
+            log_debug("Node {} has {} outgoing edges of type '{}'", source_id,
+                      outgoing_edges.count(), traverse->edge_type());
+          }
 
           bool source_had_match = false;
           for (auto edge : outgoing_edges) {
@@ -2087,15 +1938,15 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
         if (traverse->traverse_type() == TraverseType::Inner &&
             !unmatched_source_ids.empty()) {
           for (auto id : unmatched_source_ids) {
-             IF_DEBUG_ENABLED {
-               log_debug("remove unmatched node={}:{}", source.value(), id);
-             }
+            IF_DEBUG_ENABLED {
+              log_debug("remove unmatched node={}:{}", source.value(), id);
+            }
             query_state.remove_node(id, source);
           }
-           IF_DEBUG_ENABLED {
-             log_debug("rebuild table for schema {}:{}", source.value(),
-                       query_state.aliases[source.value()]);
-           }
+          IF_DEBUG_ENABLED {
+            log_debug("rebuild table for schema {}:{}", source.value(),
+                      query_state.aliases[source.value()]);
+          }
           auto table_result =
               filter_table_by_id(query_state.tables[source.value()],
                                  query_state.ids[source.value()]);
@@ -2104,10 +1955,10 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
           }
           query_state.tables[source.value()] = table_result.ValueOrDie();
         }
-         IF_DEBUG_ENABLED {
-           log_debug("found {} neighbors for {}", matched_target_ids.size(),
-                     traverse->target().toString());
-         }
+        IF_DEBUG_ENABLED {
+          log_debug("found {} neighbors for {}", matched_target_ids.size(),
+                    traverse->target().toString());
+        }
 
         if (traverse->traverse_type() == TraverseType::Inner) {
           // intersect
@@ -2182,8 +2033,10 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
     log_debug("Query processing complete, building result");
     log_debug("Query state: {}", query_state.ToString());
     for (auto traversal : query_state.traversals) {
-      log_debug("schema tag {}->{}", traversal.source().value(), traversal.source().tag());
-      log_debug("schema tag {}->{}", traversal.target().value(), traversal.target().tag());
+      log_debug("schema tag {}->{}", traversal.source().value(),
+                traversal.source().tag());
+      log_debug("schema tag {}->{}", traversal.target().value(),
+                traversal.target().tag());
     }
   }
 
@@ -2192,9 +2045,7 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
     return output_schema_res.status();
   }
   const auto output_schema = output_schema_res.ValueOrDie();
-   IF_DEBUG_ENABLED {
-     log_debug("output_schema={}", output_schema->ToString());
-   }
+  IF_DEBUG_ENABLED { log_debug("output_schema={}", output_schema->ToString()); }
 
   auto row_res = populate_rows(query.execution_config(), query_state,
                                query_state.traversals, output_schema);
@@ -2211,9 +2062,7 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
   auto output_table = output_table_res.ValueOrDie();
   for (const auto& expr : post_where) {
     result->mutable_execution_stats().num_where_clauses_post_processed++;
-     IF_DEBUG_ENABLED {
-       log_debug("post process where: {}", expr->toString());
-     }
+    IF_DEBUG_ENABLED { log_debug("post process where: {}", expr->toString()); }
     output_table = filter(output_table, *expr, false).ValueOrDie();
   }
   result->set_table(apply_select(query.select(), output_table));
diff --git a/src/edge_store.cpp b/src/edge_store.cpp
index 344f77d..2d65349 100644
--- a/src/edge_store.cpp
+++ b/src/edge_store.cpp
@@ -7,27 +7,28 @@ namespace tundradb {
 void EdgeView::iterator::advance_to_valid() {
   // Pre-check if type filter is empty to avoid string comparisons
   const bool has_type_filter = !type_filter_.empty();
-  
+
   while (edge_ids_it_ != edge_ids_end_) {
-    tbb::concurrent_hash_map<int64_t, std::shared_ptr<Edge>>::const_accessor edge_acc;
-    
+    tbb::concurrent_hash_map<int64_t, std::shared_ptr<Edge>>::const_accessor
+        edge_acc;
+
     // Fast path: try to find edge (this is the main bottleneck)
     if (store_->edges.find(edge_acc, edge_ids_it_->first)) {
       auto edge = edge_acc->second;
-      
+
       // Fast path: no type filter
       if (!has_type_filter) {
         current_edge_ = edge;
         return;
       }
-      
+
       // Slow path: check type filter
       if (edge->get_type() == type_filter_) {
         current_edge_ = edge;
         return;
       }
     }
-    
+
     ++edge_ids_it_;
   }
   current_edge_.reset();
diff --git a/tests/where_expression_test.cpp b/tests/where_expression_test.cpp
index c3e7e28..f3c4c2f 100644
--- a/tests/where_expression_test.cpp
+++ b/tests/where_expression_test.cpp
@@ -151,6 +151,7 @@ TEST_F(WhereExpressionTest, SimpleWhereCondition) {
 // Test compound WHERE with AND - fluent API
 TEST_F(WhereExpressionTest, CompoundWhereAndFluent) {
   // Test: age > 30 AND city = "NYC"
+  Logger::get_instance().set_level(LogLevel::DEBUG);
   Query query = Query::from("u:User")
                     .where("u.age", CompareOp::Gt, 30)
                     .and_where("u.city", CompareOp::Eq, "NYC")
@@ -559,6 +560,7 @@ TEST_F(WhereExpressionTest, TraversalWhereCombinations) {
 }
 
 TEST_F(WhereExpressionTest, TraversalWhereCombinations2) {
+  Logger::get_instance().set_level(LogLevel::DEBUG);
   Query query = Query::from("u:User")
                     .traverse("u", "WORKS_AT", "c:Company")
                     .where("u.age", CompareOp::Gte, 35)