diff --git a/Cargo.lock b/Cargo.lock index 727ab8cfd6..a56e4bbca7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2852,6 +2852,18 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "comprehensive-crdt-test" +version = "0.0.0" +dependencies = [ + "calimero-sdk", + "calimero-storage", + "calimero-wasm-abi", + "hex", + "serde_json", + "thiserror 1.0.69", +] + [[package]] name = "compression-codecs" version = "0.4.31" diff --git a/Cargo.toml b/Cargo.toml index e9a74753a3..399ce39192 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -84,6 +84,7 @@ members = [ "./apps/private_data", "./apps/blobs", "./apps/collaborative-editor", + "./apps/comprehensive-crdt-test", "./apps/abi_conformance", "./apps/state-schema-conformance", "./apps/xcall-example", diff --git a/apps/comprehensive-crdt-test/Cargo.toml b/apps/comprehensive-crdt-test/Cargo.toml new file mode 100644 index 0000000000..61d27bb947 --- /dev/null +++ b/apps/comprehensive-crdt-test/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "comprehensive-crdt-test" +version.workspace = true +authors.workspace = true +edition.workspace = true +repository.workspace = true +license.workspace = true +publish = false + +[lib] +crate-type = ["cdylib"] + +[dependencies] +thiserror.workspace = true +calimero-sdk.workspace = true +calimero-storage.workspace = true +hex.workspace = true + +[build-dependencies] +calimero-wasm-abi.workspace = true +serde_json.workspace = true + +[package.metadata.workspaces] +independent = true diff --git a/apps/comprehensive-crdt-test/README.md b/apps/comprehensive-crdt-test/README.md new file mode 100644 index 0000000000..d63c2276c5 --- /dev/null +++ b/apps/comprehensive-crdt-test/README.md @@ -0,0 +1,64 @@ +# Comprehensive CRDT Test Application + +This application tests **ALL** CRDT types, UserStorage, FrozenStorage, and root-level concurrent modifications. + +## Features Tested + +### CRDT Types +- ✅ **Counter** - Grow-only counter with concurrent increments +- ✅ **UnorderedMap** - Field-level merge semantics +- ✅ **Vector** - Element-wise merge +- ✅ **UnorderedSet** - Union merge semantics +- ✅ **RGA (ReplicatedGrowableArray)** - Text CRDT for collaborative editing +- ✅ **LwwRegister** - Last-write-wins register + +### Storage Types +- ✅ **UserStorage (Simple)** - User-owned simple values +- ✅ **UserStorage (Nested)** - User-owned nested data structures +- ✅ **FrozenStorage** - Content-addressable immutable storage + +### Root-Level Merging +- ✅ **Concurrent Root Modifications** - Tests that root merge works when different nodes modify different root fields concurrently + +## Purpose + +This app is designed to: +1. Test all CRDT types in a single application +2. Verify root-level concurrent modifications trigger proper merging +3. Test UserStorage and FrozenStorage alongside CRDT types +4. Serve as a comprehensive integration test for the sync protocol + +## Usage + +Build the app: +```bash +./build.sh +``` + +Run the workflow: +```bash +merobox bootstrap run workflows/comprehensive-crdt-test.yml +``` + +## Workflow Tests + +The `comprehensive-crdt-test.yml` workflow tests: +1. Root Counter - concurrent increments merge correctly +2. Root Map - field-level merge when different nodes modify different keys +3. Root Vector - element-wise merge +4. Root Set - union merge +5. Root RGA - text CRDT merge +6. Root Register - LWW semantics +7. UserStorage Simple - user-owned data sync +8. UserStorage Nested - nested user data with CRDTs +9. FrozenStorage - content-addressable storage +10. Root-Level Concurrent Modifications - different nodes modifying different root fields simultaneously + +## Architecture + +The app state (`ComprehensiveCrdtApp`) contains all CRDT types and storage types as root-level fields. This design allows testing root-level concurrent modifications where: +- Node 1 modifies `root_counter` +- Node 2 modifies `root_map` +- Node 1 modifies `root_set` + +All concurrently, triggering `merge_root_state` to merge all fields correctly. diff --git a/apps/comprehensive-crdt-test/build.rs b/apps/comprehensive-crdt-test/build.rs new file mode 100644 index 0000000000..070defd550 --- /dev/null +++ b/apps/comprehensive-crdt-test/build.rs @@ -0,0 +1,38 @@ +use std::fs; +use std::path::Path; + +use calimero_wasm_abi::emitter::emit_manifest; + +fn main() { + println!("cargo:rerun-if-changed=src/lib.rs"); + + // Parse the source code + let src_path = Path::new("src/lib.rs"); + let src_content = fs::read_to_string(src_path).expect("Failed to read src/lib.rs"); + + // Generate ABI manifest using the emitter + let manifest = emit_manifest(&src_content).expect("Failed to emit ABI manifest"); + + // Serialize the manifest to JSON + let json = serde_json::to_string_pretty(&manifest).expect("Failed to serialize manifest"); + + // Write the ABI JSON to the res directory + let res_dir = Path::new("res"); + if !res_dir.exists() { + fs::create_dir_all(res_dir).expect("Failed to create res directory"); + } + + let abi_path = res_dir.join("abi.json"); + fs::write(&abi_path, json).expect("Failed to write ABI JSON"); + + // Extract and write the state schema + if let Ok(mut state_schema) = manifest.extract_state_schema() { + state_schema.schema_version = "wasm-abi/1".to_owned(); + + let state_schema_json = + serde_json::to_string_pretty(&state_schema).expect("Failed to serialize state schema"); + let state_schema_path = res_dir.join("state-schema.json"); + fs::write(&state_schema_path, state_schema_json) + .expect("Failed to write state schema JSON"); + } +} diff --git a/apps/comprehensive-crdt-test/build.sh b/apps/comprehensive-crdt-test/build.sh new file mode 100755 index 0000000000..e643652916 --- /dev/null +++ b/apps/comprehensive-crdt-test/build.sh @@ -0,0 +1,19 @@ +#!/bin/bash +set -e + +# Add wasm32 target if not already present +rustup target add wasm32-unknown-unknown || true + +# Build the app +cargo build -p comprehensive-crdt-test --target wasm32-unknown-unknown --release + +# Copy WASM file to res directory +mkdir -p res +cp target/wasm32-unknown-unknown/release/comprehensive_crdt_test.wasm res/comprehensive_crdt_test.wasm + +# Optimize WASM if wasm-opt is available +if command -v wasm-opt &> /dev/null; then + wasm-opt -O2 res/comprehensive_crdt_test.wasm -o res/comprehensive_crdt_test.wasm || true +fi + +echo "Build complete: res/comprehensive_crdt_test.wasm" diff --git a/apps/comprehensive-crdt-test/src/lib.rs b/apps/comprehensive-crdt-test/src/lib.rs new file mode 100644 index 0000000000..d0e626291a --- /dev/null +++ b/apps/comprehensive-crdt-test/src/lib.rs @@ -0,0 +1,459 @@ +//! Comprehensive CRDT Test Application +//! +//! This app tests ALL CRDT types, UserStorage, FrozenStorage, and root-level merging: +//! - Counter +//! - UnorderedMap +//! - Vector +//! - UnorderedSet +//! - RGA (ReplicatedGrowableArray) +//! - LwwRegister +//! - UserStorage (simple and nested) +//! - FrozenStorage +//! +//! The app is designed to test root-level concurrent modifications that trigger merge_root_state. + +#![allow(clippy::len_without_is_empty)] + +use calimero_sdk::app; +use calimero_sdk::borsh::{BorshDeserialize, BorshSerialize}; +use calimero_sdk::serde::Serialize; +use calimero_sdk::PublicKey; +use calimero_storage::collections::Mergeable; +use calimero_storage::collections::{ + Counter, FrozenStorage, LwwRegister, ReplicatedGrowableArray, UnorderedMap, UnorderedSet, + UserStorage, Vector, +}; +use thiserror::Error; + +/// Comprehensive app state with ALL CRDT types and storage types +/// +/// This state is designed to test root-level concurrent modifications. +/// Each field can be modified independently, triggering root merge when +/// different nodes modify different fields concurrently. +#[app::state(emits = for<'a> Event<'a>)] +#[derive(Debug, BorshSerialize, BorshDeserialize)] +#[borsh(crate = "calimero_sdk::borsh")] +pub struct ComprehensiveCrdtApp { + // ===== Basic CRDT Types ===== + /// Counter CRDT - concurrent increments should sum + pub root_counter: Counter, + + /// UnorderedMap - field-level merge + pub root_map: UnorderedMap>, + + /// Vector - element-wise merge + pub root_vector: Vector, + + /// UnorderedSet - union merge + pub root_set: UnorderedSet, + + /// RGA - text CRDT for collaborative editing + pub root_rga: ReplicatedGrowableArray, + + /// LwwRegister - last-write-wins + pub root_register: LwwRegister, + + // ===== Storage Types ===== + /// UserStorage - simple user-owned data + pub user_storage_simple: UserStorage>, + + /// UserStorage - nested user-owned data + pub user_storage_nested: UserStorage, + + /// FrozenStorage - content-addressable immutable data + pub frozen_storage: FrozenStorage, +} + +/// Nested user data structure for testing nested UserStorage +#[derive(Debug, BorshSerialize, BorshDeserialize, Default)] +#[borsh(crate = "calimero_sdk::borsh")] +pub struct NestedUserData { + pub map: UnorderedMap>, + pub counter: Counter, +} + +impl Mergeable for NestedUserData { + fn merge( + &mut self, + other: &Self, + ) -> Result<(), calimero_storage::collections::crdt_meta::MergeError> { + self.map.merge(&other.map)?; + self.counter.merge(&other.counter)?; + Ok(()) + } +} + +#[app::event] +pub enum Event<'a> { + CounterIncremented { + value: u64, + }, + MapEntrySet { + key: &'a str, + value: &'a str, + }, + VectorPushed { + value: u64, + }, + SetItemAdded { + item: &'a str, + }, + RgaTextInserted { + position: usize, + text: &'a str, + }, + RegisterSet { + value: &'a str, + }, + UserSimpleSet { + executor_id: PublicKey, + value: &'a str, + }, + UserNestedSet { + executor_id: PublicKey, + key: &'a str, + value: &'a str, + }, + FrozenAdded { + hash: [u8; 32], + value: &'a str, + }, +} + +#[derive(Debug, Error, Serialize)] +#[serde(crate = "calimero_sdk::serde")] +#[serde(tag = "kind", content = "data")] +pub enum Error<'a> { + #[error("key not found: {0}")] + NotFound(&'a str), + #[error("User data not found for key: {0}")] + UserNotFound(PublicKey), + #[error("Frozen data not found for hash: {0}")] + FrozenNotFound(&'a str), +} + +#[app::logic] +impl ComprehensiveCrdtApp { + #[app::init] + pub fn init() -> ComprehensiveCrdtApp { + ComprehensiveCrdtApp { + root_counter: Counter::new(), + root_map: UnorderedMap::new(), + root_vector: Vector::new(), + root_set: UnorderedSet::new(), + root_rga: ReplicatedGrowableArray::new(), + root_register: LwwRegister::new(String::new()), + user_storage_simple: UserStorage::new(), + user_storage_nested: UserStorage::new(), + frozen_storage: FrozenStorage::new(), + } + } + + // ===== Counter Operations ===== + + /// Increment the root counter + pub fn increment_root_counter(&mut self) -> Result { + self.root_counter + .increment() + .map_err(|e| format!("Increment failed: {:?}", e))?; + let value = self + .root_counter + .value() + .map_err(|e| format!("Value failed: {:?}", e))?; + app::emit!(Event::CounterIncremented { value }); + Ok(value) + } + + /// Get the root counter value + pub fn get_root_counter(&self) -> Result { + self.root_counter + .value() + .map_err(|e| format!("Value failed: {:?}", e)) + } + + // ===== UnorderedMap Operations ===== + + /// Set a value in the root map + pub fn set_root_map(&mut self, key: String, value: String) -> Result<(), String> { + self.root_map + .insert(key.clone(), value.clone().into()) + .map_err(|e| format!("Insert failed: {:?}", e))?; + app::emit!(Event::MapEntrySet { + key: &key, + value: &value + }); + Ok(()) + } + + /// Get a value from the root map + pub fn get_root_map(&self, key: &str) -> Result, String> { + Ok(self + .root_map + .get(key) + .map_err(|e| format!("Get failed: {:?}", e))? + .map(|r| r.get().clone())) + } + + // ===== Vector Operations ===== + + /// Push a counter to the root vector + pub fn push_root_vector(&mut self, value: u64) -> Result { + let mut counter = Counter::new(); + for _ in 0..value { + counter + .increment() + .map_err(|e| format!("Increment failed: {:?}", e))?; + } + self.root_vector + .push(counter) + .map_err(|e| format!("Push failed: {:?}", e))?; + let len = self + .root_vector + .len() + .map_err(|e| format!("Len failed: {:?}", e))?; + app::emit!(Event::VectorPushed { value }); + Ok(len) + } + + /// Get a counter from the root vector + pub fn get_root_vector(&self, index: usize) -> Result, String> { + Ok(self + .root_vector + .get(index) + .map_err(|e| format!("Get failed: {:?}", e))? + .map(|c| c.value().unwrap_or(0))) + } + + /// Get the root vector length + pub fn get_root_vector_len(&self) -> Result { + self.root_vector + .len() + .map_err(|e| format!("Len failed: {:?}", e)) + } + + // ===== UnorderedSet Operations ===== + + /// Add an item to the root set + pub fn add_root_set(&mut self, item: String) -> Result<(), String> { + self.root_set + .insert(item.clone()) + .map_err(|e| format!("Insert failed: {:?}", e))?; + app::emit!(Event::SetItemAdded { item: &item }); + Ok(()) + } + + /// Check if an item is in the root set + pub fn has_root_set(&self, item: &str) -> Result { + self.root_set + .contains(item) + .map_err(|e| format!("Contains failed: {:?}", e)) + } + + /// Get the root set size + pub fn get_root_set_size(&self) -> Result { + Ok(self + .root_set + .iter() + .map_err(|e| format!("Iter failed: {:?}", e))? + .count()) + } + + // ===== RGA Operations ===== + + /// Insert text into the root RGA + pub fn insert_root_rga(&mut self, position: usize, text: String) -> Result<(), String> { + self.root_rga + .insert_str(position, &text) + .map_err(|e| format!("Insert failed: {:?}", e))?; + app::emit!(Event::RgaTextInserted { + position, + text: &text + }); + Ok(()) + } + + /// Get text from the root RGA + pub fn get_root_rga_text(&self) -> Result { + self.root_rga + .get_text() + .map_err(|e| format!("Get text failed: {:?}", e)) + } + + /// Get the root RGA length + pub fn get_root_rga_len(&self) -> Result { + self.root_rga + .len() + .map_err(|e| format!("Len failed: {:?}", e)) + } + + // ===== LwwRegister Operations ===== + + /// Set the root register value + pub fn set_root_register(&mut self, value: String) -> Result<(), String> { + self.root_register.set(value.clone()); + app::emit!(Event::RegisterSet { value: &value }); + Ok(()) + } + + /// Get the root register value + pub fn get_root_register(&self) -> Result { + Ok(self.root_register.get().clone()) + } + + // ===== UserStorage Simple Operations ===== + + /// Set a simple value for the current user + pub fn set_user_simple(&mut self, value: String) -> Result<(), String> { + let executor_id = calimero_sdk::env::executor_id(); + self.user_storage_simple + .insert(value.clone().into()) + .map_err(|e| format!("Insert failed: {:?}", e))?; + app::emit!(Event::UserSimpleSet { + executor_id: executor_id.into(), + value: &value + }); + Ok(()) + } + + /// Get the simple value for the current user + pub fn get_user_simple(&self) -> Result, String> { + Ok(self + .user_storage_simple + .get() + .map_err(|e| format!("Get failed: {:?}", e))? + .map(|v| v.get().clone())) + } + + /// Get the simple value for a specific user + pub fn get_user_simple_for(&self, user_key: PublicKey) -> Result, String> { + Ok(self + .user_storage_simple + .get_for_user(&user_key) + .map_err(|e| format!("Get for user failed: {:?}", e))? + .map(|v| v.get().clone())) + } + + // ===== UserStorage Nested Operations ===== + + /// Set a nested key-value pair for the current user + pub fn set_user_nested(&mut self, key: String, value: String) -> Result<(), String> { + let executor_id = calimero_sdk::env::executor_id(); + let mut nested_data = self + .user_storage_nested + .get() + .map_err(|e| format!("Get failed: {:?}", e))? + .unwrap_or_default(); + nested_data + .map + .insert(key.clone(), value.clone().into()) + .map_err(|e| format!("Map insert failed: {:?}", e))?; + self.user_storage_nested + .insert(nested_data) + .map_err(|e| format!("Insert failed: {:?}", e))?; + app::emit!(Event::UserNestedSet { + executor_id: executor_id.into(), + key: &key, + value: &value + }); + Ok(()) + } + + /// Increment the nested counter for the current user + pub fn increment_user_nested_counter(&mut self) -> Result { + let mut nested_data = self + .user_storage_nested + .get() + .map_err(|e| format!("Get failed: {:?}", e))? + .unwrap_or_default(); + nested_data + .counter + .increment() + .map_err(|e| format!("Increment failed: {:?}", e))?; + let value = nested_data + .counter + .value() + .map_err(|e| format!("Value failed: {:?}", e))?; + self.user_storage_nested + .insert(nested_data) + .map_err(|e| format!("Insert failed: {:?}", e))?; + Ok(value) + } + + /// Get a nested value for the current user + pub fn get_user_nested(&self, key: &str) -> Result, String> { + let nested_data = self + .user_storage_nested + .get() + .map_err(|e| format!("Get failed: {:?}", e))?; + match nested_data { + Some(data) => Ok(data + .map + .get(key) + .map_err(|e| format!("Map get failed: {:?}", e))? + .map(|v| v.get().clone())), + None => Ok(None), + } + } + + /// Get the nested counter value for the current user + pub fn get_user_nested_counter(&self) -> Result { + let nested_data = self + .user_storage_nested + .get() + .map_err(|e| format!("Get failed: {:?}", e))?; + match nested_data { + Some(data) => data + .counter + .value() + .map_err(|e| format!("Value failed: {:?}", e)), + None => Ok(0), + } + } + + /// Get a nested value for a specific user + pub fn get_user_nested_for( + &self, + user_key: PublicKey, + key: &str, + ) -> Result, String> { + let nested_data = self + .user_storage_nested + .get_for_user(&user_key) + .map_err(|e| format!("Get for user failed: {:?}", e))?; + match nested_data { + Some(data) => Ok(data + .map + .get(key) + .map_err(|e| format!("Map get failed: {:?}", e))? + .map(|v| v.get().clone())), + None => Ok(None), + } + } + + // ===== FrozenStorage Operations ===== + + /// Add a value to frozen storage + pub fn add_frozen(&mut self, value: String) -> Result { + let hash = self + .frozen_storage + .insert(value.clone().into()) + .map_err(|e| format!("Insert failed: {:?}", e))?; + app::emit!(Event::FrozenAdded { + hash, + value: &value + }); + Ok(hex::encode(hash)) + } + + /// Get a value from frozen storage by hash + pub fn get_frozen(&self, hash_hex: String) -> Result { + let mut hash = [0u8; 32]; + hex::decode_to_slice(hash_hex.clone(), &mut hash[..]) + .map_err(|_| "Invalid hash hex".to_string())?; + self.frozen_storage + .get(&hash) + .map_err(|e| format!("Get failed: {:?}", e))? + .map(|v| v.clone()) + .ok_or_else(|| format!("Frozen data not found for hash: {}", hash_hex)) + } +} diff --git a/apps/comprehensive-crdt-test/workflows/comprehensive-crdt-test.yml b/apps/comprehensive-crdt-test/workflows/comprehensive-crdt-test.yml new file mode 100644 index 0000000000..aad7918b6f --- /dev/null +++ b/apps/comprehensive-crdt-test/workflows/comprehensive-crdt-test.yml @@ -0,0 +1,613 @@ +name: Comprehensive CRDT Test +description: Tests ALL CRDT types, UserStorage, FrozenStorage, and root-level concurrent modifications + +force_pull_image: false + +nodes: + chain_id: testnet-1 + count: 2 + image: merod:local + prefix: comprehensive-node + +steps: + # ============================================ + # Setup Phase + # ============================================ + + - name: Install Comprehensive CRDT Application on Node 1 + type: install_application + node: comprehensive-node-1 + path: res/comprehensive_crdt_test.wasm + dev: true + outputs: + app_id: applicationId + + - name: Create Mesh + type: create_mesh + context_node: comprehensive-node-1 + application_id: "{{app_id}}" + nodes: + - comprehensive-node-2 + capability: member + outputs: + context_id: contextId + member_public_key: memberPublicKey + + # Wait for initial state to sync to Node 2 + - name: Wait for initial sync after mesh creation + type: wait_for_sync + context_id: "{{context_id}}" + nodes: + - comprehensive-node-1 + - comprehensive-node-2 + timeout: 30 + check_interval: 2 + trigger_sync: true + + # ============================================ + # Root Counter Test (CRDT merge) + # ============================================ + + - name: Node 1 increments root counter + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: increment_root_counter + outputs: + counter_1: result + + - name: Wait for first increment to sync + type: wait_for_sync + context_id: "{{context_id}}" + nodes: + - comprehensive-node-1 + - comprehensive-node-2 + timeout: 30 + check_interval: 2 + trigger_sync: true + + - name: Node 1 increments root counter again + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: increment_root_counter + + - name: Wait for second increment to sync + type: wait_for_sync + context_id: "{{context_id}}" + nodes: + - comprehensive-node-1 + - comprehensive-node-2 + timeout: 30 + check_interval: 2 + trigger_sync: true + + - name: Node 2 increments root counter (after syncing Node 1's state) + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: increment_root_counter + + - name: Wait for root counter merge + type: wait_for_sync + context_id: "{{context_id}}" + nodes: + - comprehensive-node-1 + - comprehensive-node-2 + timeout: 60 + check_interval: 2 + trigger_sync: true + + - name: Get root counter from Node 2 + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: get_root_counter + outputs: + counter_after: result + + - name: Assert root counter is 3 (2 + 1) + type: json_assert + statements: + - 'json_equal({{counter_after}}, {"output": 3})' + + # ============================================ + # Root Map Test (field-level merge) + # ============================================ + + - name: Node 1 sets root map key1 + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: set_root_map + args: + key: key1 + value: value1 + + - name: Node 2 sets root map key2 concurrently + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: set_root_map + args: + key: key2 + value: value2 + + - name: Wait for root map merge + type: wait_for_sync + context_id: "{{context_id}}" + nodes: + - comprehensive-node-1 + - comprehensive-node-2 + timeout: 60 + check_interval: 2 + trigger_sync: true + + - name: Get root map key1 from Node 2 + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: get_root_map + args: + key: key1 + outputs: + map_key1: result + + - name: Get root map key2 from Node 1 + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: get_root_map + args: + key: key2 + outputs: + map_key2: result + + - name: Assert root map field-level merge worked + type: json_assert + statements: + - 'json_equal({{map_key1}}, {"output": "value1"})' + - 'json_equal({{map_key2}}, {"output": "value2"})' + + # ============================================ + # Root Vector Test (element-wise merge) + # ============================================ + + - name: Node 1 pushes to root vector + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: push_root_vector + args: + value: 5 + + - name: Node 2 pushes to root vector concurrently + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: push_root_vector + args: + value: 10 + + - name: Wait for root vector merge + type: wait_for_sync + context_id: "{{context_id}}" + nodes: + - comprehensive-node-1 + - comprehensive-node-2 + timeout: 60 + check_interval: 2 + trigger_sync: true + + - name: Get root vector length from Node 2 + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: get_root_vector_len + outputs: + vector_len: result + + - name: Assert root vector synced correctly + type: json_assert + statements: + - 'json_equal({{vector_len}}, {"output": 2})' + + # ============================================ + # Root Set Test (union merge) + # ============================================ + + - name: Node 1 adds item1 to root set + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: add_root_set + args: + item: item1 + + - name: Node 2 adds item2 to root set concurrently + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: add_root_set + args: + item: item2 + + - name: Wait for root set union merge + type: wait_for_sync + context_id: "{{context_id}}" + nodes: + - comprehensive-node-1 + - comprehensive-node-2 + timeout: 60 + check_interval: 2 + trigger_sync: true + + - name: Check item1 on Node 2 + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: has_root_set + args: + item: item1 + outputs: + has_item1: result + + - name: Check item2 on Node 1 + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: has_root_set + args: + item: item2 + outputs: + has_item2: result + + - name: Assert root set union merge worked + type: json_assert + statements: + - 'json_equal({{has_item1}}, {"output": true})' + - 'json_equal({{has_item2}}, {"output": true})' + + # ============================================ + # Root RGA Test (text CRDT merge) + # ============================================ + + - name: Node 1 inserts text at position 0 + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: insert_root_rga + args: + position: 0 + text: Hello + + - name: Node 2 inserts text at position 0 concurrently + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: insert_root_rga + args: + position: 0 + text: World + + - name: Wait for root RGA merge + type: wait_for_sync + context_id: "{{context_id}}" + nodes: + - comprehensive-node-1 + - comprehensive-node-2 + timeout: 60 + check_interval: 2 + trigger_sync: true + + - name: Get root RGA text from Node 1 + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: get_root_rga_text + outputs: + rga_text: result + + - name: Assert root RGA contains both texts + type: json_assert + statements: + - 'json_equal({{rga_text}}, {"output": "WorldHello"})' + + # ============================================ + # Root Register Test (LWW) + # ============================================ + + - name: Node 1 sets root register + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: set_root_register + args: + value: first + + - name: Wait for timestamp separation + type: wait + seconds: 2 + + - name: Node 2 sets root register (LWW test) + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: set_root_register + args: + value: second + + - name: Wait for root register LWW consensus + type: wait_for_sync + context_id: "{{context_id}}" + nodes: + - comprehensive-node-1 + - comprehensive-node-2 + timeout: 60 + check_interval: 2 + trigger_sync: true + + - name: Get root register from Node 1 + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: get_root_register + outputs: + register_value: result + + - name: Assert latest timestamp wins + type: json_assert + statements: + - 'json_equal({{register_value}}, {"output": "second"})' + + # ============================================ + # UserStorage Simple Test + # ============================================ + + - name: Node 1 sets user simple value + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: set_user_simple + args: + value: user1-value + + - name: Node 2 sets user simple value + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: set_user_simple + args: + value: user2-value + + - name: Wait for user storage sync + type: wait_for_sync + context_id: "{{context_id}}" + nodes: + - comprehensive-node-1 + - comprehensive-node-2 + timeout: 60 + check_interval: 2 + trigger_sync: true + + - name: Get user simple value for Node 1 from Node 2 + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: get_user_simple_for + args: + user_key: "{{member_public_key}}" + outputs: + user1_value: result + + - name: Assert user storage synced correctly + type: json_assert + statements: + - 'json_equal({{user1_value}}, {"output": "user1-value"})' + + # ============================================ + # UserStorage Nested Test + # ============================================ + + - name: Node 1 sets user nested value + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: set_user_nested + args: + key: nested-key + value: nested-value + + - name: Node 1 increments user nested counter + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: increment_user_nested_counter + + - name: Wait for user nested sync + type: wait_for_sync + context_id: "{{context_id}}" + nodes: + - comprehensive-node-1 + - comprehensive-node-2 + timeout: 60 + check_interval: 2 + trigger_sync: true + + - name: Get user nested value from Node 2 + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: get_user_nested_for + args: + user_key: "{{member_public_key}}" + key: nested-key + outputs: + nested_value: result + + - name: Get user nested counter from Node 2 (as Node 1 user) + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: get_user_nested_counter + outputs: + nested_counter: result + + - name: Assert user nested synced correctly + type: json_assert + statements: + - 'json_equal({{nested_value}}, {"output": "nested-value"})' + - 'json_equal({{nested_counter}}, {"output": 1})' + + # ============================================ + # FrozenStorage Test + # ============================================ + + - name: Node 1 adds frozen value + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: add_frozen + args: + value: frozen-content + outputs: + frozen_hash: result + + - name: Wait for frozen storage sync + type: wait_for_sync + context_id: "{{context_id}}" + nodes: + - comprehensive-node-1 + - comprehensive-node-2 + timeout: 60 + check_interval: 2 + trigger_sync: true + + - name: Get frozen value from Node 2 + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: get_frozen + args: + hash_hex: "{{frozen_hash.output}}" + outputs: + frozen_value: result + + - name: Assert frozen storage synced correctly + type: json_assert + statements: + - 'json_equal({{frozen_value}}, {"output": "frozen-content"})' + + # ============================================ + # Root-Level Concurrent Modification Test + # This tests that root merge works when different + # nodes modify different root fields concurrently + # ============================================ + + - name: Node 1 modifies root counter (root field 1) + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: increment_root_counter + + - name: Node 2 modifies root map (root field 2) concurrently + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: set_root_map + args: + key: concurrent-key + value: concurrent-value + + - name: Node 1 modifies root set (root field 3) concurrently + type: call + node: comprehensive-node-1 + context_id: "{{context_id}}" + executor_public_key: "{{member_public_key}}" + method: add_root_set + args: + item: concurrent-item + + - name: Wait for root-level concurrent merge + type: wait_for_sync + context_id: "{{context_id}}" + nodes: + - comprehensive-node-1 + - comprehensive-node-2 + timeout: 60 + check_interval: 2 + trigger_sync: true + + - name: Verify all concurrent modifications merged on Node 2 + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: get_root_counter + outputs: + final_counter: result + + - name: Verify root map modification merged + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: get_root_map + args: + key: concurrent-key + outputs: + final_map: result + + - name: Verify root set modification merged + type: call + node: comprehensive-node-2 + context_id: "{{context_id}}" + executor_public_key: "{{public_key_comprehensive-node-2}}" + method: has_root_set + args: + item: concurrent-item + outputs: + final_set: result + + - name: Assert root-level concurrent merge worked + type: json_assert + statements: + - 'json_equal({{final_counter}}, {"output": 4})' # Previous 3 + 1 + - 'json_equal({{final_map}}, {"output": "concurrent-value"})' + - 'json_equal({{final_set}}, {"output": true})' + +stop_all_nodes: false +restart: false +wait_timeout: 120 diff --git a/apps/kv-store-with-user-and-frozen-storage/workflows/test_frozen_storage.yml b/apps/kv-store-with-user-and-frozen-storage/workflows/test_frozen_storage.yml index 07f4532c30..b9adf7114c 100644 --- a/apps/kv-store-with-user-and-frozen-storage/workflows/test_frozen_storage.yml +++ b/apps/kv-store-with-user-and-frozen-storage/workflows/test_frozen_storage.yml @@ -155,3 +155,131 @@ steps: type: json_assert statements: - 'json_equal({{node1_frozen_value_from_node2_res}}, {"output": "SomeFrozenString2"})' + + # ============================================ + # FrozenStorage Intrinsics Tests + # ============================================ + + # Test: Content-addressability - same content = same hash (idempotent) + - name: Node 1 inserts same content again (idempotent test) + type: call + node: new-calimero-node-1 + context_id: '{{context_id}}' + executor_public_key: '{{member_public_key}}' + method: add_frozen + args: + value: "SomeFrozenString" + outputs: + frozen_hash_idempotent: result.output + + - name: Assert idempotent insert returns same hash + type: json_assert + statements: + - 'json_equal({{frozen_hash_idempotent}}, "{{frozen_value_hash_hex}}")' + + # Test: Content-addressability - different content = different hash + - name: Node 2 inserts different content + type: call + node: new-calimero-node-2 + context_id: '{{context_id}}' + executor_public_key: '{{public_key_new-calimero-node-2}}' + method: add_frozen + args: + value: "DifferentFrozenContent" + outputs: + frozen_hash_different: result.output + + - name: Wait for frozen storage sync + type: wait_for_sync + context_id: "{{context_id}}" + nodes: + - new-calimero-node-1 + - new-calimero-node-2 + timeout: 60 + check_interval: 2 + trigger_sync: true + + # Test: Merge behavior - FrozenValue::merge() does nothing (immutable) + # Both nodes should have both entries after merge + - name: Node 1 verifies both frozen entries exist after merge + type: call + node: new-calimero-node-1 + context_id: '{{context_id}}' + executor_public_key: '{{member_public_key}}' + method: get_frozen + args: + hash_hex: "{{frozen_value_hash_hex}}" + outputs: + node1_original: result + + - name: Node 1 verifies different content entry exists + type: call + node: new-calimero-node-1 + context_id: '{{context_id}}' + executor_public_key: '{{member_public_key}}' + method: get_frozen + args: + hash_hex: "{{frozen_hash_different}}" + outputs: + node1_different: result + + - name: Assert frozen storage merge preserves all entries + type: json_assert + statements: + - 'json_equal({{node1_original}}, {"output": "SomeFrozenString"})' + - 'json_equal({{node1_different}}, {"output": "DifferentFrozenContent"})' + + # Test: Concurrent inserts of same content from different nodes + # Should result in same hash (content-addressable) and merge correctly + - name: Node 1 inserts content for concurrent test + type: call + node: new-calimero-node-1 + context_id: '{{context_id}}' + executor_public_key: '{{member_public_key}}' + method: add_frozen + args: + value: "ConcurrentContent" + outputs: + concurrent_hash_1: result.output + + - name: Node 2 inserts same content concurrently + type: call + node: new-calimero-node-2 + context_id: '{{context_id}}' + executor_public_key: '{{public_key_new-calimero-node-2}}' + method: add_frozen + args: + value: "ConcurrentContent" + outputs: + concurrent_hash_2: result.output + + - name: Wait for concurrent frozen storage merge + type: wait_for_sync + context_id: "{{context_id}}" + nodes: + - new-calimero-node-1 + - new-calimero-node-2 + timeout: 60 + check_interval: 2 + trigger_sync: true + + - name: Assert concurrent inserts produce same hash (content-addressable) + type: json_assert + statements: + - 'json_equal({{concurrent_hash_1}}, {{concurrent_hash_2}})' + + - name: Verify both nodes can retrieve concurrent content + type: call + node: new-calimero-node-2 + context_id: '{{context_id}}' + executor_public_key: '{{public_key_new-calimero-node-2}}' + method: get_frozen + args: + hash_hex: "{{concurrent_hash_1}}" + outputs: + concurrent_retrieved: result + + - name: Assert concurrent content retrieved correctly + type: json_assert + statements: + - 'json_equal({{concurrent_retrieved}}, {"output": "ConcurrentContent"})' diff --git a/apps/kv-store-with-user-and-frozen-storage/workflows/test_user_storage.yml b/apps/kv-store-with-user-and-frozen-storage/workflows/test_user_storage.yml index 2606d07773..c84c8ae415 100644 --- a/apps/kv-store-with-user-and-frozen-storage/workflows/test_user_storage.yml +++ b/apps/kv-store-with-user-and-frozen-storage/workflows/test_user_storage.yml @@ -185,3 +185,78 @@ steps: type: json_assert statements: - 'json_equal({{node1_simple_value_res_from_node2}}, {"output": "SimpleUserStringFromNode2"})' + + # ============================================ + # UserStorage Intrinsics Tests + # ============================================ + + # Test: Merge behavior - concurrent writes to different users should merge correctly + - name: Node 1 updates their own user storage (concurrent merge test) + type: call + node: new-calimero-node-1 + context_id: '{{context_id}}' + executor_public_key: '{{member_public_key}}' + method: set_user_simple + args: + value: "UpdatedByNode1" + + - name: Node 2 updates their own user storage concurrently + type: call + node: new-calimero-node-2 + context_id: '{{context_id}}' + executor_public_key: '{{public_key_new-calimero-node-2}}' + method: set_user_simple + args: + value: "UpdatedByNode2" + + - name: Wait for user storage merge + type: wait_for_sync + context_id: "{{context_id}}" + nodes: + - new-calimero-node-1 + - new-calimero-node-2 + timeout: 60 + check_interval: 2 + trigger_sync: true + + # Verify: Each user's storage should be independent (merge preserves both) + - name: Verify Node 1's storage preserved after merge + type: call + node: new-calimero-node-1 + context_id: '{{context_id}}' + executor_public_key: '{{member_public_key}}' + method: get_user_simple + outputs: + node1_after_merge: result + + - name: Verify Node 2's storage preserved after merge + type: call + node: new-calimero-node-2 + context_id: '{{context_id}}' + executor_public_key: '{{public_key_new-calimero-node-2}}' + method: get_user_simple + outputs: + node2_after_merge: result + + - name: Assert user storage merge preserves both users' data + type: json_assert + statements: + - 'json_equal({{node1_after_merge}}, {"output": "UpdatedByNode1"})' + - 'json_equal({{node2_after_merge}}, {"output": "UpdatedByNode2"})' + + # Test: User isolation - verify users can read but not write to each other's storage + - name: Verify Node 1 can read Node 2's data (read access works) + type: call + node: new-calimero-node-1 + context_id: '{{context_id}}' + executor_public_key: '{{member_public_key}}' + method: get_user_simple_for + args: + user_key: "{{public_key_new-calimero-node-2}}" + outputs: + node1_reads_node2: result + + - name: Assert read access works across users + type: json_assert + statements: + - 'json_equal({{node1_reads_node2}}, {"output": "UpdatedByNode2"})' diff --git a/apps/state-schema-conformance/src/lib.rs b/apps/state-schema-conformance/src/lib.rs index 0e6d171513..8163d8ebc6 100644 --- a/apps/state-schema-conformance/src/lib.rs +++ b/apps/state-schema-conformance/src/lib.rs @@ -12,6 +12,7 @@ use calimero_storage::collections::{Counter, LwwRegister, UnorderedMap, Unordere Clone, Copy, Debug, + Default, PartialEq, Eq, PartialOrd, @@ -54,13 +55,18 @@ impl calimero_storage::collections::Mergeable for Profile { } // Variant types -#[derive(Clone, Debug, Serialize, Deserialize, BorshSerialize, BorshDeserialize)] +#[derive(Clone, Debug, Default, Serialize, Deserialize, BorshSerialize, BorshDeserialize)] #[serde(crate = "calimero_sdk::serde")] #[borsh(crate = "calimero_sdk::borsh")] pub enum Status { - Active { timestamp: u64 }, + Active { + timestamp: u64, + }, + #[default] Inactive, - Pending { reason: String }, + Pending { + reason: String, + }, } // State with comprehensive Calimero collection types diff --git a/apps/state-visualization-test/Cargo.toml b/apps/state-visualization-test/Cargo.toml new file mode 100644 index 0000000000..a7fa853e36 --- /dev/null +++ b/apps/state-visualization-test/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "state-visualization-test" +version.workspace = true +authors.workspace = true +edition.workspace = true +repository.workspace = true +license.workspace = true +publish = false + +[lib] +crate-type = ["cdylib"] + +[dependencies] +thiserror.workspace = true +calimero-sdk.workspace = true +calimero-storage.workspace = true + +[build-dependencies] +calimero-wasm-abi.workspace = true +serde_json.workspace = true + +[package.metadata.workspaces] +independent = true diff --git a/apps/state-visualization-test/README.md b/apps/state-visualization-test/README.md new file mode 100644 index 0000000000..d3d85abdae --- /dev/null +++ b/apps/state-visualization-test/README.md @@ -0,0 +1,55 @@ +# State Visualization Test App + +This app is a **test fixture** for merodb's state visualization and schema inference capabilities. + +## Purpose + +This app is designed to verify that: + +1. **`field_name`** is correctly stored in entity metadata for all CRDT collection types +2. **Schema inference** can detect all field types from the database without requiring an external schema file +3. **merodb GUI** correctly displays and visualizes different collection types + +## CRDT Types Included + +| Field | CRDT Type | Description | +|-------|-----------|-------------| +| `items` | `UnorderedMap>` | Key-value pairs | +| `operation_count` | `Counter` | Grow-only counter | +| `operation_history` | `Vector>` | Ordered operation log | +| `tags` | `UnorderedSet` | Unique tags | +| `metadata` | `LwwRegister` | Single value register | + +## Usage + +### Build + +```bash +./build.sh +``` + +### Test with merodb + +1. Install the app on a node +2. Create a context +3. Call `populate_sample_data` to generate test data +4. Use `merodb gui` to visualize the state + +```bash +# Install and create context +meroctl --node app install --path apps/state-visualization-test/res/state_visualization_test.wasm +meroctl --node context create --application-id --protocol near + +# Populate test data +meroctl --node call --context --as populate_sample_data + +# View stats +meroctl --node call --context --as get_stats + +# Start merodb GUI (no schema file needed!) +merodb gui +``` + +## Note + +This app is **NOT for production use**. It's a development test fixture for the merodb visualization tools. diff --git a/apps/state-visualization-test/build.rs b/apps/state-visualization-test/build.rs new file mode 100644 index 0000000000..f48a33d55e --- /dev/null +++ b/apps/state-visualization-test/build.rs @@ -0,0 +1,11 @@ +fn main() { + let out_dir = std::env::var("OUT_DIR").expect("OUT_DIR not set"); + let app_abi = calimero_wasm_abi::get_abi::(); + let state_abi = calimero_wasm_abi::get_state_abi::(); + + let abi_json = serde_json::to_string_pretty(&app_abi).expect("Failed to serialize ABI"); + let state_json = serde_json::to_string_pretty(&state_abi).expect("Failed to serialize state"); + + std::fs::write(format!("{}/abi.json", out_dir), abi_json).expect("Failed to write ABI"); + std::fs::write(format!("{}/state.json", out_dir), state_json).expect("Failed to write state"); +} diff --git a/apps/state-visualization-test/build.sh b/apps/state-visualization-test/build.sh new file mode 100755 index 0000000000..0e88945e5a --- /dev/null +++ b/apps/state-visualization-test/build.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -e + +cd "$(dirname $0)" +PROFILE="${PROFILE:-app-release}" +RUSTFLAGS="--remap-path-prefix $HOME=~" cargo build --target wasm32-unknown-unknown --profile "$PROFILE" + +mkdir -p res +cp ../../target/wasm32-unknown-unknown/"$PROFILE"/state_visualization_test.wasm res/ + +# Try to optimize with wasm-opt, but don't fail if it doesn't work +if command -v wasm-opt &> /dev/null; then + wasm-opt -Oz res/state_visualization_test.wasm -o res/state_visualization_test.wasm 2>/dev/null || { + echo "Warning: wasm-opt optimization skipped (bulk memory operations not supported)" + } +fi diff --git a/apps/state-visualization-test/src/lib.rs b/apps/state-visualization-test/src/lib.rs new file mode 100644 index 0000000000..c39e4c18d9 --- /dev/null +++ b/apps/state-visualization-test/src/lib.rs @@ -0,0 +1,249 @@ +//! State Visualization Test App +//! +//! This app is designed to test merodb's state visualization and schema inference +//! capabilities. It includes various CRDT collection types to verify that: +//! +//! 1. `field_name` is correctly stored in entity metadata +//! 2. Schema inference can detect all field types from the database +//! 3. The GUI correctly displays different collection types +//! +//! This is NOT meant for production use - it's a test fixture for merodb development. + +#![allow(clippy::len_without_is_empty)] + +use std::collections::BTreeMap; + +use calimero_sdk::app; +use calimero_sdk::borsh::{BorshDeserialize, BorshSerialize}; +use calimero_sdk::serde::Serialize; +use calimero_storage::collections::{Counter, LwwRegister, UnorderedMap, UnorderedSet, Vector}; + +/// Test state with multiple CRDT collection types for visualization testing. +/// +/// Each field uses a different CRDT type to verify schema inference: +/// - `items`: UnorderedMap> - key-value pairs +/// - `operation_count`: Counter - grow-only counter +/// - `operation_history`: Vector> - ordered list of operations +/// - `tags`: UnorderedSet - unique tags +/// - `metadata`: LwwRegister - single value register +#[app::state] +#[derive(Debug, BorshSerialize, BorshDeserialize)] +#[borsh(crate = "calimero_sdk::borsh")] +pub struct VisualizationTest { + /// Key-value pairs stored as UnorderedMap + items: UnorderedMap>, + /// Total number of operations performed (Counter) + operation_count: Counter, + /// History of operations (Vector) + /// Note: Uses LwwRegister because Vector requires T: Mergeable + operation_history: Vector>, + /// Tags associated with entries (UnorderedSet) + tags: UnorderedSet, + /// Store metadata (LwwRegister) + metadata: LwwRegister, +} + +#[derive(Debug, thiserror::Error, Serialize)] +#[serde(crate = "calimero_sdk::serde")] +#[serde(tag = "kind", content = "data")] +pub enum Error<'a> { + #[error("key not found: {0}")] + NotFound(&'a str), +} + +#[app::logic] +impl VisualizationTest { + // ========================================================================= + // Item Operations (UnorderedMap) + // ========================================================================= + + /// Set a key-value pair + pub fn set(&mut self, key: String, value: String) -> app::Result<()> { + app::log!("Setting key: {:?} to value: {:?}", key, value); + + self.items.insert(key.clone(), LwwRegister::new(value.clone()))?; + self.operation_count.increment()?; + self.operation_history + .push(LwwRegister::new(format!("Set: {} = {}", key, value)))?; + + Ok(()) + } + + /// Get a value by key + pub fn get(&self, key: &str) -> app::Result> { + Ok(self.items.get(key)?.map(|v| v.get().clone())) + } + + /// Get all entries + pub fn entries(&self) -> app::Result> { + Ok(self + .items + .entries()? + .map(|(k, v)| (k, v.get().clone())) + .collect()) + } + + /// Remove an entry + pub fn remove(&mut self, key: &str) -> app::Result> { + let result = self.items.remove(key)?.map(|v| v.get().clone()); + if result.is_some() { + self.operation_count.increment()?; + self.operation_history + .push(LwwRegister::new(format!("Removed: {}", key)))?; + } + Ok(result) + } + + // ========================================================================= + // Tag Operations (UnorderedSet) + // ========================================================================= + + /// Add a tag + pub fn add_tag(&mut self, tag: String) -> app::Result { + let inserted = self.tags.insert(tag.clone())?; + if inserted { + self.operation_history + .push(LwwRegister::new(format!("Added tag: {}", tag)))?; + } + Ok(inserted) + } + + /// Remove a tag + pub fn remove_tag(&mut self, tag: &str) -> app::Result { + let removed = self.tags.remove(tag)?; + if removed { + self.operation_history + .push(LwwRegister::new(format!("Removed tag: {}", tag)))?; + } + Ok(removed) + } + + /// Get all tags + pub fn get_tags(&self) -> app::Result> { + self.tags.entries().map(|iter| iter.collect()) + } + + // ========================================================================= + // Metadata Operations (LwwRegister) + // ========================================================================= + + /// Set store metadata + pub fn set_metadata(&mut self, metadata: String) -> app::Result<()> { + self.metadata.set(metadata); + Ok(()) + } + + /// Get store metadata + pub fn get_metadata(&self) -> String { + self.metadata.get().clone() + } + + // ========================================================================= + // Counter & History Operations + // ========================================================================= + + /// Get operation count + pub fn get_operation_count(&self) -> app::Result { + self.operation_count.value().map_err(Into::into) + } + + /// Get operation history + pub fn get_operation_history(&self) -> app::Result> { + let len = self.operation_history.len()?; + let mut history = Vec::new(); + for i in 0..len { + if let Some(entry) = self.operation_history.get(i)? { + history.push(entry.get().clone()); + } + } + Ok(history) + } + + // ========================================================================= + // Test Data Population + // ========================================================================= + + /// Populate the store with sample data for testing visualization. + /// Creates multiple entries in each collection type. + pub fn populate_sample_data(&mut self) -> app::Result<()> { + app::log!("Populating sample data for visualization testing"); + + // Set store metadata + self.metadata + .set("Visualization Test Store - sample data".to_string()); + + // Add sample items (UnorderedMap entries) + let sample_items = [ + ("user:alice", "Alice Johnson"), + ("user:bob", "Bob Smith"), + ("user:charlie", "Charlie Brown"), + ("config:theme", "dark"), + ("config:language", "en-US"), + ("config:timezone", "UTC"), + ("product:1001", "Laptop Pro"), + ("product:1002", "Wireless Mouse"), + ("product:1003", "Mechanical Keyboard"), + ("product:1004", "4K Monitor"), + ("session:abc123", "active"), + ("session:def456", "active"), + ("cache:homepage", "cached_content_here"), + ("cache:dashboard", "dashboard_content"), + ("cache:settings", "settings_content"), + ]; + + for (key, value) in sample_items { + self.items + .insert(key.to_string(), LwwRegister::new(value.to_string()))?; + self.operation_count.increment()?; + self.operation_history + .push(LwwRegister::new(format!("Inserted: {} = {}", key, value)))?; + } + + // Add sample tags (UnorderedSet entries) + let sample_tags = [ + "important", + "urgent", + "archived", + "featured", + "pinned", + "read", + "unread", + "starred", + "draft", + "published", + ]; + + for tag in sample_tags { + self.tags.insert(tag.to_string())?; + } + + // Add more history entries (Vector entries) + let additional_history = [ + "System initialized", + "Connected to network", + "Loaded configuration", + "User session started", + "Cache warmed up", + ]; + + for entry in additional_history { + self.operation_history + .push(LwwRegister::new(entry.to_string()))?; + } + + Ok(()) + } + + /// Get statistics about all collections + pub fn get_stats(&self) -> app::Result> { + let mut stats = BTreeMap::new(); + stats.insert("items_count".to_string(), self.items.len()? as u64); + stats.insert("tags_count".to_string(), self.tags.len()? as u64); + stats.insert( + "history_count".to_string(), + self.operation_history.len()? as u64, + ); + stats.insert("operation_count".to_string(), self.operation_count.value()?); + Ok(stats) + } +} diff --git a/apps/state-visualization-test/workflows/build.yml b/apps/state-visualization-test/workflows/build.yml new file mode 100644 index 0000000000..67b78d300b --- /dev/null +++ b/apps/state-visualization-test/workflows/build.yml @@ -0,0 +1,35 @@ +name: Build state-visualization-test + +on: + pull_request: + paths: + - "apps/state-visualization-test/**" + push: + branches: + - master + paths: + - "apps/state-visualization-test/**" + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: stable + targets: wasm32-unknown-unknown + + - name: Build + run: | + cd apps/state-visualization-test + chmod +x build.sh + ./build.sh + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: state_visualization_test.wasm + path: apps/state-visualization-test/res/state_visualization_test.wasm diff --git a/apps/team-metrics-macro/src/lib.rs b/apps/team-metrics-macro/src/lib.rs index 1dd185ac50..04a2420ccf 100644 --- a/apps/team-metrics-macro/src/lib.rs +++ b/apps/team-metrics-macro/src/lib.rs @@ -48,9 +48,8 @@ pub enum MetricsEvent { impl TeamMetricsApp { #[app::init] pub fn init() -> TeamMetricsApp { - TeamMetricsApp { - teams: UnorderedMap::new(), - } + // Use the auto-generated Default implementation which uses field names + TeamMetricsApp::default() } pub fn record_win(&mut self, team_id: String) -> Result { diff --git a/crates/node/src/delta_store.rs b/crates/node/src/delta_store.rs index de32124389..1c64272ebe 100644 --- a/crates/node/src/delta_store.rs +++ b/crates/node/src/delta_store.rs @@ -92,22 +92,22 @@ impl DeltaApplier> for ContextStorageApplier { ))); } - // Ensure deterministic root hash across all nodes. - // WASM execution may produce different hashes due to non-deterministic factors; - // use the delta author's expected_root_hash to maintain DAG consistency. + // Root hash mismatch indicates concurrent root modifications. + // With mergeable deltas, the merge logic in storage layer handles this correctly. + // The computed hash after merge may differ from expected hash if concurrent modifications occurred. + // This is expected behavior - the merge produces the correct merged state. let computed_hash = outcome.root_hash; if *computed_hash != delta.expected_root_hash { - warn!( + debug!( context_id = %self.context_id, delta_id = ?delta.id, computed_hash = ?computed_hash, expected_hash = ?Hash::from(delta.expected_root_hash), - "Root hash mismatch - using expected hash for consistency" + "Root hash mismatch after merge - this is expected for concurrent root modifications" ); - - self.context_client - .force_root_hash(&self.context_id, delta.expected_root_hash.into()) - .map_err(|e| ApplyError::Application(format!("Failed to set root hash: {}", e)))?; + // Note: With mergeable deltas, we don't force the hash. + // The merge logic produces the correct merged state, which may have a different hash + // than the expected hash from the original delta (which was computed before merge). } debug!( @@ -545,29 +545,18 @@ impl DeltaStore { .update_dag_heads(&self.applier.context_id, heads.clone()) .map_err(|e| eyre::eyre!("Failed to update dag_heads: {}", e))?; - // Deterministic root hash selection for concurrent branches. - // When multiple DAG heads exist, use the lexicographically smallest head's root_hash - // to ensure all nodes converge to the same root regardless of delta arrival order. + // With mergeable deltas, multiple DAG heads indicate concurrent branches. + // These will be resolved by merge deltas created during application commits. + // The merge logic in storage layer handles root state merging correctly. if heads.len() > 1 { - let head_hashes = self.head_root_hashes.read().await; - let mut sorted_heads = heads.clone(); - sorted_heads.sort(); - let canonical_head = sorted_heads[0]; - - if let Some(&canonical_root_hash) = head_hashes.get(&canonical_head) { - debug!( - context_id = %self.applier.context_id, - heads_count = heads.len(), - canonical_head = ?canonical_head, - canonical_root = ?canonical_root_hash, - "Multiple DAG heads - using deterministic root hash selection" - ); - - self.applier - .context_client - .force_root_hash(&self.applier.context_id, canonical_root_hash.into()) - .map_err(|e| eyre::eyre!("Failed to set canonical root hash: {}", e))?; - } + debug!( + context_id = %self.applier.context_id, + heads_count = heads.len(), + "Multiple DAG heads detected - merge deltas will resolve forks during commit" + ); + // Note: With mergeable deltas, we don't force a canonical root hash. + // Merge deltas created during commit will merge all head states using + // the registered merge function, producing the correct merged root state. } // Cleanup old head hashes that are no longer active diff --git a/crates/sdk/macros/src/state.rs b/crates/sdk/macros/src/state.rs index 27294f4a2f..f376b8c6b8 100644 --- a/crates/sdk/macros/src/state.rs +++ b/crates/sdk/macros/src/state.rs @@ -1,5 +1,5 @@ use proc_macro2::{Span, TokenStream}; -use quote::{quote, ToTokens}; +use quote::{quote, quote_spanned, ToTokens}; use syn::parse::{Parse, ParseStream}; use syn::{ parse2, BoundLifetimes, Error as SynError, GenericParam, Generics, Ident, Lifetime, @@ -47,6 +47,9 @@ impl ToTokens for StateImpl<'_> { // Generate Mergeable implementation let merge_impl = generate_mergeable_impl(ident, generics, orig); + // Generate Default implementation with field names + let default_impl = generate_default_impl(ident, generics, orig); + // Generate registration hook let registration_hook = generate_registration_hook(ident, &ty_generics); @@ -66,6 +69,9 @@ impl ToTokens for StateImpl<'_> { // Auto-generated CRDT merge support #merge_impl + // Auto-generated Default implementation with field names + #default_impl + // Auto-generated registration hook #registration_hook } @@ -412,6 +418,119 @@ fn generate_mergeable_impl( } } +/// Generate Default trait implementation for the state struct +/// +/// This automatically uses field names for CRDT collections, enabling: +/// - Deterministic collection IDs across nodes +/// - Schema inference in merodb and other tools +/// - Better debugging and introspection +/// +/// NOTE: Only generates Default impl if the struct has CRDT collection fields. +/// For structs without collections, users should #[derive(Default)] themselves. +fn generate_default_impl( + ident: &Ident, + generics: &Generics, + orig: &StructOrEnumItem, +) -> TokenStream { + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + + // Extract fields from the struct + let fields = match orig { + StructOrEnumItem::Struct(s) => &s.fields, + StructOrEnumItem::Enum(_) => { + // Enums don't have Default - user must implement manually + return quote! { + // No Default impl for enums - implement manually if needed + }; + } + }; + + // Helper to check if a type is a CRDT collection + let is_collection_type = |type_str: &str| { + type_str.contains("UnorderedMap") + || type_str.contains("Vector") + || type_str.contains("UnorderedSet") + || type_str.contains("Counter") + || type_str.contains("ReplicatedGrowableArray") + || type_str.contains("UserStorage") + || type_str.contains("FrozenStorage") + }; + + // Count collection fields - only generate Default if there are collections + let has_collections = fields.iter().any(|field| { + let field_type = &field.ty; + let type_str = quote! { #field_type }.to_string(); + is_collection_type(&type_str) + }); + + // If no collection fields, don't generate Default - let user derive/impl it + if !has_collections { + return quote! { + // No auto-generated Default - struct has no CRDT collection fields + // Use #[derive(Default)] or implement Default manually + }; + } + + // Generate field initializations + let field_inits: Vec<_> = fields + .iter() + .filter_map(|field| { + let field_name = field.ident.as_ref()?; + let field_type = &field.ty; + + // Check if this is a known CRDT collection type + let type_str = quote! { #field_type }.to_string(); + let field_name_str = field_name.to_string(); + + if is_collection_type(&type_str) { + // Use new_with_field_name() with the field name + // Create a string literal token stream + let field_name_lit: proc_macro2::TokenStream = + format!("\"{}\"", field_name_str).parse().unwrap(); + let field_span = field_name.span(); + Some(quote_spanned! {field_span=> + #field_name: <#field_type>::new_with_field_name(#field_name_lit), + }) + } else { + // For other types (LwwRegister, String, u64, etc.), use Default + // Note: LwwRegister implements Default when T: Default + // If T doesn't implement Default, use #[app::init] instead + Some(quote! { + #field_name: ::core::default::Default::default(), + }) + } + }) + .collect(); + + quote! { + // ============================================================================ + // AUTO-GENERATED Default implementation by #[app::state] macro + // ============================================================================ + // + // This Default implementation automatically uses field names for CRDT collections, + // enabling deterministic IDs and schema inference. + // + // For CRDT collections (UnorderedMap, Vector, Counter, etc.): + // - Uses new_with_field_name(field_name) to generate deterministic IDs + // - Enables merodb and other tools to infer schema from database + // + // For other types (LwwRegister, scalars, etc.): + // - Uses Default::default() + // - Requires all types to implement Default + // + // If any field type doesn't implement Default, use #[app::init] to + // manually initialize the state instead of relying on this generated Default. + // + impl #impl_generics ::core::default::Default for #ident #ty_generics #where_clause { + fn default() -> Self { + Self { + #(#field_inits)* + } + } + } + } +} + /// Generate registration hook for automatic merge during sync fn generate_registration_hook(ident: &Ident, ty_generics: &syn::TypeGenerics<'_>) -> TokenStream { quote! { diff --git a/crates/storage/readme/DOCUMENTATION_INDEX.md b/crates/storage/readme/DOCUMENTATION_INDEX.md index 2b2a9a6ce8..dea87b6cdd 100644 --- a/crates/storage/readme/DOCUMENTATION_INDEX.md +++ b/crates/storage/readme/DOCUMENTATION_INDEX.md @@ -38,7 +38,8 @@ Complete guide to Calimero Storage CRDT documentation. ### Understanding the System 1. **[Architecture](architecture.md)** - How it works internally 2. **[Merging Deep-Dive](merging.md)** - DAG vs explicit merge explained -3. **[Design Decisions](design-decisions.md)** - Why we built it this way +3. **[Schema Inference](schema-inference.md)** - How field_name enables schema-free inspection +4. **[Design Decisions](design-decisions.md)** - Why we built it this way ### Performance - **[Performance Guide](performance.md)** - Benchmarks, optimization tips @@ -97,6 +98,7 @@ crates/storage/ ├── nesting.md # Nesting patterns guide ├── architecture.md # How it works internally ├── merging.md # Conflict resolution explained + ├── schema-inference.md # Field metadata & schema-free inspection ├── performance.md # Optimization guide ├── migration.md # Upgrading guide └── design-decisions.md # Why we built it this way diff --git a/crates/storage/readme/schema-inference.md b/crates/storage/readme/schema-inference.md new file mode 100644 index 0000000000..b4b395a151 --- /dev/null +++ b/crates/storage/readme/schema-inference.md @@ -0,0 +1,244 @@ +# Schema Inference and Field Metadata + +How Calimero Storage enables schema-free database inspection. + +--- + +## Overview + +Calimero Storage supports **schema inference** - the ability to inspect and visualize state databases without requiring an external schema file. This is achieved by storing **field names** in entity metadata. + +--- + +## How It Works + +### Field Name Storage + +When you use `#[app::state]` to define your app state, the macro automatically generates a `Default` implementation that uses `new_with_field_name()` for each collection: + +```rust +#[app::state] +#[derive(Debug, BorshSerialize, BorshDeserialize)] +pub struct MyApp { + items: UnorderedMap, // field_name = "items" + operation_count: Counter, // field_name = "operation_count" + tags: UnorderedSet, // field_name = "tags" +} +``` + +**Generated `Default` implementation:** + +```rust +impl Default for MyApp { + fn default() -> Self { + Self { + items: UnorderedMap::new_with_field_name("items"), + operation_count: Counter::new_with_field_name("operation_count"), + tags: UnorderedSet::new_with_field_name("tags"), + } + } +} +``` + +### Metadata Structure + +Each entity's metadata (`Metadata` struct) includes: + +```rust +pub struct Metadata { + pub created_at: u64, + pub updated_at: UpdatedAt, + pub storage_type: StorageType, + pub crdt_type: Option, // Counter, UnorderedMap, Vector, etc. + pub field_name: Option, // "items", "tags", etc. +} +``` + +### EntityIndex Storage + +The `field_name` is persisted in the `EntityIndex` for each collection root: + +``` +EntityIndex { + id: , + parent_id: , + metadata: { + crdt_type: Some(UnorderedMap), + field_name: Some("items"), + ... + }, + ... +} +``` + +--- + +## Using Schema Inference + +### With merodb GUI + +The `merodb gui` tool can now visualize state **without a schema file**: + +```bash +# Start the GUI - schema file is optional! +merodb gui +``` + +When no schema file is provided, merodb: +1. Scans the database for `EntityIndex` entries +2. Identifies root-level fields by checking `parent_id` +3. Reads `field_name` and `crdt_type` from metadata +4. Builds a schema dynamically + +### With CLI Export + +```bash +# Schema file is now optional +merodb export --db-path /path/to/data --context-id + +# Or specify schema explicitly (takes precedence) +merodb export --db-path /path/to/data --context-id --state-schema-file schema.json +``` + +--- + +## Benefits + +### 1. Zero-Configuration Inspection + +Developers can inspect any Calimero database without needing the original app's schema: + +```bash +# Just point to the database +merodb gui +# → Select database path +# → Select context +# → View state tree! +``` + +### 2. Migration Support + +Field names enable safe schema migrations: + +- **Identify fields:** Know what each entity represents +- **Track changes:** Detect added/removed fields +- **Validate migrations:** Ensure data integrity + +### 3. Debugging + +Better debugging experience: + +- **Clear labels:** See "items" instead of truncated hashes +- **Type information:** Know if a field is a Counter vs Map +- **Structure visualization:** Understand the state tree hierarchy + +--- + +## Backward Compatibility + +### Old Data (No field_name) + +Data written before `field_name` was added deserializes correctly: + +```rust +// Old format: field_name defaults to None +let deserialized: Metadata = borsh::from_slice(&old_bytes)?; +assert_eq!(deserialized.field_name, None); // Safe default +``` + +### Mixed Environments + +- **New collections:** Have `field_name` set +- **Old collections:** `field_name` is `None` +- **Schema inference:** Falls back to sequential matching for old data + +--- + +## Deterministic Collection IDs + +Collections created with `new_with_field_name()` get **deterministic IDs**: + +```rust +fn compute_collection_id(parent_id: Option, field_name: &str) -> Id { + let mut hasher = Sha256::new(); + if let Some(parent) = parent_id { + hasher.update(parent.as_bytes()); + } + hasher.update(field_name.as_bytes()); + Id::new(hasher.finalize().into()) +} +``` + +**Benefits:** +- Same collection gets same ID across all nodes +- Enables reliable sync without random IDs +- Predictable for testing and debugging + +--- + +## Collection Types with field_name + +All CRDT collections support `new_with_field_name()`: + +| Collection | Method | CRDT Type Stored | +|------------|--------|------------------| +| `UnorderedMap` | `new_with_field_name("items")` | `CrdtType::UnorderedMap` | +| `Vector` | `new_with_field_name("history")` | `CrdtType::Vector` | +| `UnorderedSet` | `new_with_field_name("tags")` | `CrdtType::UnorderedSet` | +| `Counter` | `new_with_field_name("count")` | `CrdtType::Counter` | +| `ReplicatedGrowableArray` | `new_with_field_name("text")` | `CrdtType::Rga` | +| `UserStorage` | `new_with_field_name("user_data")` | `CrdtType::UserStorage` | +| `FrozenStorage` | `new_with_field_name("frozen_data")` | `CrdtType::FrozenStorage` | + +--- + +## Advanced: Manual Field Names + +For advanced users who want custom field names: + +```rust +// Don't derive Default - implement manually +impl MyApp { + pub fn new() -> Self { + Self { + // Custom field name + items: UnorderedMap::new_with_field_name("custom_items_name"), + // Regular creation (no field_name) + temp_data: UnorderedMap::new(), + } + } +} +``` + +**Note:** When using manual implementation, ensure you call `new_with_field_name()` for collections you want to be discoverable by schema inference. + +--- + +## Limitations + +### 1. Type Parameters Not Inferred + +Schema inference knows `items` is an `UnorderedMap`, but cannot determine: +- Key type (`String`, `u64`, etc.) +- Value type (`LwwRegister`, custom struct, etc.) + +**Workaround:** Values are displayed as best-effort decoded data. + +### 2. Inline Types (LwwRegister) + +`LwwRegister` fields don't create separate `EntityIndex` entries - they're serialized inline with the parent. This means: +- `LwwRegister` fields won't appear in schema inference +- Their values are part of the parent entity's data + +--- + +## See Also + +- [Collections API](collections.md) - All collection types +- [Architecture](architecture.md) - How storage works internally +- [Migration Guide](migration.md) - Upgrading existing apps + +--- + +**Last Updated:** 2026-02-04 +**Version:** 0.12.0 diff --git a/crates/storage/src/action.rs b/crates/storage/src/action.rs index 02104f089b..fdb9d3d4f1 100644 --- a/crates/storage/src/action.rs +++ b/crates/storage/src/action.rs @@ -216,4 +216,8 @@ fn hash_metadata_for_payload(hasher: &mut Sha256, metadata: &Metadata) { hasher.update(borsh::to_vec(&partial_type).unwrap_or_default()); } } + + // Include crdt_type in hash to prevent tampering without invalidating signatures + // This is critical for User storage actions where crdt_type affects merge behavior + hasher.update(borsh::to_vec(&metadata.crdt_type).unwrap_or_default()); } diff --git a/crates/storage/src/collections.rs b/crates/storage/src/collections.rs index 9ccfba40e2..74f0870ba0 100644 --- a/crates/storage/src/collections.rs +++ b/crates/storage/src/collections.rs @@ -24,7 +24,9 @@ pub use rga::ReplicatedGrowableArray; pub mod lww_register; pub use lww_register::LwwRegister; pub mod crdt_meta; -pub use crdt_meta::{CrdtMeta, CrdtType, Decomposable, Mergeable, StorageStrategy}; +pub use crdt_meta::{CrdtMeta, Decomposable, Mergeable, StorageStrategy}; +// Re-export CrdtType from entities (canonical definition) +pub use crate::entities::CrdtType; pub mod composite_key; mod crdt_impls; mod decompose_impls; @@ -62,6 +64,17 @@ fn compute_id(parent: Id, key: &[u8]) -> Id { Id::new(hasher.finalize().into()) } +/// Compute a deterministic collection ID from parent ID and field name. +/// This ensures the same collection gets the same ID across all nodes. +fn compute_collection_id(parent_id: Option, field_name: &str) -> Id { + let mut hasher = Sha256::new(); + if let Some(parent) = parent_id { + hasher.update(parent.as_bytes()); + } + hasher.update(field_name.as_bytes()); + Id::new(hasher.finalize().into()) +} + #[derive(BorshSerialize, BorshDeserialize)] struct Collection { storage: Element, @@ -131,6 +144,41 @@ impl Collection { this } + /// Creates a new collection with a deterministic ID derived from parent ID and field name. + /// This ensures collections get the same ID across all nodes when created with the same + /// parent and field name. + /// + /// # Arguments + /// * `parent_id` - The ID of the parent collection (None for root-level collections) + /// * `field_name` - The name of the field containing this collection + /// * `crdt_type` - The CRDT type for this collection (e.g., UnorderedMap, Vector) + #[expect(clippy::expect_used, reason = "fatal error if it happens")] + pub(crate) fn new_with_field_name_and_crdt_type( + parent_id: Option, + field_name: &str, + crdt_type: CrdtType, + ) -> Self { + let id = compute_collection_id(parent_id, field_name); + + let mut this = Self { + children_ids: RefCell::new(None), + storage: Element::new_with_field_name_and_crdt_type( + Some(id), + Some(field_name.to_string()), + crdt_type, + ), + _priv: PhantomData, + }; + + if id.is_root() { + let _ignored = >::save(&mut this).expect("save"); + } else { + let _ = >::add_child_to(*ROOT_ID, &mut this).expect("add child"); + } + + this + } + /// Inserts an item into the collection. fn insert(&mut self, id: Option, item: T) -> StoreResult { self.insert_with_storage_type(id, item, StorageType::Public) diff --git a/crates/storage/src/collections/counter.rs b/crates/storage/src/collections/counter.rs index c0683e246d..ccbd683fb9 100644 --- a/crates/storage/src/collections/counter.rs +++ b/crates/storage/src/collections/counter.rs @@ -9,7 +9,7 @@ use borsh::io::{ErrorKind, Read, Result as BorshResult, Write}; use borsh::{BorshDeserialize, BorshSerialize}; -use super::{StorageAdaptor, UnorderedMap}; +use super::{CrdtType, StorageAdaptor, UnorderedMap}; use crate::collections::error::StoreError; use crate::interface::StorageError; use crate::store::MainStorage; @@ -184,6 +184,27 @@ impl Counter { pub fn new() -> Self { Self::new_internal() } + + /// Create a new counter with field name for schema inference. + /// + /// This enables merodb and other tools to infer the schema from the database + /// without requiring an external schema file. The field name is used to + /// generate deterministic collection IDs. + /// + /// Note: Counter uses CrdtType::Counter on its primary (positive) map for schema + /// inference. The negative map is internal and unnamed. + #[must_use] + pub fn new_with_field_name(field_name: &str) -> Self { + Self { + // Primary map gets the field_name and CrdtType::Counter + positive: UnorderedMap::new_with_field_name_and_crdt_type( + field_name, + CrdtType::Counter, + ), + // Negative map is internal - no field_name + negative: UnorderedMap::new_internal(), + } + } } impl Counter { diff --git a/crates/storage/src/collections/crdt_meta.rs b/crates/storage/src/collections/crdt_meta.rs index 9ade236828..59b9a3a156 100644 --- a/crates/storage/src/collections/crdt_meta.rs +++ b/crates/storage/src/collections/crdt_meta.rs @@ -12,24 +12,8 @@ use borsh::{BorshDeserialize, BorshSerialize}; -/// Identifies the specific CRDT type -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum CrdtType { - /// Last-Write-Wins Register - LwwRegister, - /// Grow-only Counter - Counter, - /// Replicated Growable Array (text CRDT) - Rga, - /// Unordered Map (add-wins set semantics for keys) - UnorderedMap, - /// Unordered Set (add-wins semantics) - UnorderedSet, - /// Vector (ordered list with operational transformation) - Vector, - /// Custom user-defined CRDT (with #[derive(CrdtState)]) - Custom(String), -} +// Re-export CrdtType from entities module (canonical definition) +pub use crate::entities::CrdtType; /// Storage strategy for a CRDT type #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/crates/storage/src/collections/frozen.rs b/crates/storage/src/collections/frozen.rs index 3d4fbf1be8..d75f59bfee 100644 --- a/crates/storage/src/collections/frozen.rs +++ b/crates/storage/src/collections/frozen.rs @@ -39,6 +39,25 @@ where storage: Element::new(None), } } + + /// Create a new FrozenStorage with field name for schema inference. + /// + /// This enables merodb and other tools to infer the schema from the database + /// without requiring an external schema file. The field name is used to + /// generate deterministic collection IDs. + pub fn new_with_field_name(field_name: &str) -> Self { + Self { + inner: UnorderedMap::new_with_field_name_and_crdt_type( + field_name, + CrdtType::FrozenStorage, + ), + storage: Element::new_with_field_name_and_crdt_type( + None, + Some(field_name.to_string()), + CrdtType::FrozenStorage, + ), + } + } } impl Default for FrozenStorage @@ -159,7 +178,7 @@ where S: StorageAdaptor, { fn crdt_type() -> CrdtType { - CrdtType::Custom("FrozenStorage".to_owned()) + CrdtType::FrozenStorage } fn storage_strategy() -> StorageStrategy { StorageStrategy::Structured diff --git a/crates/storage/src/collections/rga.rs b/crates/storage/src/collections/rga.rs index 5455da2a10..c1341662c1 100644 --- a/crates/storage/src/collections/rga.rs +++ b/crates/storage/src/collections/rga.rs @@ -26,7 +26,7 @@ use borsh::{BorshDeserialize, BorshSerialize}; -use super::UnorderedMap; +use super::{CrdtType, UnorderedMap}; use crate::collections::error::StoreError; use crate::env; use crate::store::{MainStorage, StorageAdaptor}; @@ -146,6 +146,18 @@ impl ReplicatedGrowableArray { pub fn new() -> Self { Self::new_internal() } + + /// Create a new RGA with field name for schema inference. + /// + /// This enables merodb and other tools to infer the schema from the database + /// without requiring an external schema file. The field name is used to + /// generate deterministic collection IDs. + #[must_use] + pub fn new_with_field_name(field_name: &str) -> Self { + Self { + chars: UnorderedMap::new_with_field_name_and_crdt_type(field_name, CrdtType::Rga), + } + } } impl Default for ReplicatedGrowableArray { diff --git a/crates/storage/src/collections/unordered_map.rs b/crates/storage/src/collections/unordered_map.rs index db757ad50b..bc2b62332c 100644 --- a/crates/storage/src/collections/unordered_map.rs +++ b/crates/storage/src/collections/unordered_map.rs @@ -9,7 +9,7 @@ use borsh::{BorshDeserialize, BorshSerialize}; use serde::ser::SerializeMap; use serde::Serialize; -use super::{compute_id, Collection, EntryMut, StorageAdaptor}; +use super::{compute_id, Collection, CrdtType, EntryMut, StorageAdaptor}; use crate::address::Id; use crate::collections::error::StoreError; use crate::entities::{ChildInfo, Data, Element, StorageType}; @@ -33,6 +33,31 @@ where pub fn new() -> Self { Self::new_internal() } + + /// Create a new map collection with field name for schema inference. + /// + /// This enables merodb and other tools to infer the schema from the database + /// without requiring an external schema file. The field name is used to + /// generate deterministic collection IDs. + pub fn new_with_field_name(field_name: &str) -> Self { + Self { + inner: Collection::new_with_field_name_and_crdt_type( + None, + field_name, + CrdtType::UnorderedMap, + ), + } + } + + /// Create a new map collection with field name and custom CRDT type. + /// + /// This is used internally by composite types like Counter that want to + /// store their own CRDT type while using UnorderedMap for storage. + pub(crate) fn new_with_field_name_and_crdt_type(field_name: &str, crdt_type: CrdtType) -> Self { + Self { + inner: Collection::new_with_field_name_and_crdt_type(None, field_name, crdt_type), + } + } } impl UnorderedMap diff --git a/crates/storage/src/collections/unordered_set.rs b/crates/storage/src/collections/unordered_set.rs index 5a032a7295..a314b16861 100644 --- a/crates/storage/src/collections/unordered_set.rs +++ b/crates/storage/src/collections/unordered_set.rs @@ -7,7 +7,7 @@ use borsh::{BorshDeserialize, BorshSerialize}; use serde::ser::SerializeSeq; use serde::Serialize; -use super::{compute_id, Collection}; +use super::{compute_id, Collection, CrdtType}; use crate::collections::error::StoreError; use crate::entities::Data; use crate::store::{MainStorage, StorageAdaptor}; @@ -27,6 +27,21 @@ where pub fn new() -> Self { Self::new_internal() } + + /// Create a new set collection with field name for schema inference. + /// + /// This enables merodb and other tools to infer the schema from the database + /// without requiring an external schema file. The field name is used to + /// generate deterministic collection IDs. + pub fn new_with_field_name(field_name: &str) -> Self { + Self { + inner: Collection::new_with_field_name_and_crdt_type( + None, + field_name, + CrdtType::UnorderedSet, + ), + } + } } impl UnorderedSet diff --git a/crates/storage/src/collections/user.rs b/crates/storage/src/collections/user.rs index 5d35067938..29cbc3de01 100644 --- a/crates/storage/src/collections/user.rs +++ b/crates/storage/src/collections/user.rs @@ -37,6 +37,25 @@ where storage: Element::new(None), } } + + /// Create a new UserStorage with field name for schema inference. + /// + /// This enables merodb and other tools to infer the schema from the database + /// without requiring an external schema file. The field name is used to + /// generate deterministic collection IDs. + pub fn new_with_field_name(field_name: &str) -> Self { + Self { + inner: UnorderedMap::new_with_field_name_and_crdt_type( + field_name, + CrdtType::UserStorage, + ), + storage: Element::new_with_field_name_and_crdt_type( + None, + Some(field_name.to_string()), + CrdtType::UserStorage, + ), + } + } } impl Default for UserStorage @@ -170,7 +189,7 @@ where S: StorageAdaptor, { fn crdt_type() -> CrdtType { - CrdtType::Custom("UserStorage".to_owned()) + CrdtType::UserStorage } fn storage_strategy() -> StorageStrategy { StorageStrategy::Structured diff --git a/crates/storage/src/collections/vector.rs b/crates/storage/src/collections/vector.rs index f2315d2f33..4f6bc1c381 100644 --- a/crates/storage/src/collections/vector.rs +++ b/crates/storage/src/collections/vector.rs @@ -8,7 +8,7 @@ use borsh::{BorshDeserialize, BorshSerialize}; use serde::ser::SerializeSeq; use serde::Serialize; -use super::Collection; +use super::{Collection, CrdtType}; use crate::collections::error::StoreError; use crate::store::{MainStorage, StorageAdaptor}; @@ -46,6 +46,21 @@ where pub fn new() -> Self { Self::new_internal() } + + /// Create a new vector collection with field name for schema inference. + /// + /// This enables merodb and other tools to infer the schema from the database + /// without requiring an external schema file. The field name is used to + /// generate deterministic collection IDs. + pub fn new_with_field_name(field_name: &str) -> Self { + Self { + inner: Collection::new_with_field_name_and_crdt_type( + None, + field_name, + CrdtType::Vector, + ), + } + } } impl Vector diff --git a/crates/storage/src/entities.rs b/crates/storage/src/entities.rs index 3f6b19165d..d6846946e3 100644 --- a/crates/storage/src/entities.rs +++ b/crates/storage/src/entities.rs @@ -23,6 +23,38 @@ use borsh::{BorshDeserialize, BorshSerialize}; use crate::address::Id; use crate::env::time_now; +/// Identifies the specific CRDT type for entity metadata. +/// +/// Used to enable proper CRDT merge dispatch during state synchronization. +/// Without this, state sync falls back to Last-Write-Wins (LWW), which causes +/// data loss for concurrent updates on Counters, Maps, Sets, etc. +#[derive(BorshDeserialize, BorshSerialize, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub enum CrdtType { + /// Last-Write-Wins Register + LwwRegister, + /// Grow-only Counter + Counter, + /// Replicated Growable Array (text CRDT) + Rga, + /// Unordered Map (add-wins set semantics for keys) + UnorderedMap, + /// Unordered Set (add-wins semantics) + UnorderedSet, + /// Vector (ordered list with operational transformation) + Vector, + /// UserStorage - user-owned storage wrapper + UserStorage, + /// FrozenStorage - content-addressable immutable storage + FrozenStorage, + /// Record - struct/record type that merges field-by-field using children's merge functions + Record, + /// Custom user-defined CRDT (requires WASM callback for merge) + Custom { + /// Type name identifier for the custom CRDT + type_name: String, + }, +} + /// Marker trait for atomic, persistable entities. /// /// Implemented via `#[derive(AtomicUnit)]` macro. @@ -180,6 +212,23 @@ impl Element { /// Creates a new element (marked dirty, empty hash until saved). #[must_use] pub fn new(id: Option) -> Self { + Self::new_with_field_name(id, None) + } + + /// Creates a new element with optional field name for schema inference. + #[must_use] + pub fn new_with_field_name(id: Option, field_name: Option) -> Self { + Self::new_with_field_name_and_crdt_type(id, field_name, CrdtType::LwwRegister) + } + + /// Creates a new element with field name and specific CRDT type for schema inference. + /// This allows collections to specify their actual CRDT type (e.g., UnorderedMap, Vector). + #[must_use] + pub fn new_with_field_name_and_crdt_type( + id: Option, + field_name: Option, + crdt_type: CrdtType, + ) -> Self { let timestamp = time_now(); let element_id = id.unwrap_or_else(Id::random); Self { @@ -189,12 +238,15 @@ impl Element { created_at: timestamp, updated_at: timestamp.into(), storage_type: StorageType::Public, + crdt_type: Some(crdt_type), + field_name, }, merkle_hash: [0; 32], } } /// Creates the root element. + /// Root elements don't have a field name (they are the root of the state tree). #[must_use] pub fn root() -> Self { let timestamp = time_now(); @@ -205,6 +257,8 @@ impl Element { created_at: timestamp, updated_at: timestamp.into(), storage_type: StorageType::Public, + crdt_type: Some(CrdtType::Record), + field_name: None, }, merkle_hash: [0; 32], } @@ -332,10 +386,7 @@ impl Default for StorageType { } /// System metadata (timestamps in u64 nanoseconds). -#[derive( - BorshDeserialize, BorshSerialize, Clone, Debug, Default, Eq, Ord, PartialEq, PartialOrd, -)] -#[non_exhaustive] +#[derive(BorshSerialize, Clone, Debug, Default, Eq, Ord, PartialEq, PartialOrd)] pub struct Metadata { /// Timestamp of creation time in u64 nanoseconds. pub created_at: u64, @@ -346,16 +397,48 @@ pub struct Metadata { /// different characteristics of handling in the node. /// See `StorageType`. pub storage_type: StorageType, + + /// CRDT type for merge dispatch during state synchronization. + /// + /// - Built-in types (Counter, Map, etc.) merge in storage layer + /// - Custom types dispatch to WASM for app-defined merge + /// - None indicates legacy data (falls back to LWW) + /// + /// See `CrdtType`. + pub crdt_type: Option, + + /// Field name for schema inference and migrations. + /// + /// - Stored when entity is created via `new_with_field_name()` + /// - Enables schema inference from database without external schema file + /// - Critical for migrations: identifies which field an entity belongs to + /// - None for legacy data or entities created without field name + pub field_name: Option, } impl Metadata { /// Creates new metadata with the provided timestamps. + /// Defaults to LwwRegister CRDT type. #[must_use] pub fn new(created_at: u64, updated_at: u64) -> Self { Self { created_at, updated_at: updated_at.into(), storage_type: StorageType::default(), + crdt_type: Some(CrdtType::LwwRegister), + field_name: None, + } + } + + /// Creates new metadata with the provided timestamps and CRDT type. + #[must_use] + pub fn with_crdt_type(created_at: u64, updated_at: u64, crdt_type: CrdtType) -> Self { + Self { + created_at, + updated_at: updated_at.into(), + storage_type: StorageType::default(), + crdt_type: Some(crdt_type), + field_name: None, } } @@ -375,6 +458,126 @@ impl Metadata { pub fn updated_at(&self) -> u64 { *self.updated_at } + + /// Checks if the CRDT type is a built-in type (not Custom). + #[must_use] + pub fn is_builtin_crdt(&self) -> bool { + matches!( + self.crdt_type, + Some(CrdtType::LwwRegister) + | Some(CrdtType::Counter) + | Some(CrdtType::Rga) + | Some(CrdtType::UnorderedMap) + | Some(CrdtType::UnorderedSet) + | Some(CrdtType::Vector) + | Some(CrdtType::UserStorage) + | Some(CrdtType::FrozenStorage) + | Some(CrdtType::Record) + ) + } +} + +// Custom BorshDeserialize implementation for backward compatibility +// Old Metadata didn't have crdt_type field, so we handle missing field gracefully +impl borsh::BorshDeserialize for Metadata { + fn deserialize_reader(reader: &mut R) -> Result { + use tracing::debug; + + let created_at = u64::deserialize_reader(reader)?; + let updated_at = UpdatedAt::deserialize_reader(reader)?; + let storage_type = StorageType::deserialize_reader(reader)?; + + // Try to deserialize crdt_type as Option + // If we run out of bytes (old format), default to None + // This handles backward compatibility with old Metadata that didn't have crdt_type + let crdt_type = match >::deserialize_reader(reader) { + Ok(ct) => { + debug!( + target: "storage::entities", + "Metadata deserialized with crdt_type: {:?}", + ct + ); + ct + } + Err(e) => { + // Check error kind first (most reliable) + use std::io::ErrorKind; + let is_eof = matches!(e.kind(), ErrorKind::UnexpectedEof); + + // Also check error message for Borsh-specific errors + let err_str = e.to_string(); + let is_borsh_eof = err_str.contains("UnexpectedEof") + || err_str.contains("Not all bytes read") + || err_str.contains("Unexpected length") + || err_str.contains("Unexpected end of input"); + + debug!( + target: "storage::entities", + "Metadata deserialization: crdt_type field missing (old format), error_kind={:?}, error_msg={}, is_eof={}, is_borsh_eof={}", + e.kind(), + err_str, + is_eof, + is_borsh_eof + ); + + if is_eof || is_borsh_eof { + // Old format without crdt_type - default to None + None + } else { + // Some other error - propagate it + debug!( + target: "storage::entities", + "Metadata deserialization: propagating non-EOF error: {}", + err_str + ); + return Err(e); + } + } + }; + + // Try to deserialize field_name as Option + // If we run out of bytes (old format), default to None + let field_name = match >::deserialize_reader(reader) { + Ok(fn_val) => { + debug!( + target: "storage::entities", + "Metadata deserialized with field_name: {:?}", + fn_val + ); + fn_val + } + Err(e) => { + use std::io::ErrorKind; + let is_eof = matches!(e.kind(), ErrorKind::UnexpectedEof); + let err_str = e.to_string(); + let is_borsh_eof = err_str.contains("UnexpectedEof") + || err_str.contains("Not all bytes read") + || err_str.contains("Unexpected length") + || err_str.contains("Unexpected end of input"); + + if is_eof || is_borsh_eof { + // Old format without field_name - default to None + None + } else { + // Some other error - propagate it + debug!( + target: "storage::entities", + "Metadata deserialization: field_name error (non-EOF): {}", + e + ); + return Err(e); + } + } + }; + + Ok(Metadata { + created_at, + updated_at, + storage_type, + crdt_type, + field_name, + }) + } } /// Update timestamp (PartialEq always true for CRDT semantics). diff --git a/crates/storage/src/error.rs b/crates/storage/src/error.rs index 1fdd53e56a..80a45a9774 100644 --- a/crates/storage/src/error.rs +++ b/crates/storage/src/error.rs @@ -59,6 +59,10 @@ pub enum StorageError { #[error("Serialization error: {0}")] SerializationError(IoError), + /// An error occurred during CRDT merge. + #[error("Merge error: {0}")] + MergeError(String), + /// An error from the Store. #[error("Store error: {0}")] StoreError(#[from] Report), @@ -85,6 +89,7 @@ impl Serialize for StorageError { )), Self::InvalidData(ref msg) => serializer.serialize_str(msg), Self::InvalidSignature => serializer.serialize_str("Invalid signature"), + Self::MergeError(ref msg) => serializer.serialize_str(msg), Self::NonceReplay(ref data) => { let (pk, nonce) = &**data; serializer.serialize_str(&format!("Nonce replay for {}: {}", pk, nonce)) diff --git a/crates/storage/src/index.rs b/crates/storage/src/index.rs index acb7815fc0..30bc49ba1c 100644 --- a/crates/storage/src/index.rs +++ b/crates/storage/src/index.rs @@ -77,6 +77,12 @@ impl Index { }); child_index.parent_id = Some(parent_id); child_index.own_hash = child.merkle_hash(); + // Always preserve field_name from child metadata if it exists + // This ensures field_name is stored even if EntityIndex already exists + // Critical for schema inference - field_name identifies which struct field this entity belongs to + if child.metadata.field_name.is_some() { + child_index.metadata.field_name = child.metadata.field_name.clone(); + } child_index.full_hash = Self::calculate_full_hash_for_children(child_index.own_hash, &child_index.children)?; Self::save_index(&child_index)?; diff --git a/crates/storage/src/interface.rs b/crates/storage/src/interface.rs index 621b002320..d989343481 100644 --- a/crates/storage/src/interface.rs +++ b/crates/storage/src/interface.rs @@ -46,10 +46,12 @@ use sha2::{Digest, Sha256}; use tracing::debug; use crate::address::Id; +use crate::collections::crdt_meta::CrdtType; use crate::constants; use crate::entities::{ChildInfo, Data, Metadata, SignatureData, StorageType}; use crate::env::time_now; use crate::index::Index; +use crate::merge::{try_merge_by_type_name, try_merge_registered, WasmMergeCallback}; use crate::store::{Key, MainStorage, StorageAdaptor}; // Re-export types for convenience @@ -705,6 +707,8 @@ impl Interface { /// - `IndexNotFound` if entity exists but has no index /// pub fn find_by_id(id: Id) -> Result, StorageError> { + use tracing::debug; + // Check if entity is deleted (tombstone) if >::is_deleted(id)? { return Ok(None); // Entity is deleted @@ -716,7 +720,27 @@ impl Interface { return Ok(None); }; - let mut item = from_slice::(&slice).map_err(StorageError::DeserializationError)?; + debug!( + target: "storage::interface", + "find_by_id: deserializing entity, id={}, data_len={}", + id, + slice.len() + ); + + let mut item = match from_slice::(&slice) { + Ok(item) => item, + Err(e) => { + debug!( + target: "storage::interface", + "find_by_id: deserialization failed, id={}, error={}, data_len={}, data_preview={:?}", + id, + e, + slice.len(), + if slice.len() > 100 { &slice[..100] } else { &slice } + ); + return Err(StorageError::DeserializationError(e)); + } + }; let (full_hash, _) = >::get_hashes_for(id)?.ok_or(StorageError::IndexNotFound(id))?; @@ -726,6 +750,13 @@ impl Interface { item.element_mut().metadata = >::get_metadata(id)?.ok_or(StorageError::IndexNotFound(id))?; + debug!( + target: "storage::interface", + "find_by_id: successfully deserialized entity, id={}, metadata_crdt_type={:?}", + id, + item.element().metadata.crdt_type + ); + Ok(Some(item)) } @@ -948,29 +979,57 @@ impl Interface { data: &[u8], metadata: Metadata, ) -> Result, StorageError> { - let incoming_created_at = metadata.created_at; - let incoming_updated_at = metadata.updated_at(); + let _incoming_created_at = metadata.created_at; + let _incoming_updated_at = metadata.updated_at(); let last_metadata = >::get_metadata(id)?; let final_data = if let Some(last_metadata) = &last_metadata { - if last_metadata.updated_at > metadata.updated_at { - return Ok(None); - } else if id.is_root() { - // Root entity (app state) - ALWAYS merge to preserve CRDTs like G-Counter - // Even if incoming is newer, we merge to avoid losing concurrent updates + // CRITICAL: Root entities with crdt_type ALWAYS merge, regardless of timestamps! + // CRDT merge is idempotent and based on data, not timestamps. + // For backward compatibility, root entities WITHOUT crdt_type use LWW. + let has_crdt_type = metadata.crdt_type.is_some() || last_metadata.crdt_type.is_some(); + if id.is_root() && has_crdt_type { + // Root entity (app state) with CRDT type - ALWAYS merge to preserve CRDTs + // Even if incoming is older, we merge to avoid losing concurrent updates + // EXCEPT during initialization where merge fails - allow overwriting incompatible state if let Some(existing_data) = S::storage_read(Key::Entry(id)) { - Self::try_merge_data( + // Check if this appears to be initialization (created_at == updated_at or very close) + let is_init = metadata.created_at == metadata.updated_at() + || metadata.updated_at().saturating_sub(metadata.created_at) + < 1_000_000_000; // Within 1 second + match Self::try_merge_data( id, &existing_data, data, *last_metadata.updated_at, *metadata.updated_at, - )? + ) { + Ok(merged) => merged, + Err(e) if is_init => { + // During initialization, if merge fails (e.g., incompatible state from previous run), + // allow overwriting existing state instead of failing + // This handles cases where leftover state exists but can't be deserialized/merged + debug!( + %id, + error = %e, + created_at = metadata.created_at, + updated_at = %metadata.updated_at(), + "Merge failed during initialization, overwriting existing state" + ); + data.to_vec() + } + Err(e) => return Err(e), + } } else { data.to_vec() } - } else if last_metadata.updated_at == metadata.updated_at { + } else if *last_metadata.updated_at > *metadata.updated_at { + // Non-root or root without crdt_type: skip if existing is newer (LWW) + // Note: Use dereferenced comparison since UpdatedAt::PartialOrd may not be correct + return Ok(None); + } else if *last_metadata.updated_at == *metadata.updated_at { // Concurrent update (same timestamp) - try to merge + // Note: Use dereferenced comparison since UpdatedAt::PartialEq always returns true if let Some(existing_data) = S::storage_read(Key::Entry(id)) { Self::try_merge_data( id, @@ -1006,9 +1065,10 @@ impl Interface { /// Attempt to merge two versions of data using CRDT semantics. /// - /// Returns the merged data, falling back to LWW (newer data) on failure. + /// For root entities: MUST use registered merge function - never falls back to LWW. + /// For non-root entities: Falls back to LWW if merge fails. fn try_merge_data( - _id: Id, + id: Id, existing: &[u8], incoming: &[u8], existing_timestamp: u64, @@ -1016,11 +1076,49 @@ impl Interface { ) -> Result, StorageError> { use crate::merge::merge_root_state; + // For root entities, handle legacy Collection format (32 bytes = just Id) + // If existing state is Collection format, it means we're migrating from old format + // In this case, the incoming state (T format) should be used directly + // This is safe because: + // 1. The incoming state is the new format (T) + // 2. The existing state is the old format (Collection) + // 3. We can't merge them without knowing T at compile time + // 4. The incoming state is being saved now, so it's the current state + if id.is_root() { + // Try to deserialize as T first - if it fails with "Unexpected length" and existing is 32 bytes, + // it's likely Collection format + if existing.len() == 32 { + // Legacy Collection format - use incoming state as migration + debug!( + %id, + existing_len = existing.len(), + incoming_len = incoming.len(), + "Existing state is Collection format (legacy, 32 bytes), using incoming state as migration" + ); + return Ok(incoming.to_vec()); + } + // If existing is not 32 bytes but deserialization fails, log for debugging + debug!( + %id, + existing_len = existing.len(), + incoming_len = incoming.len(), + "Attempting to merge root state" + ); + } + // Attempt CRDT merge match merge_root_state(existing, incoming, existing_timestamp, incoming_timestamp) { Ok(merged) => Ok(merged), - Err(_) => { - // Merge failed - fall back to LWW + Err(e) => { + if id.is_root() { + // Root MUST use registered merge - never fall back to LWW + // This ensures UserStorage, FrozenStorage, and other CRDTs merge correctly + return Err(StorageError::MergeError(format!( + "Root state merge failed: {}. Root state requires registered merge function via register_crdt_merge().", + e + ))); + } + // For non-root entities, fall back to LWW if merge fails if incoming_timestamp >= existing_timestamp { Ok(incoming.to_vec()) } else { @@ -1030,6 +1128,251 @@ impl Interface { } } + /// Merge entities with optional WASM callback for custom types. + /// + /// This is the main entry point for CRDT merge during state synchronization. + /// Dispatches based on `local_metadata.crdt_type`: + /// - Built-in CRDTs (Counter, Map, etc.) → merge directly in storage layer + /// - Custom types → dispatch to WASM callback + /// - None/unknown → fallback to LWW + /// + /// # Arguments + /// * `local_data` - Local entity data (bytes) + /// * `remote_data` - Remote entity data (bytes) + /// * `local_metadata` - Local entity metadata (includes crdt_type) + /// * `remote_metadata` - Remote entity metadata + /// * `callback` - Optional WASM callback for custom types + /// + /// # Returns + /// * `Ok(Some(merged))` - Merged data + /// * `Ok(None)` - Merge not applicable + /// * `Err(...)` - Merge failed + /// + /// # Errors + /// Returns `StorageError` if: + /// - Deserialization of local or remote data fails + /// - The CRDT merge operation fails + /// - Custom WASM callback fails for custom types + pub fn merge_by_crdt_type_with_callback( + local_data: &[u8], + remote_data: &[u8], + local_metadata: &Metadata, + remote_metadata: &Metadata, + callback: Option<&dyn WasmMergeCallback>, + ) -> Result>, StorageError> { + #[allow(unused_imports)] + use crate::collections::{LwwRegister, Mergeable}; + + let crdt_type = local_metadata.crdt_type.as_ref(); + + match crdt_type { + // ════════════════════════════════════════════════════════ + // BUILT-IN CRDTs: Merge in storage layer (fast, no WASM) + // Includes: LwwRegister, Counter, UnorderedMap, UnorderedSet, + // Vector, RGA, UserStorage, FrozenStorage, Record + // ════════════════════════════════════════════════════════ + Some(CrdtType::LwwRegister) => { + // LWW uses timestamps for deterministic resolution + // Note: For typed LwwRegister, the merge just compares timestamps + // Here we're working with raw bytes, so compare metadata timestamps + let winner = if remote_metadata.updated_at() >= local_metadata.updated_at() { + remote_data + } else { + local_data + }; + Ok(Some(winner.to_vec())) + } + + Some(CrdtType::Counter) => { + // Counter merges by summing per-node counts + // Requires deserializing the Counter struct + // For now, fallback to registry or LWW since Counter has complex internal structure + Self::try_merge_via_registry_or_lww( + local_data, + remote_data, + local_metadata, + remote_metadata, + ) + } + + Some(CrdtType::UnorderedMap) + | Some(CrdtType::UnorderedSet) + | Some(CrdtType::Vector) => { + // Collections are merged at the entry level via their child IDs + // The collection container itself uses LWW for its metadata + let winner = if remote_metadata.updated_at() >= local_metadata.updated_at() { + remote_data + } else { + local_data + }; + Ok(Some(winner.to_vec())) + } + + Some(CrdtType::Rga) => { + // RGA is built on UnorderedMap, merge happens at character level + let winner = if remote_metadata.updated_at() >= local_metadata.updated_at() { + remote_data + } else { + local_data + }; + Ok(Some(winner.to_vec())) + } + + Some(CrdtType::UserStorage) | Some(CrdtType::FrozenStorage) => { + // UserStorage and FrozenStorage are wrappers around UnorderedMap + // They implement Mergeable and merge at the entry level via their child IDs + // Use registry merge to properly merge the underlying UnorderedMap + Self::try_merge_via_registry_or_lww( + local_data, + remote_data, + local_metadata, + remote_metadata, + ) + } + + Some(CrdtType::Record) => { + // Record types merge field-by-field using registered merge functions + Self::try_merge_via_registry_or_lww( + local_data, + remote_data, + local_metadata, + remote_metadata, + ) + } + + // ════════════════════════════════════════════════════════ + // CUSTOM TYPES: Use WASM callback, registry, or LWW fallback + // ════════════════════════════════════════════════════════ + Some(CrdtType::Custom { type_name }) => { + // Custom types need WASM callback for proper merge + Self::try_merge_custom_with_registry( + type_name.as_str(), + local_data, + remote_data, + local_metadata, + remote_metadata, + callback, + ) + } + + // ════════════════════════════════════════════════════════ + // LEGACY: No type info, use LWW + // ════════════════════════════════════════════════════════ + None => { + // Legacy data - fallback to LWW + let winner = if remote_metadata.updated_at() >= local_metadata.updated_at() { + remote_data + } else { + local_data + }; + Ok(Some(winner.to_vec())) + } + } + } + + /// Try merge via registry, fallback to LWW if not registered. + fn try_merge_via_registry_or_lww( + local_data: &[u8], + remote_data: &[u8], + local_metadata: &Metadata, + remote_metadata: &Metadata, + ) -> Result>, StorageError> { + // Try registered merge functions + if let Some(result) = try_merge_registered( + local_data, + remote_data, + local_metadata.updated_at(), + remote_metadata.updated_at(), + ) { + match result { + Ok(merged) => return Ok(Some(merged)), + Err(_) => {} // Fall through to LWW + } + } + + // Fallback to LWW + let winner = if remote_metadata.updated_at() >= local_metadata.updated_at() { + remote_data + } else { + local_data + }; + Ok(Some(winner.to_vec())) + } + + /// Merge custom type using WASM callback, registry, or LWW fallback. + /// + /// Priority: + /// 1. WASM callback (if provided) - for runtime-managed WASM merge + /// 2. Type-name registry - for types registered via `register_crdt_merge` + /// 3. Brute-force registry - legacy fallback + /// 4. LWW fallback + fn try_merge_custom_with_registry( + type_name: &str, + local_data: &[u8], + remote_data: &[u8], + local_metadata: &Metadata, + remote_metadata: &Metadata, + callback: Option<&dyn WasmMergeCallback>, + ) -> Result>, StorageError> { + // 1. Try WASM callback first (production path) + if let Some(cb) = callback { + match cb.merge_custom( + type_name, + local_data, + remote_data, + local_metadata.updated_at(), + remote_metadata.updated_at(), + ) { + Ok(merged) => return Ok(Some(merged)), + Err(e) => { + debug!("WASM merge failed for {}: {}, falling back", type_name, e); + // Fall through to registry/LWW + } + } + } + + // 2. Try type-name registry (efficient lookup) + if let Some(result) = try_merge_by_type_name( + type_name, + local_data, + remote_data, + local_metadata.updated_at(), + remote_metadata.updated_at(), + ) { + match result { + Ok(merged) => return Ok(Some(merged)), + Err(e) => { + debug!( + "Type-name merge failed for {}: {}, falling back", + type_name, e + ); + // Fall through to brute-force/LWW + } + } + } + + // 3. Try brute-force registry (legacy fallback) + if let Some(result) = try_merge_registered( + local_data, + remote_data, + local_metadata.updated_at(), + remote_metadata.updated_at(), + ) { + match result { + Ok(merged) => return Ok(Some(merged)), + Err(_) => {} // Fall through to LWW + } + } + + // 4. Fallback to LWW + let winner = if remote_metadata.updated_at() >= local_metadata.updated_at() { + remote_data + } else { + local_data + }; + Ok(Some(winner.to_vec())) + } + /// Saves raw serialized data with orphan checking. /// /// # Errors diff --git a/crates/storage/src/lib.rs b/crates/storage/src/lib.rs index 94ce6f0b4a..54c1822a90 100644 --- a/crates/storage/src/lib.rs +++ b/crates/storage/src/lib.rs @@ -85,13 +85,15 @@ pub mod exports { pub use calimero_storage_macros::{AtomicUnit, Collection}; // Re-export commonly used types -pub use entities::{Data, Element}; +pub use entities::{CrdtType, Data, Element, Metadata}; pub use error::StorageError; pub use interface::Interface; /// Shared test functionality. #[cfg(test)] pub mod tests { + /// Collection serialization/deserialization tests. + pub mod collection_serialization; /// CRDT collections (UnorderedMap, Vector, Counter) tests. pub mod collections; /// Common test utilities and data structures. diff --git a/crates/storage/src/merge.rs b/crates/storage/src/merge.rs index 131c8961c8..884b9b5bc1 100644 --- a/crates/storage/src/merge.rs +++ b/crates/storage/src/merge.rs @@ -4,14 +4,18 @@ //! multiple nodes update the same data concurrently. pub mod registry; -pub use registry::{register_crdt_merge, try_merge_registered}; +pub use registry::{register_crdt_merge, try_merge_by_type_name, try_merge_registered}; #[cfg(test)] pub use registry::clear_merge_registry; use borsh::{BorshDeserialize, BorshSerialize}; -/// Attempts to merge two Borsh-serialized app state blobs using CRDT semantics. +/// Merges root state as a Record CRDT. +/// +/// Root is a Record CRDT that merges field-by-field using each field's merge function. +/// This is automatically handled by the registered merge function (from #[app::state] macro), +/// which calls Mergeable::merge() that recursively merges all CRDT fields. /// /// # When is This Called? /// @@ -32,8 +36,17 @@ use borsh::{BorshDeserialize, BorshSerialize}; /// /// # Strategy /// -/// 1. **Try registered merge:** If app called `register_crdt_merge()`, use type-specific merge -/// 2. **Fallback to LWW:** If no registered merge, use Last-Write-Wins +/// 1. **Try registered merge:** Uses the merge function registered via `register_crdt_merge()` +/// - This function deserializes both states +/// - Calls `Mergeable::merge()` which merges field-by-field +/// - Each field's merge function is called recursively (UserStorage, FrozenStorage, etc.) +/// 2. **Error if not registered:** Root MUST have a registered merge function +/// +/// # Why Record? +/// +/// Root is conceptually a Record CRDT type - it's a struct/record that contains +/// multiple CRDT fields. The Record merges by calling each field's merge function, +/// which is exactly what the auto-generated Mergeable implementation does. /// /// # Arguments /// * `existing` - The currently stored state (Borsh-serialized) @@ -45,33 +58,28 @@ use borsh::{BorshDeserialize, BorshSerialize}; /// Merged state as Borsh-serialized bytes /// /// # Errors -/// Returns error if merge fails (falls back to LWW in that case) +/// Returns error if merge fails (root requires registered merge function) pub fn merge_root_state( existing: &[u8], incoming: &[u8], existing_ts: u64, incoming_ts: u64, ) -> Result, Box> { - // Try registered CRDT merge functions first - // This enables automatic nested CRDT merging when apps use #[app::state] - if let Some(result) = try_merge_registered(existing, incoming, existing_ts, incoming_ts) { - return result; - } - - // NOTE: We can't blindly deserialize without knowing the type. - // The collections (UnorderedMap, Vector, Counter, etc.) already handle - // CRDT merging through their own element IDs and storage mechanisms. - // - // For root entities, concurrent updates should be rare since most operations - // target nested entities (RGA characters, Map entries, etc.) which have their - // own IDs and merge independently. - // - // Fallback: use LWW if no registered merge function - // This is safe for simple apps or backward compatibility - if incoming_ts >= existing_ts { - Ok(incoming.to_vec()) - } else { - Ok(existing.to_vec()) + // Root is a Record CRDT - it merges field-by-field using children's merge functions + // The registered merge function (from #[app::state] macro) implements this: + // 1. Deserializes both states + // 2. Calls Mergeable::merge() which merges each CRDT field + // 3. Each field's merge function is called recursively (UserStorage, FrozenStorage, etc.) + match try_merge_registered(existing, incoming, existing_ts, incoming_ts) { + Some(Ok(merged)) => Ok(merged), + Some(Err(e)) => { + // Merge function was registered but failed (e.g., deserialization error) + Err(format!("Root state merge failed: {}. Root state is a Record CRDT that merges using its children's merge functions. Apps using #[app::state] must call register_crdt_merge() (auto-generated as __calimero_register_merge).", e).into()) + } + None => { + // No registered merge function found + Err("Root state is a Record CRDT that merges using its children's merge functions. Apps using #[app::state] must call register_crdt_merge() (auto-generated as __calimero_register_merge).".into()) + } } } @@ -102,3 +110,141 @@ pub trait CrdtMerge: BorshSerialize + BorshDeserialize { /// Merge another instance into self using CRDT semantics. fn crdt_merge(&mut self, other: &Self); } + +// ════════════════════════════════════════════════════════════════════════════ +// WASM Merge Callback +// ════════════════════════════════════════════════════════════════════════════ + +/// Error type for WASM merge operations. +#[derive(Debug)] +pub enum WasmMergeError { + /// The type name is not recognized by the WASM module. + UnknownType(String), + /// The WASM merge function returned an error. + MergeFailed(String), + /// Failed to serialize/deserialize data for WASM boundary. + SerializationError(String), +} + +impl std::fmt::Display for WasmMergeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::UnknownType(name) => write!(f, "Unknown type for WASM merge: {}", name), + Self::MergeFailed(msg) => write!(f, "WASM merge failed: {}", msg), + Self::SerializationError(msg) => write!(f, "Serialization error: {}", msg), + } + } +} + +impl std::error::Error for WasmMergeError {} + +/// Trait for WASM merge callbacks used during state synchronization. +/// +/// This trait allows the runtime layer to provide custom merge logic +/// for `CrdtType::Custom` types via WASM callbacks. +/// +/// # Example +/// +/// ```ignore +/// // In runtime layer: +/// struct RuntimeMergeCallback { +/// wasm_module: WasmModule, +/// } +/// +/// impl WasmMergeCallback for RuntimeMergeCallback { +/// fn merge_custom( +/// &self, +/// type_name: &str, +/// local_data: &[u8], +/// remote_data: &[u8], +/// local_ts: u64, +/// remote_ts: u64, +/// ) -> Result, WasmMergeError> { +/// // Call WASM merge function +/// self.wasm_module.call_merge(type_name, local_data, remote_data) +/// } +/// } +/// ``` +pub trait WasmMergeCallback: Send + Sync { + /// Merge two instances of a custom type using WASM merge logic. + /// + /// # Arguments + /// * `type_name` - The name of the custom type (from `CrdtType::Custom`) + /// * `local_data` - Borsh-serialized local data + /// * `remote_data` - Borsh-serialized remote data + /// * `local_ts` - Timestamp of local data + /// * `remote_ts` - Timestamp of remote data + /// + /// # Returns + /// Borsh-serialized merged result, or error if merge fails. + /// + /// # Errors + /// Returns `WasmMergeError` if the WASM merge callback fails or the type is not registered. + fn merge_custom( + &self, + type_name: &str, + local_data: &[u8], + remote_data: &[u8], + local_ts: u64, + remote_ts: u64, + ) -> Result, WasmMergeError>; +} + +/// A no-op callback that falls back to LWW for custom types. +/// +/// Used when no WASM callback is available (e.g., tests, non-WASM contexts). +#[derive(Debug, Default, Clone, Copy)] +pub struct NoopMergeCallback; + +impl WasmMergeCallback for NoopMergeCallback { + fn merge_custom( + &self, + _type_name: &str, + local_data: &[u8], + remote_data: &[u8], + local_ts: u64, + remote_ts: u64, + ) -> Result, WasmMergeError> { + // Fallback to LWW + if remote_ts >= local_ts { + Ok(remote_data.to_vec()) + } else { + Ok(local_data.to_vec()) + } + } +} + +/// A callback that uses the in-process merge registry (global). +/// +/// This is useful when the WASM module has already registered its merge +/// function via `register_crdt_merge`. The runtime calls this after WASM +/// initialization to use the registered merge functions. +/// +/// # Example +/// +/// ```ignore +/// // After WASM module loads and calls __calimero_register_merge: +/// let callback = RegistryMergeCallback; +/// +/// // During sync: +/// compare_trees_with_callback(data, index, Some(&callback)); +/// ``` +#[derive(Debug, Default, Clone, Copy)] +pub struct RegistryMergeCallback; + +impl WasmMergeCallback for RegistryMergeCallback { + fn merge_custom( + &self, + type_name: &str, + local_data: &[u8], + remote_data: &[u8], + local_ts: u64, + remote_ts: u64, + ) -> Result, WasmMergeError> { + match try_merge_by_type_name(type_name, local_data, remote_data, local_ts, remote_ts) { + Some(Ok(merged)) => Ok(merged), + Some(Err(e)) => Err(WasmMergeError::MergeFailed(e.to_string())), + None => Err(WasmMergeError::UnknownType(type_name.to_owned())), + } + } +} diff --git a/crates/storage/src/merge/registry.rs b/crates/storage/src/merge/registry.rs index a687f4beee..b310333d28 100644 --- a/crates/storage/src/merge/registry.rs +++ b/crates/storage/src/merge/registry.rs @@ -108,13 +108,46 @@ pub fn try_merge_registered( // Try each registered merge function (brute force for Phase 2) let registry = MERGE_REGISTRY.read().ok()?; + if registry.is_empty() { + return None; + } + + let mut last_error: Option> = None; for (_type_id, merge_fn) in registry.iter() { - if let Ok(merged) = merge_fn(existing, incoming, existing_ts, incoming_ts) { - return Some(Ok(merged)); + match merge_fn(existing, incoming, existing_ts, incoming_ts) { + Ok(merged) => return Some(Ok(merged)), + Err(e) => { + // Store the last error to provide better diagnostics + last_error = Some(e); + } } } - None + // All registered merge functions failed - return the last error for better diagnostics + if let Some(err) = last_error { + Some(Err(err)) + } else { + None + } +} + +/// Try to merge using type name (for CrdtType::Custom dispatch). +/// +/// This function attempts to match the type name against registered types. +/// Since we don't have a type-name registry yet, this falls back to +/// trying all registered merge functions (same as `try_merge_registered`). +/// +/// In the future, this can be optimized with a type-name-to-TypeId mapping. +pub fn try_merge_by_type_name( + _type_name: &str, + existing: &[u8], + incoming: &[u8], + existing_ts: u64, + incoming_ts: u64, +) -> Option, Box>> { + // For now, fall back to brute-force registry lookup + // TODO: Add type-name-to-TypeId mapping for efficient lookup + try_merge_registered(existing, incoming, existing_ts, incoming_ts) } #[cfg(test)] diff --git a/crates/storage/src/tests/collection_serialization.rs b/crates/storage/src/tests/collection_serialization.rs new file mode 100644 index 0000000000..699971d4c7 --- /dev/null +++ b/crates/storage/src/tests/collection_serialization.rs @@ -0,0 +1,207 @@ +//! Unit tests for Collection serialization/deserialization +//! +//! These tests verify: +//! 1. Collection serializes only Element.id (not metadata) +//! 2. Collection deserialization works correctly +//! 3. Backward compatibility with old formats +//! 4. "Not all bytes read" error scenarios + +use crate::address::Id; +use crate::collections::{Root, UnorderedMap}; +use crate::entities::{CrdtType, Element, Metadata, StorageType}; +use borsh::{BorshDeserialize, BorshSerialize}; + +// We need to access the internal Collection struct for testing +// Since it's private, we'll test through the public API where possible +// and create a test-only version if needed + +#[test] +fn test_element_serialization_only_id() { + // Element should only serialize `id`, not metadata, is_dirty, or merkle_hash + let element = Element::new(Some(Id::random())); + + let serialized = borsh::to_vec(&element).unwrap(); + + // Element should serialize only the 32-byte ID + assert_eq!( + serialized.len(), + 32, + "Element should serialize only 32 bytes (Id)" + ); + + // Verify we can deserialize it back + let deserialized: Element = BorshDeserialize::try_from_slice(&serialized).unwrap(); + assert_eq!(deserialized.id(), element.id()); +} + +#[test] +fn test_element_deserialization_with_old_format() { + // Simulate old Element format (just ID, no extra fields) + let id = Id::random(); + let old_format_bytes = borsh::to_vec(&id).unwrap(); + + // Should deserialize correctly (Element only reads ID) + let deserialized: Element = BorshDeserialize::try_from_slice(&old_format_bytes).unwrap(); + assert_eq!(deserialized.id(), id); +} + +#[test] +fn test_metadata_serialization_with_crdt_type() { + let metadata = Metadata::with_crdt_type(1000, 2000, CrdtType::Counter); + let serialized = borsh::to_vec(&metadata).unwrap(); + + // Should serialize: created_at (8) + updated_at (8) + storage_type (1) + crdt_type Option (1 + variant) + // Let's verify it deserializes correctly + let deserialized: Metadata = BorshDeserialize::try_from_slice(&serialized).unwrap(); + assert_eq!(deserialized.crdt_type, Some(CrdtType::Counter)); +} + +#[test] +fn test_metadata_deserialization_without_crdt_type() { + // Create metadata without crdt_type (old format) + // We'll manually create bytes for old format: created_at + updated_at + storage_type + use crate::entities::UpdatedAt; + + let created_at = 1000u64; + let updated_at = UpdatedAt::from(2000u64); + let storage_type = StorageType::Public; + + // Serialize old format manually (without crdt_type) + let mut old_format_bytes = Vec::new(); + old_format_bytes.extend_from_slice(&created_at.to_le_bytes()); + old_format_bytes.extend_from_slice(&updated_at.to_le_bytes()); + // Serialize storage_type using Borsh + let storage_type_bytes = borsh::to_vec(&storage_type).unwrap(); + old_format_bytes.extend_from_slice(&storage_type_bytes); + // Note: old format doesn't have crdt_type field + + // Should deserialize with crdt_type = None (backward compatibility) + let deserialized: Metadata = BorshDeserialize::try_from_slice(&old_format_bytes).unwrap(); + assert_eq!(deserialized.created_at, created_at); + assert_eq!(deserialized.updated_at(), 2000); + assert_eq!( + deserialized.crdt_type, None, + "Old format should have crdt_type = None" + ); +} + +#[test] +fn test_metadata_deserialization_with_extra_bytes() { + // Create metadata with crdt_type + let metadata = Metadata::with_crdt_type(1000, 2000, CrdtType::Counter); + let mut serialized = borsh::to_vec(&metadata).unwrap(); + + // Add extra bytes (simulating "Not all bytes read" scenario) + serialized.push(0x42); + serialized.push(0x43); + + // This should fail with "Not all bytes read" + let result: Result = BorshDeserialize::try_from_slice(&serialized); + assert!(result.is_err()); + + let err = result.unwrap_err(); + let err_str = err.to_string(); + assert!( + err_str.contains("Not all bytes read") || err_str.contains("Unexpected length"), + "Should fail with 'Not all bytes read' error, got: {}", + err_str + ); +} + +#[test] +fn test_collection_serialization_size() { + // Create a root collection + let root: Root> = Root::new(|| UnorderedMap::new()); + + // Get the inner collection + // We can't directly access the inner Collection, but we can test through Root + // The Collection should serialize only Element.id (32 bytes) + + // Serialize the root's inner collection by committing and reading back + root.commit(); + + // The Collection struct should only serialize Element.id + // Element.id is 32 bytes + // So Collection serialization should be exactly 32 bytes + let element = Element::new(Some(Id::root())); + let element_bytes = borsh::to_vec(&element).unwrap(); + assert_eq!( + element_bytes.len(), + 32, + "Element serialization should be 32 bytes" + ); +} + +#[test] +fn test_collection_deserialization_with_extra_bytes() { + // Create a minimal Collection-like structure + // Collection serializes as: Element (which is just Id = 32 bytes) + let id = Id::root(); + let mut collection_bytes = borsh::to_vec(&id).unwrap(); + + // Add extra bytes (simulating old format or corruption) + collection_bytes.push(0x01); + collection_bytes.push(0x02); + collection_bytes.push(0x03); + + // Try to deserialize as Element (what Collection contains) + let result: Result = BorshDeserialize::try_from_slice(&collection_bytes); + + // This should fail with "Not all bytes read" because we added extra bytes + assert!(result.is_err()); + let err = result.unwrap_err(); + let err_str = err.to_string(); + assert!( + err_str.contains("Not all bytes read") || err_str.contains("Unexpected length"), + "Should fail with 'Not all bytes read' error when extra bytes present, got: {}", + err_str + ); +} + +#[test] +fn test_collection_round_trip() { + // Create a root collection + let mut root: Root> = Root::new(|| UnorderedMap::new()); + + // Insert something + root.insert("key1".to_string(), "value1".to_string()) + .unwrap(); + + // Commit + root.commit(); + + // Fetch should work (this is what's failing in the workflow) + let fetched = Root::>::fetch(); + assert!( + fetched.is_some(), + "Root::fetch() should succeed after commit" + ); + + let fetched_root = fetched.unwrap(); + let value = fetched_root.get("key1").unwrap(); + assert_eq!(value, Some("value1".to_string())); +} + +#[test] +fn test_element_id_only_serialization() { + // Verify Element only serializes id field + let id1 = Id::random(); + let id2 = Id::random(); + + let element1 = Element::new(Some(id1)); + let element2 = Element::new(Some(id2)); + + let bytes1 = borsh::to_vec(&element1).unwrap(); + let bytes2 = borsh::to_vec(&element2).unwrap(); + + // Both should be exactly 32 bytes (just the ID) + assert_eq!(bytes1.len(), 32); + assert_eq!(bytes2.len(), 32); + + // They should be different (different IDs) + assert_ne!(bytes1, bytes2); + + // But should match the ID bytes + assert_eq!(bytes1, id1.as_bytes()); + assert_eq!(bytes2, id2.as_bytes()); +} diff --git a/crates/storage/src/tests/common.rs b/crates/storage/src/tests/common.rs index 9da9907098..fe47ce0e62 100644 --- a/crates/storage/src/tests/common.rs +++ b/crates/storage/src/tests/common.rs @@ -187,6 +187,8 @@ pub fn create_signed_user_add_action( nonce, }), }, + crdt_type: None, + field_name: None, }; // Create action for signing @@ -237,6 +239,8 @@ pub fn create_signed_user_update_action( nonce, }), }, + crdt_type: None, + field_name: None, }; let mut action = Action::Update { diff --git a/crates/storage/src/tests/entities.rs b/crates/storage/src/tests/entities.rs index 9230ccf0e5..fa5c7fd87f 100644 --- a/crates/storage/src/tests/entities.rs +++ b/crates/storage/src/tests/entities.rs @@ -277,10 +277,253 @@ mod element__traits { #[cfg(test)] mod metadata__constructor { + use super::*; #[test] - #[ignore] fn new() { - todo!() + let metadata = Metadata::new(1000, 2000); + assert_eq!(metadata.created_at, 1000); + assert_eq!(*metadata.updated_at, 2000); + // Metadata::new() now defaults to LwwRegister CRDT type + assert_eq!(metadata.crdt_type, Some(CrdtType::LwwRegister)); + } + + #[test] + fn with_crdt_type() { + let metadata = Metadata::with_crdt_type(1000, 2000, CrdtType::Counter); + assert_eq!(metadata.created_at, 1000); + assert_eq!(*metadata.updated_at, 2000); + assert_eq!(metadata.crdt_type, Some(CrdtType::Counter)); + } +} + +#[cfg(test)] +mod metadata__crdt_type { + use super::*; + + #[test] + fn is_builtin_crdt__counter() { + let metadata = Metadata::with_crdt_type(1000, 2000, CrdtType::Counter); + assert!(metadata.is_builtin_crdt()); + } + + #[test] + fn is_builtin_crdt__lww_register() { + let metadata = Metadata::with_crdt_type(1000, 2000, CrdtType::LwwRegister); + assert!(metadata.is_builtin_crdt()); + } + + #[test] + fn is_builtin_crdt__rga() { + let metadata = Metadata::with_crdt_type(1000, 2000, CrdtType::Rga); + assert!(metadata.is_builtin_crdt()); + } + + #[test] + fn is_builtin_crdt__unordered_map() { + let metadata = Metadata::with_crdt_type(1000, 2000, CrdtType::UnorderedMap); + assert!(metadata.is_builtin_crdt()); + } + + #[test] + fn is_builtin_crdt__unordered_set() { + let metadata = Metadata::with_crdt_type(1000, 2000, CrdtType::UnorderedSet); + assert!(metadata.is_builtin_crdt()); + } + + #[test] + fn is_builtin_crdt__vector() { + let metadata = Metadata::with_crdt_type(1000, 2000, CrdtType::Vector); + assert!(metadata.is_builtin_crdt()); + } + + #[test] + fn is_builtin_crdt__custom() { + let metadata = Metadata::with_crdt_type( + 1000, + 2000, + CrdtType::Custom { + type_name: "MyCRDT".to_string(), + }, + ); + assert!(!metadata.is_builtin_crdt()); + } + + #[test] + fn is_builtin_crdt__none() { + let mut metadata = Metadata::new(1000, 2000); + metadata.crdt_type = None; // Explicitly set to None for this test + assert!(!metadata.is_builtin_crdt()); + } +} + +#[cfg(test)] +mod metadata__serialization { + use super::*; + use borsh::{BorshDeserialize, BorshSerialize}; + + #[test] + fn serialize_deserialize__with_crdt_type() { + let metadata = Metadata::with_crdt_type(1000, 2000, CrdtType::Counter); + let serialized = borsh::to_vec(&metadata).unwrap(); + let deserialized: Metadata = BorshDeserialize::try_from_slice(&serialized).unwrap(); + assert_eq!(metadata.created_at, deserialized.created_at); + assert_eq!(metadata.updated_at, deserialized.updated_at); + assert_eq!(metadata.crdt_type, deserialized.crdt_type); + assert_eq!(deserialized.crdt_type, Some(CrdtType::Counter)); + } + + #[test] + fn serialize_deserialize__without_crdt_type() { + let mut metadata = Metadata::new(1000, 2000); + metadata.crdt_type = None; // Explicitly set to None for this test + let serialized = borsh::to_vec(&metadata).unwrap(); + let deserialized: Metadata = BorshDeserialize::try_from_slice(&serialized).unwrap(); + assert_eq!(metadata.created_at, deserialized.created_at); + assert_eq!(metadata.updated_at, deserialized.updated_at); + assert_eq!(deserialized.crdt_type, None); + } + + #[test] + fn serialize_deserialize__custom_crdt() { + let metadata = Metadata::with_crdt_type( + 1000, + 2000, + CrdtType::Custom { + type_name: "MyCustomCRDT".to_string(), + }, + ); + let serialized = borsh::to_vec(&metadata).unwrap(); + let deserialized: Metadata = BorshDeserialize::try_from_slice(&serialized).unwrap(); + assert_eq!(metadata.crdt_type, deserialized.crdt_type); + match deserialized.crdt_type { + Some(CrdtType::Custom { type_name }) => { + assert_eq!(type_name, "MyCustomCRDT"); + } + _ => panic!("Expected Custom CRDT type"), + } + } + + #[test] + fn default__has_none_crdt_type() { + let metadata = Metadata::default(); + assert_eq!(metadata.crdt_type, None); + } + + #[test] + fn serialize_deserialize__with_field_name() { + let mut metadata = Metadata::with_crdt_type(1000, 2000, CrdtType::UnorderedMap); + metadata.field_name = Some("items".to_string()); + let serialized = borsh::to_vec(&metadata).unwrap(); + let deserialized: Metadata = BorshDeserialize::try_from_slice(&serialized).unwrap(); + assert_eq!(deserialized.field_name, Some("items".to_string())); + assert_eq!(deserialized.crdt_type, Some(CrdtType::UnorderedMap)); + } + + #[test] + fn serialize_deserialize__without_field_name() { + let metadata = Metadata::with_crdt_type(1000, 2000, CrdtType::Counter); + // field_name is None by default + assert_eq!(metadata.field_name, None); + let serialized = borsh::to_vec(&metadata).unwrap(); + let deserialized: Metadata = BorshDeserialize::try_from_slice(&serialized).unwrap(); + assert_eq!(deserialized.field_name, None); + assert_eq!(deserialized.crdt_type, Some(CrdtType::Counter)); + } +} + +#[cfg(test)] +mod element__new_with_field_name { + use super::*; + + #[test] + fn creates_element_with_field_name() { + let element = Element::new_with_field_name(None, Some("my_field".to_string())); + assert_eq!(element.metadata.field_name, Some("my_field".to_string())); + // Default CRDT type for new_with_field_name is LwwRegister + assert_eq!(element.metadata.crdt_type, Some(CrdtType::LwwRegister)); + } + + #[test] + fn creates_element_without_field_name() { + let element = Element::new_with_field_name(None, None); + assert_eq!(element.metadata.field_name, None); + } + + #[test] + fn creates_element_with_field_name_and_crdt_type() { + let element = Element::new_with_field_name_and_crdt_type( + None, + Some("items".to_string()), + CrdtType::UnorderedMap, + ); + assert_eq!(element.metadata.field_name, Some("items".to_string())); + assert_eq!(element.metadata.crdt_type, Some(CrdtType::UnorderedMap)); + } + + #[test] + fn new_defaults_to_no_field_name() { + let element = Element::new(None); + assert_eq!(element.metadata.field_name, None); + } +} + +#[cfg(test)] +mod metadata__backward_compatibility { + use super::*; + use borsh::BorshDeserialize; + + /// Test that old Metadata format (without crdt_type and field_name) deserializes correctly. + /// This simulates data written before crdt_type and field_name were added. + #[test] + fn deserialize_old_format_without_crdt_type_and_field_name() { + // Manually construct old-format Metadata bytes: + // created_at: u64 (8 bytes) + // updated_at: u64 (8 bytes) + // storage_type: Public variant (1 byte for enum discriminant) + let mut old_bytes = Vec::new(); + old_bytes.extend_from_slice(&1000u64.to_le_bytes()); // created_at + old_bytes.extend_from_slice(&2000u64.to_le_bytes()); // updated_at + old_bytes.push(0u8); // StorageType::Public enum discriminant + + // Deserialize - should succeed with None for crdt_type and field_name + let deserialized: Metadata = BorshDeserialize::try_from_slice(&old_bytes).unwrap(); + assert_eq!(deserialized.created_at, 1000); + assert_eq!(*deserialized.updated_at, 2000); + assert!(matches!(deserialized.storage_type, StorageType::Public)); + assert_eq!(deserialized.crdt_type, None); + assert_eq!(deserialized.field_name, None); + } + + /// Test that Metadata with crdt_type but without field_name deserializes correctly. + /// This simulates data written after crdt_type was added but before field_name. + #[test] + fn deserialize_format_with_crdt_type_without_field_name() { + // Construct Metadata with crdt_type but let field_name be missing + let metadata_with_crdt = Metadata::with_crdt_type(1000, 2000, CrdtType::Counter); + let mut bytes_with_crdt = borsh::to_vec(&metadata_with_crdt).unwrap(); + + // Remove the field_name bytes (last few bytes after crdt_type) + // Since field_name is Option serialized as None (0 byte), we can test + // by ensuring current format works correctly + let deserialized: Metadata = BorshDeserialize::try_from_slice(&bytes_with_crdt).unwrap(); + assert_eq!(deserialized.crdt_type, Some(CrdtType::Counter)); + // field_name should be None (default when not set) + assert_eq!(deserialized.field_name, None); + } + + /// Test that current format with all fields deserializes correctly. + #[test] + fn deserialize_current_format_with_all_fields() { + let mut metadata = Metadata::with_crdt_type(1000, 2000, CrdtType::UnorderedMap); + metadata.field_name = Some("test_field".to_string()); + + let bytes = borsh::to_vec(&metadata).unwrap(); + let deserialized: Metadata = BorshDeserialize::try_from_slice(&bytes).unwrap(); + + assert_eq!(deserialized.created_at, 1000); + assert_eq!(*deserialized.updated_at, 2000); + assert_eq!(deserialized.crdt_type, Some(CrdtType::UnorderedMap)); + assert_eq!(deserialized.field_name, Some("test_field".to_string())); } } diff --git a/crates/storage/src/tests/index.rs b/crates/storage/src/tests/index.rs index 46ebaeff81..d2e2b085df 100644 --- a/crates/storage/src/tests/index.rs +++ b/crates/storage/src/tests/index.rs @@ -20,6 +20,8 @@ mod index__public_methods { created_at: 1, updated_at: 1.into(), storage_type: StorageType::Public, + crdt_type: None, + field_name: None, }, }; @@ -33,12 +35,16 @@ mod index__public_methods { created_at: 43, updated_at: 22.into(), storage_type: StorageType::Public, + crdt_type: None, + field_name: None, }, )], metadata: Metadata { created_at: 1, updated_at: 1.into(), storage_type: StorageType::Public, + crdt_type: None, + field_name: None, }, }; diff --git a/crates/storage/src/tests/interface.rs b/crates/storage/src/tests/interface.rs index 2622d6a667..79282b7fd9 100644 --- a/crates/storage/src/tests/interface.rs +++ b/crates/storage/src/tests/interface.rs @@ -90,16 +90,27 @@ mod interface__public_methods { #[test] fn save__too_old() { - let element1 = Element::root(); + // Use a non-root element because root entities always merge (CRDT behavior) + // Non-root entities use LWW: older timestamps are rejected + let element1 = Element::new(None); let mut page1 = Page::new_from_element("Node", element1); let mut page2 = page1.clone(); - assert!(MainInterface::save(&mut page1).unwrap()); + // First, create a parent so the non-root entity can be saved + let root_element = Element::root(); + let mut root_page = Page::new_from_element("Root", root_element); + assert!(MainInterface::save(&mut root_page).unwrap()); + + // Add page1 as child of root + assert!(MainInterface::add_child_to(root_page.id(), &mut page1).unwrap()); + + // Now test too_old behavior for non-root entity page2.element_mut().update(); sleep(Duration::from_millis(2)); page1.element_mut().update(); - assert!(MainInterface::save(&mut page1).unwrap()); - assert!(!MainInterface::save(&mut page2).unwrap()); + assert!(MainInterface::add_child_to(root_page.id(), &mut page1).unwrap()); + // page2 has older timestamp, should be rejected (LWW) + assert!(!MainInterface::add_child_to(root_page.id(), &mut page2).unwrap()); } #[test] @@ -779,6 +790,8 @@ mod user_storage_signature_verification { owner, signature_data: None, // No signature! }, + crdt_type: None, + field_name: None, }, }; @@ -845,7 +858,9 @@ mod user_storage_signature_verification { let (signing_key, owner) = create_test_keypair(); // First, create the entity + // Use root element but clear crdt_type to avoid CRDT merge path in tests let mut element = Element::root(); + element.metadata.crdt_type = None; element.set_user_domain(owner); let page = Page::new_from_element("Original Title", element); let serialized = to_vec(&page).unwrap(); @@ -986,7 +1001,9 @@ mod user_storage_replay_protection { let (signing_key, owner) = create_test_keypair(); + // Use root element but clear crdt_type to avoid CRDT merge path in tests let mut element = Element::root(); + element.metadata.crdt_type = None; element.set_user_domain(owner); let mut page = Page::new_from_element("Version 1", element); let serialized = to_vec(&page).unwrap(); @@ -1121,6 +1138,8 @@ mod frozen_storage_verification { created_at: timestamp, updated_at: timestamp.into(), storage_type: StorageType::Frozen, + crdt_type: None, + field_name: None, }, }; @@ -1149,6 +1168,8 @@ mod frozen_storage_verification { created_at: timestamp, updated_at: timestamp.into(), storage_type: StorageType::Frozen, + crdt_type: None, + field_name: None, }, }; @@ -1185,6 +1206,8 @@ mod frozen_storage_verification { created_at: timestamp, updated_at: timestamp.into(), storage_type: StorageType::Frozen, + crdt_type: None, + field_name: None, }, }; assert!(MainInterface::apply_action(add_action).is_ok()); @@ -1204,6 +1227,8 @@ mod frozen_storage_verification { created_at: timestamp, updated_at: new_timestamp.into(), storage_type: StorageType::Frozen, + crdt_type: None, + field_name: None, }, }; @@ -1240,6 +1265,8 @@ mod frozen_storage_verification { created_at: timestamp, updated_at: timestamp.into(), storage_type: StorageType::Frozen, + crdt_type: None, + field_name: None, }, }; assert!(MainInterface::apply_action(add_action).is_ok()); @@ -1286,6 +1313,8 @@ mod frozen_storage_verification { created_at: timestamp, updated_at: timestamp.into(), storage_type: StorageType::Frozen, + crdt_type: None, + field_name: None, }, }; @@ -1326,6 +1355,8 @@ mod frozen_storage_verification { created_at: timestamp, updated_at: timestamp.into(), storage_type: StorageType::Frozen, + crdt_type: None, + field_name: None, }, }; @@ -1362,6 +1393,8 @@ mod timestamp_drift_protection { created_at: future_timestamp, updated_at: future_timestamp.into(), storage_type: StorageType::Public, + crdt_type: None, + field_name: None, }, }; @@ -1394,6 +1427,8 @@ mod timestamp_drift_protection { created_at: future_timestamp, updated_at: future_timestamp.into(), storage_type: StorageType::Public, + crdt_type: None, + field_name: None, }, }; @@ -1420,6 +1455,8 @@ mod timestamp_drift_protection { created_at: past_timestamp, updated_at: past_timestamp.into(), storage_type: StorageType::Public, + crdt_type: None, + field_name: None, }, }; @@ -1490,6 +1527,8 @@ mod storage_type_edge_cases { nonce, }), }, + crdt_type: None, + field_name: None, }; let mut action = Action::DeleteRef { @@ -1654,6 +1693,8 @@ mod storage_type_edge_cases { owner, signature_data: None, // No signature! }, + crdt_type: None, + field_name: None, }, }; @@ -1742,6 +1783,8 @@ mod storage_type_edge_cases { created_at: page.element().created_at(), updated_at: timestamp.into(), storage_type: StorageType::Public, // Changed to Public! + crdt_type: None, + field_name: None, }, }; @@ -1766,7 +1809,9 @@ mod storage_type_edge_cases { let (signing_key, owner) = create_test_keypair(); // Create user-owned entity + // Use root element but clear crdt_type to avoid CRDT merge path in tests let mut element = Element::root(); + element.metadata.crdt_type = None; element.set_user_domain(owner); let page = Page::new_from_element("Page", element); let serialized = to_vec(&page).unwrap(); diff --git a/tools/merodb/src/abi.rs b/tools/merodb/src/abi.rs index f2bc23228e..de9768fe3b 100644 --- a/tools/merodb/src/abi.rs +++ b/tools/merodb/src/abi.rs @@ -67,3 +67,188 @@ pub fn load_state_schema_from_json(schema_path: &Path) -> Result { load_state_schema_from_json_value(&schema_value) } + +/// Infer state schema from database by reading field names and CRDT types from metadata +/// +/// This function scans the State column for EntityIndex entries and builds a schema +/// based on field_name and crdt_type found in metadata. This enables schema-free +/// database inspection when field names are stored in metadata. +/// +/// # Arguments +/// * `db` - The database to scan +/// * `context_id` - Optional context ID to filter by. If None, scans all contexts (may find fields from multiple contexts) +pub fn infer_schema_from_database( + db: &rocksdb::DBWithThreadMode, + context_id: Option<&[u8]>, +) -> Result { + use calimero_wasm_abi::schema::{ + CollectionType, CrdtCollectionType, Field, ScalarType, TypeDef, TypeRef, + }; + use std::collections::BTreeMap; + + let state_cf = db + .cf_handle("State") + .ok_or_else(|| eyre::eyre!("State column family not found"))?; + + let mut fields = Vec::new(); + let mut seen_field_names = std::collections::HashSet::new(); + + // Root ID depends on context: + // - If context_id is provided, root ID is that context_id (Id::root() returns context_id()) + // - If no context_id, we can't determine root fields reliably, so use all zeros as fallback + let root_id_bytes: [u8; 32] = match context_id { + Some(ctx_id) => ctx_id.try_into().map_err(|_| { + eyre::eyre!( + "context_id must be exactly 32 bytes, got {} bytes", + ctx_id.len() + ) + })?, + None => [0u8; 32], + }; + + // Scan State column for EntityIndex entries + let iter = db.iterator_cf(&state_cf, rocksdb::IteratorMode::Start); + for item in iter { + let (key, value) = item?; + + // Filter by context_id if provided (key format: context_id (32 bytes) + state_key (32 bytes)) + if let Some(expected_context_id) = context_id { + if key.len() < 32 || &key[..32] != expected_context_id { + continue; + } + } + + // Try to deserialize as EntityIndex + if let Ok(index) = borsh::from_slice::(&value) { + // Check if this is a root-level field (parent_id is None or equals root/context_id) + let is_root_field = index.parent_id.is_none() + || index + .parent_id + .as_ref() + .map(|id| id.as_bytes() == &root_id_bytes) + .unwrap_or(false); + + if is_root_field { + // Check if we have field_name in metadata + if let Some(ref field_name) = index.metadata.field_name { + if !seen_field_names.contains(field_name) { + seen_field_names.insert(field_name.clone()); + + // Infer type from crdt_type + let type_ref = if let Some(crdt_type) = index.metadata.crdt_type { + match crdt_type { + crate::export::CrdtType::UnorderedMap => { + // Default to Map - can be refined later + TypeRef::Collection { + collection: CollectionType::Map { + key: Box::new(TypeRef::string()), + value: Box::new(TypeRef::string()), + }, + crdt_type: Some(CrdtCollectionType::UnorderedMap), + inner_type: None, + } + } + crate::export::CrdtType::Vector => TypeRef::Collection { + collection: CollectionType::List { + items: Box::new(TypeRef::string()), + }, + crdt_type: Some(CrdtCollectionType::Vector), + inner_type: None, + }, + crate::export::CrdtType::UnorderedSet => TypeRef::Collection { + collection: CollectionType::List { + items: Box::new(TypeRef::string()), + }, + crdt_type: Some(CrdtCollectionType::UnorderedSet), + inner_type: None, + }, + crate::export::CrdtType::Counter => TypeRef::Collection { + // Counter is stored as Map internally + collection: CollectionType::Map { + key: Box::new(TypeRef::string()), + value: Box::new(TypeRef::Scalar(ScalarType::U64)), + }, + crdt_type: Some(CrdtCollectionType::Counter), + inner_type: None, + }, + crate::export::CrdtType::Rga => TypeRef::Collection { + collection: CollectionType::Record { fields: Vec::new() }, + crdt_type: Some(CrdtCollectionType::ReplicatedGrowableArray), + inner_type: None, + }, + crate::export::CrdtType::LwwRegister => TypeRef::Collection { + collection: CollectionType::Record { fields: Vec::new() }, + crdt_type: Some(CrdtCollectionType::LwwRegister), + inner_type: Some(Box::new(TypeRef::string())), + }, + crate::export::CrdtType::UserStorage => TypeRef::Collection { + collection: CollectionType::Map { + key: Box::new(TypeRef::string()), + value: Box::new(TypeRef::string()), + }, + crdt_type: Some(CrdtCollectionType::UnorderedMap), + inner_type: None, + }, + crate::export::CrdtType::FrozenStorage => TypeRef::Collection { + collection: CollectionType::Map { + key: Box::new(TypeRef::string()), + value: Box::new(TypeRef::string()), + }, + crdt_type: Some(CrdtCollectionType::UnorderedMap), + inner_type: None, + }, + crate::export::CrdtType::Record => { + // Record type - would need to inspect children to infer fields + TypeRef::Collection { + collection: CollectionType::Record { fields: Vec::new() }, + crdt_type: None, + inner_type: None, + } + } + crate::export::CrdtType::Custom { type_name: _ } => { + // Custom type - can't infer without schema + TypeRef::Collection { + collection: CollectionType::Record { fields: Vec::new() }, + crdt_type: None, + inner_type: None, + } + } + } + } else { + // No CRDT type - default to LWW register + TypeRef::Collection { + collection: CollectionType::Record { fields: Vec::new() }, + crdt_type: Some(CrdtCollectionType::LwwRegister), + inner_type: Some(Box::new(TypeRef::string())), + } + }; + + fields.push(Field { + name: field_name.clone(), + type_: type_ref, + nullable: None, + }); + } + } + } + } + } + + // Create a record type with all inferred fields + let state_root_type = "InferredStateRoot".to_string(); + let mut types = BTreeMap::new(); + types.insert( + state_root_type.clone(), + TypeDef::Record { + fields: fields.clone(), + }, + ); + + Ok(Manifest { + schema_version: "wasm-abi/1".to_string(), + types, + methods: Vec::new(), + events: Vec::new(), + state_root: Some(state_root_type), + }) +} diff --git a/tools/merodb/src/export.rs b/tools/merodb/src/export.rs index 05d9b1a512..c93ba0ace0 100644 --- a/tools/merodb/src/export.rs +++ b/tools/merodb/src/export.rs @@ -88,6 +88,47 @@ struct MapField { value_type: TypeRef, } +/// Try to decode entry data with a specific field definition +fn try_decode_with_field( + entry_bytes: &[u8], + field: &Field, + index: &EntityIndex, + manifest: &Manifest, +) -> Option { + match &field.type_ { + TypeRef::Collection { + collection: CollectionType::Map { key, value }, + .. + } => { + let map_field = MapField { + name: field.name.clone(), + key_type: (**key).clone(), + value_type: (**value).clone(), + }; + decode_map_entry(entry_bytes, &map_field, manifest) + .ok() + .map(|decoded| add_index_metadata(decoded, index)) + .flatten() + } + TypeRef::Collection { + collection: CollectionType::List { items }, + .. + } => decode_list_entry(entry_bytes, field, items, manifest) + .ok() + .map(|decoded| add_index_metadata(decoded, index)) + .flatten(), + TypeRef::Collection { + collection: CollectionType::Record { .. }, + crdt_type, + inner_type, + } => decode_record_entry(entry_bytes, field, crdt_type, inner_type, manifest) + .ok() + .map(|decoded| add_index_metadata(decoded, index)) + .flatten(), + _ => None, + } +} + /// Try to decode a collection entry by looking up the actual entry data from an EntityIndex /// Supports Map entries (Entry<(K, V)>) and List entries (Entry) fn try_decode_collection_entry_from_index( @@ -170,6 +211,27 @@ fn try_decode_collection_entry_from_index( record_fields.len() ); + // First, try to match by field_name if available (most direct and efficient) + if let Some(ref field_name) = index.metadata.field_name { + eprintln!( + "[try_decode_collection_entry_from_index] Using field_name from metadata: {}", + field_name + ); + if let Some(field) = record_fields.iter().find(|f| f.name == *field_name) { + eprintln!( + "[try_decode_collection_entry_from_index] Found matching field by name: {}", + field_name + ); + // Try to decode with this specific field + return try_decode_with_field(&entry_bytes, field, index, manifest); + } else { + eprintln!( + "[try_decode_collection_entry_from_index] Field name '{}' not found in schema, falling back to all fields", + field_name + ); + } + } + // If we have a parent_id, try to find the collection field that matches it // Otherwise, try all collection fields let fields_to_try: Vec<&Field> = if let Some(parent_id) = &index.parent_id { @@ -627,6 +689,7 @@ fn decode_state_entry( "own_hash": hex::encode(index.own_hash), "created_at": index.metadata.created_at, "updated_at": *index.metadata.updated_at, + "field_name": index.metadata.field_name, "deleted_at": index.deleted_at })); } else { @@ -934,24 +997,24 @@ fn decode_scalar_entry(bytes: &[u8], field: &Field, manifest: &Manifest) -> Resu } // EntityIndex structure for decoding -#[derive(borsh::BorshDeserialize)] -struct EntityIndex { - id: Id, - parent_id: Option, - children: Option>, - full_hash: [u8; 32], - own_hash: [u8; 32], - metadata: Metadata, - deleted_at: Option, +#[derive(borsh::BorshDeserialize, Clone)] +pub(crate) struct EntityIndex { + pub(crate) id: Id, + pub(crate) parent_id: Option, + pub(crate) children: Option>, + pub(crate) full_hash: [u8; 32], + pub(crate) own_hash: [u8; 32], + pub(crate) metadata: Metadata, + pub(crate) deleted_at: Option, } -#[derive(borsh::BorshDeserialize)] -struct Id { +#[derive(borsh::BorshDeserialize, Clone)] +pub(crate) struct Id { bytes: [u8; 32], } impl Id { - const fn as_bytes(&self) -> &[u8; 32] { + pub(crate) const fn as_bytes(&self) -> &[u8; 32] { &self.bytes } } @@ -1334,6 +1397,8 @@ fn try_manual_entity_index_decode( created_at, updated_at: UpdatedAt(updated_at_val), storage_type, + crdt_type: None, + field_name: None, }; let child_info = ChildInfo { @@ -1408,12 +1473,14 @@ fn try_manual_entity_index_decode( created_at: 0, updated_at: UpdatedAt(0), storage_type: StorageType::Public, + crdt_type: None, + field_name: None, }, deleted_at: None, }) } -#[derive(borsh::BorshDeserialize)] +#[derive(borsh::BorshDeserialize, Clone)] #[expect( dead_code, reason = "Fields required for Borsh deserialization structure" @@ -1424,18 +1491,78 @@ struct ChildInfo { metadata: Metadata, } -#[derive(borsh::BorshDeserialize)] +#[derive(Clone)] #[expect( dead_code, reason = "Fields required for Borsh deserialization structure" )] -struct Metadata { - created_at: u64, - updated_at: UpdatedAt, - storage_type: StorageType, +pub(crate) struct Metadata { + pub(crate) created_at: u64, + pub(crate) updated_at: UpdatedAt, + pub(crate) storage_type: StorageType, + pub(crate) crdt_type: Option, + pub(crate) field_name: Option, } -#[derive(borsh::BorshDeserialize)] +// Custom BorshDeserialize for backward compatibility with old Metadata that doesn't have field_name +impl borsh::BorshDeserialize for Metadata { + fn deserialize_reader(reader: &mut R) -> Result { + let created_at = u64::deserialize_reader(reader)?; + let updated_at = UpdatedAt::deserialize_reader(reader)?; + let storage_type = StorageType::deserialize_reader(reader)?; + + // Try to deserialize crdt_type (may not exist in old format) + let crdt_type = match >::deserialize_reader(reader) { + Ok(ct) => ct, + Err(e) => { + if matches!(e.kind(), std::io::ErrorKind::UnexpectedEof) { + None + } else { + return Err(e); + } + } + }; + + // Try to deserialize field_name (may not exist in old format) + let field_name = match >::deserialize_reader(reader) { + Ok(fn_val) => fn_val, + Err(e) => { + if matches!(e.kind(), std::io::ErrorKind::UnexpectedEof) { + None + } else { + return Err(e); + } + } + }; + + Ok(Metadata { + created_at, + updated_at, + storage_type, + crdt_type, + field_name, + }) + } +} + +/// CRDT type identifier for entity metadata. +/// Must match the definition in calimero-storage. +#[derive(borsh::BorshDeserialize, Debug, Clone, PartialEq, Eq)] +#[allow(dead_code)] +pub(crate) enum CrdtType { + LwwRegister, + Counter, + Rga, + UnorderedMap, + UnorderedSet, + Vector, + UserStorage, + FrozenStorage, + Record, + Custom { type_name: String }, +} + +#[derive(borsh::BorshDeserialize, Clone)] #[expect( dead_code, reason = "Variants required for Borsh deserialization structure" @@ -1449,7 +1576,7 @@ enum StorageType { Frozen, } -#[derive(borsh::BorshDeserialize)] +#[derive(borsh::BorshDeserialize, Clone)] #[expect( dead_code, reason = "Fields required for Borsh deserialization structure" @@ -1459,7 +1586,7 @@ struct SignatureData { nonce: u64, } -#[derive(borsh::BorshDeserialize)] +#[derive(borsh::BorshDeserialize, Clone)] struct UpdatedAt(u64); impl Deref for UpdatedAt { @@ -2059,8 +2186,44 @@ fn decode_state_root_bfs( fields.len() ); + // PRE-FILTER: Build a mapping from field_name to (state_key, EntityIndex) for children that have field_name + // This allows direct field matching instead of sequential iteration + let mut field_name_to_child: std::collections::HashMap = + std::collections::HashMap::new(); + for child_info in &root_children { + let child_element_id = hex::encode(child_info.id.as_bytes()); + if let Some(state_key) = element_to_state.get(&child_element_id) { + let child_key_bytes = match hex::decode(state_key) { + Ok(bytes) => bytes, + Err(_) => continue, + }; + let mut child_key = Vec::with_capacity(64); + child_key.extend_from_slice(context_id); + child_key.extend_from_slice(&child_key_bytes); + + if let Ok(Some(child_value)) = db.get_cf(state_cf, &child_key) { + if let Ok(child_index) = borsh::from_slice::(&child_value) { + if let Some(ref field_name) = child_index.metadata.field_name { + eprintln!( + "[decode_state_root_bfs] Found collection root with field_name='{}': id={}, {} children", + field_name, + child_element_id, + child_index.children.as_ref().map(|c| c.len()).unwrap_or(0) + ); + field_name_to_child + .insert(field_name.clone(), (state_key.clone(), child_index)); + } + } + } + } + } + eprintln!( + "[decode_state_root_bfs] Pre-filtered {} collection roots with field_name", + field_name_to_child.len() + ); + // For each field in the state root schema, find and decode its children using BFS - // Match children to fields by iterating through root's children + // Match children to fields by field_name first, then fall back to sequential matching let mut used_children = std::collections::HashSet::new(); for field in fields { eprintln!("[decode_state_root_bfs] Decoding field: {}", field.name); @@ -2079,52 +2242,93 @@ fn decode_state_root_bfs( }; let field_value = if field_value { - // Find an unused child that is a collection root + // FIRST: Try to find by field_name (direct match) let mut matched_child = None; - for child_info in &root_children { - let child_element_id = hex::encode(child_info.id.as_bytes()); - if used_children.contains(&child_element_id) { - continue; + if let Some((state_key, child_index)) = field_name_to_child.get(&field.name) { + let child_element_id = hex::encode(child_index.id.as_bytes()); + if !used_children.contains(&child_element_id) { + eprintln!( + "[decode_state_root_bfs] Direct field_name match for '{}': {} children", + field.name, + child_index.children.as_ref().map(|c| c.len()).unwrap_or(0) + ); + matched_child = Some((state_key.clone(), child_index.clone())); + used_children.insert(child_element_id); } + } - // Check if this child is a collection root by loading its EntityIndex - if let Some(state_key) = element_to_state.get(&child_element_id) { - let child_key_bytes = hex::decode(state_key).wrap_err_with(|| { - format!("Failed to decode child_state_key: {}", state_key) - })?; - let mut child_key = Vec::with_capacity(64); - child_key.extend_from_slice(context_id); - child_key.extend_from_slice(&child_key_bytes); + // FALLBACK: If no direct match, try sequential matching (for legacy data) + if matched_child.is_none() { + eprintln!( + "[decode_state_root_bfs] No direct field_name match for '{}', trying sequential", + field.name + ); + for child_info in &root_children { + let child_element_id = hex::encode(child_info.id.as_bytes()); + if used_children.contains(&child_element_id) { + continue; + } - if let Ok(Some(child_value)) = db.get_cf(state_cf, &child_key) { - // Try standard Borsh deserialization first - let child_index = match borsh::from_slice::(&child_value) { - Ok(index) => { - eprintln!("[decode_state_root_bfs] Successfully decoded collection root EntityIndex for field {}: {} children", field.name, index.children.as_ref().map(|c| c.len()).unwrap_or(0)); - index - } - Err(e) => { - // Try manual deserialization as fallback - eprintln!("[decode_state_root_bfs] Failed to decode collection root EntityIndex for field {} using Borsh: {}. Attempting manual decode...", field.name, e); - match try_manual_entity_index_decode(&child_value, context_id) { - Ok(index) => { - eprintln!("[decode_state_root_bfs] Successfully decoded collection root EntityIndex manually for field {}: {} children", field.name, index.children.as_ref().map(|c| c.len()).unwrap_or(0)); - index - } - Err(manual_err) => { - eprintln!("[decode_state_root_bfs] Manual decode also failed for collection root: {}", manual_err); - continue; // Skip this child + // Check if this child is a collection root by loading its EntityIndex + if let Some(state_key) = element_to_state.get(&child_element_id) { + let child_key_bytes = hex::decode(state_key).wrap_err_with(|| { + format!("Failed to decode child_state_key: {}", state_key) + })?; + let mut child_key = Vec::with_capacity(64); + child_key.extend_from_slice(context_id); + child_key.extend_from_slice(&child_key_bytes); + + if let Ok(Some(child_value)) = db.get_cf(state_cf, &child_key) { + // Try standard Borsh deserialization first + let child_index = match borsh::from_slice::(&child_value) { + Ok(index) => { + eprintln!("[decode_state_root_bfs] Successfully decoded collection root EntityIndex for field {}: {} children, field_name={:?}", + field.name, index.children.as_ref().map(|c| c.len()).unwrap_or(0), index.metadata.field_name); + index + } + Err(e) => { + // Try manual deserialization as fallback + eprintln!("[decode_state_root_bfs] Failed to decode collection root EntityIndex for field {} using Borsh: {}. Attempting manual decode...", field.name, e); + match try_manual_entity_index_decode(&child_value, context_id) { + Ok(index) => { + eprintln!("[decode_state_root_bfs] Successfully decoded collection root EntityIndex manually for field {}: {} children, field_name={:?}", + field.name, index.children.as_ref().map(|c| c.len()).unwrap_or(0), index.metadata.field_name); + index + } + Err(manual_err) => { + eprintln!("[decode_state_root_bfs] Manual decode also failed for collection root: {}", manual_err); + continue; // Skip this child + } } } + }; + + // Match by field_name if available, otherwise fall back to sequential matching + let field_name_matches = child_index + .metadata + .field_name + .as_ref() + .map(|fn_| fn_ == &field.name) + .unwrap_or(false); + + if field_name_matches { + // This child's field_name matches the schema field + eprintln!("[decode_state_root_bfs] Found matching child for field {} by field_name", field.name); + matched_child = Some((state_key.clone(), child_index.clone())); + used_children.insert(child_element_id); + break; + } else if child_index.metadata.field_name.is_none() { + // Legacy data without field_name - use sequential matching as fallback + eprintln!("[decode_state_root_bfs] Child has no field_name, using sequential match for field {}", field.name); + matched_child = Some((state_key.clone(), child_index.clone())); + used_children.insert(child_element_id); + break; } - }; - // This is a collection root - it matches this collection field - matched_child = Some((state_key.clone(), child_index)); - used_children.insert(child_element_id); - break; + // If field_name exists but doesn't match, continue to next child + } } } - } + } // end fallback if let Some((collection_root_key, collection_root_index)) = matched_child { // Decode this collection field using the found collection root @@ -2152,7 +2356,7 @@ fn decode_state_root_bfs( } } else { // Non-collection field - could be a Record (Counter, etc.) or scalar - // Try to find a child that matches this field + // Try to find a child that matches this field by field_name // For Record types like Counter, they're stored as children of the root let mut matched_child = None; for child_info in &root_children { @@ -2161,7 +2365,7 @@ fn decode_state_root_bfs( continue; } - // Check if this child matches the field by trying to decode it + // Check if this child matches the field by field_name first if let Some(state_key) = element_to_state.get(&child_element_id) { let child_key_bytes = hex::decode(state_key).wrap_err_with(|| { format!("Failed to decode child_state_key: {}", state_key) @@ -2171,6 +2375,21 @@ fn decode_state_root_bfs( child_key.extend_from_slice(&child_key_bytes); if let Ok(Some(child_value)) = db.get_cf(state_cf, &child_key) { + // First try to decode as EntityIndex to check field_name + if let Ok(child_index) = borsh::from_slice::(&child_value) { + // Check if field_name matches + if let Some(ref child_field_name) = child_index.metadata.field_name { + if child_field_name != &field.name { + // This child's field_name doesn't match - skip to next child + eprintln!("[decode_state_root_bfs] Skipping child {} for field {} - field_name is '{}'", + child_element_id, field.name, child_field_name); + continue; + } + eprintln!("[decode_state_root_bfs] Found matching child {} for field {} by field_name", + child_element_id, field.name); + } + } + eprintln!("[decode_state_root_bfs] Attempting to decode child {} for field {} (value length: {})", child_element_id, field.name, child_value.len()); // First, try to decode directly as the field's type (for Counter, etc.) // This handles cases where the value is stored as Entry where T is the field type diff --git a/tools/merodb/src/export/cli.rs b/tools/merodb/src/export/cli.rs index eb3bb39e31..ca90e9a532 100644 --- a/tools/merodb/src/export/cli.rs +++ b/tools/merodb/src/export/cli.rs @@ -31,6 +31,7 @@ pub struct ExportArgs { /// State schema JSON file (extracted using `calimero-abi state`) /// /// This includes the state root type and its dependencies, sufficient for state deserialization. + /// If not provided, schema will be inferred from database metadata (field_name and crdt_type). #[arg(long, value_name = "SCHEMA_FILE")] pub state_schema_file: Option, @@ -68,7 +69,25 @@ pub fn run_export(args: ExportArgs) -> Result<()> { Err(e) => eyre::bail!("Failed to load state schema: {e}"), } } else { - eyre::bail!("--state-schema-file is required when exporting data"); + // Infer schema from database metadata + println!("No schema file provided, inferring schema from database metadata..."); + println!("(This requires field_name to be stored in entity metadata)"); + match abi::infer_schema_from_database(&db, None) { + Ok(manifest) => { + println!("Schema inferred successfully"); + if let Some(ref root) = manifest.state_root { + println!("State root: {root}"); + } + if let Some(ref root_name) = manifest.state_root { + if let Some(calimero_wasm_abi::schema::TypeDef::Record { fields }) = manifest.types.get(root_name) { + println!("Fields: {}", fields.len()); + } + } + println!("Note: Inferred schema may have simplified types. For full type information, provide --state-schema-file"); + manifest + } + Err(e) => eyre::bail!("Failed to infer schema from database: {e}. Try providing --state-schema-file instead."), + } }; let columns = if args.all { diff --git a/tools/merodb/src/gui/index.html b/tools/merodb/src/gui/index.html index f666377010..2cca605e45 100644 --- a/tools/merodb/src/gui/index.html +++ b/tools/merodb/src/gui/index.html @@ -35,7 +35,7 @@

MeroDB Inspector

📊

Load Database

-

Specify the database path and optionally upload a state schema file for state decoding

+

Specify the database path. Schema file is optional - if not provided, schema will be inferred from database metadata

@@ -63,7 +63,7 @@

Load Database

> No file chosen
- Required for state deserialization + Optional - schema will be inferred from database if not provided
diff --git a/tools/merodb/src/gui/server.rs b/tools/merodb/src/gui/server.rs index a726f49c1b..241fe3af24 100644 --- a/tools/merodb/src/gui/server.rs +++ b/tools/merodb/src/gui/server.rs @@ -14,6 +14,7 @@ use tower_http::{services::ServeDir, set_header::SetResponseHeaderLayer}; use crate::{abi, dag, export, types::Column}; use calimero_wasm_abi::schema::Manifest; +use hex; #[derive(Debug, Serialize)] struct ErrorResponse { @@ -187,11 +188,11 @@ async fn handle_export(mut multipart: Multipart) -> impl IntoResponse { } } } else { - eprintln!("No state schema file provided - state values will not be decoded"); + // Will infer schema after opening database None }; - // Open database + // Open database (needed for both schema inference and export) let db = match open_database(&db_path) { Ok(db) => db, Err(e) => { @@ -205,6 +206,30 @@ async fn handle_export(mut multipart: Multipart) -> impl IntoResponse { } }; + // Infer schema if not provided (no context_id for global export) + let schema = if schema.is_none() { + eprintln!("No state schema file provided - inferring schema from database..."); + match abi::infer_schema_from_database(&db, None) { + Ok(manifest) => { + eprintln!("Schema inferred successfully"); + info_message = Some( + "No schema file provided - schema inferred from database metadata. State values will be decoded using inferred schema.".to_string() + ); + Some(manifest) + } + Err(e) => { + let warning = format!( + "Failed to infer schema from database: {e}. State values will not be decoded." + ); + eprintln!("Warning: {warning}"); + warning_message = Some(warning); + None + } + } + } else { + schema + }; + // Export all columns let columns = Column::all().to_vec(); let data = if let Some(schema) = schema { @@ -299,7 +324,7 @@ async fn handle_state_tree(mut multipart: Multipart) -> impl IntoResponse { return (StatusCode::BAD_REQUEST, Json(ErrorResponse { error: e })).into_response(); } - // State schema is required for state tree extraction + // State schema is optional - infer from database if not provided let schema = if let Some(schema_text) = state_schema_text { match serde_json::from_str::(&schema_text) { Ok(schema_value) => match abi::load_state_schema_from_json_value(&schema_value) { @@ -325,13 +350,34 @@ async fn handle_state_tree(mut multipart: Multipart) -> impl IntoResponse { } } } else { - return ( - StatusCode::BAD_REQUEST, - Json(ErrorResponse { - error: "State schema file is required for state tree extraction".to_owned(), - }), - ) - .into_response(); + // Infer schema from database + eprintln!("[server] No schema file provided, inferring from database..."); + match open_database(&db_path) { + Ok(db) => match abi::infer_schema_from_database(&db, None) { + Ok(manifest) => { + eprintln!("[server] Schema inferred successfully"); + manifest + } + Err(e) => { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ErrorResponse { + error: format!("Failed to infer schema from database: {e}"), + }), + ) + .into_response(); + } + }, + Err(e) => { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ErrorResponse { + error: format!("Failed to open database for schema inference: {e}"), + }), + ) + .into_response(); + } + } }; // Open database @@ -544,7 +590,7 @@ async fn handle_context_tree(mut multipart: Multipart) -> impl IntoResponse { return (StatusCode::BAD_REQUEST, Json(ErrorResponse { error: e })).into_response(); } - // State schema is required for state tree extraction + // State schema is optional - infer from database if not provided let schema = if let Some(schema_text) = state_schema_text { match serde_json::from_str::(&schema_text) { Ok(schema_value) => match abi::load_state_schema_from_json_value(&schema_value) { @@ -570,13 +616,67 @@ async fn handle_context_tree(mut multipart: Multipart) -> impl IntoResponse { } } } else { - return ( - StatusCode::BAD_REQUEST, - Json(ErrorResponse { - error: "State schema file is required for state tree extraction".to_owned(), - }), - ) - .into_response(); + // Infer schema from database for this specific context + eprintln!( + "[server] No schema file provided, inferring from database for context {}...", + context_id + ); + match open_database(&db_path) { + Ok(db) => { + // Decode context_id from hex string + let context_id_bytes = match hex::decode(&context_id) { + Ok(bytes) if bytes.len() == 32 => bytes, + _ => { + return ( + StatusCode::BAD_REQUEST, + Json(ErrorResponse { + error: format!("Invalid context_id format: {}", context_id), + }), + ) + .into_response(); + } + }; + match abi::infer_schema_from_database(&db, Some(&context_id_bytes)) { + Ok(manifest) => { + let field_count = manifest + .state_root + .as_ref() + .and_then(|root| manifest.types.get(root)) + .and_then(|ty| { + if let calimero_wasm_abi::schema::TypeDef::Record { fields } = ty { + Some(fields.len()) + } else { + None + } + }) + .unwrap_or(0); + eprintln!( + "[server] Schema inferred successfully for context {}: {} fields found", + context_id, field_count + ); + manifest + } + Err(e) => { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ErrorResponse { + error: format!("Failed to infer schema from database: {e}"), + }), + ) + .into_response(); + } + } + } + Err(e) => { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ErrorResponse { + error: format!("Failed to open database for schema inference: {e}"), + }), + ) + .into_response(); + } + } }; // Open database diff --git a/tools/merodb/src/gui/static/css/visualization.css b/tools/merodb/src/gui/static/css/visualization.css index 95bcc37a9d..8027beaf37 100644 --- a/tools/merodb/src/gui/static/css/visualization.css +++ b/tools/merodb/src/gui/static/css/visualization.css @@ -121,6 +121,14 @@ pointer-events: none; } +/* Field Type Colors */ +.field-type-unordered_map { color: #61afef !important; } +.field-type-unordered_set { color: #c678dd !important; } +.field-type-vector { color: #e5c07b !important; } +.field-type-counter { color: #98c379 !important; } +.field-type-rga { color: #d19a66 !important; } +.field-type-lww_register { color: #56b6c2 !important; } + /* State Tree Links */ .state-link { stroke: var(--color-link-default); diff --git a/tools/merodb/src/gui/static/js/api-service.js b/tools/merodb/src/gui/static/js/api-service.js index 7381048b48..3c1c1e342b 100644 --- a/tools/merodb/src/gui/static/js/api-service.js +++ b/tools/merodb/src/gui/static/js/api-service.js @@ -172,26 +172,23 @@ export class ApiService { throw new Error(`Failed to read state schema file: ${err.message}. The file may have already been consumed.`); } } else { - console.error('[ApiService.loadContextTree] ERROR: No state schema file or cached content available!'); - console.error('[ApiService.loadContextTree] State:', { - currentStateSchemaFile: window.app?.state?.currentStateSchemaFile?.name || 'null', - hasCachedContent: !!window.app?.state?.currentStateSchemaFileContent, - hasLocalStorageContent: !!localStorage.getItem('merodb_schema_content'), - stateSchemaFileProvided: !!stateSchemaFile - }); - throw new Error('State schema file is required for state tree extraction'); + // Schema is optional - backend will infer it if not provided + console.log('[ApiService.loadContextTree] No state schema file - backend will infer schema from database'); + text = null; // Don't send schema file } } catch (err) { - if (err.message.includes('State schema file is required')) { - throw err; - } - console.error('[ApiService.loadContextTree] Error accessing local storage:', err); - throw new Error('State schema file is required for state tree extraction'); + // Schema is optional - backend will infer it if not provided + console.log('[ApiService.loadContextTree] No state schema file - backend will infer schema from database'); + text = null; // Don't send schema file } } - console.log('[ApiService.loadContextTree] Appending state_schema_file to formData, length:', text.length); - formData.append('state_schema_file', text); + if (text) { + console.log('[ApiService.loadContextTree] Appending state_schema_file to formData, length:', text.length); + formData.append('state_schema_file', text); + } else { + console.log('[ApiService.loadContextTree] No schema file - will use schema inference'); + } const response = await fetch('/api/context-tree', { method: 'POST', diff --git a/tools/merodb/src/gui/static/js/app.js b/tools/merodb/src/gui/static/js/app.js index 3a6555bac1..d6e829cab0 100644 --- a/tools/merodb/src/gui/static/js/app.js +++ b/tools/merodb/src/gui/static/js/app.js @@ -214,9 +214,9 @@ export class App { this.state.currentStateSchemaFile = stateSchemaInput.files[0]; } - if (!this.state.currentStateSchemaFile) { - UIManager.showMessage('warning-message', 'No state schema file found. Please select a file first.'); - return; + // Schema file is optional - can use schema inference + if (!this.state.currentStateSchemaFile && !this.state.currentStateSchemaFileContent) { + console.log('[App] No schema file - will use schema inference'); } } await this.loadDatabase(); diff --git a/tools/merodb/src/gui/static/js/state-tree-visualizer.js b/tools/merodb/src/gui/static/js/state-tree-visualizer.js index 20e36b26ac..73a7329f03 100644 --- a/tools/merodb/src/gui/static/js/state-tree-visualizer.js +++ b/tools/merodb/src/gui/static/js/state-tree-visualizer.js @@ -37,6 +37,7 @@ export class StateTreeVisualizer { */ async load() { // Check if we have schema content (from file or local storage) + // Schema is optional - if not provided, backend will infer it from database if (!this.state.currentStateSchemaFile && !this.state.currentStateSchemaFileContent) { // Try to load from local storage try { @@ -45,10 +46,10 @@ export class StateTreeVisualizer { this.state.currentStateSchemaFileContent = savedContent; console.log('[StateTreeVisualizer] Loaded schema from local storage'); } else { - throw new Error('State schema file is required for state tree visualization'); + console.log('[StateTreeVisualizer] No schema file provided - will use schema inference'); } } catch (err) { - throw new Error('State schema file is required for state tree visualization'); + console.log('[StateTreeVisualizer] No schema file provided - will use schema inference'); } } @@ -371,17 +372,69 @@ export class StateTreeVisualizer { return ''; }); - // Add node ID labels + // Add node labels - show field name for Field nodes, truncated ID otherwise nodeEnter.append('text') .attr('dy', '0.31em') .attr('x', d => (d.children || d._children) ? -10 : 10) .attr('text-anchor', d => (d.children || d._children) ? 'end' : 'start') .text(d => { + // For Field nodes, show the field name + if (d.data.type === 'Field' && d.data.field) { + return d.data.field; + } + // For StateRoot, show "Root" + if (d.data.type === 'StateRoot') { + return 'Root'; + } + // For Entry nodes, show meaningful data + if (d.data.type === 'Entry' && d.data.data) { + // Counter entries: show value (the count) instead of key (executor ID) + // Counter has both key (hash) and value (number) + if (d.data.data.key && d.data.data.value) { + const val = d.data.data.value.parsed ?? d.data.data.value; + // If value is a number (Counter), show "count: N" + if (typeof val === 'number') { + return `count: ${val}`; + } + // Otherwise show "key → value" for regular maps + const key = d.data.data.key.parsed || d.data.data.key; + const keyStr = typeof key === 'string' ? key : JSON.stringify(key); + const valStr = typeof val === 'string' ? val : JSON.stringify(val); + const display = `${keyStr} → ${valStr}`; + return display.length > 30 ? display.substring(0, 27) + '...' : display; + } + // Map entries with only key: show key + if (d.data.data.key) { + const key = d.data.data.key.parsed || d.data.data.key; + const keyStr = typeof key === 'string' ? key : JSON.stringify(key); + return keyStr.length > 25 ? keyStr.substring(0, 22) + '...' : keyStr; + } + // Vector entries: show item value + if (d.data.data.item) { + const item = d.data.data.item.parsed || d.data.data.item; + const itemStr = typeof item === 'string' ? item : JSON.stringify(item); + return itemStr.length > 35 ? itemStr.substring(0, 32) + '...' : itemStr; + } + // Set entries or other: show value + if (d.data.data.value) { + const val = d.data.data.value.parsed || d.data.data.value; + const valStr = typeof val === 'string' ? val : JSON.stringify(val); + return valStr.length > 25 ? valStr.substring(0, 22) + '...' : valStr; + } + } + // Fallback to truncated ID const id = d.data.id || 'N/A'; return id !== 'N/A' ? `${id.substring(0, 8)}...` : 'N/A'; }) - .style('font-size', '10px') - .style('fill', '#bbb') + .style('font-size', '11px') + .style('fill', d => { + // Color code by type + if (d.data.type === 'StateRoot') return '#ffa500'; // Orange for root + if (d.data.type === 'Field') return '#61afef'; // Blue for fields + if (d.data.type === 'Entry') return '#98c379'; // Green for entries + return '#bbb'; + }) + .style('font-weight', d => d.data.type === 'Field' ? 'bold' : 'normal') .style('pointer-events', 'none'); // Transition nodes to their new position @@ -612,9 +665,11 @@ export class StateTreeVisualizer { html += ` Type:`; html += ` ${data.type || 'N/A'}`; html += ``; + // Calculate children count from actual tree structure + const childrenCount = (node.children?.length || 0) + (node._children?.length || 0); html += `
`; html += ` Children:`; - html += ` ${data.children_count || 0}`; + html += ` ${childrenCount}`; html += `
`; html += ``; @@ -712,47 +767,64 @@ export class StateTreeVisualizer { html += ``; } - html += '
'; - html += `
Hashes
`; - html += `
`; - html += ` ID:`; - html += ` ${TooltipManager.formatHash(data.id, 'ID')}`; - html += `
`; - html += `
`; - html += ` Full Hash:`; - html += ` ${TooltipManager.formatHash(data.full_hash, 'Full Hash')}`; - html += `
`; - html += `
`; - html += ` Own Hash:`; - html += ` ${TooltipManager.formatHash(data.own_hash, 'Own Hash')}`; - html += `
`; - // Use the parent node's ID from the D3 hierarchy instead of data.parent_id - // This ensures the displayed parent ID matches what's shown in the tree - if (node.parent) { - html += `
`; - html += ` Parent ID:`; - html += ` ${TooltipManager.formatHash(node.parent.data.id, 'Parent ID')}`; + // Hashes section - only show if we have hash data + const hasHashData = data.id || data.full_hash || data.own_hash || node.parent; + if (hasHashData) { + html += '
'; + html += `
Hashes
`; + if (data.id) { + html += `
`; + html += ` ID:`; + html += ` ${TooltipManager.formatHash(data.id, 'ID')}`; + html += `
`; + } + if (data.full_hash) { + html += `
`; + html += ` Full Hash:`; + html += ` ${TooltipManager.formatHash(data.full_hash, 'Full Hash')}`; + html += `
`; + } + if (data.own_hash) { + html += `
`; + html += ` Own Hash:`; + html += ` ${TooltipManager.formatHash(data.own_hash, 'Own Hash')}`; + html += `
`; + } + // Use the parent node's ID from the D3 hierarchy + if (node.parent) { + html += `
`; + html += ` Parent ID:`; + html += ` ${TooltipManager.formatHash(node.parent.data.id, 'Parent ID')}`; + html += `
`; + } html += `
`; } - html += `
`; - html += '
'; - html += `
Timestamps
`; - html += `
`; - html += ` Created:`; - html += ` ${TooltipManager.formatTimestamp(data.created_at)}`; - html += `
`; - html += `
`; - html += ` Updated:`; - html += ` ${TooltipManager.formatTimestamp(data.updated_at)}`; - html += `
`; - if (data.deleted_at) { - html += `
`; - html += ` Deleted:`; - html += ` ${TooltipManager.formatTimestamp(data.deleted_at)}`; + // Timestamps section - only show if we have timestamp data + const hasTimestampData = data.created_at || data.updated_at || data.deleted_at; + if (hasTimestampData) { + html += '
'; + html += `
Timestamps
`; + if (data.created_at) { + html += `
`; + html += ` Created:`; + html += ` ${TooltipManager.formatTimestamp(data.created_at)}`; + html += `
`; + } + if (data.updated_at) { + html += `
`; + html += ` Updated:`; + html += ` ${TooltipManager.formatTimestamp(data.updated_at)}`; + html += `
`; + } + if (data.deleted_at) { + html += `
`; + html += ` Deleted:`; + html += ` ${TooltipManager.formatTimestamp(data.deleted_at)}`; + html += `
`; + } html += `
`; } - html += `
`; return html; } @@ -895,13 +967,29 @@ export class StateTreeVisualizer { // Check if item is deleted const isDeleted = data.deleted_at !== null && data.deleted_at !== undefined; + // Determine fill color based on type + let textFill = isDeleted ? '#888' : '#d4d4d4'; + if (!isDeleted && d._typeClass) { + // Use CSS class color for typed fields + const typeColorMap = { + 'field-type-unordered_map': '#61afef', + 'field-type-unordered_set': '#c678dd', + 'field-type-vector': '#e5c07b', + 'field-type-counter': '#98c379', + 'field-type-rga': '#d19a66', + 'field-type-lww_register': '#56b6c2' + }; + textFill = typeColorMap[d._typeClass] || textFill; + } + // Create text element that can wrap const text = g.append('text') .attr('x', (!d.children && !d._children) ? 8 : 0) // Offset for leaf nodes with circles .attr('y', nodeHeight / 2) .attr('dy', '0.35em') - .attr('font-size', '11px') - .attr('fill', isDeleted ? '#888' : '#d4d4d4') // Grayed out for deleted + .attr('font-size', '12px') + .attr('font-weight', data.type === 'Field' ? '500' : '400') + .attr('fill', textFill) .attr('opacity', isDeleted ? 0.6 : 1.0); // Reduced opacity for deleted let labelText = ''; @@ -931,6 +1019,22 @@ export class StateTreeVisualizer { } } + // Icon mapping for field types + const typeIcons = { + 'UnorderedMap': '🗺️', + 'UnorderedSet': '📦', + 'Vector': '📋', + 'LwwRegister': '📝', + 'Counter': '🔢', + 'Rga': '📜', + 'unordered_map': '🗺️', + 'unordered_set': '📦', + 'vector': '📋', + 'lww_register': '📝', + 'counter': '🔢', + 'rga': '📜' + }; + // Format type info nicely if (typeInfo) { // Convert common type names to readable format @@ -943,45 +1047,95 @@ export class StateTreeVisualizer { 'Rga': 'rga' }; const readableType = typeMap[typeInfo] || typeInfo.toLowerCase(); + const icon = typeIcons[typeInfo] || typeIcons[readableType] || '📁'; + + // Add child count for collections + const childCount = d._children ? d._children.length : (d.children ? d.children.length : 0); + const countStr = childCount > 0 ? ` [${childCount}]` : ''; + if (counterValue !== null) { - labelText = `${fieldName} (${readableType}) = ${counterValue}`; + labelText = `${icon} ${fieldName}: ${readableType}${countStr} = ${counterValue}`; } else { - labelText = `${fieldName} (${readableType})`; + labelText = `${icon} ${fieldName}: ${readableType}${countStr}`; } + + // Store type info for styling + d._typeClass = `field-type-${readableType}`; } else { if (counterValue !== null) { - labelText = `${fieldName} = ${counterValue}`; + labelText = `📁 ${fieldName} = ${counterValue}`; } else { - labelText = fieldName; + labelText = `📁 ${fieldName}`; } } } - // For Entry types, show key: value format + // For Entry types, show meaningful data else if (data.type === 'Entry') { if (data.data) { const stateData = data.data; let keyStr = ''; let valueStr = ''; + let itemStr = ''; - // Get key + // Get key (for Map entries) if (stateData.key && stateData.key.parsed !== undefined) { - keyStr = JSON.stringify(stateData.key.parsed, null, 0); + const key = stateData.key.parsed; + if (typeof key === 'string') { + keyStr = `"${key}"`; + } else { + keyStr = JSON.stringify(key, null, 0); + } } else if (stateData.key) { keyStr = String(stateData.key); } - // Get value + // Get value (for Map/Counter entries) if (stateData.value && stateData.value.parsed !== undefined) { - valueStr = JSON.stringify(stateData.value.parsed, null, 0); + const val = stateData.value.parsed; + // Handle LwwRegister values (show inner value) + if (val && typeof val === 'object' && val.value !== undefined && val.clock !== undefined) { + valueStr = typeof val.value === 'string' ? `"${val.value}"` : JSON.stringify(val.value, null, 0); + } else if (typeof val === 'string') { + valueStr = `"${val}"`; + } else if (typeof val === 'number') { + valueStr = String(val); + } else { + valueStr = JSON.stringify(val, null, 0); + } } else if (stateData.value) { valueStr = String(stateData.value); } - // Format as "key: value" + // Get item (for Vector/Set entries) + if (stateData.item && stateData.item.parsed !== undefined) { + const item = stateData.item.parsed; + // Handle LwwRegister wrapped items + if (item && typeof item === 'object' && item.value !== undefined && item.clock !== undefined) { + itemStr = typeof item.value === 'string' ? `"${item.value}"` : JSON.stringify(item.value, null, 0); + } else if (typeof item === 'string') { + itemStr = `"${item}"`; + } else { + itemStr = JSON.stringify(item, null, 0); + } + } else if (stateData.item) { + itemStr = String(stateData.item); + } + + // Truncate long values + const maxLen = 60; + if (valueStr.length > maxLen) valueStr = valueStr.substring(0, maxLen) + '...'; + if (itemStr.length > maxLen) itemStr = itemStr.substring(0, maxLen) + '...'; + + // Determine display format based on what data is available if (keyStr && valueStr) { - labelText = `${keyStr}: ${valueStr}`; + // Counter: if value is a number, show "key → value" + // Map: show "key → value" + labelText = `${keyStr} → ${valueStr}`; + } else if (itemStr) { + // Vector/Set entry: just show the item value + labelText = itemStr; } else if (keyStr) { - labelText = `Key: ${keyStr}`; + labelText = keyStr; } else if (valueStr) { labelText = valueStr; } else {