diff --git a/apps/kv-store-init/src/lib.rs b/apps/kv-store-init/src/lib.rs index 79c2bc5d9..284ef9f47 100644 --- a/apps/kv-store-init/src/lib.rs +++ b/apps/kv-store-init/src/lib.rs @@ -38,7 +38,7 @@ impl KvStoreInit { app::log!("Initializing KvStoreInit with default items"); let mut store = KvStoreInit { - items: UnorderedMap::new(), + items: UnorderedMap::new_with_field_name("items"), }; // Add some initial data during initialization diff --git a/apps/kv-store/src/lib.rs b/apps/kv-store/src/lib.rs index 6a6547b06..60dcdf905 100644 --- a/apps/kv-store/src/lib.rs +++ b/apps/kv-store/src/lib.rs @@ -37,7 +37,7 @@ impl KvStore { #[app::init] pub fn init() -> KvStore { KvStore { - items: UnorderedMap::new(), + items: UnorderedMap::new_with_field_name("items"), } } diff --git a/apps/nested-crdt-test/src/lib.rs b/apps/nested-crdt-test/src/lib.rs index fe09beb9c..c73bb0a10 100644 --- a/apps/nested-crdt-test/src/lib.rs +++ b/apps/nested-crdt-test/src/lib.rs @@ -68,11 +68,11 @@ impl NestedCrdtTest { #[app::init] pub fn init() -> NestedCrdtTest { NestedCrdtTest { - counters: UnorderedMap::new(), - registers: UnorderedMap::new(), - metadata: UnorderedMap::new(), - metrics: Vector::new(), - tags: UnorderedMap::new(), + counters: UnorderedMap::new_with_field_name("counters"), + registers: UnorderedMap::new_with_field_name("registers"), + metadata: UnorderedMap::new_with_field_name("metadata"), + metrics: Vector::new_with_field_name("metrics"), + tags: UnorderedMap::new_with_field_name("tags"), } } diff --git a/crates/storage/src/collections/counter.rs b/crates/storage/src/collections/counter.rs index 053850ddb..79852ca8a 100644 --- a/crates/storage/src/collections/counter.rs +++ b/crates/storage/src/collections/counter.rs @@ -250,18 +250,20 @@ impl Counter /// all top-level collections have deterministic IDs regardless of how they were /// created in `init()`. /// + /// This method also migrates all internal map entries to use the new parent IDs, + /// ensuring that increments during `init()` remain accessible. + /// /// # Arguments /// * `field_name` - The name of the struct field containing this counter pub fn reassign_deterministic_id(&mut self, field_name: &str) { - // Counter has two internal maps - both need deterministic IDs + // Counter has two internal maps - both need deterministic IDs and entry migration. + // We use the UnorderedMap's reassign which handles entry migration. self.positive - .inner - .reassign_deterministic_id_with_crdt_type( - &format!("__counter_internal_{field_name}_positive"), - CrdtType::Counter, - ); + .reassign_deterministic_id(&format!("__counter_internal_{field_name}_positive")); + // Update CRDT type after reassignment + self.positive.inner.storage.metadata.crdt_type = Some(CrdtType::Counter); + self.negative - .inner .reassign_deterministic_id(&format!("__counter_internal_{field_name}_negative")); } diff --git a/crates/storage/src/collections/frozen.rs b/crates/storage/src/collections/frozen.rs index 5a713e5aa..94d8eae84 100644 --- a/crates/storage/src/collections/frozen.rs +++ b/crates/storage/src/collections/frozen.rs @@ -68,6 +68,9 @@ where /// all top-level collections have deterministic IDs regardless of how they were /// created in `init()`. /// + /// This method also migrates all existing entries to use the new parent ID, + /// ensuring that entries inserted during `init()` remain accessible. + /// /// # Arguments /// * `field_name` - The name of the struct field containing this FrozenStorage pub fn reassign_deterministic_id(&mut self, field_name: &str) { diff --git a/crates/storage/src/collections/unordered_map.rs b/crates/storage/src/collections/unordered_map.rs index f21f31274..ba5990b53 100644 --- a/crates/storage/src/collections/unordered_map.rs +++ b/crates/storage/src/collections/unordered_map.rs @@ -101,11 +101,44 @@ where /// all top-level collections have deterministic IDs regardless of how they were /// created in `init()`. /// + /// This method also migrates all existing entries to use the new parent ID, + /// ensuring that entries inserted during `init()` remain accessible. + /// /// # Arguments /// * `field_name` - The name of the struct field containing this map - pub fn reassign_deterministic_id(&mut self, field_name: &str) { + #[expect(clippy::expect_used, reason = "fatal error if migration fails")] + pub fn reassign_deterministic_id(&mut self, field_name: &str) + where + K: AsRef<[u8]> + PartialEq, + { + use super::compute_collection_id; + + let new_id = compute_collection_id(None, field_name); + let old_id = self.inner.id(); + + // If already has the correct ID, nothing to do + if old_id == new_id { + return; + } + + // Collect all entries before migration (must do this before clearing) + let entries: Vec<(K, V)> = self + .entries() + .expect("failed to read entries for migration") + .collect(); + + // Clear the collection (removes old entries with old IDs) + self.inner.clear().expect("failed to clear for migration"); + + // Now reassign the collection's ID self.inner .reassign_deterministic_id_with_crdt_type(field_name, CrdtType::UnorderedMap); + + // Re-insert all entries (they will get new IDs based on new parent ID) + for (key, value) in entries { + self.insert(key, value) + .expect("failed to re-insert entry during migration"); + } } /// Insert a key-value pair into the map. diff --git a/crates/storage/src/collections/unordered_set.rs b/crates/storage/src/collections/unordered_set.rs index 13dc88323..1a986d65b 100644 --- a/crates/storage/src/collections/unordered_set.rs +++ b/crates/storage/src/collections/unordered_set.rs @@ -83,11 +83,44 @@ where /// all top-level collections have deterministic IDs regardless of how they were /// created in `init()`. /// + /// This method also migrates all existing elements to use the new parent ID, + /// ensuring that elements inserted during `init()` remain accessible. + /// /// # Arguments /// * `field_name` - The name of the struct field containing this set - pub fn reassign_deterministic_id(&mut self, field_name: &str) { + #[expect(clippy::expect_used, reason = "fatal error if migration fails")] + pub fn reassign_deterministic_id(&mut self, field_name: &str) + where + V: AsRef<[u8]> + PartialEq, + { + use super::compute_collection_id; + + let new_id = compute_collection_id(None, field_name); + let old_id = self.inner.id(); + + // If already has the correct ID, nothing to do + if old_id == new_id { + return; + } + + // Collect all elements before migration (must do this before clearing) + let elements: Vec = self + .iter() + .expect("failed to read elements for migration") + .collect(); + + // Clear the collection (removes old entries with old IDs) + self.inner.clear().expect("failed to clear for migration"); + + // Now reassign the collection's ID self.inner .reassign_deterministic_id_with_crdt_type(field_name, CrdtType::UnorderedSet); + + // Re-insert all elements (they will get new IDs based on new parent ID) + for value in elements { + self.insert(value) + .expect("failed to re-insert element during migration"); + } } /// Insert a value pair into the set collection if the element does not already exist. diff --git a/crates/storage/src/collections/user.rs b/crates/storage/src/collections/user.rs index dac849798..a0ea233f3 100644 --- a/crates/storage/src/collections/user.rs +++ b/crates/storage/src/collections/user.rs @@ -66,6 +66,9 @@ where /// all top-level collections have deterministic IDs regardless of how they were /// created in `init()`. /// + /// This method also migrates all existing entries to use the new parent ID, + /// ensuring that entries inserted during `init()` remain accessible. + /// /// # Arguments /// * `field_name` - The name of the struct field containing this UserStorage pub fn reassign_deterministic_id(&mut self, field_name: &str) { diff --git a/tools/merodb/src/abi.rs b/tools/merodb/src/abi.rs index f2bc23228..9db9a804a 100644 --- a/tools/merodb/src/abi.rs +++ b/tools/merodb/src/abi.rs @@ -1,6 +1,7 @@ use std::fs; use std::path::Path; +use calimero_storage::collections::CrdtType; use calimero_wasm_abi::schema::Manifest; use eyre::Result; @@ -67,3 +68,196 @@ pub fn load_state_schema_from_json(schema_path: &Path) -> Result { load_state_schema_from_json_value(&schema_value) } + +/// Infer state schema from database by reading field names and CRDT types from metadata +/// +/// This function scans the State column for EntityIndex entries and builds a schema +/// based on field_name and crdt_type found in metadata. This enables schema-free +/// database inspection when field names are stored in metadata. +/// +/// # Arguments +/// * `db` - The database to scan +/// * `context_id` - Optional context ID to filter by. If None, scans all contexts (may find fields from multiple contexts) +pub fn infer_schema_from_database( + db: &rocksdb::DBWithThreadMode, + context_id: Option<&[u8]>, +) -> Result { + use calimero_wasm_abi::schema::{ + CollectionType, CrdtCollectionType, Field, ScalarType, TypeDef, TypeRef, + }; + use std::collections::BTreeMap; + + let state_cf = db + .cf_handle("State") + .ok_or_else(|| eyre::eyre!("State column family not found"))?; + + let mut fields = Vec::new(); + let mut seen_field_names = std::collections::HashSet::new(); + + // Root ID depends on context: + // - If context_id is provided, root ID is that context_id (Id::root() returns context_id()) + // - If no context_id, we can't determine root fields reliably, so use all zeros as fallback + let root_id_bytes: [u8; 32] = match context_id { + Some(ctx_id) => ctx_id.try_into().map_err(|_| { + eyre::eyre!( + "context_id must be exactly 32 bytes, got {} bytes", + ctx_id.len() + ) + })?, + None => { + eprintln!( + "[WARNING] No context_id provided for schema inference. \ + Using [0; 32] as fallback root ID. This may produce incorrect or incomplete \ + schema if the database contains multiple contexts. Consider providing a \ + specific context_id for accurate schema inference." + ); + [0u8; 32] + } + }; + + // Scan State column for EntityIndex entries + let iter = db.iterator_cf(&state_cf, rocksdb::IteratorMode::Start); + for item in iter { + let (key, value) = item?; + + // Filter by context_id if provided (key format: context_id (32 bytes) + state_key (32 bytes)) + if let Some(expected_context_id) = context_id { + if key.len() < 32 || &key[..32] != expected_context_id { + continue; + } + } + + // Try to deserialize as EntityIndex + if let Ok(index) = borsh::from_slice::(&value) { + // Check if this is a root-level field (parent_id is None or equals root/context_id) + let is_root_field = index.parent_id.is_none() + || index + .parent_id + .as_ref() + .map(|id| id.as_bytes() == &root_id_bytes) + .unwrap_or(false); + + if is_root_field { + // Check if we have field_name in metadata + if let Some(ref field_name) = index.metadata.field_name { + if !seen_field_names.contains(field_name) { + seen_field_names.insert(field_name.clone()); + + // Infer type from crdt_type + let type_ref = if let Some(crdt_type) = index.metadata.crdt_type { + match crdt_type { + CrdtType::UnorderedMap => { + // Default to Map - can be refined later + TypeRef::Collection { + collection: CollectionType::Map { + key: Box::new(TypeRef::string()), + value: Box::new(TypeRef::string()), + }, + crdt_type: Some(CrdtCollectionType::UnorderedMap), + inner_type: None, + } + } + CrdtType::Vector => TypeRef::Collection { + collection: CollectionType::List { + items: Box::new(TypeRef::string()), + }, + crdt_type: Some(CrdtCollectionType::Vector), + inner_type: None, + }, + CrdtType::UnorderedSet => TypeRef::Collection { + collection: CollectionType::List { + items: Box::new(TypeRef::string()), + }, + crdt_type: Some(CrdtCollectionType::UnorderedSet), + inner_type: None, + }, + CrdtType::Counter => TypeRef::Collection { + // Counter is stored as Map internally + collection: CollectionType::Map { + key: Box::new(TypeRef::string()), + value: Box::new(TypeRef::Scalar(ScalarType::U64)), + }, + crdt_type: Some(CrdtCollectionType::Counter), + inner_type: None, + }, + CrdtType::Rga => TypeRef::Collection { + collection: CollectionType::Record { fields: Vec::new() }, + crdt_type: Some(CrdtCollectionType::ReplicatedGrowableArray), + inner_type: None, + }, + CrdtType::LwwRegister => TypeRef::Collection { + collection: CollectionType::Record { fields: Vec::new() }, + crdt_type: Some(CrdtCollectionType::LwwRegister), + inner_type: Some(Box::new(TypeRef::string())), + }, + CrdtType::UserStorage => TypeRef::Collection { + collection: CollectionType::Map { + key: Box::new(TypeRef::string()), + value: Box::new(TypeRef::string()), + }, + crdt_type: Some(CrdtCollectionType::UnorderedMap), + inner_type: None, + }, + CrdtType::FrozenStorage => TypeRef::Collection { + collection: CollectionType::Map { + key: Box::new(TypeRef::string()), + value: Box::new(TypeRef::string()), + }, + crdt_type: Some(CrdtCollectionType::UnorderedMap), + inner_type: None, + }, + CrdtType::Record => { + // Record type - would need to inspect children to infer fields + TypeRef::Collection { + collection: CollectionType::Record { fields: Vec::new() }, + crdt_type: None, + inner_type: None, + } + } + CrdtType::Custom(_) => { + // Custom type - can't infer without schema + TypeRef::Collection { + collection: CollectionType::Record { fields: Vec::new() }, + crdt_type: None, + inner_type: None, + } + } + } + } else { + // No CRDT type - default to LWW register + TypeRef::Collection { + collection: CollectionType::Record { fields: Vec::new() }, + crdt_type: Some(CrdtCollectionType::LwwRegister), + inner_type: Some(Box::new(TypeRef::string())), + } + }; + + fields.push(Field { + name: field_name.clone(), + type_: type_ref, + nullable: None, + }); + } + } + } + } + } + + // Create a record type with all inferred fields + let state_root_type = "InferredStateRoot".to_string(); + let mut types = BTreeMap::new(); + types.insert( + state_root_type.clone(), + TypeDef::Record { + fields: fields.clone(), + }, + ); + + Ok(Manifest { + schema_version: "wasm-abi/1".to_string(), + types, + methods: Vec::new(), + events: Vec::new(), + state_root: Some(state_root_type), + }) +} diff --git a/tools/merodb/src/export.rs b/tools/merodb/src/export.rs index 05d9b1a51..02bb1bb19 100644 --- a/tools/merodb/src/export.rs +++ b/tools/merodb/src/export.rs @@ -1,6 +1,7 @@ pub mod cli; use borsh::BorshDeserialize; +use calimero_storage::collections::CrdtType; use calimero_store::types::ContextDagDelta as StoreContextDagDelta; use calimero_wasm_abi::schema::{ CollectionType, CrdtCollectionType, Field, Manifest, ScalarType, TypeDef, TypeRef, @@ -88,6 +89,47 @@ struct MapField { value_type: TypeRef, } +/// Try to decode entry data with a specific field definition +fn try_decode_with_field( + entry_bytes: &[u8], + field: &Field, + index: &EntityIndex, + manifest: &Manifest, +) -> Option { + match &field.type_ { + TypeRef::Collection { + collection: CollectionType::Map { key, value }, + .. + } => { + let map_field = MapField { + name: field.name.clone(), + key_type: (**key).clone(), + value_type: (**value).clone(), + }; + decode_map_entry(entry_bytes, &map_field, manifest) + .ok() + .map(|decoded| add_index_metadata(decoded, index)) + .flatten() + } + TypeRef::Collection { + collection: CollectionType::List { items }, + .. + } => decode_list_entry(entry_bytes, field, items, manifest) + .ok() + .map(|decoded| add_index_metadata(decoded, index)) + .flatten(), + TypeRef::Collection { + collection: CollectionType::Record { .. }, + crdt_type, + inner_type, + } => decode_record_entry(entry_bytes, field, crdt_type, inner_type, manifest) + .ok() + .map(|decoded| add_index_metadata(decoded, index)) + .flatten(), + _ => None, + } +} + /// Try to decode a collection entry by looking up the actual entry data from an EntityIndex /// Supports Map entries (Entry<(K, V)>) and List entries (Entry) fn try_decode_collection_entry_from_index( @@ -170,6 +212,27 @@ fn try_decode_collection_entry_from_index( record_fields.len() ); + // First, try to match by field_name if available (most direct and efficient) + if let Some(ref field_name) = index.metadata.field_name { + eprintln!( + "[try_decode_collection_entry_from_index] Using field_name from metadata: {}", + field_name + ); + if let Some(field) = record_fields.iter().find(|f| f.name == *field_name) { + eprintln!( + "[try_decode_collection_entry_from_index] Found matching field by name: {}", + field_name + ); + // Try to decode with this specific field + return try_decode_with_field(&entry_bytes, field, index, manifest); + } else { + eprintln!( + "[try_decode_collection_entry_from_index] Field name '{}' not found in schema, falling back to all fields", + field_name + ); + } + } + // If we have a parent_id, try to find the collection field that matches it // Otherwise, try all collection fields let fields_to_try: Vec<&Field> = if let Some(parent_id) = &index.parent_id { @@ -627,6 +690,8 @@ fn decode_state_entry( "own_hash": hex::encode(index.own_hash), "created_at": index.metadata.created_at, "updated_at": *index.metadata.updated_at, + "field_name": index.metadata.field_name, + "crdt_type": index.metadata.crdt_type.as_ref().map(|c| format!("{:?}", c)), "deleted_at": index.deleted_at })); } else { @@ -934,24 +999,24 @@ fn decode_scalar_entry(bytes: &[u8], field: &Field, manifest: &Manifest) -> Resu } // EntityIndex structure for decoding -#[derive(borsh::BorshDeserialize)] -struct EntityIndex { - id: Id, - parent_id: Option, - children: Option>, - full_hash: [u8; 32], - own_hash: [u8; 32], - metadata: Metadata, - deleted_at: Option, +#[derive(borsh::BorshDeserialize, Clone)] +pub(crate) struct EntityIndex { + pub(crate) id: Id, + pub(crate) parent_id: Option, + pub(crate) children: Option>, + pub(crate) full_hash: [u8; 32], + pub(crate) own_hash: [u8; 32], + pub(crate) metadata: Metadata, + pub(crate) deleted_at: Option, } -#[derive(borsh::BorshDeserialize)] -struct Id { +#[derive(borsh::BorshDeserialize, Clone)] +pub(crate) struct Id { bytes: [u8; 32], } impl Id { - const fn as_bytes(&self) -> &[u8; 32] { + pub(crate) const fn as_bytes(&self) -> &[u8; 32] { &self.bytes } } @@ -1334,6 +1399,8 @@ fn try_manual_entity_index_decode( created_at, updated_at: UpdatedAt(updated_at_val), storage_type, + crdt_type: None, + field_name: None, }; let child_info = ChildInfo { @@ -1408,12 +1475,14 @@ fn try_manual_entity_index_decode( created_at: 0, updated_at: UpdatedAt(0), storage_type: StorageType::Public, + crdt_type: None, + field_name: None, }, deleted_at: None, }) } -#[derive(borsh::BorshDeserialize)] +#[derive(borsh::BorshDeserialize, Clone)] #[expect( dead_code, reason = "Fields required for Borsh deserialization structure" @@ -1424,18 +1493,64 @@ struct ChildInfo { metadata: Metadata, } -#[derive(borsh::BorshDeserialize)] +#[derive(Clone)] #[expect( dead_code, reason = "Fields required for Borsh deserialization structure" )] -struct Metadata { - created_at: u64, - updated_at: UpdatedAt, - storage_type: StorageType, +pub(crate) struct Metadata { + pub(crate) created_at: u64, + pub(crate) updated_at: UpdatedAt, + pub(crate) storage_type: StorageType, + pub(crate) crdt_type: Option, + pub(crate) field_name: Option, +} + +// Custom BorshDeserialize for backward compatibility with old Metadata that doesn't have field_name +impl borsh::BorshDeserialize for Metadata { + fn deserialize_reader(reader: &mut R) -> Result { + let created_at = u64::deserialize_reader(reader)?; + let updated_at = UpdatedAt::deserialize_reader(reader)?; + let storage_type = StorageType::deserialize_reader(reader)?; + + // Try to deserialize crdt_type (may not exist in old format) + let crdt_type = match >::deserialize_reader(reader) { + Ok(ct) => ct, + Err(e) => { + if matches!(e.kind(), std::io::ErrorKind::UnexpectedEof) { + None + } else { + return Err(e); + } + } + }; + + // Try to deserialize field_name (may not exist in old format) + let field_name = match >::deserialize_reader(reader) { + Ok(fn_val) => fn_val, + Err(e) => { + if matches!(e.kind(), std::io::ErrorKind::UnexpectedEof) { + None + } else { + return Err(e); + } + } + }; + + Ok(Metadata { + created_at, + updated_at, + storage_type, + crdt_type, + field_name, + }) + } } -#[derive(borsh::BorshDeserialize)] +// CrdtType is now imported from calimero_storage::collections::CrdtType +// to ensure Borsh serialization compatibility with the storage layer. + +#[derive(borsh::BorshDeserialize, Clone)] #[expect( dead_code, reason = "Variants required for Borsh deserialization structure" @@ -1449,7 +1564,7 @@ enum StorageType { Frozen, } -#[derive(borsh::BorshDeserialize)] +#[derive(borsh::BorshDeserialize, Clone)] #[expect( dead_code, reason = "Fields required for Borsh deserialization structure" @@ -1459,7 +1574,7 @@ struct SignatureData { nonce: u64, } -#[derive(borsh::BorshDeserialize)] +#[derive(borsh::BorshDeserialize, Clone)] struct UpdatedAt(u64); impl Deref for UpdatedAt { @@ -2059,8 +2174,44 @@ fn decode_state_root_bfs( fields.len() ); + // PRE-FILTER: Build a mapping from field_name to (state_key, EntityIndex) for children that have field_name + // This allows direct field matching instead of sequential iteration + let mut field_name_to_child: std::collections::HashMap = + std::collections::HashMap::new(); + for child_info in &root_children { + let child_element_id = hex::encode(child_info.id.as_bytes()); + if let Some(state_key) = element_to_state.get(&child_element_id) { + let child_key_bytes = match hex::decode(state_key) { + Ok(bytes) => bytes, + Err(_) => continue, + }; + let mut child_key = Vec::with_capacity(64); + child_key.extend_from_slice(context_id); + child_key.extend_from_slice(&child_key_bytes); + + if let Ok(Some(child_value)) = db.get_cf(state_cf, &child_key) { + if let Ok(child_index) = borsh::from_slice::(&child_value) { + if let Some(ref field_name) = child_index.metadata.field_name { + eprintln!( + "[decode_state_root_bfs] Found collection root with field_name='{}': id={}, {} children", + field_name, + child_element_id, + child_index.children.as_ref().map(|c| c.len()).unwrap_or(0) + ); + field_name_to_child + .insert(field_name.clone(), (state_key.clone(), child_index)); + } + } + } + } + } + eprintln!( + "[decode_state_root_bfs] Pre-filtered {} collection roots with field_name", + field_name_to_child.len() + ); + // For each field in the state root schema, find and decode its children using BFS - // Match children to fields by iterating through root's children + // Match children to fields by field_name first, then fall back to sequential matching let mut used_children = std::collections::HashSet::new(); for field in fields { eprintln!("[decode_state_root_bfs] Decoding field: {}", field.name); @@ -2079,52 +2230,93 @@ fn decode_state_root_bfs( }; let field_value = if field_value { - // Find an unused child that is a collection root + // FIRST: Try to find by field_name (direct match) let mut matched_child = None; - for child_info in &root_children { - let child_element_id = hex::encode(child_info.id.as_bytes()); - if used_children.contains(&child_element_id) { - continue; + if let Some((state_key, child_index)) = field_name_to_child.get(&field.name) { + let child_element_id = hex::encode(child_index.id.as_bytes()); + if !used_children.contains(&child_element_id) { + eprintln!( + "[decode_state_root_bfs] Direct field_name match for '{}': {} children", + field.name, + child_index.children.as_ref().map(|c| c.len()).unwrap_or(0) + ); + matched_child = Some((state_key.clone(), child_index.clone())); + used_children.insert(child_element_id); } + } - // Check if this child is a collection root by loading its EntityIndex - if let Some(state_key) = element_to_state.get(&child_element_id) { - let child_key_bytes = hex::decode(state_key).wrap_err_with(|| { - format!("Failed to decode child_state_key: {}", state_key) - })?; - let mut child_key = Vec::with_capacity(64); - child_key.extend_from_slice(context_id); - child_key.extend_from_slice(&child_key_bytes); + // FALLBACK: If no direct match, try sequential matching (for legacy data) + if matched_child.is_none() { + eprintln!( + "[decode_state_root_bfs] No direct field_name match for '{}', trying sequential", + field.name + ); + for child_info in &root_children { + let child_element_id = hex::encode(child_info.id.as_bytes()); + if used_children.contains(&child_element_id) { + continue; + } - if let Ok(Some(child_value)) = db.get_cf(state_cf, &child_key) { - // Try standard Borsh deserialization first - let child_index = match borsh::from_slice::(&child_value) { - Ok(index) => { - eprintln!("[decode_state_root_bfs] Successfully decoded collection root EntityIndex for field {}: {} children", field.name, index.children.as_ref().map(|c| c.len()).unwrap_or(0)); - index - } - Err(e) => { - // Try manual deserialization as fallback - eprintln!("[decode_state_root_bfs] Failed to decode collection root EntityIndex for field {} using Borsh: {}. Attempting manual decode...", field.name, e); - match try_manual_entity_index_decode(&child_value, context_id) { - Ok(index) => { - eprintln!("[decode_state_root_bfs] Successfully decoded collection root EntityIndex manually for field {}: {} children", field.name, index.children.as_ref().map(|c| c.len()).unwrap_or(0)); - index - } - Err(manual_err) => { - eprintln!("[decode_state_root_bfs] Manual decode also failed for collection root: {}", manual_err); - continue; // Skip this child + // Check if this child is a collection root by loading its EntityIndex + if let Some(state_key) = element_to_state.get(&child_element_id) { + let child_key_bytes = hex::decode(state_key).wrap_err_with(|| { + format!("Failed to decode child_state_key: {}", state_key) + })?; + let mut child_key = Vec::with_capacity(64); + child_key.extend_from_slice(context_id); + child_key.extend_from_slice(&child_key_bytes); + + if let Ok(Some(child_value)) = db.get_cf(state_cf, &child_key) { + // Try standard Borsh deserialization first + let child_index = match borsh::from_slice::(&child_value) { + Ok(index) => { + eprintln!("[decode_state_root_bfs] Successfully decoded collection root EntityIndex for field {}: {} children, field_name={:?}", + field.name, index.children.as_ref().map(|c| c.len()).unwrap_or(0), index.metadata.field_name); + index + } + Err(e) => { + // Try manual deserialization as fallback + eprintln!("[decode_state_root_bfs] Failed to decode collection root EntityIndex for field {} using Borsh: {}. Attempting manual decode...", field.name, e); + match try_manual_entity_index_decode(&child_value, context_id) { + Ok(index) => { + eprintln!("[decode_state_root_bfs] Successfully decoded collection root EntityIndex manually for field {}: {} children, field_name={:?}", + field.name, index.children.as_ref().map(|c| c.len()).unwrap_or(0), index.metadata.field_name); + index + } + Err(manual_err) => { + eprintln!("[decode_state_root_bfs] Manual decode also failed for collection root: {}", manual_err); + continue; // Skip this child + } } } + }; + + // Match by field_name if available, otherwise fall back to sequential matching + let field_name_matches = child_index + .metadata + .field_name + .as_ref() + .map(|fn_| fn_ == &field.name) + .unwrap_or(false); + + if field_name_matches { + // This child's field_name matches the schema field + eprintln!("[decode_state_root_bfs] Found matching child for field {} by field_name", field.name); + matched_child = Some((state_key.clone(), child_index.clone())); + used_children.insert(child_element_id); + break; + } else if child_index.metadata.field_name.is_none() { + // Legacy data without field_name - use sequential matching as fallback + eprintln!("[decode_state_root_bfs] Child has no field_name, using sequential match for field {}", field.name); + matched_child = Some((state_key.clone(), child_index.clone())); + used_children.insert(child_element_id); + break; } - }; - // This is a collection root - it matches this collection field - matched_child = Some((state_key.clone(), child_index)); - used_children.insert(child_element_id); - break; + // If field_name exists but doesn't match, continue to next child + } } } - } + } // end fallback if let Some((collection_root_key, collection_root_index)) = matched_child { // Decode this collection field using the found collection root @@ -2152,7 +2344,7 @@ fn decode_state_root_bfs( } } else { // Non-collection field - could be a Record (Counter, etc.) or scalar - // Try to find a child that matches this field + // Try to find a child that matches this field by field_name // For Record types like Counter, they're stored as children of the root let mut matched_child = None; for child_info in &root_children { @@ -2161,7 +2353,7 @@ fn decode_state_root_bfs( continue; } - // Check if this child matches the field by trying to decode it + // Check if this child matches the field by field_name first if let Some(state_key) = element_to_state.get(&child_element_id) { let child_key_bytes = hex::decode(state_key).wrap_err_with(|| { format!("Failed to decode child_state_key: {}", state_key) @@ -2171,6 +2363,21 @@ fn decode_state_root_bfs( child_key.extend_from_slice(&child_key_bytes); if let Ok(Some(child_value)) = db.get_cf(state_cf, &child_key) { + // First try to decode as EntityIndex to check field_name + if let Ok(child_index) = borsh::from_slice::(&child_value) { + // Check if field_name matches + if let Some(ref child_field_name) = child_index.metadata.field_name { + if child_field_name != &field.name { + // This child's field_name doesn't match - skip to next child + eprintln!("[decode_state_root_bfs] Skipping child {} for field {} - field_name is '{}'", + child_element_id, field.name, child_field_name); + continue; + } + eprintln!("[decode_state_root_bfs] Found matching child {} for field {} by field_name", + child_element_id, field.name); + } + } + eprintln!("[decode_state_root_bfs] Attempting to decode child {} for field {} (value length: {})", child_element_id, field.name, child_value.len()); // First, try to decode directly as the field's type (for Counter, etc.) // This handles cases where the value is stored as Entry where T is the field type @@ -2839,6 +3046,188 @@ fn decode_collection_field_with_root( } } +/// Read the actual value of a Counter by summing entries in its positive and negative maps +#[cfg(feature = "gui")] +fn read_counter_value( + db: &DBWithThreadMode, + state_cf: &rocksdb::ColumnFamily, + context_id: &[u8], + positive_id: &[u8], + negative_id: &[u8], +) -> i64 { + use sha2::{Digest, Sha256}; + + let mut total: i64 = 0; + + // Helper to sum values from a counter map (positive or negative) + let sum_map_values = |map_id: &[u8]| -> i64 { + let mut sum: i64 = 0; + + // Find the EntityIndex for this map to get its children + let mut key_bytes_for_hash = Vec::with_capacity(33); + key_bytes_for_hash.push(0u8); // Key::Index variant + key_bytes_for_hash.extend_from_slice(map_id); + let map_state_key = Sha256::digest(&key_bytes_for_hash); + + let mut full_key = Vec::with_capacity(64); + full_key.extend_from_slice(context_id); + full_key.extend_from_slice(&map_state_key); + + if let Ok(Some(map_value)) = db.get_cf(state_cf, &full_key) { + if let Ok(map_index) = borsh::from_slice::(&map_value) { + // For each child in the map, read its Entry to get the count value + if let Some(children) = &map_index.children { + for child_info in children { + // Calculate Key::Entry for this child + let mut entry_key_bytes = Vec::with_capacity(33); + entry_key_bytes.push(1u8); // Key::Entry variant + entry_key_bytes.extend_from_slice(child_info.id.as_bytes()); + let entry_state_key = Sha256::digest(&entry_key_bytes); + + let mut entry_full_key = Vec::with_capacity(64); + entry_full_key.extend_from_slice(context_id); + entry_full_key.extend_from_slice(&entry_state_key); + + if let Ok(Some(entry_value)) = db.get_cf(state_cf, &entry_full_key) { + // Entry format: (key: String, value: u64, element_id: Id) + // Parse key length, skip key, read u64 value + if entry_value.len() >= 12 { + // minimum: 4 (len) + 0 (key) + 8 (u64) + let key_len = u32::from_le_bytes([ + entry_value[0], + entry_value[1], + entry_value[2], + entry_value[3], + ]) as usize; + + let value_offset = 4 + key_len; + if entry_value.len() >= value_offset + 8 { + let count = u64::from_le_bytes([ + entry_value[value_offset], + entry_value[value_offset + 1], + entry_value[value_offset + 2], + entry_value[value_offset + 3], + entry_value[value_offset + 4], + entry_value[value_offset + 5], + entry_value[value_offset + 6], + entry_value[value_offset + 7], + ]); + // Use saturating conversion to avoid overflow + // u64 values > i64::MAX will be clamped to i64::MAX + let count_i64 = i64::try_from(count).unwrap_or(i64::MAX); + sum = sum.saturating_add(count_i64); + } + } + } + } + } + } + } + sum + }; + + total = total.saturating_add(sum_map_values(positive_id)); + total = total.saturating_sub(sum_map_values(negative_id)); + + total +} + +/// Read children of a nested collection and return their entries as JSON +#[cfg(feature = "gui")] +fn read_nested_collection_entries( + db: &DBWithThreadMode, + state_cf: &rocksdb::ColumnFamily, + context_id: &[u8], + collection_id: &[u8], +) -> Vec { + use sha2::{Digest, Sha256}; + + let mut results = Vec::new(); + + // Look up the EntityIndex for this collection + let mut key_bytes = Vec::with_capacity(33); + key_bytes.push(0u8); // Key::Index variant + key_bytes.extend_from_slice(collection_id); + let state_key = Sha256::digest(&key_bytes); + + let mut full_key = Vec::with_capacity(64); + full_key.extend_from_slice(context_id); + full_key.extend_from_slice(&state_key); + + let Ok(Some(index_bytes)) = db.get_cf(state_cf, &full_key) else { + return results; + }; + + let Ok(index) = borsh::from_slice::(&index_bytes) else { + return results; + }; + + // Read each child entry + if let Some(children) = &index.children { + for child_info in children { + // Get Key::Entry for this child + let mut entry_key_bytes = Vec::with_capacity(33); + entry_key_bytes.push(1u8); // Key::Entry variant + entry_key_bytes.extend_from_slice(child_info.id.as_bytes()); + let entry_state_key = Sha256::digest(&entry_key_bytes); + + let mut entry_full_key = Vec::with_capacity(64); + entry_full_key.extend_from_slice(context_id); + entry_full_key.extend_from_slice(&entry_state_key); + + let Ok(Some(entry_value)) = db.get_cf(state_cf, &entry_full_key) else { + continue; + }; + + // Try to parse the entry as (key: String, value) + if entry_value.len() >= 4 { + let key_len = u32::from_le_bytes([ + entry_value[0], + entry_value[1], + entry_value[2], + entry_value[3], + ]) as usize; + + if key_len > 0 && key_len < 1000 && entry_value.len() >= 4 + key_len { + if let Ok(key_str) = std::str::from_utf8(&entry_value[4..4 + key_len]) { + let value_bytes = &entry_value[4 + key_len..]; + + // Try to parse value as a string (LwwRegister) + if value_bytes.len() >= 4 { + let val_len = u32::from_le_bytes([ + value_bytes[0], + value_bytes[1], + value_bytes[2], + value_bytes[3], + ]) as usize; + + if val_len > 0 && val_len < 10000 && value_bytes.len() >= 4 + val_len { + if let Ok(val_str) = + std::str::from_utf8(&value_bytes[4..4 + val_len]) + { + results.push(json!({ + "key": key_str, + "value": val_str, + })); + continue; + } + } + } + + // Fallback: show key with hex value + results.push(json!({ + "key": key_str, + "value_hex": hex::encode(value_bytes), + })); + } + } + } + } + } + + results +} + #[cfg(feature = "gui")] fn decode_collection_entries_bfs( db: &DBWithThreadMode, @@ -2958,7 +3347,224 @@ fn decode_collection_entries_bfs( .wrap_err("Failed to query entry")? .ok_or_else(|| eyre::eyre!("Entry not found"))?; - // Decode the entry according to collection type + // FIRST: Try to decode as EntityIndex to check if it's a nested collection + // This allows us to detect nested CRDTs (Counter, nested Map, Set, etc.) + // by reading their crdt_type from metadata instead of relying on schema + match borsh::from_slice::(&entry_value) { + Ok(entry_index) => { + // It's an EntityIndex - check its crdt_type + let child_crdt_type = entry_index.metadata.crdt_type.as_ref(); + let child_field_name = entry_index.metadata.field_name.clone(); + let child_count = entry_index.children.as_ref().map(|c| c.len()).unwrap_or(0); + + eprintln!( + "[decode_collection_entries_bfs] Entry {} is EntityIndex with crdt_type={:?}, field_name={:?}, children={}", + entry_state_key, child_crdt_type, child_field_name, child_count + ); + + // If it has a crdt_type, treat it as a nested collection + if let Some(crdt) = child_crdt_type { + let crdt_name = format!("{:?}", crdt); + entries.push(json!({ + "state_key": entry_state_key, + "entry": { + "type": "NestedCollection", + "crdt_type": crdt_name, + "field_name": child_field_name, + "children_count": child_count, + "id": hex::encode(entry_index.id.as_bytes()), + } + })); + continue; + } + + // EntityIndex with no crdt_type might be a Vector entry + // Look for the actual data under Key::Entry for this ID + use sha2::{Digest, Sha256}; + let mut entry_key_bytes = Vec::with_capacity(33); + entry_key_bytes.push(1u8); // Key::Entry variant + entry_key_bytes.extend_from_slice(entry_index.id.as_bytes()); + let entry_data_state_key = Sha256::digest(&entry_key_bytes); + + let mut entry_data_full_key = Vec::with_capacity(64); + entry_data_full_key.extend_from_slice(context_id); + entry_data_full_key.extend_from_slice(&entry_data_state_key); + + if let Ok(Some(entry_data)) = db.get_cf(state_cf, &entry_data_full_key) { + // Found the entry data - check if it's a Counter (64 bytes = two IDs) + if entry_data.len() == 64 { + let positive_id = &entry_data[..32]; + let negative_id = &entry_data[32..]; + let counter_value = read_counter_value( + db, + state_cf, + context_id, + positive_id, + negative_id, + ); + + // Note: For legacy data without crdt_type metadata, we attempt to read + // the value as a Counter. New data created after PR #1864 will have + // crdt_type set in metadata and won't need this heuristic. + // A successful read (non-zero or non-error sum) indicates Counter data. + eprintln!( + "[decode_collection_entries_bfs] Entry {} (Vector item, no crdt_type metadata): Counter value read = {}", + entry_state_key, counter_value + ); + entries.push(json!({ + "state_key": entry_state_key, + "entry": { + "type": "VectorEntry", + "index": entries.len(), + "value": { + "type": "Counter", + "parsed": counter_value, + } + } + })); + continue; + } + } + } + Err(_) => { + // Not an EntityIndex - try to parse as raw (key, value) entry + // First try to extract the key (Borsh string: u32 length + bytes) + if entry_value.len() >= 4 { + let key_len = u32::from_le_bytes([ + entry_value[0], + entry_value[1], + entry_value[2], + entry_value[3], + ]) as usize; + + if key_len > 0 && key_len < 1000 && entry_value.len() >= 4 + key_len { + if let Ok(key_str) = std::str::from_utf8(&entry_value[4..4 + key_len]) { + let value_bytes = &entry_value[4 + key_len..]; + + // Check if value looks like a Counter (64 bytes = two 32-byte IDs) + // This handles raw entry data without EntityIndex metadata. + // For new data created after PR #1864, EntityIndex will have crdt_type + // and be handled in the Ok(entry_index) branch above. + if value_bytes.len() == 64 { + let positive_id = &value_bytes[..32]; + let negative_id = &value_bytes[32..]; + + // Try to read the actual counter value from the internal maps + let counter_value = read_counter_value( + db, + state_cf, + context_id, + positive_id, + negative_id, + ); + + // Also try to read the first collection as nested children + let nested_children = read_nested_collection_entries( + db, + state_cf, + context_id, + positive_id, + ); + + // Heuristic: If nested_children is non-empty, it's likely a nested collection. + // If counter_value is non-zero and children are empty, it's likely a Counter. + // This is a fallback for legacy data without crdt_type metadata. + if !nested_children.is_empty() { + eprintln!( + "[decode_collection_entries_bfs] Entry {} is NestedCollection (64 bytes, has children): key='{}', children={}", + entry_state_key, key_str, nested_children.len() + ); + entries.push(json!({ + "state_key": entry_state_key, + "entry": { + "type": "Entry", + "key": { "parsed": key_str, "type": "scalar::String" }, + "value": { + "type": "NestedCollection", + "crdt_type": "UnorderedMap", + "children": nested_children, + "children_count": nested_children.len(), + } + } + })); + continue; + } else { + // No children found - treat as Counter (its internal maps may have entries) + eprintln!( + "[decode_collection_entries_bfs] Entry {} is Counter (64 bytes, no nested children): key='{}', value={}", + entry_state_key, key_str, counter_value + ); + entries.push(json!({ + "state_key": entry_state_key, + "entry": { + "type": "Entry", + "key": { "parsed": key_str, "type": "scalar::String" }, + "value": { + "type": "Counter", + "parsed": counter_value, + "display": format!("πŸ”’ {}", counter_value), + } + } + })); + continue; + } + } + + // Check if value is another nested ID (32 bytes = single collection reference) + if value_bytes.len() == 32 { + eprintln!( + "[decode_collection_entries_bfs] Entry {} looks like Map: key='{}', value=1 ID (32 bytes)", + entry_state_key, key_str + ); + entries.push(json!({ + "state_key": entry_state_key, + "entry": { + "type": "Entry", + "key": { "parsed": key_str, "type": "scalar::String" }, + "value": { + "type": "NestedCollection", + "crdt_type": "UnorderedMap", + "collection_id": hex::encode(value_bytes), + } + } + })); + continue; + } + } + } + } + + // Try Vector entry format: value directly (no key prefix) + // Vector entries are just 64 bytes (two 32-byte IDs) + if entry_value.len() == 64 { + let positive_id = &entry_value[..32]; + let negative_id = &entry_value[32..]; + let counter_value = + read_counter_value(db, state_cf, context_id, positive_id, negative_id); + + if counter_value.abs() < 1_000_000 { + eprintln!( + "[decode_collection_entries_bfs] Entry {} is Vector item: value={}", + entry_state_key, counter_value + ); + entries.push(json!({ + "state_key": entry_state_key, + "entry": { + "type": "VectorEntry", + "index": entries.len(), + "value": { + "type": "Counter", + "parsed": counter_value, + } + } + })); + continue; + } + } + } + } + + // FALLBACK: Decode the entry according to collection type from schema match decode_collection_entry( &entry_value, field, @@ -2980,7 +3586,7 @@ fn decode_collection_entries_bfs( } Err(e) => { eprintln!( - "[decode_collection_entries_bfs] Failed to decode entry {}: {}", + "[decode_collection_entries_bfs] Failed to decode entry {} with schema: {}", entry_state_key, e ); } diff --git a/tools/merodb/src/export/cli.rs b/tools/merodb/src/export/cli.rs index eb3bb39e3..ca90e9a53 100644 --- a/tools/merodb/src/export/cli.rs +++ b/tools/merodb/src/export/cli.rs @@ -31,6 +31,7 @@ pub struct ExportArgs { /// State schema JSON file (extracted using `calimero-abi state`) /// /// This includes the state root type and its dependencies, sufficient for state deserialization. + /// If not provided, schema will be inferred from database metadata (field_name and crdt_type). #[arg(long, value_name = "SCHEMA_FILE")] pub state_schema_file: Option, @@ -68,7 +69,25 @@ pub fn run_export(args: ExportArgs) -> Result<()> { Err(e) => eyre::bail!("Failed to load state schema: {e}"), } } else { - eyre::bail!("--state-schema-file is required when exporting data"); + // Infer schema from database metadata + println!("No schema file provided, inferring schema from database metadata..."); + println!("(This requires field_name to be stored in entity metadata)"); + match abi::infer_schema_from_database(&db, None) { + Ok(manifest) => { + println!("Schema inferred successfully"); + if let Some(ref root) = manifest.state_root { + println!("State root: {root}"); + } + if let Some(ref root_name) = manifest.state_root { + if let Some(calimero_wasm_abi::schema::TypeDef::Record { fields }) = manifest.types.get(root_name) { + println!("Fields: {}", fields.len()); + } + } + println!("Note: Inferred schema may have simplified types. For full type information, provide --state-schema-file"); + manifest + } + Err(e) => eyre::bail!("Failed to infer schema from database: {e}. Try providing --state-schema-file instead."), + } }; let columns = if args.all { diff --git a/tools/merodb/src/gui/index.html b/tools/merodb/src/gui/index.html index f66637701..2cca605e4 100644 --- a/tools/merodb/src/gui/index.html +++ b/tools/merodb/src/gui/index.html @@ -35,7 +35,7 @@

MeroDB Inspector

πŸ“Š

Load Database

-

Specify the database path and optionally upload a state schema file for state decoding

+

Specify the database path. Schema file is optional - if not provided, schema will be inferred from database metadata

@@ -63,7 +63,7 @@

Load Database

> No file chosen
- Required for state deserialization + Optional - schema will be inferred from database if not provided
diff --git a/tools/merodb/src/gui/server.rs b/tools/merodb/src/gui/server.rs index a726f49c1..241fe3af2 100644 --- a/tools/merodb/src/gui/server.rs +++ b/tools/merodb/src/gui/server.rs @@ -14,6 +14,7 @@ use tower_http::{services::ServeDir, set_header::SetResponseHeaderLayer}; use crate::{abi, dag, export, types::Column}; use calimero_wasm_abi::schema::Manifest; +use hex; #[derive(Debug, Serialize)] struct ErrorResponse { @@ -187,11 +188,11 @@ async fn handle_export(mut multipart: Multipart) -> impl IntoResponse { } } } else { - eprintln!("No state schema file provided - state values will not be decoded"); + // Will infer schema after opening database None }; - // Open database + // Open database (needed for both schema inference and export) let db = match open_database(&db_path) { Ok(db) => db, Err(e) => { @@ -205,6 +206,30 @@ async fn handle_export(mut multipart: Multipart) -> impl IntoResponse { } }; + // Infer schema if not provided (no context_id for global export) + let schema = if schema.is_none() { + eprintln!("No state schema file provided - inferring schema from database..."); + match abi::infer_schema_from_database(&db, None) { + Ok(manifest) => { + eprintln!("Schema inferred successfully"); + info_message = Some( + "No schema file provided - schema inferred from database metadata. State values will be decoded using inferred schema.".to_string() + ); + Some(manifest) + } + Err(e) => { + let warning = format!( + "Failed to infer schema from database: {e}. State values will not be decoded." + ); + eprintln!("Warning: {warning}"); + warning_message = Some(warning); + None + } + } + } else { + schema + }; + // Export all columns let columns = Column::all().to_vec(); let data = if let Some(schema) = schema { @@ -299,7 +324,7 @@ async fn handle_state_tree(mut multipart: Multipart) -> impl IntoResponse { return (StatusCode::BAD_REQUEST, Json(ErrorResponse { error: e })).into_response(); } - // State schema is required for state tree extraction + // State schema is optional - infer from database if not provided let schema = if let Some(schema_text) = state_schema_text { match serde_json::from_str::(&schema_text) { Ok(schema_value) => match abi::load_state_schema_from_json_value(&schema_value) { @@ -325,13 +350,34 @@ async fn handle_state_tree(mut multipart: Multipart) -> impl IntoResponse { } } } else { - return ( - StatusCode::BAD_REQUEST, - Json(ErrorResponse { - error: "State schema file is required for state tree extraction".to_owned(), - }), - ) - .into_response(); + // Infer schema from database + eprintln!("[server] No schema file provided, inferring from database..."); + match open_database(&db_path) { + Ok(db) => match abi::infer_schema_from_database(&db, None) { + Ok(manifest) => { + eprintln!("[server] Schema inferred successfully"); + manifest + } + Err(e) => { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ErrorResponse { + error: format!("Failed to infer schema from database: {e}"), + }), + ) + .into_response(); + } + }, + Err(e) => { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ErrorResponse { + error: format!("Failed to open database for schema inference: {e}"), + }), + ) + .into_response(); + } + } }; // Open database @@ -544,7 +590,7 @@ async fn handle_context_tree(mut multipart: Multipart) -> impl IntoResponse { return (StatusCode::BAD_REQUEST, Json(ErrorResponse { error: e })).into_response(); } - // State schema is required for state tree extraction + // State schema is optional - infer from database if not provided let schema = if let Some(schema_text) = state_schema_text { match serde_json::from_str::(&schema_text) { Ok(schema_value) => match abi::load_state_schema_from_json_value(&schema_value) { @@ -570,13 +616,67 @@ async fn handle_context_tree(mut multipart: Multipart) -> impl IntoResponse { } } } else { - return ( - StatusCode::BAD_REQUEST, - Json(ErrorResponse { - error: "State schema file is required for state tree extraction".to_owned(), - }), - ) - .into_response(); + // Infer schema from database for this specific context + eprintln!( + "[server] No schema file provided, inferring from database for context {}...", + context_id + ); + match open_database(&db_path) { + Ok(db) => { + // Decode context_id from hex string + let context_id_bytes = match hex::decode(&context_id) { + Ok(bytes) if bytes.len() == 32 => bytes, + _ => { + return ( + StatusCode::BAD_REQUEST, + Json(ErrorResponse { + error: format!("Invalid context_id format: {}", context_id), + }), + ) + .into_response(); + } + }; + match abi::infer_schema_from_database(&db, Some(&context_id_bytes)) { + Ok(manifest) => { + let field_count = manifest + .state_root + .as_ref() + .and_then(|root| manifest.types.get(root)) + .and_then(|ty| { + if let calimero_wasm_abi::schema::TypeDef::Record { fields } = ty { + Some(fields.len()) + } else { + None + } + }) + .unwrap_or(0); + eprintln!( + "[server] Schema inferred successfully for context {}: {} fields found", + context_id, field_count + ); + manifest + } + Err(e) => { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ErrorResponse { + error: format!("Failed to infer schema from database: {e}"), + }), + ) + .into_response(); + } + } + } + Err(e) => { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ErrorResponse { + error: format!("Failed to open database for schema inference: {e}"), + }), + ) + .into_response(); + } + } }; // Open database diff --git a/tools/merodb/src/gui/static/css/visualization.css b/tools/merodb/src/gui/static/css/visualization.css index 95bcc37a9..8027beaf3 100644 --- a/tools/merodb/src/gui/static/css/visualization.css +++ b/tools/merodb/src/gui/static/css/visualization.css @@ -121,6 +121,14 @@ pointer-events: none; } +/* Field Type Colors */ +.field-type-unordered_map { color: #61afef !important; } +.field-type-unordered_set { color: #c678dd !important; } +.field-type-vector { color: #e5c07b !important; } +.field-type-counter { color: #98c379 !important; } +.field-type-rga { color: #d19a66 !important; } +.field-type-lww_register { color: #56b6c2 !important; } + /* State Tree Links */ .state-link { stroke: var(--color-link-default); diff --git a/tools/merodb/src/gui/static/js/api-service.js b/tools/merodb/src/gui/static/js/api-service.js index 7381048b4..3c1c1e342 100644 --- a/tools/merodb/src/gui/static/js/api-service.js +++ b/tools/merodb/src/gui/static/js/api-service.js @@ -172,26 +172,23 @@ export class ApiService { throw new Error(`Failed to read state schema file: ${err.message}. The file may have already been consumed.`); } } else { - console.error('[ApiService.loadContextTree] ERROR: No state schema file or cached content available!'); - console.error('[ApiService.loadContextTree] State:', { - currentStateSchemaFile: window.app?.state?.currentStateSchemaFile?.name || 'null', - hasCachedContent: !!window.app?.state?.currentStateSchemaFileContent, - hasLocalStorageContent: !!localStorage.getItem('merodb_schema_content'), - stateSchemaFileProvided: !!stateSchemaFile - }); - throw new Error('State schema file is required for state tree extraction'); + // Schema is optional - backend will infer it if not provided + console.log('[ApiService.loadContextTree] No state schema file - backend will infer schema from database'); + text = null; // Don't send schema file } } catch (err) { - if (err.message.includes('State schema file is required')) { - throw err; - } - console.error('[ApiService.loadContextTree] Error accessing local storage:', err); - throw new Error('State schema file is required for state tree extraction'); + // Schema is optional - backend will infer it if not provided + console.log('[ApiService.loadContextTree] No state schema file - backend will infer schema from database'); + text = null; // Don't send schema file } } - console.log('[ApiService.loadContextTree] Appending state_schema_file to formData, length:', text.length); - formData.append('state_schema_file', text); + if (text) { + console.log('[ApiService.loadContextTree] Appending state_schema_file to formData, length:', text.length); + formData.append('state_schema_file', text); + } else { + console.log('[ApiService.loadContextTree] No schema file - will use schema inference'); + } const response = await fetch('/api/context-tree', { method: 'POST', diff --git a/tools/merodb/src/gui/static/js/app.js b/tools/merodb/src/gui/static/js/app.js index 3a6555bac..d6e829cab 100644 --- a/tools/merodb/src/gui/static/js/app.js +++ b/tools/merodb/src/gui/static/js/app.js @@ -214,9 +214,9 @@ export class App { this.state.currentStateSchemaFile = stateSchemaInput.files[0]; } - if (!this.state.currentStateSchemaFile) { - UIManager.showMessage('warning-message', 'No state schema file found. Please select a file first.'); - return; + // Schema file is optional - can use schema inference + if (!this.state.currentStateSchemaFile && !this.state.currentStateSchemaFileContent) { + console.log('[App] No schema file - will use schema inference'); } } await this.loadDatabase(); diff --git a/tools/merodb/src/gui/static/js/state-tree-visualizer.js b/tools/merodb/src/gui/static/js/state-tree-visualizer.js index 20e36b26a..2098bf66b 100644 --- a/tools/merodb/src/gui/static/js/state-tree-visualizer.js +++ b/tools/merodb/src/gui/static/js/state-tree-visualizer.js @@ -37,6 +37,7 @@ export class StateTreeVisualizer { */ async load() { // Check if we have schema content (from file or local storage) + // Schema is optional - if not provided, backend will infer it from database if (!this.state.currentStateSchemaFile && !this.state.currentStateSchemaFileContent) { // Try to load from local storage try { @@ -45,10 +46,10 @@ export class StateTreeVisualizer { this.state.currentStateSchemaFileContent = savedContent; console.log('[StateTreeVisualizer] Loaded schema from local storage'); } else { - throw new Error('State schema file is required for state tree visualization'); + console.log('[StateTreeVisualizer] No schema file provided - will use schema inference'); } } catch (err) { - throw new Error('State schema file is required for state tree visualization'); + console.log('[StateTreeVisualizer] No schema file provided - will use schema inference'); } } @@ -371,17 +372,109 @@ export class StateTreeVisualizer { return ''; }); - // Add node ID labels + // Add node labels - show field name for Field nodes, truncated ID otherwise nodeEnter.append('text') .attr('dy', '0.31em') .attr('x', d => (d.children || d._children) ? -10 : 10) .attr('text-anchor', d => (d.children || d._children) ? 'end' : 'start') .text(d => { + // For Field nodes, show the field name + if (d.data.type === 'Field' && d.data.field) { + return d.data.field; + } + // For StateRoot, show "Root" + if (d.data.type === 'StateRoot') { + return 'Root'; + } + // For Entry nodes, show meaningful data + if ((d.data.type === 'Entry' || d.data.type === 'VectorEntry') && d.data.data) { + // Counter entries: show value (the count) with icon + if (d.data.data.key && d.data.data.value) { + const val = d.data.data.value.parsed ?? d.data.data.value; + const valType = d.data.data.value?.type; + // If value is a Counter, show "key: πŸ”’ N" + if (valType === 'Counter' && typeof val === 'number') { + const key = d.data.data.key.parsed || d.data.data.key; + const keyStr = typeof key === 'string' ? key : JSON.stringify(key); + return `${keyStr}: πŸ”’ ${val}`; + } + // If value is a number (legacy Counter format), show "key: πŸ”’ N" + if (typeof val === 'number') { + const key = d.data.data.key.parsed || d.data.data.key; + const keyStr = typeof key === 'string' ? key : JSON.stringify(key); + return `${keyStr}: πŸ”’ ${val}`; + } + // If value is a NestedCollection, show "key: type" with children count + if (valType === 'NestedCollection') { + const key = d.data.data.key.parsed || d.data.data.key; + const keyStr = typeof key === 'string' ? key : JSON.stringify(key); + const crdtType = d.data.data.value.crdt_type || 'Collection'; + const nestedChildren = d.data.data.value.children || []; + const childCount = d.data.data.value.children_count || nestedChildren.length; + + // If we have nested children, show them inline + if (nestedChildren.length > 0 && nestedChildren.length <= 4) { + // For UnorderedSet, keys ARE the values (show as set items) + // For UnorderedMap, show key=value pairs + const isSet = crdtType === 'UnorderedSet' || nestedChildren.every(c => c.value_hex); + if (isSet) { + const items = nestedChildren.map(c => c.key).join(', '); + return `${keyStr}: {${items.length > 30 ? items.substring(0, 27) + '...' : items}}`; + } else { + const preview = nestedChildren.map(c => `${c.key}=${c.value || '?'}`).join(', '); + return `${keyStr}: {${preview.length > 30 ? preview.substring(0, 27) + '...' : preview}}`; + } + } + return `${keyStr}: πŸ“¦ ${crdtType} [${childCount}]`; + } + // Otherwise show "key β†’ value" for regular maps + const key = d.data.data.key.parsed || d.data.data.key; + const keyStr = typeof key === 'string' ? key : JSON.stringify(key); + const valStr = typeof val === 'string' ? val : JSON.stringify(val); + const display = `${keyStr} β†’ ${valStr}`; + return display.length > 30 ? display.substring(0, 27) + '...' : display; + } + // VectorEntry without key: show value directly + if (d.data.data.value && d.data.type === 'VectorEntry') { + const val = d.data.data.value.parsed ?? d.data.data.value; + const valType = d.data.data.value?.type; + if (valType === 'Counter' && typeof val === 'number') { + return `πŸ”’ ${val}`; + } + return typeof val === 'number' ? `πŸ”’ ${val}` : JSON.stringify(val); + } + // Map entries with only key: show key + if (d.data.data.key) { + const key = d.data.data.key.parsed || d.data.data.key; + const keyStr = typeof key === 'string' ? key : JSON.stringify(key); + return keyStr.length > 25 ? keyStr.substring(0, 22) + '...' : keyStr; + } + // Vector entries: show item value + if (d.data.data.item) { + const item = d.data.data.item.parsed || d.data.data.item; + const itemStr = typeof item === 'string' ? item : JSON.stringify(item); + return itemStr.length > 35 ? itemStr.substring(0, 32) + '...' : itemStr; + } + // Set entries or other: show value + if (d.data.data.value) { + const val = d.data.data.value.parsed || d.data.data.value; + const valStr = typeof val === 'string' ? val : JSON.stringify(val); + return valStr.length > 25 ? valStr.substring(0, 22) + '...' : valStr; + } + } + // Fallback to truncated ID const id = d.data.id || 'N/A'; return id !== 'N/A' ? `${id.substring(0, 8)}...` : 'N/A'; }) - .style('font-size', '10px') - .style('fill', '#bbb') + .style('font-size', '11px') + .style('fill', d => { + // Color code by type + if (d.data.type === 'StateRoot') return '#ffa500'; // Orange for root + if (d.data.type === 'Field') return '#61afef'; // Blue for fields + if (d.data.type === 'Entry') return '#98c379'; // Green for entries + return '#bbb'; + }) + .style('font-weight', d => d.data.type === 'Field' ? 'bold' : 'normal') .style('pointer-events', 'none'); // Transition nodes to their new position @@ -612,9 +705,11 @@ export class StateTreeVisualizer { html += ` Type:`; html += ` ${data.type || 'N/A'}`; html += ``; + // Calculate children count from actual tree structure + const childrenCount = (node.children?.length || 0) + (node._children?.length || 0); html += `
`; html += ` Children:`; - html += ` ${data.children_count || 0}`; + html += ` ${childrenCount}`; html += `
`; html += ``; @@ -712,47 +807,64 @@ export class StateTreeVisualizer { html += ``; } - html += '
'; - html += `
Hashes
`; - html += `
`; - html += ` ID:`; - html += ` ${TooltipManager.formatHash(data.id, 'ID')}`; - html += `
`; - html += `
`; - html += ` Full Hash:`; - html += ` ${TooltipManager.formatHash(data.full_hash, 'Full Hash')}`; - html += `
`; - html += `
`; - html += ` Own Hash:`; - html += ` ${TooltipManager.formatHash(data.own_hash, 'Own Hash')}`; - html += `
`; - // Use the parent node's ID from the D3 hierarchy instead of data.parent_id - // This ensures the displayed parent ID matches what's shown in the tree - if (node.parent) { - html += `
`; - html += ` Parent ID:`; - html += ` ${TooltipManager.formatHash(node.parent.data.id, 'Parent ID')}`; + // Hashes section - only show if we have hash data + const hasHashData = data.id || data.full_hash || data.own_hash || node.parent; + if (hasHashData) { + html += '
'; + html += `
Hashes
`; + if (data.id) { + html += `
`; + html += ` ID:`; + html += ` ${TooltipManager.formatHash(data.id, 'ID')}`; + html += `
`; + } + if (data.full_hash) { + html += `
`; + html += ` Full Hash:`; + html += ` ${TooltipManager.formatHash(data.full_hash, 'Full Hash')}`; + html += `
`; + } + if (data.own_hash) { + html += `
`; + html += ` Own Hash:`; + html += ` ${TooltipManager.formatHash(data.own_hash, 'Own Hash')}`; + html += `
`; + } + // Use the parent node's ID from the D3 hierarchy + if (node.parent) { + html += `
`; + html += ` Parent ID:`; + html += ` ${TooltipManager.formatHash(node.parent.data.id, 'Parent ID')}`; + html += `
`; + } html += `
`; } - html += `
`; - html += '
'; - html += `
Timestamps
`; - html += `
`; - html += ` Created:`; - html += ` ${TooltipManager.formatTimestamp(data.created_at)}`; - html += `
`; - html += `
`; - html += ` Updated:`; - html += ` ${TooltipManager.formatTimestamp(data.updated_at)}`; - html += `
`; - if (data.deleted_at) { - html += `
`; - html += ` Deleted:`; - html += ` ${TooltipManager.formatTimestamp(data.deleted_at)}`; + // Timestamps section - only show if we have timestamp data + const hasTimestampData = data.created_at || data.updated_at || data.deleted_at; + if (hasTimestampData) { + html += '
'; + html += `
Timestamps
`; + if (data.created_at) { + html += `
`; + html += ` Created:`; + html += ` ${TooltipManager.formatTimestamp(data.created_at)}`; + html += `
`; + } + if (data.updated_at) { + html += `
`; + html += ` Updated:`; + html += ` ${TooltipManager.formatTimestamp(data.updated_at)}`; + html += `
`; + } + if (data.deleted_at) { + html += `
`; + html += ` Deleted:`; + html += ` ${TooltipManager.formatTimestamp(data.deleted_at)}`; + html += `
`; + } html += `
`; } - html += `
`; return html; } @@ -895,13 +1007,29 @@ export class StateTreeVisualizer { // Check if item is deleted const isDeleted = data.deleted_at !== null && data.deleted_at !== undefined; + // Determine fill color based on type + let textFill = isDeleted ? '#888' : '#d4d4d4'; + if (!isDeleted && d._typeClass) { + // Use CSS class color for typed fields + const typeColorMap = { + 'field-type-unordered_map': '#61afef', + 'field-type-unordered_set': '#c678dd', + 'field-type-vector': '#e5c07b', + 'field-type-counter': '#98c379', + 'field-type-rga': '#d19a66', + 'field-type-lww_register': '#56b6c2' + }; + textFill = typeColorMap[d._typeClass] || textFill; + } + // Create text element that can wrap const text = g.append('text') .attr('x', (!d.children && !d._children) ? 8 : 0) // Offset for leaf nodes with circles .attr('y', nodeHeight / 2) .attr('dy', '0.35em') - .attr('font-size', '11px') - .attr('fill', isDeleted ? '#888' : '#d4d4d4') // Grayed out for deleted + .attr('font-size', '12px') + .attr('font-weight', data.type === 'Field' ? '500' : '400') + .attr('fill', textFill) .attr('opacity', isDeleted ? 0.6 : 1.0); // Reduced opacity for deleted let labelText = ''; @@ -931,6 +1059,22 @@ export class StateTreeVisualizer { } } + // Icon mapping for field types + const typeIcons = { + 'UnorderedMap': 'πŸ—ΊοΈ', + 'UnorderedSet': 'πŸ“¦', + 'Vector': 'πŸ“‹', + 'LwwRegister': 'πŸ“', + 'Counter': 'πŸ”’', + 'Rga': 'πŸ“œ', + 'unordered_map': 'πŸ—ΊοΈ', + 'unordered_set': 'πŸ“¦', + 'vector': 'πŸ“‹', + 'lww_register': 'πŸ“', + 'counter': 'πŸ”’', + 'rga': 'πŸ“œ' + }; + // Format type info nicely if (typeInfo) { // Convert common type names to readable format @@ -943,45 +1087,95 @@ export class StateTreeVisualizer { 'Rga': 'rga' }; const readableType = typeMap[typeInfo] || typeInfo.toLowerCase(); + const icon = typeIcons[typeInfo] || typeIcons[readableType] || 'πŸ“'; + + // Add child count for collections + const childCount = d._children ? d._children.length : (d.children ? d.children.length : 0); + const countStr = childCount > 0 ? ` [${childCount}]` : ''; + if (counterValue !== null) { - labelText = `${fieldName} (${readableType}) = ${counterValue}`; + labelText = `${icon} ${fieldName}: ${readableType}${countStr} = ${counterValue}`; } else { - labelText = `${fieldName} (${readableType})`; + labelText = `${icon} ${fieldName}: ${readableType}${countStr}`; } + + // Store type info for styling + d._typeClass = `field-type-${readableType}`; } else { if (counterValue !== null) { - labelText = `${fieldName} = ${counterValue}`; + labelText = `πŸ“ ${fieldName} = ${counterValue}`; } else { - labelText = fieldName; + labelText = `πŸ“ ${fieldName}`; } } } - // For Entry types, show key: value format + // For Entry types, show meaningful data else if (data.type === 'Entry') { if (data.data) { const stateData = data.data; let keyStr = ''; let valueStr = ''; + let itemStr = ''; - // Get key + // Get key (for Map entries) if (stateData.key && stateData.key.parsed !== undefined) { - keyStr = JSON.stringify(stateData.key.parsed, null, 0); + const key = stateData.key.parsed; + if (typeof key === 'string') { + keyStr = `"${key}"`; + } else { + keyStr = JSON.stringify(key, null, 0); + } } else if (stateData.key) { keyStr = String(stateData.key); } - // Get value + // Get value (for Map/Counter entries) if (stateData.value && stateData.value.parsed !== undefined) { - valueStr = JSON.stringify(stateData.value.parsed, null, 0); + const val = stateData.value.parsed; + // Handle LwwRegister values (show inner value) + if (val && typeof val === 'object' && val.value !== undefined && val.clock !== undefined) { + valueStr = typeof val.value === 'string' ? `"${val.value}"` : JSON.stringify(val.value, null, 0); + } else if (typeof val === 'string') { + valueStr = `"${val}"`; + } else if (typeof val === 'number') { + valueStr = String(val); + } else { + valueStr = JSON.stringify(val, null, 0); + } } else if (stateData.value) { valueStr = String(stateData.value); } - // Format as "key: value" + // Get item (for Vector/Set entries) + if (stateData.item && stateData.item.parsed !== undefined) { + const item = stateData.item.parsed; + // Handle LwwRegister wrapped items + if (item && typeof item === 'object' && item.value !== undefined && item.clock !== undefined) { + itemStr = typeof item.value === 'string' ? `"${item.value}"` : JSON.stringify(item.value, null, 0); + } else if (typeof item === 'string') { + itemStr = `"${item}"`; + } else { + itemStr = JSON.stringify(item, null, 0); + } + } else if (stateData.item) { + itemStr = String(stateData.item); + } + + // Truncate long values + const maxLen = 60; + if (valueStr.length > maxLen) valueStr = valueStr.substring(0, maxLen) + '...'; + if (itemStr.length > maxLen) itemStr = itemStr.substring(0, maxLen) + '...'; + + // Determine display format based on what data is available if (keyStr && valueStr) { - labelText = `${keyStr}: ${valueStr}`; + // Counter: if value is a number, show "key β†’ value" + // Map: show "key β†’ value" + labelText = `${keyStr} β†’ ${valueStr}`; + } else if (itemStr) { + // Vector/Set entry: just show the item value + labelText = itemStr; } else if (keyStr) { - labelText = `Key: ${keyStr}`; + labelText = keyStr; } else if (valueStr) { labelText = valueStr; } else {