From 6ca26a03d50337697ecc7135107c8e2dee3ac317 Mon Sep 17 00:00:00 2001 From: xilosada Date: Thu, 5 Feb 2026 09:50:18 +0100 Subject: [PATCH 1/7] feat(merodb): add schema inference and improved visualization - Schema inference from database metadata (field_name, crdt_type) - Improved tree view with field names instead of truncated IDs - Better visualization for Counter, Vector, Set values - Show children count and hide empty sections in sidebar --- tools/merodb/src/abi.rs | 185 ++++++++++ tools/merodb/src/export.rs | 341 ++++++++++++++---- tools/merodb/src/export/cli.rs | 21 +- tools/merodb/src/gui/index.html | 4 +- tools/merodb/src/gui/server.rs | 136 ++++++- .../src/gui/static/css/visualization.css | 8 + tools/merodb/src/gui/static/js/api-service.js | 27 +- tools/merodb/src/gui/static/js/app.js | 6 +- .../gui/static/js/state-tree-visualizer.js | 266 +++++++++++--- 9 files changed, 838 insertions(+), 156 deletions(-) diff --git a/tools/merodb/src/abi.rs b/tools/merodb/src/abi.rs index f2bc23228..de9768fe3 100644 --- a/tools/merodb/src/abi.rs +++ b/tools/merodb/src/abi.rs @@ -67,3 +67,188 @@ pub fn load_state_schema_from_json(schema_path: &Path) -> Result { load_state_schema_from_json_value(&schema_value) } + +/// Infer state schema from database by reading field names and CRDT types from metadata +/// +/// This function scans the State column for EntityIndex entries and builds a schema +/// based on field_name and crdt_type found in metadata. This enables schema-free +/// database inspection when field names are stored in metadata. +/// +/// # Arguments +/// * `db` - The database to scan +/// * `context_id` - Optional context ID to filter by. If None, scans all contexts (may find fields from multiple contexts) +pub fn infer_schema_from_database( + db: &rocksdb::DBWithThreadMode, + context_id: Option<&[u8]>, +) -> Result { + use calimero_wasm_abi::schema::{ + CollectionType, CrdtCollectionType, Field, ScalarType, TypeDef, TypeRef, + }; + use std::collections::BTreeMap; + + let state_cf = db + .cf_handle("State") + .ok_or_else(|| eyre::eyre!("State column family not found"))?; + + let mut fields = Vec::new(); + let mut seen_field_names = std::collections::HashSet::new(); + + // Root ID depends on context: + // - If context_id is provided, root ID is that context_id (Id::root() returns context_id()) + // - If no context_id, we can't determine root fields reliably, so use all zeros as fallback + let root_id_bytes: [u8; 32] = match context_id { + Some(ctx_id) => ctx_id.try_into().map_err(|_| { + eyre::eyre!( + "context_id must be exactly 32 bytes, got {} bytes", + ctx_id.len() + ) + })?, + None => [0u8; 32], + }; + + // Scan State column for EntityIndex entries + let iter = db.iterator_cf(&state_cf, rocksdb::IteratorMode::Start); + for item in iter { + let (key, value) = item?; + + // Filter by context_id if provided (key format: context_id (32 bytes) + state_key (32 bytes)) + if let Some(expected_context_id) = context_id { + if key.len() < 32 || &key[..32] != expected_context_id { + continue; + } + } + + // Try to deserialize as EntityIndex + if let Ok(index) = borsh::from_slice::(&value) { + // Check if this is a root-level field (parent_id is None or equals root/context_id) + let is_root_field = index.parent_id.is_none() + || index + .parent_id + .as_ref() + .map(|id| id.as_bytes() == &root_id_bytes) + .unwrap_or(false); + + if is_root_field { + // Check if we have field_name in metadata + if let Some(ref field_name) = index.metadata.field_name { + if !seen_field_names.contains(field_name) { + seen_field_names.insert(field_name.clone()); + + // Infer type from crdt_type + let type_ref = if let Some(crdt_type) = index.metadata.crdt_type { + match crdt_type { + crate::export::CrdtType::UnorderedMap => { + // Default to Map - can be refined later + TypeRef::Collection { + collection: CollectionType::Map { + key: Box::new(TypeRef::string()), + value: Box::new(TypeRef::string()), + }, + crdt_type: Some(CrdtCollectionType::UnorderedMap), + inner_type: None, + } + } + crate::export::CrdtType::Vector => TypeRef::Collection { + collection: CollectionType::List { + items: Box::new(TypeRef::string()), + }, + crdt_type: Some(CrdtCollectionType::Vector), + inner_type: None, + }, + crate::export::CrdtType::UnorderedSet => TypeRef::Collection { + collection: CollectionType::List { + items: Box::new(TypeRef::string()), + }, + crdt_type: Some(CrdtCollectionType::UnorderedSet), + inner_type: None, + }, + crate::export::CrdtType::Counter => TypeRef::Collection { + // Counter is stored as Map internally + collection: CollectionType::Map { + key: Box::new(TypeRef::string()), + value: Box::new(TypeRef::Scalar(ScalarType::U64)), + }, + crdt_type: Some(CrdtCollectionType::Counter), + inner_type: None, + }, + crate::export::CrdtType::Rga => TypeRef::Collection { + collection: CollectionType::Record { fields: Vec::new() }, + crdt_type: Some(CrdtCollectionType::ReplicatedGrowableArray), + inner_type: None, + }, + crate::export::CrdtType::LwwRegister => TypeRef::Collection { + collection: CollectionType::Record { fields: Vec::new() }, + crdt_type: Some(CrdtCollectionType::LwwRegister), + inner_type: Some(Box::new(TypeRef::string())), + }, + crate::export::CrdtType::UserStorage => TypeRef::Collection { + collection: CollectionType::Map { + key: Box::new(TypeRef::string()), + value: Box::new(TypeRef::string()), + }, + crdt_type: Some(CrdtCollectionType::UnorderedMap), + inner_type: None, + }, + crate::export::CrdtType::FrozenStorage => TypeRef::Collection { + collection: CollectionType::Map { + key: Box::new(TypeRef::string()), + value: Box::new(TypeRef::string()), + }, + crdt_type: Some(CrdtCollectionType::UnorderedMap), + inner_type: None, + }, + crate::export::CrdtType::Record => { + // Record type - would need to inspect children to infer fields + TypeRef::Collection { + collection: CollectionType::Record { fields: Vec::new() }, + crdt_type: None, + inner_type: None, + } + } + crate::export::CrdtType::Custom { type_name: _ } => { + // Custom type - can't infer without schema + TypeRef::Collection { + collection: CollectionType::Record { fields: Vec::new() }, + crdt_type: None, + inner_type: None, + } + } + } + } else { + // No CRDT type - default to LWW register + TypeRef::Collection { + collection: CollectionType::Record { fields: Vec::new() }, + crdt_type: Some(CrdtCollectionType::LwwRegister), + inner_type: Some(Box::new(TypeRef::string())), + } + }; + + fields.push(Field { + name: field_name.clone(), + type_: type_ref, + nullable: None, + }); + } + } + } + } + } + + // Create a record type with all inferred fields + let state_root_type = "InferredStateRoot".to_string(); + let mut types = BTreeMap::new(); + types.insert( + state_root_type.clone(), + TypeDef::Record { + fields: fields.clone(), + }, + ); + + Ok(Manifest { + schema_version: "wasm-abi/1".to_string(), + types, + methods: Vec::new(), + events: Vec::new(), + state_root: Some(state_root_type), + }) +} diff --git a/tools/merodb/src/export.rs b/tools/merodb/src/export.rs index 05d9b1a51..c93ba0ace 100644 --- a/tools/merodb/src/export.rs +++ b/tools/merodb/src/export.rs @@ -88,6 +88,47 @@ struct MapField { value_type: TypeRef, } +/// Try to decode entry data with a specific field definition +fn try_decode_with_field( + entry_bytes: &[u8], + field: &Field, + index: &EntityIndex, + manifest: &Manifest, +) -> Option { + match &field.type_ { + TypeRef::Collection { + collection: CollectionType::Map { key, value }, + .. + } => { + let map_field = MapField { + name: field.name.clone(), + key_type: (**key).clone(), + value_type: (**value).clone(), + }; + decode_map_entry(entry_bytes, &map_field, manifest) + .ok() + .map(|decoded| add_index_metadata(decoded, index)) + .flatten() + } + TypeRef::Collection { + collection: CollectionType::List { items }, + .. + } => decode_list_entry(entry_bytes, field, items, manifest) + .ok() + .map(|decoded| add_index_metadata(decoded, index)) + .flatten(), + TypeRef::Collection { + collection: CollectionType::Record { .. }, + crdt_type, + inner_type, + } => decode_record_entry(entry_bytes, field, crdt_type, inner_type, manifest) + .ok() + .map(|decoded| add_index_metadata(decoded, index)) + .flatten(), + _ => None, + } +} + /// Try to decode a collection entry by looking up the actual entry data from an EntityIndex /// Supports Map entries (Entry<(K, V)>) and List entries (Entry) fn try_decode_collection_entry_from_index( @@ -170,6 +211,27 @@ fn try_decode_collection_entry_from_index( record_fields.len() ); + // First, try to match by field_name if available (most direct and efficient) + if let Some(ref field_name) = index.metadata.field_name { + eprintln!( + "[try_decode_collection_entry_from_index] Using field_name from metadata: {}", + field_name + ); + if let Some(field) = record_fields.iter().find(|f| f.name == *field_name) { + eprintln!( + "[try_decode_collection_entry_from_index] Found matching field by name: {}", + field_name + ); + // Try to decode with this specific field + return try_decode_with_field(&entry_bytes, field, index, manifest); + } else { + eprintln!( + "[try_decode_collection_entry_from_index] Field name '{}' not found in schema, falling back to all fields", + field_name + ); + } + } + // If we have a parent_id, try to find the collection field that matches it // Otherwise, try all collection fields let fields_to_try: Vec<&Field> = if let Some(parent_id) = &index.parent_id { @@ -627,6 +689,7 @@ fn decode_state_entry( "own_hash": hex::encode(index.own_hash), "created_at": index.metadata.created_at, "updated_at": *index.metadata.updated_at, + "field_name": index.metadata.field_name, "deleted_at": index.deleted_at })); } else { @@ -934,24 +997,24 @@ fn decode_scalar_entry(bytes: &[u8], field: &Field, manifest: &Manifest) -> Resu } // EntityIndex structure for decoding -#[derive(borsh::BorshDeserialize)] -struct EntityIndex { - id: Id, - parent_id: Option, - children: Option>, - full_hash: [u8; 32], - own_hash: [u8; 32], - metadata: Metadata, - deleted_at: Option, +#[derive(borsh::BorshDeserialize, Clone)] +pub(crate) struct EntityIndex { + pub(crate) id: Id, + pub(crate) parent_id: Option, + pub(crate) children: Option>, + pub(crate) full_hash: [u8; 32], + pub(crate) own_hash: [u8; 32], + pub(crate) metadata: Metadata, + pub(crate) deleted_at: Option, } -#[derive(borsh::BorshDeserialize)] -struct Id { +#[derive(borsh::BorshDeserialize, Clone)] +pub(crate) struct Id { bytes: [u8; 32], } impl Id { - const fn as_bytes(&self) -> &[u8; 32] { + pub(crate) const fn as_bytes(&self) -> &[u8; 32] { &self.bytes } } @@ -1334,6 +1397,8 @@ fn try_manual_entity_index_decode( created_at, updated_at: UpdatedAt(updated_at_val), storage_type, + crdt_type: None, + field_name: None, }; let child_info = ChildInfo { @@ -1408,12 +1473,14 @@ fn try_manual_entity_index_decode( created_at: 0, updated_at: UpdatedAt(0), storage_type: StorageType::Public, + crdt_type: None, + field_name: None, }, deleted_at: None, }) } -#[derive(borsh::BorshDeserialize)] +#[derive(borsh::BorshDeserialize, Clone)] #[expect( dead_code, reason = "Fields required for Borsh deserialization structure" @@ -1424,18 +1491,78 @@ struct ChildInfo { metadata: Metadata, } -#[derive(borsh::BorshDeserialize)] +#[derive(Clone)] #[expect( dead_code, reason = "Fields required for Borsh deserialization structure" )] -struct Metadata { - created_at: u64, - updated_at: UpdatedAt, - storage_type: StorageType, +pub(crate) struct Metadata { + pub(crate) created_at: u64, + pub(crate) updated_at: UpdatedAt, + pub(crate) storage_type: StorageType, + pub(crate) crdt_type: Option, + pub(crate) field_name: Option, } -#[derive(borsh::BorshDeserialize)] +// Custom BorshDeserialize for backward compatibility with old Metadata that doesn't have field_name +impl borsh::BorshDeserialize for Metadata { + fn deserialize_reader(reader: &mut R) -> Result { + let created_at = u64::deserialize_reader(reader)?; + let updated_at = UpdatedAt::deserialize_reader(reader)?; + let storage_type = StorageType::deserialize_reader(reader)?; + + // Try to deserialize crdt_type (may not exist in old format) + let crdt_type = match >::deserialize_reader(reader) { + Ok(ct) => ct, + Err(e) => { + if matches!(e.kind(), std::io::ErrorKind::UnexpectedEof) { + None + } else { + return Err(e); + } + } + }; + + // Try to deserialize field_name (may not exist in old format) + let field_name = match >::deserialize_reader(reader) { + Ok(fn_val) => fn_val, + Err(e) => { + if matches!(e.kind(), std::io::ErrorKind::UnexpectedEof) { + None + } else { + return Err(e); + } + } + }; + + Ok(Metadata { + created_at, + updated_at, + storage_type, + crdt_type, + field_name, + }) + } +} + +/// CRDT type identifier for entity metadata. +/// Must match the definition in calimero-storage. +#[derive(borsh::BorshDeserialize, Debug, Clone, PartialEq, Eq)] +#[allow(dead_code)] +pub(crate) enum CrdtType { + LwwRegister, + Counter, + Rga, + UnorderedMap, + UnorderedSet, + Vector, + UserStorage, + FrozenStorage, + Record, + Custom { type_name: String }, +} + +#[derive(borsh::BorshDeserialize, Clone)] #[expect( dead_code, reason = "Variants required for Borsh deserialization structure" @@ -1449,7 +1576,7 @@ enum StorageType { Frozen, } -#[derive(borsh::BorshDeserialize)] +#[derive(borsh::BorshDeserialize, Clone)] #[expect( dead_code, reason = "Fields required for Borsh deserialization structure" @@ -1459,7 +1586,7 @@ struct SignatureData { nonce: u64, } -#[derive(borsh::BorshDeserialize)] +#[derive(borsh::BorshDeserialize, Clone)] struct UpdatedAt(u64); impl Deref for UpdatedAt { @@ -2059,8 +2186,44 @@ fn decode_state_root_bfs( fields.len() ); + // PRE-FILTER: Build a mapping from field_name to (state_key, EntityIndex) for children that have field_name + // This allows direct field matching instead of sequential iteration + let mut field_name_to_child: std::collections::HashMap = + std::collections::HashMap::new(); + for child_info in &root_children { + let child_element_id = hex::encode(child_info.id.as_bytes()); + if let Some(state_key) = element_to_state.get(&child_element_id) { + let child_key_bytes = match hex::decode(state_key) { + Ok(bytes) => bytes, + Err(_) => continue, + }; + let mut child_key = Vec::with_capacity(64); + child_key.extend_from_slice(context_id); + child_key.extend_from_slice(&child_key_bytes); + + if let Ok(Some(child_value)) = db.get_cf(state_cf, &child_key) { + if let Ok(child_index) = borsh::from_slice::(&child_value) { + if let Some(ref field_name) = child_index.metadata.field_name { + eprintln!( + "[decode_state_root_bfs] Found collection root with field_name='{}': id={}, {} children", + field_name, + child_element_id, + child_index.children.as_ref().map(|c| c.len()).unwrap_or(0) + ); + field_name_to_child + .insert(field_name.clone(), (state_key.clone(), child_index)); + } + } + } + } + } + eprintln!( + "[decode_state_root_bfs] Pre-filtered {} collection roots with field_name", + field_name_to_child.len() + ); + // For each field in the state root schema, find and decode its children using BFS - // Match children to fields by iterating through root's children + // Match children to fields by field_name first, then fall back to sequential matching let mut used_children = std::collections::HashSet::new(); for field in fields { eprintln!("[decode_state_root_bfs] Decoding field: {}", field.name); @@ -2079,52 +2242,93 @@ fn decode_state_root_bfs( }; let field_value = if field_value { - // Find an unused child that is a collection root + // FIRST: Try to find by field_name (direct match) let mut matched_child = None; - for child_info in &root_children { - let child_element_id = hex::encode(child_info.id.as_bytes()); - if used_children.contains(&child_element_id) { - continue; + if let Some((state_key, child_index)) = field_name_to_child.get(&field.name) { + let child_element_id = hex::encode(child_index.id.as_bytes()); + if !used_children.contains(&child_element_id) { + eprintln!( + "[decode_state_root_bfs] Direct field_name match for '{}': {} children", + field.name, + child_index.children.as_ref().map(|c| c.len()).unwrap_or(0) + ); + matched_child = Some((state_key.clone(), child_index.clone())); + used_children.insert(child_element_id); } + } - // Check if this child is a collection root by loading its EntityIndex - if let Some(state_key) = element_to_state.get(&child_element_id) { - let child_key_bytes = hex::decode(state_key).wrap_err_with(|| { - format!("Failed to decode child_state_key: {}", state_key) - })?; - let mut child_key = Vec::with_capacity(64); - child_key.extend_from_slice(context_id); - child_key.extend_from_slice(&child_key_bytes); + // FALLBACK: If no direct match, try sequential matching (for legacy data) + if matched_child.is_none() { + eprintln!( + "[decode_state_root_bfs] No direct field_name match for '{}', trying sequential", + field.name + ); + for child_info in &root_children { + let child_element_id = hex::encode(child_info.id.as_bytes()); + if used_children.contains(&child_element_id) { + continue; + } - if let Ok(Some(child_value)) = db.get_cf(state_cf, &child_key) { - // Try standard Borsh deserialization first - let child_index = match borsh::from_slice::(&child_value) { - Ok(index) => { - eprintln!("[decode_state_root_bfs] Successfully decoded collection root EntityIndex for field {}: {} children", field.name, index.children.as_ref().map(|c| c.len()).unwrap_or(0)); - index - } - Err(e) => { - // Try manual deserialization as fallback - eprintln!("[decode_state_root_bfs] Failed to decode collection root EntityIndex for field {} using Borsh: {}. Attempting manual decode...", field.name, e); - match try_manual_entity_index_decode(&child_value, context_id) { - Ok(index) => { - eprintln!("[decode_state_root_bfs] Successfully decoded collection root EntityIndex manually for field {}: {} children", field.name, index.children.as_ref().map(|c| c.len()).unwrap_or(0)); - index - } - Err(manual_err) => { - eprintln!("[decode_state_root_bfs] Manual decode also failed for collection root: {}", manual_err); - continue; // Skip this child + // Check if this child is a collection root by loading its EntityIndex + if let Some(state_key) = element_to_state.get(&child_element_id) { + let child_key_bytes = hex::decode(state_key).wrap_err_with(|| { + format!("Failed to decode child_state_key: {}", state_key) + })?; + let mut child_key = Vec::with_capacity(64); + child_key.extend_from_slice(context_id); + child_key.extend_from_slice(&child_key_bytes); + + if let Ok(Some(child_value)) = db.get_cf(state_cf, &child_key) { + // Try standard Borsh deserialization first + let child_index = match borsh::from_slice::(&child_value) { + Ok(index) => { + eprintln!("[decode_state_root_bfs] Successfully decoded collection root EntityIndex for field {}: {} children, field_name={:?}", + field.name, index.children.as_ref().map(|c| c.len()).unwrap_or(0), index.metadata.field_name); + index + } + Err(e) => { + // Try manual deserialization as fallback + eprintln!("[decode_state_root_bfs] Failed to decode collection root EntityIndex for field {} using Borsh: {}. Attempting manual decode...", field.name, e); + match try_manual_entity_index_decode(&child_value, context_id) { + Ok(index) => { + eprintln!("[decode_state_root_bfs] Successfully decoded collection root EntityIndex manually for field {}: {} children, field_name={:?}", + field.name, index.children.as_ref().map(|c| c.len()).unwrap_or(0), index.metadata.field_name); + index + } + Err(manual_err) => { + eprintln!("[decode_state_root_bfs] Manual decode also failed for collection root: {}", manual_err); + continue; // Skip this child + } } } + }; + + // Match by field_name if available, otherwise fall back to sequential matching + let field_name_matches = child_index + .metadata + .field_name + .as_ref() + .map(|fn_| fn_ == &field.name) + .unwrap_or(false); + + if field_name_matches { + // This child's field_name matches the schema field + eprintln!("[decode_state_root_bfs] Found matching child for field {} by field_name", field.name); + matched_child = Some((state_key.clone(), child_index.clone())); + used_children.insert(child_element_id); + break; + } else if child_index.metadata.field_name.is_none() { + // Legacy data without field_name - use sequential matching as fallback + eprintln!("[decode_state_root_bfs] Child has no field_name, using sequential match for field {}", field.name); + matched_child = Some((state_key.clone(), child_index.clone())); + used_children.insert(child_element_id); + break; } - }; - // This is a collection root - it matches this collection field - matched_child = Some((state_key.clone(), child_index)); - used_children.insert(child_element_id); - break; + // If field_name exists but doesn't match, continue to next child + } } } - } + } // end fallback if let Some((collection_root_key, collection_root_index)) = matched_child { // Decode this collection field using the found collection root @@ -2152,7 +2356,7 @@ fn decode_state_root_bfs( } } else { // Non-collection field - could be a Record (Counter, etc.) or scalar - // Try to find a child that matches this field + // Try to find a child that matches this field by field_name // For Record types like Counter, they're stored as children of the root let mut matched_child = None; for child_info in &root_children { @@ -2161,7 +2365,7 @@ fn decode_state_root_bfs( continue; } - // Check if this child matches the field by trying to decode it + // Check if this child matches the field by field_name first if let Some(state_key) = element_to_state.get(&child_element_id) { let child_key_bytes = hex::decode(state_key).wrap_err_with(|| { format!("Failed to decode child_state_key: {}", state_key) @@ -2171,6 +2375,21 @@ fn decode_state_root_bfs( child_key.extend_from_slice(&child_key_bytes); if let Ok(Some(child_value)) = db.get_cf(state_cf, &child_key) { + // First try to decode as EntityIndex to check field_name + if let Ok(child_index) = borsh::from_slice::(&child_value) { + // Check if field_name matches + if let Some(ref child_field_name) = child_index.metadata.field_name { + if child_field_name != &field.name { + // This child's field_name doesn't match - skip to next child + eprintln!("[decode_state_root_bfs] Skipping child {} for field {} - field_name is '{}'", + child_element_id, field.name, child_field_name); + continue; + } + eprintln!("[decode_state_root_bfs] Found matching child {} for field {} by field_name", + child_element_id, field.name); + } + } + eprintln!("[decode_state_root_bfs] Attempting to decode child {} for field {} (value length: {})", child_element_id, field.name, child_value.len()); // First, try to decode directly as the field's type (for Counter, etc.) // This handles cases where the value is stored as Entry where T is the field type diff --git a/tools/merodb/src/export/cli.rs b/tools/merodb/src/export/cli.rs index eb3bb39e3..ca90e9a53 100644 --- a/tools/merodb/src/export/cli.rs +++ b/tools/merodb/src/export/cli.rs @@ -31,6 +31,7 @@ pub struct ExportArgs { /// State schema JSON file (extracted using `calimero-abi state`) /// /// This includes the state root type and its dependencies, sufficient for state deserialization. + /// If not provided, schema will be inferred from database metadata (field_name and crdt_type). #[arg(long, value_name = "SCHEMA_FILE")] pub state_schema_file: Option, @@ -68,7 +69,25 @@ pub fn run_export(args: ExportArgs) -> Result<()> { Err(e) => eyre::bail!("Failed to load state schema: {e}"), } } else { - eyre::bail!("--state-schema-file is required when exporting data"); + // Infer schema from database metadata + println!("No schema file provided, inferring schema from database metadata..."); + println!("(This requires field_name to be stored in entity metadata)"); + match abi::infer_schema_from_database(&db, None) { + Ok(manifest) => { + println!("Schema inferred successfully"); + if let Some(ref root) = manifest.state_root { + println!("State root: {root}"); + } + if let Some(ref root_name) = manifest.state_root { + if let Some(calimero_wasm_abi::schema::TypeDef::Record { fields }) = manifest.types.get(root_name) { + println!("Fields: {}", fields.len()); + } + } + println!("Note: Inferred schema may have simplified types. For full type information, provide --state-schema-file"); + manifest + } + Err(e) => eyre::bail!("Failed to infer schema from database: {e}. Try providing --state-schema-file instead."), + } }; let columns = if args.all { diff --git a/tools/merodb/src/gui/index.html b/tools/merodb/src/gui/index.html index f66637701..2cca605e4 100644 --- a/tools/merodb/src/gui/index.html +++ b/tools/merodb/src/gui/index.html @@ -35,7 +35,7 @@

MeroDB Inspector

πŸ“Š

Load Database

-

Specify the database path and optionally upload a state schema file for state decoding

+

Specify the database path. Schema file is optional - if not provided, schema will be inferred from database metadata

@@ -63,7 +63,7 @@

Load Database

> No file chosen
- Required for state deserialization + Optional - schema will be inferred from database if not provided
diff --git a/tools/merodb/src/gui/server.rs b/tools/merodb/src/gui/server.rs index a726f49c1..241fe3af2 100644 --- a/tools/merodb/src/gui/server.rs +++ b/tools/merodb/src/gui/server.rs @@ -14,6 +14,7 @@ use tower_http::{services::ServeDir, set_header::SetResponseHeaderLayer}; use crate::{abi, dag, export, types::Column}; use calimero_wasm_abi::schema::Manifest; +use hex; #[derive(Debug, Serialize)] struct ErrorResponse { @@ -187,11 +188,11 @@ async fn handle_export(mut multipart: Multipart) -> impl IntoResponse { } } } else { - eprintln!("No state schema file provided - state values will not be decoded"); + // Will infer schema after opening database None }; - // Open database + // Open database (needed for both schema inference and export) let db = match open_database(&db_path) { Ok(db) => db, Err(e) => { @@ -205,6 +206,30 @@ async fn handle_export(mut multipart: Multipart) -> impl IntoResponse { } }; + // Infer schema if not provided (no context_id for global export) + let schema = if schema.is_none() { + eprintln!("No state schema file provided - inferring schema from database..."); + match abi::infer_schema_from_database(&db, None) { + Ok(manifest) => { + eprintln!("Schema inferred successfully"); + info_message = Some( + "No schema file provided - schema inferred from database metadata. State values will be decoded using inferred schema.".to_string() + ); + Some(manifest) + } + Err(e) => { + let warning = format!( + "Failed to infer schema from database: {e}. State values will not be decoded." + ); + eprintln!("Warning: {warning}"); + warning_message = Some(warning); + None + } + } + } else { + schema + }; + // Export all columns let columns = Column::all().to_vec(); let data = if let Some(schema) = schema { @@ -299,7 +324,7 @@ async fn handle_state_tree(mut multipart: Multipart) -> impl IntoResponse { return (StatusCode::BAD_REQUEST, Json(ErrorResponse { error: e })).into_response(); } - // State schema is required for state tree extraction + // State schema is optional - infer from database if not provided let schema = if let Some(schema_text) = state_schema_text { match serde_json::from_str::(&schema_text) { Ok(schema_value) => match abi::load_state_schema_from_json_value(&schema_value) { @@ -325,13 +350,34 @@ async fn handle_state_tree(mut multipart: Multipart) -> impl IntoResponse { } } } else { - return ( - StatusCode::BAD_REQUEST, - Json(ErrorResponse { - error: "State schema file is required for state tree extraction".to_owned(), - }), - ) - .into_response(); + // Infer schema from database + eprintln!("[server] No schema file provided, inferring from database..."); + match open_database(&db_path) { + Ok(db) => match abi::infer_schema_from_database(&db, None) { + Ok(manifest) => { + eprintln!("[server] Schema inferred successfully"); + manifest + } + Err(e) => { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ErrorResponse { + error: format!("Failed to infer schema from database: {e}"), + }), + ) + .into_response(); + } + }, + Err(e) => { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ErrorResponse { + error: format!("Failed to open database for schema inference: {e}"), + }), + ) + .into_response(); + } + } }; // Open database @@ -544,7 +590,7 @@ async fn handle_context_tree(mut multipart: Multipart) -> impl IntoResponse { return (StatusCode::BAD_REQUEST, Json(ErrorResponse { error: e })).into_response(); } - // State schema is required for state tree extraction + // State schema is optional - infer from database if not provided let schema = if let Some(schema_text) = state_schema_text { match serde_json::from_str::(&schema_text) { Ok(schema_value) => match abi::load_state_schema_from_json_value(&schema_value) { @@ -570,13 +616,67 @@ async fn handle_context_tree(mut multipart: Multipart) -> impl IntoResponse { } } } else { - return ( - StatusCode::BAD_REQUEST, - Json(ErrorResponse { - error: "State schema file is required for state tree extraction".to_owned(), - }), - ) - .into_response(); + // Infer schema from database for this specific context + eprintln!( + "[server] No schema file provided, inferring from database for context {}...", + context_id + ); + match open_database(&db_path) { + Ok(db) => { + // Decode context_id from hex string + let context_id_bytes = match hex::decode(&context_id) { + Ok(bytes) if bytes.len() == 32 => bytes, + _ => { + return ( + StatusCode::BAD_REQUEST, + Json(ErrorResponse { + error: format!("Invalid context_id format: {}", context_id), + }), + ) + .into_response(); + } + }; + match abi::infer_schema_from_database(&db, Some(&context_id_bytes)) { + Ok(manifest) => { + let field_count = manifest + .state_root + .as_ref() + .and_then(|root| manifest.types.get(root)) + .and_then(|ty| { + if let calimero_wasm_abi::schema::TypeDef::Record { fields } = ty { + Some(fields.len()) + } else { + None + } + }) + .unwrap_or(0); + eprintln!( + "[server] Schema inferred successfully for context {}: {} fields found", + context_id, field_count + ); + manifest + } + Err(e) => { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ErrorResponse { + error: format!("Failed to infer schema from database: {e}"), + }), + ) + .into_response(); + } + } + } + Err(e) => { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ErrorResponse { + error: format!("Failed to open database for schema inference: {e}"), + }), + ) + .into_response(); + } + } }; // Open database diff --git a/tools/merodb/src/gui/static/css/visualization.css b/tools/merodb/src/gui/static/css/visualization.css index 95bcc37a9..8027beaf3 100644 --- a/tools/merodb/src/gui/static/css/visualization.css +++ b/tools/merodb/src/gui/static/css/visualization.css @@ -121,6 +121,14 @@ pointer-events: none; } +/* Field Type Colors */ +.field-type-unordered_map { color: #61afef !important; } +.field-type-unordered_set { color: #c678dd !important; } +.field-type-vector { color: #e5c07b !important; } +.field-type-counter { color: #98c379 !important; } +.field-type-rga { color: #d19a66 !important; } +.field-type-lww_register { color: #56b6c2 !important; } + /* State Tree Links */ .state-link { stroke: var(--color-link-default); diff --git a/tools/merodb/src/gui/static/js/api-service.js b/tools/merodb/src/gui/static/js/api-service.js index 7381048b4..3c1c1e342 100644 --- a/tools/merodb/src/gui/static/js/api-service.js +++ b/tools/merodb/src/gui/static/js/api-service.js @@ -172,26 +172,23 @@ export class ApiService { throw new Error(`Failed to read state schema file: ${err.message}. The file may have already been consumed.`); } } else { - console.error('[ApiService.loadContextTree] ERROR: No state schema file or cached content available!'); - console.error('[ApiService.loadContextTree] State:', { - currentStateSchemaFile: window.app?.state?.currentStateSchemaFile?.name || 'null', - hasCachedContent: !!window.app?.state?.currentStateSchemaFileContent, - hasLocalStorageContent: !!localStorage.getItem('merodb_schema_content'), - stateSchemaFileProvided: !!stateSchemaFile - }); - throw new Error('State schema file is required for state tree extraction'); + // Schema is optional - backend will infer it if not provided + console.log('[ApiService.loadContextTree] No state schema file - backend will infer schema from database'); + text = null; // Don't send schema file } } catch (err) { - if (err.message.includes('State schema file is required')) { - throw err; - } - console.error('[ApiService.loadContextTree] Error accessing local storage:', err); - throw new Error('State schema file is required for state tree extraction'); + // Schema is optional - backend will infer it if not provided + console.log('[ApiService.loadContextTree] No state schema file - backend will infer schema from database'); + text = null; // Don't send schema file } } - console.log('[ApiService.loadContextTree] Appending state_schema_file to formData, length:', text.length); - formData.append('state_schema_file', text); + if (text) { + console.log('[ApiService.loadContextTree] Appending state_schema_file to formData, length:', text.length); + formData.append('state_schema_file', text); + } else { + console.log('[ApiService.loadContextTree] No schema file - will use schema inference'); + } const response = await fetch('/api/context-tree', { method: 'POST', diff --git a/tools/merodb/src/gui/static/js/app.js b/tools/merodb/src/gui/static/js/app.js index 3a6555bac..d6e829cab 100644 --- a/tools/merodb/src/gui/static/js/app.js +++ b/tools/merodb/src/gui/static/js/app.js @@ -214,9 +214,9 @@ export class App { this.state.currentStateSchemaFile = stateSchemaInput.files[0]; } - if (!this.state.currentStateSchemaFile) { - UIManager.showMessage('warning-message', 'No state schema file found. Please select a file first.'); - return; + // Schema file is optional - can use schema inference + if (!this.state.currentStateSchemaFile && !this.state.currentStateSchemaFileContent) { + console.log('[App] No schema file - will use schema inference'); } } await this.loadDatabase(); diff --git a/tools/merodb/src/gui/static/js/state-tree-visualizer.js b/tools/merodb/src/gui/static/js/state-tree-visualizer.js index 20e36b26a..73a7329f0 100644 --- a/tools/merodb/src/gui/static/js/state-tree-visualizer.js +++ b/tools/merodb/src/gui/static/js/state-tree-visualizer.js @@ -37,6 +37,7 @@ export class StateTreeVisualizer { */ async load() { // Check if we have schema content (from file or local storage) + // Schema is optional - if not provided, backend will infer it from database if (!this.state.currentStateSchemaFile && !this.state.currentStateSchemaFileContent) { // Try to load from local storage try { @@ -45,10 +46,10 @@ export class StateTreeVisualizer { this.state.currentStateSchemaFileContent = savedContent; console.log('[StateTreeVisualizer] Loaded schema from local storage'); } else { - throw new Error('State schema file is required for state tree visualization'); + console.log('[StateTreeVisualizer] No schema file provided - will use schema inference'); } } catch (err) { - throw new Error('State schema file is required for state tree visualization'); + console.log('[StateTreeVisualizer] No schema file provided - will use schema inference'); } } @@ -371,17 +372,69 @@ export class StateTreeVisualizer { return ''; }); - // Add node ID labels + // Add node labels - show field name for Field nodes, truncated ID otherwise nodeEnter.append('text') .attr('dy', '0.31em') .attr('x', d => (d.children || d._children) ? -10 : 10) .attr('text-anchor', d => (d.children || d._children) ? 'end' : 'start') .text(d => { + // For Field nodes, show the field name + if (d.data.type === 'Field' && d.data.field) { + return d.data.field; + } + // For StateRoot, show "Root" + if (d.data.type === 'StateRoot') { + return 'Root'; + } + // For Entry nodes, show meaningful data + if (d.data.type === 'Entry' && d.data.data) { + // Counter entries: show value (the count) instead of key (executor ID) + // Counter has both key (hash) and value (number) + if (d.data.data.key && d.data.data.value) { + const val = d.data.data.value.parsed ?? d.data.data.value; + // If value is a number (Counter), show "count: N" + if (typeof val === 'number') { + return `count: ${val}`; + } + // Otherwise show "key β†’ value" for regular maps + const key = d.data.data.key.parsed || d.data.data.key; + const keyStr = typeof key === 'string' ? key : JSON.stringify(key); + const valStr = typeof val === 'string' ? val : JSON.stringify(val); + const display = `${keyStr} β†’ ${valStr}`; + return display.length > 30 ? display.substring(0, 27) + '...' : display; + } + // Map entries with only key: show key + if (d.data.data.key) { + const key = d.data.data.key.parsed || d.data.data.key; + const keyStr = typeof key === 'string' ? key : JSON.stringify(key); + return keyStr.length > 25 ? keyStr.substring(0, 22) + '...' : keyStr; + } + // Vector entries: show item value + if (d.data.data.item) { + const item = d.data.data.item.parsed || d.data.data.item; + const itemStr = typeof item === 'string' ? item : JSON.stringify(item); + return itemStr.length > 35 ? itemStr.substring(0, 32) + '...' : itemStr; + } + // Set entries or other: show value + if (d.data.data.value) { + const val = d.data.data.value.parsed || d.data.data.value; + const valStr = typeof val === 'string' ? val : JSON.stringify(val); + return valStr.length > 25 ? valStr.substring(0, 22) + '...' : valStr; + } + } + // Fallback to truncated ID const id = d.data.id || 'N/A'; return id !== 'N/A' ? `${id.substring(0, 8)}...` : 'N/A'; }) - .style('font-size', '10px') - .style('fill', '#bbb') + .style('font-size', '11px') + .style('fill', d => { + // Color code by type + if (d.data.type === 'StateRoot') return '#ffa500'; // Orange for root + if (d.data.type === 'Field') return '#61afef'; // Blue for fields + if (d.data.type === 'Entry') return '#98c379'; // Green for entries + return '#bbb'; + }) + .style('font-weight', d => d.data.type === 'Field' ? 'bold' : 'normal') .style('pointer-events', 'none'); // Transition nodes to their new position @@ -612,9 +665,11 @@ export class StateTreeVisualizer { html += ` Type:`; html += ` ${data.type || 'N/A'}`; html += ``; + // Calculate children count from actual tree structure + const childrenCount = (node.children?.length || 0) + (node._children?.length || 0); html += `
`; html += ` Children:`; - html += ` ${data.children_count || 0}`; + html += ` ${childrenCount}`; html += `
`; html += ``; @@ -712,47 +767,64 @@ export class StateTreeVisualizer { html += ``; } - html += '
'; - html += `
Hashes
`; - html += `
`; - html += ` ID:`; - html += ` ${TooltipManager.formatHash(data.id, 'ID')}`; - html += `
`; - html += `
`; - html += ` Full Hash:`; - html += ` ${TooltipManager.formatHash(data.full_hash, 'Full Hash')}`; - html += `
`; - html += `
`; - html += ` Own Hash:`; - html += ` ${TooltipManager.formatHash(data.own_hash, 'Own Hash')}`; - html += `
`; - // Use the parent node's ID from the D3 hierarchy instead of data.parent_id - // This ensures the displayed parent ID matches what's shown in the tree - if (node.parent) { - html += `
`; - html += ` Parent ID:`; - html += ` ${TooltipManager.formatHash(node.parent.data.id, 'Parent ID')}`; + // Hashes section - only show if we have hash data + const hasHashData = data.id || data.full_hash || data.own_hash || node.parent; + if (hasHashData) { + html += '
'; + html += `
Hashes
`; + if (data.id) { + html += `
`; + html += ` ID:`; + html += ` ${TooltipManager.formatHash(data.id, 'ID')}`; + html += `
`; + } + if (data.full_hash) { + html += `
`; + html += ` Full Hash:`; + html += ` ${TooltipManager.formatHash(data.full_hash, 'Full Hash')}`; + html += `
`; + } + if (data.own_hash) { + html += `
`; + html += ` Own Hash:`; + html += ` ${TooltipManager.formatHash(data.own_hash, 'Own Hash')}`; + html += `
`; + } + // Use the parent node's ID from the D3 hierarchy + if (node.parent) { + html += `
`; + html += ` Parent ID:`; + html += ` ${TooltipManager.formatHash(node.parent.data.id, 'Parent ID')}`; + html += `
`; + } html += `
`; } - html += `
`; - html += '
'; - html += `
Timestamps
`; - html += `
`; - html += ` Created:`; - html += ` ${TooltipManager.formatTimestamp(data.created_at)}`; - html += `
`; - html += `
`; - html += ` Updated:`; - html += ` ${TooltipManager.formatTimestamp(data.updated_at)}`; - html += `
`; - if (data.deleted_at) { - html += `
`; - html += ` Deleted:`; - html += ` ${TooltipManager.formatTimestamp(data.deleted_at)}`; + // Timestamps section - only show if we have timestamp data + const hasTimestampData = data.created_at || data.updated_at || data.deleted_at; + if (hasTimestampData) { + html += '
'; + html += `
Timestamps
`; + if (data.created_at) { + html += `
`; + html += ` Created:`; + html += ` ${TooltipManager.formatTimestamp(data.created_at)}`; + html += `
`; + } + if (data.updated_at) { + html += `
`; + html += ` Updated:`; + html += ` ${TooltipManager.formatTimestamp(data.updated_at)}`; + html += `
`; + } + if (data.deleted_at) { + html += `
`; + html += ` Deleted:`; + html += ` ${TooltipManager.formatTimestamp(data.deleted_at)}`; + html += `
`; + } html += `
`; } - html += `
`; return html; } @@ -895,13 +967,29 @@ export class StateTreeVisualizer { // Check if item is deleted const isDeleted = data.deleted_at !== null && data.deleted_at !== undefined; + // Determine fill color based on type + let textFill = isDeleted ? '#888' : '#d4d4d4'; + if (!isDeleted && d._typeClass) { + // Use CSS class color for typed fields + const typeColorMap = { + 'field-type-unordered_map': '#61afef', + 'field-type-unordered_set': '#c678dd', + 'field-type-vector': '#e5c07b', + 'field-type-counter': '#98c379', + 'field-type-rga': '#d19a66', + 'field-type-lww_register': '#56b6c2' + }; + textFill = typeColorMap[d._typeClass] || textFill; + } + // Create text element that can wrap const text = g.append('text') .attr('x', (!d.children && !d._children) ? 8 : 0) // Offset for leaf nodes with circles .attr('y', nodeHeight / 2) .attr('dy', '0.35em') - .attr('font-size', '11px') - .attr('fill', isDeleted ? '#888' : '#d4d4d4') // Grayed out for deleted + .attr('font-size', '12px') + .attr('font-weight', data.type === 'Field' ? '500' : '400') + .attr('fill', textFill) .attr('opacity', isDeleted ? 0.6 : 1.0); // Reduced opacity for deleted let labelText = ''; @@ -931,6 +1019,22 @@ export class StateTreeVisualizer { } } + // Icon mapping for field types + const typeIcons = { + 'UnorderedMap': 'πŸ—ΊοΈ', + 'UnorderedSet': 'πŸ“¦', + 'Vector': 'πŸ“‹', + 'LwwRegister': 'πŸ“', + 'Counter': 'πŸ”’', + 'Rga': 'πŸ“œ', + 'unordered_map': 'πŸ—ΊοΈ', + 'unordered_set': 'πŸ“¦', + 'vector': 'πŸ“‹', + 'lww_register': 'πŸ“', + 'counter': 'πŸ”’', + 'rga': 'πŸ“œ' + }; + // Format type info nicely if (typeInfo) { // Convert common type names to readable format @@ -943,45 +1047,95 @@ export class StateTreeVisualizer { 'Rga': 'rga' }; const readableType = typeMap[typeInfo] || typeInfo.toLowerCase(); + const icon = typeIcons[typeInfo] || typeIcons[readableType] || 'πŸ“'; + + // Add child count for collections + const childCount = d._children ? d._children.length : (d.children ? d.children.length : 0); + const countStr = childCount > 0 ? ` [${childCount}]` : ''; + if (counterValue !== null) { - labelText = `${fieldName} (${readableType}) = ${counterValue}`; + labelText = `${icon} ${fieldName}: ${readableType}${countStr} = ${counterValue}`; } else { - labelText = `${fieldName} (${readableType})`; + labelText = `${icon} ${fieldName}: ${readableType}${countStr}`; } + + // Store type info for styling + d._typeClass = `field-type-${readableType}`; } else { if (counterValue !== null) { - labelText = `${fieldName} = ${counterValue}`; + labelText = `πŸ“ ${fieldName} = ${counterValue}`; } else { - labelText = fieldName; + labelText = `πŸ“ ${fieldName}`; } } } - // For Entry types, show key: value format + // For Entry types, show meaningful data else if (data.type === 'Entry') { if (data.data) { const stateData = data.data; let keyStr = ''; let valueStr = ''; + let itemStr = ''; - // Get key + // Get key (for Map entries) if (stateData.key && stateData.key.parsed !== undefined) { - keyStr = JSON.stringify(stateData.key.parsed, null, 0); + const key = stateData.key.parsed; + if (typeof key === 'string') { + keyStr = `"${key}"`; + } else { + keyStr = JSON.stringify(key, null, 0); + } } else if (stateData.key) { keyStr = String(stateData.key); } - // Get value + // Get value (for Map/Counter entries) if (stateData.value && stateData.value.parsed !== undefined) { - valueStr = JSON.stringify(stateData.value.parsed, null, 0); + const val = stateData.value.parsed; + // Handle LwwRegister values (show inner value) + if (val && typeof val === 'object' && val.value !== undefined && val.clock !== undefined) { + valueStr = typeof val.value === 'string' ? `"${val.value}"` : JSON.stringify(val.value, null, 0); + } else if (typeof val === 'string') { + valueStr = `"${val}"`; + } else if (typeof val === 'number') { + valueStr = String(val); + } else { + valueStr = JSON.stringify(val, null, 0); + } } else if (stateData.value) { valueStr = String(stateData.value); } - // Format as "key: value" + // Get item (for Vector/Set entries) + if (stateData.item && stateData.item.parsed !== undefined) { + const item = stateData.item.parsed; + // Handle LwwRegister wrapped items + if (item && typeof item === 'object' && item.value !== undefined && item.clock !== undefined) { + itemStr = typeof item.value === 'string' ? `"${item.value}"` : JSON.stringify(item.value, null, 0); + } else if (typeof item === 'string') { + itemStr = `"${item}"`; + } else { + itemStr = JSON.stringify(item, null, 0); + } + } else if (stateData.item) { + itemStr = String(stateData.item); + } + + // Truncate long values + const maxLen = 60; + if (valueStr.length > maxLen) valueStr = valueStr.substring(0, maxLen) + '...'; + if (itemStr.length > maxLen) itemStr = itemStr.substring(0, maxLen) + '...'; + + // Determine display format based on what data is available if (keyStr && valueStr) { - labelText = `${keyStr}: ${valueStr}`; + // Counter: if value is a number, show "key β†’ value" + // Map: show "key β†’ value" + labelText = `${keyStr} β†’ ${valueStr}`; + } else if (itemStr) { + // Vector/Set entry: just show the item value + labelText = itemStr; } else if (keyStr) { - labelText = `Key: ${keyStr}`; + labelText = keyStr; } else if (valueStr) { labelText = valueStr; } else { From e3d603f20b3df68c180d2868eee3d928a18417f1 Mon Sep 17 00:00:00 2001 From: xilosada Date: Thu, 5 Feb 2026 11:29:00 +0100 Subject: [PATCH 2/7] feat(merodb): improve nested CRDT visualization - Add Counter value display with emoji for Map - Add Vector visualization with index and count - Add recursive nested collection decomposition - Show UnorderedMap children as {key=value, ...} format - Show UnorderedSet items as {item1, item2, ...} format - Detect Counter vs UnorderedSet by reading actual counter values - Add read_nested_collection_entries() helper for recursive decoding - Update GUI state-tree-visualizer.js to render nested children inline --- apps/kv-store/src/lib.rs | 2 +- apps/nested-crdt-test/src/lib.rs | 10 +- tools/merodb/src/export.rs | 395 +++++++++++++++++- .../gui/static/js/state-tree-visualizer.js | 50 ++- 4 files changed, 444 insertions(+), 13 deletions(-) diff --git a/apps/kv-store/src/lib.rs b/apps/kv-store/src/lib.rs index 6a6547b06..60dcdf905 100644 --- a/apps/kv-store/src/lib.rs +++ b/apps/kv-store/src/lib.rs @@ -37,7 +37,7 @@ impl KvStore { #[app::init] pub fn init() -> KvStore { KvStore { - items: UnorderedMap::new(), + items: UnorderedMap::new_with_field_name("items"), } } diff --git a/apps/nested-crdt-test/src/lib.rs b/apps/nested-crdt-test/src/lib.rs index fe09beb9c..c73bb0a10 100644 --- a/apps/nested-crdt-test/src/lib.rs +++ b/apps/nested-crdt-test/src/lib.rs @@ -68,11 +68,11 @@ impl NestedCrdtTest { #[app::init] pub fn init() -> NestedCrdtTest { NestedCrdtTest { - counters: UnorderedMap::new(), - registers: UnorderedMap::new(), - metadata: UnorderedMap::new(), - metrics: Vector::new(), - tags: UnorderedMap::new(), + counters: UnorderedMap::new_with_field_name("counters"), + registers: UnorderedMap::new_with_field_name("registers"), + metadata: UnorderedMap::new_with_field_name("metadata"), + metrics: Vector::new_with_field_name("metrics"), + tags: UnorderedMap::new_with_field_name("tags"), } } diff --git a/tools/merodb/src/export.rs b/tools/merodb/src/export.rs index c93ba0ace..aad4dc4fd 100644 --- a/tools/merodb/src/export.rs +++ b/tools/merodb/src/export.rs @@ -690,6 +690,7 @@ fn decode_state_entry( "created_at": index.metadata.created_at, "updated_at": *index.metadata.updated_at, "field_name": index.metadata.field_name, + "crdt_type": index.metadata.crdt_type.as_ref().map(|c| format!("{:?}", c)), "deleted_at": index.deleted_at })); } else { @@ -3058,6 +3059,185 @@ fn decode_collection_field_with_root( } } +/// Read the actual value of a Counter by summing entries in its positive and negative maps +#[cfg(feature = "gui")] +fn read_counter_value( + db: &DBWithThreadMode, + state_cf: &rocksdb::ColumnFamily, + context_id: &[u8], + positive_id: &[u8], + negative_id: &[u8], +) -> i64 { + use sha2::{Digest, Sha256}; + + let mut total: i64 = 0; + + // Helper to sum values from a counter map (positive or negative) + let sum_map_values = |map_id: &[u8]| -> i64 { + let mut sum: i64 = 0; + + // Find the EntityIndex for this map to get its children + let mut key_bytes_for_hash = Vec::with_capacity(33); + key_bytes_for_hash.push(0u8); // Key::Index variant + key_bytes_for_hash.extend_from_slice(map_id); + let map_state_key = Sha256::digest(&key_bytes_for_hash); + + let mut full_key = Vec::with_capacity(64); + full_key.extend_from_slice(context_id); + full_key.extend_from_slice(&map_state_key); + + if let Ok(Some(map_value)) = db.get_cf(state_cf, &full_key) { + if let Ok(map_index) = borsh::from_slice::(&map_value) { + // For each child in the map, read its Entry to get the count value + if let Some(children) = &map_index.children { + for child_info in children { + // Calculate Key::Entry for this child + let mut entry_key_bytes = Vec::with_capacity(33); + entry_key_bytes.push(1u8); // Key::Entry variant + entry_key_bytes.extend_from_slice(child_info.id.as_bytes()); + let entry_state_key = Sha256::digest(&entry_key_bytes); + + let mut entry_full_key = Vec::with_capacity(64); + entry_full_key.extend_from_slice(context_id); + entry_full_key.extend_from_slice(&entry_state_key); + + if let Ok(Some(entry_value)) = db.get_cf(state_cf, &entry_full_key) { + // Entry format: (key: String, value: u64, element_id: Id) + // Parse key length, skip key, read u64 value + if entry_value.len() >= 12 { + // minimum: 4 (len) + 0 (key) + 8 (u64) + let key_len = u32::from_le_bytes([ + entry_value[0], + entry_value[1], + entry_value[2], + entry_value[3], + ]) as usize; + + let value_offset = 4 + key_len; + if entry_value.len() >= value_offset + 8 { + let count = u64::from_le_bytes([ + entry_value[value_offset], + entry_value[value_offset + 1], + entry_value[value_offset + 2], + entry_value[value_offset + 3], + entry_value[value_offset + 4], + entry_value[value_offset + 5], + entry_value[value_offset + 6], + entry_value[value_offset + 7], + ]); + sum += count as i64; + } + } + } + } + } + } + } + sum + }; + + total += sum_map_values(positive_id); + total -= sum_map_values(negative_id); + + total +} + +/// Read children of a nested collection and return their entries as JSON +#[cfg(feature = "gui")] +fn read_nested_collection_entries( + db: &DBWithThreadMode, + state_cf: &rocksdb::ColumnFamily, + context_id: &[u8], + collection_id: &[u8], +) -> Vec { + use sha2::{Digest, Sha256}; + + let mut results = Vec::new(); + + // Look up the EntityIndex for this collection + let mut key_bytes = Vec::with_capacity(33); + key_bytes.push(0u8); // Key::Index variant + key_bytes.extend_from_slice(collection_id); + let state_key = Sha256::digest(&key_bytes); + + let mut full_key = Vec::with_capacity(64); + full_key.extend_from_slice(context_id); + full_key.extend_from_slice(&state_key); + + let Ok(Some(index_bytes)) = db.get_cf(state_cf, &full_key) else { + return results; + }; + + let Ok(index) = borsh::from_slice::(&index_bytes) else { + return results; + }; + + // Read each child entry + if let Some(children) = &index.children { + for child_info in children { + // Get Key::Entry for this child + let mut entry_key_bytes = Vec::with_capacity(33); + entry_key_bytes.push(1u8); // Key::Entry variant + entry_key_bytes.extend_from_slice(child_info.id.as_bytes()); + let entry_state_key = Sha256::digest(&entry_key_bytes); + + let mut entry_full_key = Vec::with_capacity(64); + entry_full_key.extend_from_slice(context_id); + entry_full_key.extend_from_slice(&entry_state_key); + + let Ok(Some(entry_value)) = db.get_cf(state_cf, &entry_full_key) else { + continue; + }; + + // Try to parse the entry as (key: String, value) + if entry_value.len() >= 4 { + let key_len = u32::from_le_bytes([ + entry_value[0], + entry_value[1], + entry_value[2], + entry_value[3], + ]) as usize; + + if key_len > 0 && key_len < 1000 && entry_value.len() >= 4 + key_len { + if let Ok(key_str) = std::str::from_utf8(&entry_value[4..4 + key_len]) { + let value_bytes = &entry_value[4 + key_len..]; + + // Try to parse value as a string (LwwRegister) + if value_bytes.len() >= 4 { + let val_len = u32::from_le_bytes([ + value_bytes[0], + value_bytes[1], + value_bytes[2], + value_bytes[3], + ]) as usize; + + if val_len > 0 && val_len < 10000 && value_bytes.len() >= 4 + val_len { + if let Ok(val_str) = + std::str::from_utf8(&value_bytes[4..4 + val_len]) + { + results.push(json!({ + "key": key_str, + "value": val_str, + })); + continue; + } + } + } + + // Fallback: show key with hex value + results.push(json!({ + "key": key_str, + "value_hex": hex::encode(value_bytes), + })); + } + } + } + } + } + + results +} + #[cfg(feature = "gui")] fn decode_collection_entries_bfs( db: &DBWithThreadMode, @@ -3177,7 +3357,218 @@ fn decode_collection_entries_bfs( .wrap_err("Failed to query entry")? .ok_or_else(|| eyre::eyre!("Entry not found"))?; - // Decode the entry according to collection type + // FIRST: Try to decode as EntityIndex to check if it's a nested collection + // This allows us to detect nested CRDTs (Counter, nested Map, Set, etc.) + // by reading their crdt_type from metadata instead of relying on schema + match borsh::from_slice::(&entry_value) { + Ok(entry_index) => { + // It's an EntityIndex - check its crdt_type + let child_crdt_type = entry_index.metadata.crdt_type.as_ref(); + let child_field_name = entry_index.metadata.field_name.clone(); + let child_count = entry_index.children.as_ref().map(|c| c.len()).unwrap_or(0); + + eprintln!( + "[decode_collection_entries_bfs] Entry {} is EntityIndex with crdt_type={:?}, field_name={:?}, children={}", + entry_state_key, child_crdt_type, child_field_name, child_count + ); + + // If it has a crdt_type, treat it as a nested collection + if let Some(crdt) = child_crdt_type { + let crdt_name = format!("{:?}", crdt); + entries.push(json!({ + "state_key": entry_state_key, + "entry": { + "type": "NestedCollection", + "crdt_type": crdt_name, + "field_name": child_field_name, + "children_count": child_count, + "id": hex::encode(entry_index.id.as_bytes()), + } + })); + continue; + } + + // EntityIndex with no crdt_type might be a Vector entry + // Look for the actual data under Key::Entry for this ID + use sha2::{Digest, Sha256}; + let mut entry_key_bytes = Vec::with_capacity(33); + entry_key_bytes.push(1u8); // Key::Entry variant + entry_key_bytes.extend_from_slice(entry_index.id.as_bytes()); + let entry_data_state_key = Sha256::digest(&entry_key_bytes); + + let mut entry_data_full_key = Vec::with_capacity(64); + entry_data_full_key.extend_from_slice(context_id); + entry_data_full_key.extend_from_slice(&entry_data_state_key); + + if let Ok(Some(entry_data)) = db.get_cf(state_cf, &entry_data_full_key) { + // Found the entry data - check if it's a Counter (64 bytes = two IDs) + if entry_data.len() == 64 { + let positive_id = &entry_data[..32]; + let negative_id = &entry_data[32..]; + let counter_value = read_counter_value( + db, + state_cf, + context_id, + positive_id, + negative_id, + ); + + if counter_value.abs() < 1_000_000 { + eprintln!( + "[decode_collection_entries_bfs] Entry {} (Vector item) is Counter: value={}", + entry_state_key, counter_value + ); + entries.push(json!({ + "state_key": entry_state_key, + "entry": { + "type": "VectorEntry", + "index": entries.len(), + "value": { + "type": "Counter", + "parsed": counter_value, + } + } + })); + continue; + } + } + } + } + Err(_) => { + // Not an EntityIndex - try to parse as raw (key, value) entry + // First try to extract the key (Borsh string: u32 length + bytes) + if entry_value.len() >= 4 { + let key_len = u32::from_le_bytes([ + entry_value[0], + entry_value[1], + entry_value[2], + entry_value[3], + ]) as usize; + + if key_len > 0 && key_len < 1000 && entry_value.len() >= 4 + key_len { + if let Ok(key_str) = std::str::from_utf8(&entry_value[4..4 + key_len]) { + let value_bytes = &entry_value[4 + key_len..]; + + // Check if value looks like a Counter (64 bytes = two 32-byte IDs) + if value_bytes.len() == 64 { + let positive_id = &value_bytes[..32]; + let negative_id = &value_bytes[32..]; + + // Try to read the actual counter value + let counter_value = read_counter_value( + db, + state_cf, + context_id, + positive_id, + negative_id, + ); + + // Sanity check: if counter value is reasonable (between -1M and 1M), it's likely a Counter + // Otherwise it might be an UnorderedSet or other 64-byte structure + if counter_value.abs() < 1_000_000 { + eprintln!( + "[decode_collection_entries_bfs] Entry {} is Counter: key='{}', value={}", + entry_state_key, key_str, counter_value + ); + entries.push(json!({ + "state_key": entry_state_key, + "entry": { + "type": "Entry", + "key": { "parsed": key_str, "type": "scalar::String" }, + "value": { + "type": "Counter", + "parsed": counter_value, + "display": format!("πŸ”’ {}", counter_value), + } + } + })); + continue; + } else { + // Likely an UnorderedSet or nested Map (64 bytes = two collection IDs) + // Try to read the first collection to get its children + let nested_children = read_nested_collection_entries( + db, + state_cf, + context_id, + positive_id, + ); + + eprintln!( + "[decode_collection_entries_bfs] Entry {} is NestedCollection (64 bytes): key='{}', children={}", + entry_state_key, key_str, nested_children.len() + ); + entries.push(json!({ + "state_key": entry_state_key, + "entry": { + "type": "Entry", + "key": { "parsed": key_str, "type": "scalar::String" }, + "value": { + "type": "NestedCollection", + "crdt_type": if nested_children.is_empty() { "UnorderedSet" } else { "UnorderedMap" }, + "children": nested_children, + "children_count": nested_children.len(), + } + } + })); + continue; + } + } + + // Check if value is another nested ID (32 bytes = single collection reference) + if value_bytes.len() == 32 { + eprintln!( + "[decode_collection_entries_bfs] Entry {} looks like Map: key='{}', value=1 ID (32 bytes)", + entry_state_key, key_str + ); + entries.push(json!({ + "state_key": entry_state_key, + "entry": { + "type": "Entry", + "key": { "parsed": key_str, "type": "scalar::String" }, + "value": { + "type": "NestedCollection", + "crdt_type": "UnorderedMap", + "collection_id": hex::encode(value_bytes), + } + } + })); + continue; + } + } + } + } + + // Try Vector entry format: value directly (no key prefix) + // Vector entries are just 64 bytes (two 32-byte IDs) + if entry_value.len() == 64 { + let positive_id = &entry_value[..32]; + let negative_id = &entry_value[32..]; + let counter_value = + read_counter_value(db, state_cf, context_id, positive_id, negative_id); + + if counter_value.abs() < 1_000_000 { + eprintln!( + "[decode_collection_entries_bfs] Entry {} is Vector item: value={}", + entry_state_key, counter_value + ); + entries.push(json!({ + "state_key": entry_state_key, + "entry": { + "type": "VectorEntry", + "index": entries.len(), + "value": { + "type": "Counter", + "parsed": counter_value, + } + } + })); + continue; + } + } + } + } + + // FALLBACK: Decode the entry according to collection type from schema match decode_collection_entry( &entry_value, field, @@ -3199,7 +3590,7 @@ fn decode_collection_entries_bfs( } Err(e) => { eprintln!( - "[decode_collection_entries_bfs] Failed to decode entry {}: {}", + "[decode_collection_entries_bfs] Failed to decode entry {} with schema: {}", entry_state_key, e ); } diff --git a/tools/merodb/src/gui/static/js/state-tree-visualizer.js b/tools/merodb/src/gui/static/js/state-tree-visualizer.js index 73a7329f0..2098bf66b 100644 --- a/tools/merodb/src/gui/static/js/state-tree-visualizer.js +++ b/tools/merodb/src/gui/static/js/state-tree-visualizer.js @@ -387,14 +387,45 @@ export class StateTreeVisualizer { return 'Root'; } // For Entry nodes, show meaningful data - if (d.data.type === 'Entry' && d.data.data) { - // Counter entries: show value (the count) instead of key (executor ID) - // Counter has both key (hash) and value (number) + if ((d.data.type === 'Entry' || d.data.type === 'VectorEntry') && d.data.data) { + // Counter entries: show value (the count) with icon if (d.data.data.key && d.data.data.value) { const val = d.data.data.value.parsed ?? d.data.data.value; - // If value is a number (Counter), show "count: N" + const valType = d.data.data.value?.type; + // If value is a Counter, show "key: πŸ”’ N" + if (valType === 'Counter' && typeof val === 'number') { + const key = d.data.data.key.parsed || d.data.data.key; + const keyStr = typeof key === 'string' ? key : JSON.stringify(key); + return `${keyStr}: πŸ”’ ${val}`; + } + // If value is a number (legacy Counter format), show "key: πŸ”’ N" if (typeof val === 'number') { - return `count: ${val}`; + const key = d.data.data.key.parsed || d.data.data.key; + const keyStr = typeof key === 'string' ? key : JSON.stringify(key); + return `${keyStr}: πŸ”’ ${val}`; + } + // If value is a NestedCollection, show "key: type" with children count + if (valType === 'NestedCollection') { + const key = d.data.data.key.parsed || d.data.data.key; + const keyStr = typeof key === 'string' ? key : JSON.stringify(key); + const crdtType = d.data.data.value.crdt_type || 'Collection'; + const nestedChildren = d.data.data.value.children || []; + const childCount = d.data.data.value.children_count || nestedChildren.length; + + // If we have nested children, show them inline + if (nestedChildren.length > 0 && nestedChildren.length <= 4) { + // For UnorderedSet, keys ARE the values (show as set items) + // For UnorderedMap, show key=value pairs + const isSet = crdtType === 'UnorderedSet' || nestedChildren.every(c => c.value_hex); + if (isSet) { + const items = nestedChildren.map(c => c.key).join(', '); + return `${keyStr}: {${items.length > 30 ? items.substring(0, 27) + '...' : items}}`; + } else { + const preview = nestedChildren.map(c => `${c.key}=${c.value || '?'}`).join(', '); + return `${keyStr}: {${preview.length > 30 ? preview.substring(0, 27) + '...' : preview}}`; + } + } + return `${keyStr}: πŸ“¦ ${crdtType} [${childCount}]`; } // Otherwise show "key β†’ value" for regular maps const key = d.data.data.key.parsed || d.data.data.key; @@ -403,6 +434,15 @@ export class StateTreeVisualizer { const display = `${keyStr} β†’ ${valStr}`; return display.length > 30 ? display.substring(0, 27) + '...' : display; } + // VectorEntry without key: show value directly + if (d.data.data.value && d.data.type === 'VectorEntry') { + const val = d.data.data.value.parsed ?? d.data.data.value; + const valType = d.data.data.value?.type; + if (valType === 'Counter' && typeof val === 'number') { + return `πŸ”’ ${val}`; + } + return typeof val === 'number' ? `πŸ”’ ${val}` : JSON.stringify(val); + } // Map entries with only key: show key if (d.data.data.key) { const key = d.data.data.key.parsed || d.data.data.key; From 6adcbf1702832e9a35cfc72dc2773fd6b382f0aa Mon Sep 17 00:00:00 2001 From: xilosada Date: Thu, 5 Feb 2026 16:50:50 +0100 Subject: [PATCH 3/7] fix(merodb): import CrdtType from calimero-storage instead of duplicating The local CrdtType enum in merodb could diverge from calimero-storage, causing Borsh deserialization issues. Now imports the canonical definition to ensure serialization compatibility. - Remove local CrdtType enum definition from export.rs - Import calimero_storage::collections::CrdtType in export.rs and abi.rs - Update pattern matching in abi.rs to use tuple variant syntax --- tools/merodb/src/abi.rs | 21 +++++++++++---------- tools/merodb/src/export.rs | 19 +++---------------- 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/tools/merodb/src/abi.rs b/tools/merodb/src/abi.rs index de9768fe3..577620b6e 100644 --- a/tools/merodb/src/abi.rs +++ b/tools/merodb/src/abi.rs @@ -1,6 +1,7 @@ use std::fs; use std::path::Path; +use calimero_storage::collections::CrdtType; use calimero_wasm_abi::schema::Manifest; use eyre::Result; @@ -137,7 +138,7 @@ pub fn infer_schema_from_database( // Infer type from crdt_type let type_ref = if let Some(crdt_type) = index.metadata.crdt_type { match crdt_type { - crate::export::CrdtType::UnorderedMap => { + CrdtType::UnorderedMap => { // Default to Map - can be refined later TypeRef::Collection { collection: CollectionType::Map { @@ -148,21 +149,21 @@ pub fn infer_schema_from_database( inner_type: None, } } - crate::export::CrdtType::Vector => TypeRef::Collection { + CrdtType::Vector => TypeRef::Collection { collection: CollectionType::List { items: Box::new(TypeRef::string()), }, crdt_type: Some(CrdtCollectionType::Vector), inner_type: None, }, - crate::export::CrdtType::UnorderedSet => TypeRef::Collection { + CrdtType::UnorderedSet => TypeRef::Collection { collection: CollectionType::List { items: Box::new(TypeRef::string()), }, crdt_type: Some(CrdtCollectionType::UnorderedSet), inner_type: None, }, - crate::export::CrdtType::Counter => TypeRef::Collection { + CrdtType::Counter => TypeRef::Collection { // Counter is stored as Map internally collection: CollectionType::Map { key: Box::new(TypeRef::string()), @@ -171,17 +172,17 @@ pub fn infer_schema_from_database( crdt_type: Some(CrdtCollectionType::Counter), inner_type: None, }, - crate::export::CrdtType::Rga => TypeRef::Collection { + CrdtType::Rga => TypeRef::Collection { collection: CollectionType::Record { fields: Vec::new() }, crdt_type: Some(CrdtCollectionType::ReplicatedGrowableArray), inner_type: None, }, - crate::export::CrdtType::LwwRegister => TypeRef::Collection { + CrdtType::LwwRegister => TypeRef::Collection { collection: CollectionType::Record { fields: Vec::new() }, crdt_type: Some(CrdtCollectionType::LwwRegister), inner_type: Some(Box::new(TypeRef::string())), }, - crate::export::CrdtType::UserStorage => TypeRef::Collection { + CrdtType::UserStorage => TypeRef::Collection { collection: CollectionType::Map { key: Box::new(TypeRef::string()), value: Box::new(TypeRef::string()), @@ -189,7 +190,7 @@ pub fn infer_schema_from_database( crdt_type: Some(CrdtCollectionType::UnorderedMap), inner_type: None, }, - crate::export::CrdtType::FrozenStorage => TypeRef::Collection { + CrdtType::FrozenStorage => TypeRef::Collection { collection: CollectionType::Map { key: Box::new(TypeRef::string()), value: Box::new(TypeRef::string()), @@ -197,7 +198,7 @@ pub fn infer_schema_from_database( crdt_type: Some(CrdtCollectionType::UnorderedMap), inner_type: None, }, - crate::export::CrdtType::Record => { + CrdtType::Record => { // Record type - would need to inspect children to infer fields TypeRef::Collection { collection: CollectionType::Record { fields: Vec::new() }, @@ -205,7 +206,7 @@ pub fn infer_schema_from_database( inner_type: None, } } - crate::export::CrdtType::Custom { type_name: _ } => { + CrdtType::Custom(_) => { // Custom type - can't infer without schema TypeRef::Collection { collection: CollectionType::Record { fields: Vec::new() }, diff --git a/tools/merodb/src/export.rs b/tools/merodb/src/export.rs index aad4dc4fd..498014fd3 100644 --- a/tools/merodb/src/export.rs +++ b/tools/merodb/src/export.rs @@ -1,6 +1,7 @@ pub mod cli; use borsh::BorshDeserialize; +use calimero_storage::collections::CrdtType; use calimero_store::types::ContextDagDelta as StoreContextDagDelta; use calimero_wasm_abi::schema::{ CollectionType, CrdtCollectionType, Field, Manifest, ScalarType, TypeDef, TypeRef, @@ -1546,22 +1547,8 @@ impl borsh::BorshDeserialize for Metadata { } } -/// CRDT type identifier for entity metadata. -/// Must match the definition in calimero-storage. -#[derive(borsh::BorshDeserialize, Debug, Clone, PartialEq, Eq)] -#[allow(dead_code)] -pub(crate) enum CrdtType { - LwwRegister, - Counter, - Rga, - UnorderedMap, - UnorderedSet, - Vector, - UserStorage, - FrozenStorage, - Record, - Custom { type_name: String }, -} +// CrdtType is now imported from calimero_storage::collections::CrdtType +// to ensure Borsh serialization compatibility with the storage layer. #[derive(borsh::BorshDeserialize, Clone)] #[expect( From 93a73300af0aadcb7f6c6d930fb3c5e6ea64f001 Mon Sep 17 00:00:00 2001 From: xilosada Date: Thu, 5 Feb 2026 16:52:49 +0100 Subject: [PATCH 4/7] fix(merodb): use children presence instead of magic number for Counter/NestedCollection detection Replace fragile counter_value.abs() < 1_000_000 heuristic with a more reliable check: if nested children are found, treat as NestedCollection; otherwise treat as Counter. This is a fallback for legacy data without crdt_type metadata. New data created after PR #1864 will have crdt_type set in EntityIndex metadata and be handled directly without needing heuristics. --- tools/merodb/src/export.rs | 84 ++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 39 deletions(-) diff --git a/tools/merodb/src/export.rs b/tools/merodb/src/export.rs index 498014fd3..1f4cb0314 100644 --- a/tools/merodb/src/export.rs +++ b/tools/merodb/src/export.rs @@ -3400,24 +3400,26 @@ fn decode_collection_entries_bfs( negative_id, ); - if counter_value.abs() < 1_000_000 { - eprintln!( - "[decode_collection_entries_bfs] Entry {} (Vector item) is Counter: value={}", + // Note: For legacy data without crdt_type metadata, we attempt to read + // the value as a Counter. New data created after PR #1864 will have + // crdt_type set in metadata and won't need this heuristic. + // A successful read (non-zero or non-error sum) indicates Counter data. + eprintln!( + "[decode_collection_entries_bfs] Entry {} (Vector item, no crdt_type metadata): Counter value read = {}", entry_state_key, counter_value ); - entries.push(json!({ - "state_key": entry_state_key, - "entry": { - "type": "VectorEntry", - "index": entries.len(), - "value": { - "type": "Counter", - "parsed": counter_value, - } + entries.push(json!({ + "state_key": entry_state_key, + "entry": { + "type": "VectorEntry", + "index": entries.len(), + "value": { + "type": "Counter", + "parsed": counter_value, } - })); - continue; - } + } + })); + continue; } } } @@ -3437,11 +3439,14 @@ fn decode_collection_entries_bfs( let value_bytes = &entry_value[4 + key_len..]; // Check if value looks like a Counter (64 bytes = two 32-byte IDs) + // This handles raw entry data without EntityIndex metadata. + // For new data created after PR #1864, EntityIndex will have crdt_type + // and be handled in the Ok(entry_index) branch above. if value_bytes.len() == 64 { let positive_id = &value_bytes[..32]; let negative_id = &value_bytes[32..]; - // Try to read the actual counter value + // Try to read the actual counter value from the internal maps let counter_value = read_counter_value( db, state_cf, @@ -3450,12 +3455,21 @@ fn decode_collection_entries_bfs( negative_id, ); - // Sanity check: if counter value is reasonable (between -1M and 1M), it's likely a Counter - // Otherwise it might be an UnorderedSet or other 64-byte structure - if counter_value.abs() < 1_000_000 { + // Also try to read the first collection as nested children + let nested_children = read_nested_collection_entries( + db, + state_cf, + context_id, + positive_id, + ); + + // Heuristic: If nested_children is non-empty, it's likely a nested collection. + // If counter_value is non-zero and children are empty, it's likely a Counter. + // This is a fallback for legacy data without crdt_type metadata. + if !nested_children.is_empty() { eprintln!( - "[decode_collection_entries_bfs] Entry {} is Counter: key='{}', value={}", - entry_state_key, key_str, counter_value + "[decode_collection_entries_bfs] Entry {} is NestedCollection (64 bytes, has children): key='{}', children={}", + entry_state_key, key_str, nested_children.len() ); entries.push(json!({ "state_key": entry_state_key, @@ -3463,26 +3477,19 @@ fn decode_collection_entries_bfs( "type": "Entry", "key": { "parsed": key_str, "type": "scalar::String" }, "value": { - "type": "Counter", - "parsed": counter_value, - "display": format!("πŸ”’ {}", counter_value), + "type": "NestedCollection", + "crdt_type": "UnorderedMap", + "children": nested_children, + "children_count": nested_children.len(), } } })); continue; } else { - // Likely an UnorderedSet or nested Map (64 bytes = two collection IDs) - // Try to read the first collection to get its children - let nested_children = read_nested_collection_entries( - db, - state_cf, - context_id, - positive_id, - ); - + // No children found - treat as Counter (its internal maps may have entries) eprintln!( - "[decode_collection_entries_bfs] Entry {} is NestedCollection (64 bytes): key='{}', children={}", - entry_state_key, key_str, nested_children.len() + "[decode_collection_entries_bfs] Entry {} is Counter (64 bytes, no nested children): key='{}', value={}", + entry_state_key, key_str, counter_value ); entries.push(json!({ "state_key": entry_state_key, @@ -3490,10 +3497,9 @@ fn decode_collection_entries_bfs( "type": "Entry", "key": { "parsed": key_str, "type": "scalar::String" }, "value": { - "type": "NestedCollection", - "crdt_type": if nested_children.is_empty() { "UnorderedSet" } else { "UnorderedMap" }, - "children": nested_children, - "children_count": nested_children.len(), + "type": "Counter", + "parsed": counter_value, + "display": format!("πŸ”’ {}", counter_value), } } })); From 2f69f1a1eed8aabd100449059111fcc80ca0a49f Mon Sep 17 00:00:00 2001 From: xilosada Date: Thu, 5 Feb 2026 16:55:20 +0100 Subject: [PATCH 5/7] fix(merodb): use saturating arithmetic for counter value sums Prevent potential integer overflow when summing counter values: - Use i64::try_from(u64) with fallback to i64::MAX for large values - Use saturating_add/saturating_sub for sum calculations This ensures counter visualization doesn't panic or produce incorrect results for counters with very large values (> i64::MAX). --- tools/merodb/src/export.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/merodb/src/export.rs b/tools/merodb/src/export.rs index 1f4cb0314..02bb1bb19 100644 --- a/tools/merodb/src/export.rs +++ b/tools/merodb/src/export.rs @@ -3112,7 +3112,10 @@ fn read_counter_value( entry_value[value_offset + 6], entry_value[value_offset + 7], ]); - sum += count as i64; + // Use saturating conversion to avoid overflow + // u64 values > i64::MAX will be clamped to i64::MAX + let count_i64 = i64::try_from(count).unwrap_or(i64::MAX); + sum = sum.saturating_add(count_i64); } } } @@ -3123,8 +3126,8 @@ fn read_counter_value( sum }; - total += sum_map_values(positive_id); - total -= sum_map_values(negative_id); + total = total.saturating_add(sum_map_values(positive_id)); + total = total.saturating_sub(sum_map_values(negative_id)); total } From 34e97d95f8ca34e4515bd34a4907a6c95da5cff6 Mon Sep 17 00:00:00 2001 From: xilosada Date: Thu, 5 Feb 2026 16:56:58 +0100 Subject: [PATCH 6/7] fix(merodb): add warning when schema inference runs without context_id When no context_id is provided, schema inference uses [0; 32] as a fallback root ID which may produce incorrect or incomplete results if the database contains multiple contexts. Added a warning to inform users about this limitation and recommend providing a specific context_id. --- tools/merodb/src/abi.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/merodb/src/abi.rs b/tools/merodb/src/abi.rs index 577620b6e..9db9a804a 100644 --- a/tools/merodb/src/abi.rs +++ b/tools/merodb/src/abi.rs @@ -104,7 +104,15 @@ pub fn infer_schema_from_database( ctx_id.len() ) })?, - None => [0u8; 32], + None => { + eprintln!( + "[WARNING] No context_id provided for schema inference. \ + Using [0; 32] as fallback root ID. This may produce incorrect or incomplete \ + schema if the database contains multiple contexts. Consider providing a \ + specific context_id for accurate schema inference." + ); + [0u8; 32] + } }; // Scan State column for EntityIndex entries From 20c9cae20ffb4872f74b0d8c1bdd954588edb343 Mon Sep 17 00:00:00 2001 From: xilosada Date: Fri, 6 Feb 2026 00:39:03 +0100 Subject: [PATCH 7/7] fix(kv-store-init): use new_with_field_name for deterministic ID The app was using UnorderedMap::new() (random ID) then inserting data during init(). After init returns, the macro reassigns the ID to be deterministic, but this doesn't re-key existing database entries. Using new_with_field_name('items') ensures the collection has the correct deterministic ID from the start, so entries inserted during init() are stored under the correct key. --- apps/kv-store-init/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/kv-store-init/src/lib.rs b/apps/kv-store-init/src/lib.rs index 79c2bc5d9..284ef9f47 100644 --- a/apps/kv-store-init/src/lib.rs +++ b/apps/kv-store-init/src/lib.rs @@ -38,7 +38,7 @@ impl KvStoreInit { app::log!("Initializing KvStoreInit with default items"); let mut store = KvStoreInit { - items: UnorderedMap::new(), + items: UnorderedMap::new_with_field_name("items"), }; // Add some initial data during initialization