diff --git a/.gitignore b/.gitignore index bae70c7ff..20ff1822e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ *.ipynb flamegraph.svg target +moon/_build/ +moon_*_fuzz_artifacts*/ dhat-heap.json .DS_Store node_modules/ diff --git a/Cargo.lock b/Cargo.lock index b35fe3c91..a618abd0b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -886,6 +886,42 @@ dependencies = [ "serde", ] +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "pin-utils", + "slab", +] + [[package]] name = "fuzz" version = "0.1.0" @@ -1506,6 +1542,7 @@ dependencies = [ "rand 0.8.5", "rustc-hash", "serde_json", + "serial_test", "tracing", ] @@ -1836,7 +1873,7 @@ checksum = "3f3d053a135388e6b1df14e8af1212af5064746e9b87a06a345a7a779ee9695a" [[package]] name = "loro-wasm" -version = "1.10.3" +version = "1.10.4" dependencies = [ "console_error_panic_hook", "js-sys", @@ -2222,6 +2259,12 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pkg-config" version = "0.3.32" @@ -2602,6 +2645,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scc" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc" +dependencies = [ + "sdd", +] + [[package]] name = "scoped-tls" version = "1.0.1" @@ -2614,6 +2666,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sdd" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca" + [[package]] name = "semver" version = "1.0.26" @@ -2698,6 +2756,32 @@ dependencies = [ "serde", ] +[[package]] +name = "serial_test" +version = "3.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d0b343e184fc3b7bb44dff0705fffcf4b3756ba6aff420dddd8b24ca145e555" +dependencies = [ + "futures-executor", + "futures-util", + "log", + "once_cell", + "parking_lot", + "scc", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "3.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f50427f258fb77356e4cd4aa0e87e2bd2c66dbcee41dc405282cae2bfc26c83" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "sha2" version = "0.10.9" @@ -2740,6 +2824,12 @@ dependencies = [ "typenum", ] +[[package]] +name = "slab" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" + [[package]] name = "smallvec" version = "1.15.1" diff --git a/crates/kv-store/tests/moon_sstable_fixture.rs b/crates/kv-store/tests/moon_sstable_fixture.rs new file mode 100644 index 000000000..b00f6753b --- /dev/null +++ b/crates/kv-store/tests/moon_sstable_fixture.rs @@ -0,0 +1,19 @@ +use bytes::Bytes; +use loro_kv_store::sstable::SsTable; + +#[test] +fn import_moon_encoded_sstable() { + let bytes = Bytes::from_static(include_bytes!("testdata/moon_sstable_simple.bin")); + let table = SsTable::import_all(bytes, true).unwrap(); + let kvs: Vec<(Bytes, Bytes)> = table.iter().collect(); + + assert_eq!( + kvs, + vec![ + (Bytes::from_static(b"a"), Bytes::from_static(b"1")), + (Bytes::from_static(b"ab"), Bytes::from_static(b"2")), + (Bytes::from_static(b"z"), Bytes::from_static(b"")), + ] + ); +} + diff --git a/crates/kv-store/tests/testdata/moon_sstable_simple.bin b/crates/kv-store/tests/testdata/moon_sstable_simple.bin new file mode 100644 index 000000000..da6282ff8 Binary files /dev/null and b/crates/kv-store/tests/testdata/moon_sstable_simple.bin differ diff --git a/crates/loro-internal/src/state/dead_containers_cache.rs b/crates/loro-internal/src/state/dead_containers_cache.rs index 598c0d8dc..c40dbf06e 100644 --- a/crates/loro-internal/src/state/dead_containers_cache.rs +++ b/crates/loro-internal/src/state/dead_containers_cache.rs @@ -21,8 +21,9 @@ impl DocState { pub(crate) fn is_deleted(&mut self, idx: ContainerIdx) -> bool { #[cfg(not(debug_assertions))] { - if let Some(is_deleted) = self.dead_containers_cache.cache.get(&idx) { - return *is_deleted; + // Cache stores only deleted containers. + if self.dead_containers_cache.cache.contains_key(&idx) { + return true; } } @@ -52,8 +53,14 @@ impl DocState { } } - for idx in visited { - self.dead_containers_cache.cache.insert(idx, is_deleted); + if is_deleted { + for idx in visited { + self.dead_containers_cache.cache.insert(idx, true); + } + } else { + for idx in visited { + self.dead_containers_cache.cache.remove(&idx); + } } is_deleted diff --git a/crates/loro/Cargo.toml b/crates/loro/Cargo.toml index dc83c2148..42de337bf 100644 --- a/crates/loro/Cargo.toml +++ b/crates/loro/Cargo.toml @@ -26,7 +26,7 @@ tracing = { workspace = true } rustc-hash = { workspace = true } [dev-dependencies] -serde_json = "1.0.87" +serde_json = { version = "1.0.87", features = ["float_roundtrip"] } anyhow = "1.0.83" ctor = "0.2" dev-utils = { path = "../dev-utils" } @@ -37,6 +37,7 @@ base64 = "0.22.1" serial_test = "3" [features] +default = ["counter"] counter = ["loro-internal/counter"] jsonpath = ["loro-internal/jsonpath"] logging = ["loro-internal/logging"] diff --git a/crates/loro/examples/moon_golden_gen.rs b/crates/loro/examples/moon_golden_gen.rs new file mode 100644 index 000000000..ba9a0f58e --- /dev/null +++ b/crates/loro/examples/moon_golden_gen.rs @@ -0,0 +1,420 @@ +use std::borrow::Cow; +use std::path::{Path, PathBuf}; + +use loro::{ + ExpandType, ExportMode, LoroDoc, LoroValue, StyleConfig, StyleConfigMap, Timestamp, ToJson, + TreeParentId, VersionVector, +}; +use rand::{rngs::StdRng, Rng, SeedableRng}; + +fn usage() -> ! { + eprintln!( + r#"moon_golden_gen (loro) + +Generate a deterministic random Loro document and export: +- FastUpdates (binary) + JsonUpdates (JsonSchema) +- FastSnapshot (binary) + deep JSON (get_deep_value) + +Usage: + cargo run -p loro --example moon_golden_gen -- \ + --out-dir [--seed ] [--ops ] [--commit-every ] [--peers ] + +Outputs in : + - updates.blob + - updates.json + - snapshot.blob + - snapshot.deep.json + - meta.json +"# + ); + std::process::exit(2); +} + +fn parse_arg_value<'a>(args: &'a [String], name: &str) -> Option<&'a str> { + args.windows(2) + .find_map(|w| (w[0] == name).then_some(w[1].as_str())) +} + +fn parse_u64(args: &[String], name: &str, default: u64) -> u64 { + match parse_arg_value(args, name) { + None => default, + Some(v) => v.parse().unwrap_or_else(|_| usage()), + } +} + +fn parse_usize(args: &[String], name: &str, default: usize) -> usize { + match parse_arg_value(args, name) { + None => default, + Some(v) => v.parse().unwrap_or_else(|_| usage()), + } +} + +fn parse_out_dir(args: &[String]) -> PathBuf { + parse_arg_value(args, "--out-dir") + .map(PathBuf::from) + .unwrap_or_else(|| usage()) +} + +fn write_json(path: &Path, value: &serde_json::Value) -> anyhow::Result<()> { + let s = serde_json::to_string_pretty(value)?; + std::fs::write(path, s)?; + Ok(()) +} + +fn apply_random_ops( + doc: &LoroDoc, + seed: u64, + ops: usize, + commit_every: usize, + peer_ids: &[u64], +) -> anyhow::Result<()> { + let mut rng = StdRng::seed_from_u64(seed); + + let peer_ids = if peer_ids.is_empty() { &[1] } else { peer_ids }; + + let mut styles = StyleConfigMap::new(); + styles.insert( + "bold".into(), + StyleConfig { + expand: ExpandType::After, + }, + ); + styles.insert( + "link".into(), + StyleConfig { + expand: ExpandType::Before, + }, + ); + doc.config_text_style(styles); + + let mut active_peer = peer_ids[0]; + doc.set_peer_id(active_peer)?; + let map = doc.get_map("map"); + let list = doc.get_list("list"); + let text = doc.get_text("text"); + let mlist = doc.get_movable_list("mlist"); + let tree = doc.get_tree("tree"); + tree.enable_fractional_index(0); + + // Stable baseline so root containers don't disappear from deep JSON. + map.insert("keep", 0)?; + list.insert(0, 0)?; + text.insert(0, "hi😀")?; + mlist.insert(0, 0)?; + let keep_node = tree.create(None)?; + tree.get_meta(keep_node)?.insert("title", "keep")?; + + // Ensure Text mark/mark_end coverage. + if text.len_unicode() >= 2 { + text.mark(0..2, "bold", true)?; + if text.len_unicode() >= 3 { + text.mark(1..3, "link", "https://example.com")?; + } + text.unmark(0..1, "bold")?; + } + + // Ensure nested container coverage (container values in map/list/movable_list). + let child_map = map.insert_container("child_map", loro::LoroMap::new())?; + child_map.insert("a", 1)?; + let child_text = child_map.insert_container("t", loro::LoroText::new())?; + child_text.insert(0, "inner😀")?; + + let child_list = map.insert_container("child_list", loro::LoroList::new())?; + child_list.insert(0, "x")?; + let child_mlist = map.insert_container("child_mlist", loro::LoroMovableList::new())?; + child_mlist.insert(0, 10)?; + child_mlist.insert(1, 20)?; + child_mlist.mov(0, 1)?; + + let child_tree = map.insert_container("child_tree", loro::LoroTree::new())?; + child_tree.enable_fractional_index(0); + let child_tree_root = child_tree.create(None)?; + child_tree.get_meta(child_tree_root)?.insert("m", 1)?; + + let maps = [map.clone(), child_map]; + let lists = [list.clone(), child_list]; + let texts = [text.clone(), child_text]; + let mlists = [mlist.clone(), child_mlist]; + + struct TreeCtx { + tree: loro::LoroTree, + nodes: Vec, + } + let mut trees = [ + TreeCtx { + tree: tree.clone(), + nodes: vec![keep_node], + }, + TreeCtx { + tree: child_tree, + nodes: vec![child_tree_root], + }, + ]; + + let mut map_keys: Vec = Vec::new(); + let mut child_map_keys: Vec = Vec::new(); + + for i in 0..ops { + // Switch active peer after each commit boundary (when multiple peers are requested). + if commit_every > 0 && i > 0 && i % commit_every == 0 && peer_ids.len() > 1 { + active_peer = peer_ids[rng.gen_range(0..peer_ids.len())]; + doc.set_peer_id(active_peer)?; + } + + let op_type = rng.gen_range(0..18); + match op_type { + 0 => { + let key = format!("k{}", rng.gen::()); + map.insert(&key, rng.gen::())?; + map_keys.push(key); + } + 1 => { + let key = format!("k{}", rng.gen::()); + let value = if rng.gen::() { + LoroValue::from(rng.gen::()) + } else { + LoroValue::Null + }; + map.insert(&key, value)?; + map_keys.push(key); + } + 2 => { + // Insert more value kinds (string/f64/binary) into either root map or child_map. + let (target, keys) = if rng.gen::() { + (&maps[0], &mut map_keys) + } else { + (&maps[1], &mut child_map_keys) + }; + let key = format!("v{}", rng.gen::()); + match rng.gen_range(0..3) { + 0 => target.insert(&key, "str😀")?, + 1 => target.insert(&key, rng.gen::() - 0.5)?, + _ => target.insert(&key, vec![0u8, 1, 2, rng.gen::()])?, + } + keys.push(key); + } + 3 => { + // Map delete (guarantee it hits an existing key sometimes). + if !map_keys.is_empty() && rng.gen::() { + let idx = rng.gen_range(0..map_keys.len()); + let key = map_keys.swap_remove(idx); + map.delete(&key)?; + } else if !child_map_keys.is_empty() { + let idx = rng.gen_range(0..child_map_keys.len()); + let key = child_map_keys.swap_remove(idx); + maps[1].delete(&key)?; + } + } + 4 => { + let target = &lists[rng.gen_range(0..lists.len())]; + let index = rng.gen_range(0..=target.len()); + target.insert(index, rng.gen::())?; + } + 5 => { + let target = &lists[rng.gen_range(0..lists.len())]; + if target.len() > 0 { + let index = rng.gen_range(0..target.len()); + let max_len = (target.len() - index).min(3); + let len = rng.gen_range(1..=max_len); + target.delete(index, len)?; + } + } + 6 => { + let target = &texts[rng.gen_range(0..texts.len())]; + let index = rng.gen_range(0..=target.len_unicode()); + let s = match rng.gen_range(0..8) { + 0 => "a", + 1 => "b", + 2 => "Z", + 3 => "😀", + 4 => "中", + 5 => "ab", + 6 => "😀!", + _ => "!", + }; + target.insert(index, s)?; + } + 7 => { + let target = &texts[rng.gen_range(0..texts.len())]; + let len_u = target.len_unicode(); + if len_u > 0 { + let index = rng.gen_range(0..len_u); + let max_len = (len_u - index).min(3); + let len = rng.gen_range(1..=max_len); + target.delete(index, len)?; + } + } + 8 => { + // Text mark/unmark + let target = &texts[rng.gen_range(0..texts.len())]; + let len_u = target.len_unicode(); + if len_u >= 2 { + let start = rng.gen_range(0..len_u - 1); + let end = rng.gen_range(start + 1..=len_u); + if rng.gen::() { + let key = if rng.gen::() { "bold" } else { "link" }; + let value: LoroValue = if key == "bold" { + LoroValue::from(true) + } else { + LoroValue::from("https://loro.dev") + }; + let _ = target.mark(start..end, key, value); + } else { + let key = if rng.gen::() { "bold" } else { "link" }; + let _ = target.unmark(start..end, key); + } + } + } + 9 => { + // MovableList insert + let target = &mlists[rng.gen_range(0..mlists.len())]; + let index = rng.gen_range(0..=target.len()); + target.insert(index, rng.gen::())?; + } + 10 => { + // MovableList delete + let target = &mlists[rng.gen_range(0..mlists.len())]; + if target.len() > 0 { + let index = rng.gen_range(0..target.len()); + let max_len = (target.len() - index).min(3); + let len = rng.gen_range(1..=max_len); + target.delete(index, len)?; + } + } + 11 => { + // MovableList set + let target = &mlists[rng.gen_range(0..mlists.len())]; + if target.len() > 0 { + let index = rng.gen_range(0..target.len()); + target.set(index, rng.gen::())?; + } + } + 12 => { + // MovableList move + let target = &mlists[rng.gen_range(0..mlists.len())]; + if target.len() >= 2 { + let from = rng.gen_range(0..target.len()); + let to = rng.gen_range(0..target.len()); + let _ = target.mov(from, to); + } + } + 13 => { + // Tree create + let t = &mut trees[rng.gen_range(0..trees.len())]; + let parent = if t.nodes.is_empty() || rng.gen::() { + TreeParentId::Root + } else { + TreeParentId::from(t.nodes[rng.gen_range(0..t.nodes.len())]) + }; + let id = t.tree.create(parent)?; + t.nodes.push(id); + } + 14 => { + // Tree move + let t = &mut trees[rng.gen_range(0..trees.len())]; + if t.nodes.len() >= 2 { + let target = t.nodes[rng.gen_range(0..t.nodes.len())]; + let parent = if rng.gen::() { + TreeParentId::Root + } else { + TreeParentId::from(t.nodes[rng.gen_range(0..t.nodes.len())]) + }; + let _ = t.tree.mov(target, parent); + } + } + 15 => { + // Tree delete (try to keep at least 1 node around) + let t = &mut trees[rng.gen_range(0..trees.len())]; + if t.nodes.len() > 1 { + let idx = rng.gen_range(0..t.nodes.len()); + let id = t.nodes.swap_remove(idx); + let _ = t.tree.delete(id); + } + } + 16 => { + // Tree meta insert + let t = &mut trees[rng.gen_range(0..trees.len())]; + if !t.nodes.is_empty() { + let id = t.nodes[rng.gen_range(0..t.nodes.len())]; + if let Ok(meta) = t.tree.get_meta(id) { + let key = format!("m{}", rng.gen::()); + let _ = meta.insert(&key, rng.gen::()); + } + } + } + 17 => { + // Insert container values into sequence containers. + if rng.gen::() { + let target = &lists[rng.gen_range(0..lists.len())]; + let index = rng.gen_range(0..=target.len()); + let _ = target.insert_container(index, loro::LoroMap::new()); + } else { + let target = &mlists[rng.gen_range(0..mlists.len())]; + let index = rng.gen_range(0..=target.len()); + let _ = target.insert_container(index, loro::LoroText::new()); + } + } + _ => unreachable!(), + } + + if commit_every > 0 && (i + 1) % commit_every == 0 { + let msg = format!("commit-{} seed={} peer={}", i + 1, seed, active_peer); + doc.set_next_commit_message(&msg); + doc.set_next_commit_timestamp(i as Timestamp); + doc.commit(); + } + } + + let msg = format!("final seed={seed} ops={ops}"); + doc.set_next_commit_message(&msg); + doc.set_next_commit_timestamp(ops as Timestamp); + doc.commit(); + Ok(()) +} + +fn main() -> anyhow::Result<()> { + let args: Vec = std::env::args().collect(); + if args.iter().any(|a| a == "--help" || a == "-h") { + usage(); + } + + let out_dir = parse_out_dir(&args); + let seed = parse_u64(&args, "--seed", 1); + let ops = parse_usize(&args, "--ops", 200); + let commit_every = parse_usize(&args, "--commit-every", 20); + let peers = parse_usize(&args, "--peers", 1); + + std::fs::create_dir_all(&out_dir)?; + + let doc = LoroDoc::new(); + let peer_ids: Vec = (1..=peers.max(1) as u64).collect(); + apply_random_ops(&doc, seed, ops, commit_every, &peer_ids)?; + + let start = VersionVector::default(); + let end = doc.oplog_vv(); + + let updates_blob = doc.export(ExportMode::Updates { + from: Cow::Borrowed(&start), + })?; + std::fs::write(out_dir.join("updates.blob"), &updates_blob)?; + + let updates_schema = doc.export_json_updates(&start, &end); + let updates_json = serde_json::to_value(&updates_schema)?; + write_json(&out_dir.join("updates.json"), &updates_json)?; + + let snapshot_blob = doc.export(ExportMode::Snapshot)?; + std::fs::write(out_dir.join("snapshot.blob"), &snapshot_blob)?; + + let deep = doc.get_deep_value().to_json_value(); + write_json(&out_dir.join("snapshot.deep.json"), &deep)?; + + let meta = serde_json::json!({ + "seed": seed, + "ops": ops, + "commit_every": commit_every, + "peers": peers, + }); + write_json(&out_dir.join("meta.json"), &meta)?; + + Ok(()) +} diff --git a/crates/loro/examples/moon_jsonschema_fuzz.rs b/crates/loro/examples/moon_jsonschema_fuzz.rs new file mode 100644 index 000000000..f53fa8233 --- /dev/null +++ b/crates/loro/examples/moon_jsonschema_fuzz.rs @@ -0,0 +1,672 @@ +use std::borrow::Cow; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::time::{SystemTime, UNIX_EPOCH}; + +use loro::{ + Container, ExpandType, ExportMode, Frontiers, LoroDoc, LoroValue, StyleConfig, StyleConfigMap, + Timestamp, ToJson, TreeParentId, ValueOrContainer, VersionVector, +}; +use rand::{rngs::StdRng, Rng, SeedableRng}; + +fn configure_styles(doc: &LoroDoc) { + let mut styles = StyleConfigMap::new(); + styles.insert( + "bold".into(), + StyleConfig { + expand: ExpandType::After, + }, + ); + styles.insert( + "link".into(), + StyleConfig { + expand: ExpandType::Before, + }, + ); + doc.config_text_style(styles); +} + +fn usage() -> ! { + eprintln!( + r#"moon_jsonschema_fuzz (loro) + +Randomly generate Loro ops in Rust, export JsonSchema updates, then ask MoonBit to +encode them into a FastUpdates (mode=4) blob. Import the blob back in Rust and +validate the final state matches. + +This is a semantic test for Moon `encode-jsonschema` (JsonSchema -> binary Updates). + +Usage: + MOON_BIN=~/.moon/bin/moon NODE_BIN=node \ + cargo run -p loro --example moon_jsonschema_fuzz -- \ + --iters [--seed ] [--ops ] [--commit-every ] [--peers ] [--out-dir ] + +If a mismatch happens, this tool writes a reproducible case into: + /case-/ + +"# + ); + std::process::exit(2); +} + +fn parse_arg_value<'a>(args: &'a [String], name: &str) -> Option<&'a str> { + args.windows(2) + .find_map(|w| (w[0] == name).then_some(w[1].as_str())) +} + +fn parse_usize(args: &[String], name: &str, default: usize) -> usize { + match parse_arg_value(args, name) { + None => default, + Some(v) => v.parse().unwrap_or_else(|_| usage()), + } +} + +fn parse_u64(args: &[String], name: &str, default: u64) -> u64 { + match parse_arg_value(args, name) { + None => default, + Some(v) => v.parse().unwrap_or_else(|_| usage()), + } +} + +fn parse_out_dir(args: &[String]) -> PathBuf { + parse_arg_value(args, "--out-dir") + .map(PathBuf::from) + .unwrap_or_else(|| PathBuf::from("moon_jsonschema_fuzz_artifacts")) +} + +fn bin_available(bin: &str, args: &[&str]) -> bool { + Command::new(bin) + .args(args) + .output() + .map(|o| o.status.success()) + .unwrap_or(false) +} + +fn repo_root() -> PathBuf { + // crates/loro -> crates -> repo root + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(|p| p.parent()) + .expect("repo root") + .to_path_buf() +} + +fn build_moon_cli_js(moon_bin: &str) -> anyhow::Result { + let root = repo_root(); + let moon_dir = root.join("moon"); + let status = Command::new(moon_bin) + .current_dir(&moon_dir) + .args(["build", "--target", "js", "--release", "cmd/loro_codec_cli"]) + .status()?; + anyhow::ensure!(status.success(), "failed to build MoonBit CLI"); + Ok(moon_dir.join("_build/js/release/build/cmd/loro_codec_cli/loro_codec_cli.js")) +} + +fn run_encode_jsonschema(node_bin: &str, cli_js: &Path, input_json: &str) -> anyhow::Result> { + let ts = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let tmp = std::env::temp_dir().join(format!( + "loro-moon-jsonschema-fuzz-{}-{ts}", + std::process::id() + )); + std::fs::create_dir_all(&tmp)?; + let in_path = tmp.join("in.json"); + let out_path = tmp.join("out.blob"); + std::fs::write(&in_path, input_json.as_bytes())?; + + let status = Command::new(node_bin) + .arg(cli_js) + .args([ + "encode-jsonschema", + in_path.to_str().unwrap(), + out_path.to_str().unwrap(), + ]) + .status()?; + anyhow::ensure!(status.success(), "node encode-jsonschema failed"); + + let out = std::fs::read(&out_path)?; + Ok(out) +} + +fn write_json(path: &Path, value: &serde_json::Value) -> anyhow::Result<()> { + let s = serde_json::to_string_pretty(value)?; + std::fs::write(path, s)?; + Ok(()) +} + +fn first_json_diff_path( + a: &serde_json::Value, + b: &serde_json::Value, + path: &str, +) -> Option { + use serde_json::Value; + if a == b { + return None; + } + match (a, b) { + (Value::Object(ao), Value::Object(bo)) => { + for (k, av) in ao { + let Some(bv) = bo.get(k) else { + return Some(format!("{path}.{k} (missing rhs)")); + }; + if let Some(p) = first_json_diff_path(av, bv, &format!("{path}.{k}")) { + return Some(p); + } + } + for k in bo.keys() { + if !ao.contains_key(k) { + return Some(format!("{path}.{k} (missing lhs)")); + } + } + Some(path.to_string()) + } + (Value::Array(aa), Value::Array(ba)) => { + if aa.len() != ba.len() { + return Some(format!("{path} (len {} != {})", aa.len(), ba.len())); + } + for (i, (av, bv)) in aa.iter().zip(ba.iter()).enumerate() { + if let Some(p) = first_json_diff_path(av, bv, &format!("{path}[{i}]")) { + return Some(p); + } + } + Some(path.to_string()) + } + _ => Some(path.to_string()), + } +} + +fn frontiers_sorted_strings(frontiers: &Frontiers) -> Vec { + let mut ids: Vec = frontiers.iter().map(|id| id.to_string()).collect(); + ids.sort(); + ids +} + +fn richtext_json_child_text(doc: &LoroDoc) -> anyhow::Result { + let map = doc.get_map("map"); + let Some(ValueOrContainer::Container(Container::Map(child_map))) = map.get("child_map") else { + anyhow::bail!("missing map.child_map container") + }; + let Some(ValueOrContainer::Container(Container::Text(child_text))) = child_map.get("t") else { + anyhow::bail!("missing map.child_map.t container") + }; + Ok(child_text.get_richtext_value().to_json_value()) +} + +fn apply_random_ops( + doc: &LoroDoc, + seed: u64, + ops: usize, + commit_every: usize, + peer_ids: &[u64], +) -> anyhow::Result> { + let mut rng = StdRng::seed_from_u64(seed); + let peer_ids = if peer_ids.is_empty() { &[1] } else { peer_ids }; + + configure_styles(doc); + + let mut active_peer = peer_ids[0]; + doc.set_peer_id(active_peer)?; + + let map = doc.get_map("map"); + let list = doc.get_list("list"); + let text = doc.get_text("text"); + let mlist = doc.get_movable_list("mlist"); + let tree = doc.get_tree("tree"); + tree.enable_fractional_index(0); + + // Counter (always enabled by default in this repo). + let counter = map.insert_container("counter", loro::LoroCounter::new())?; + + // Stable baseline so root containers don't disappear from deep JSON. + map.insert("keep", 0)?; + list.insert(0, 0)?; + text.insert(0, "hi😀")?; + mlist.insert(0, 0)?; + counter.increment(0.0)?; + let keep_node = tree.create(None)?; + tree.get_meta(keep_node)?.insert("title", "keep")?; + + // Ensure nested container coverage (container values in map/list/movable_list). + let child_map = map.insert_container("child_map", loro::LoroMap::new())?; + child_map.insert("a", 1)?; + let child_text = child_map.insert_container("t", loro::LoroText::new())?; + child_text.insert(0, "inner😀")?; + + let child_list = map.insert_container("child_list", loro::LoroList::new())?; + child_list.insert(0, "x")?; + let child_mlist = map.insert_container("child_mlist", loro::LoroMovableList::new())?; + child_mlist.insert(0, 10)?; + child_mlist.insert(1, 20)?; + child_mlist.mov(0, 1)?; + + let child_tree = map.insert_container("child_tree", loro::LoroTree::new())?; + child_tree.enable_fractional_index(0); + let child_tree_root = child_tree.create(None)?; + child_tree.get_meta(child_tree_root)?.insert("m", 1)?; + + let counters = [counter]; + let maps = [map.clone(), child_map]; + let lists = [list.clone(), child_list]; + let texts = [text.clone(), child_text]; + let mlists = [mlist.clone(), child_mlist]; + + struct TreeCtx { + tree: loro::LoroTree, + nodes: Vec, + } + let mut trees = [ + TreeCtx { + tree: tree.clone(), + nodes: vec![keep_node], + }, + TreeCtx { + tree: child_tree, + nodes: vec![child_tree_root], + }, + ]; + + let mut map_keys: Vec = Vec::new(); + let mut child_map_keys: Vec = Vec::new(); + + let mut frontiers: Vec = Vec::new(); + + for i in 0..ops { + // Switch active peer after each commit boundary (when multiple peers are requested). + if commit_every > 0 && i > 0 && i % commit_every == 0 && peer_ids.len() > 1 { + active_peer = peer_ids[rng.gen_range(0..peer_ids.len())]; + doc.set_peer_id(active_peer)?; + } + + let op_type = rng.gen_range(0..20); + match op_type { + 0 => { + let key = format!("k{}", rng.gen::()); + map.insert(&key, rng.gen::())?; + map_keys.push(key); + } + 1 => { + let key = format!("k{}", rng.gen::()); + let value = if rng.gen::() { + LoroValue::from(rng.gen::()) + } else { + LoroValue::Null + }; + map.insert(&key, value)?; + map_keys.push(key); + } + 2 => { + // Insert more value kinds (string/f64/binary) into either root map or child_map. + let (target, keys) = if rng.gen::() { + (&maps[0], &mut map_keys) + } else { + (&maps[1], &mut child_map_keys) + }; + let key = format!("v{}", rng.gen::()); + match rng.gen_range(0..3) { + 0 => target.insert(&key, "str😀")?, + 1 => target.insert(&key, rng.gen::() - 0.5)?, + _ => target.insert(&key, vec![0u8, 1, 2, rng.gen::()])?, + } + keys.push(key); + } + 3 => { + // Map delete (guarantee it hits an existing key sometimes). + if !map_keys.is_empty() && rng.gen::() { + let idx = rng.gen_range(0..map_keys.len()); + let key = map_keys.swap_remove(idx); + map.delete(&key)?; + } else if !child_map_keys.is_empty() { + let idx = rng.gen_range(0..child_map_keys.len()); + let key = child_map_keys.swap_remove(idx); + maps[1].delete(&key)?; + } + } + 4 => { + let target = &lists[rng.gen_range(0..lists.len())]; + let index = rng.gen_range(0..=target.len()); + target.insert(index, rng.gen::())?; + } + 5 => { + let target = &lists[rng.gen_range(0..lists.len())]; + if target.len() > 0 { + let index = rng.gen_range(0..target.len()); + let max_len = (target.len() - index).min(3); + let len = rng.gen_range(1..=max_len); + target.delete(index, len)?; + } + } + 6 => { + let target = &texts[rng.gen_range(0..texts.len())]; + let index = rng.gen_range(0..=target.len_unicode()); + target.insert(index, "x😀")?; + } + 7 => { + let target = &texts[rng.gen_range(0..texts.len())]; + if target.len_unicode() > 0 { + let index = rng.gen_range(0..target.len_unicode()); + let max_len = (target.len_unicode() - index).min(3); + let len = rng.gen_range(1..=max_len); + target.delete(index, len)?; + } + } + 8 => { + // Text mark + let target = &texts[rng.gen_range(0..texts.len())]; + if target.len_unicode() >= 2 { + let start = rng.gen_range(0..target.len_unicode()); + let end = rng.gen_range(start..=target.len_unicode()); + let _ = target.mark(start..end, "bold", true); + } + } + 9 => { + // Text unmark + let target = &texts[rng.gen_range(0..texts.len())]; + if target.len_unicode() >= 1 { + let start = rng.gen_range(0..target.len_unicode()); + let end = rng.gen_range(start..=target.len_unicode()); + let _ = target.unmark(start..end, "bold"); + } + } + 10 => { + let target = &mlists[rng.gen_range(0..mlists.len())]; + let index = rng.gen_range(0..=target.len()); + target.insert(index, rng.gen::())?; + } + 11 => { + let target = &mlists[rng.gen_range(0..mlists.len())]; + if target.len() > 0 { + let index = rng.gen_range(0..target.len()); + let max_len = (target.len() - index).min(3); + let len = rng.gen_range(1..=max_len); + target.delete(index, len)?; + } + } + 12 => { + // MovableList set + let target = &mlists[rng.gen_range(0..mlists.len())]; + if target.len() > 0 { + let index = rng.gen_range(0..target.len()); + target.set(index, rng.gen::())?; + } + } + 13 => { + // MovableList move + let target = &mlists[rng.gen_range(0..mlists.len())]; + if target.len() >= 2 { + let from = rng.gen_range(0..target.len()); + let to = rng.gen_range(0..target.len()); + let _ = target.mov(from, to); + } + } + 14 => { + // Tree create + let t = &mut trees[rng.gen_range(0..trees.len())]; + let parent = if t.nodes.is_empty() || rng.gen::() { + TreeParentId::Root + } else { + TreeParentId::from(t.nodes[rng.gen_range(0..t.nodes.len())]) + }; + let id = t.tree.create(parent)?; + t.nodes.push(id); + } + 15 => { + // Tree move + let t = &mut trees[rng.gen_range(0..trees.len())]; + if t.nodes.len() >= 2 { + let target = t.nodes[rng.gen_range(0..t.nodes.len())]; + let parent = if rng.gen::() { + TreeParentId::Root + } else { + TreeParentId::from(t.nodes[rng.gen_range(0..t.nodes.len())]) + }; + let _ = t.tree.mov(target, parent); + } + } + 16 => { + // Tree delete (try to keep at least 1 node around) + let t = &mut trees[rng.gen_range(0..trees.len())]; + if t.nodes.len() > 1 { + let idx = rng.gen_range(0..t.nodes.len()); + let id = t.nodes.swap_remove(idx); + let _ = t.tree.delete(id); + } + } + 17 => { + // Tree meta insert + let t = &mut trees[rng.gen_range(0..trees.len())]; + if !t.nodes.is_empty() { + let id = t.nodes[rng.gen_range(0..t.nodes.len())]; + if let Ok(meta) = t.tree.get_meta(id) { + let key = format!("m{}", rng.gen::()); + let _ = meta.insert(&key, rng.gen::()); + } + } + } + 18 => { + // Insert container values into sequence containers. + if rng.gen::() { + let target = &lists[rng.gen_range(0..lists.len())]; + let index = rng.gen_range(0..=target.len()); + let _ = target.insert_container(index, loro::LoroMap::new()); + } else { + let target = &mlists[rng.gen_range(0..mlists.len())]; + let index = rng.gen_range(0..=target.len()); + let _ = target.insert_container(index, loro::LoroText::new()); + } + } + 19 => { + // Counter inc/dec + let target = &counters[rng.gen_range(0..counters.len())]; + let x = (rng.gen::() - 0.5) * 10.0; + if rng.gen::() { + let _ = target.increment(x); + } else { + let _ = target.decrement(x); + } + } + _ => unreachable!(), + } + + if commit_every > 0 && (i + 1) % commit_every == 0 { + let msg = format!("commit-{} seed={} peer={}", i + 1, seed, active_peer); + doc.set_next_commit_message(&msg); + doc.set_next_commit_timestamp(i as Timestamp); + doc.commit(); + let f = doc.state_frontiers(); + if frontiers.last().map_or(true, |last| last != &f) { + frontiers.push(f); + } + } + } + + let msg = format!("final seed={seed} ops={ops}"); + doc.set_next_commit_message(&msg); + doc.set_next_commit_timestamp(ops as Timestamp); + doc.commit(); + let f = doc.state_frontiers(); + if frontiers.last().map_or(true, |last| last != &f) { + frontiers.push(f); + } + + Ok(frontiers) +} + +fn main() -> anyhow::Result<()> { + let args: Vec = std::env::args().collect(); + if args.iter().any(|a| a == "--help" || a == "-h") { + usage(); + } + + let iters = parse_usize(&args, "--iters", 100); + if iters == 0 { + usage(); + } + + let ops = parse_usize(&args, "--ops", 200); + let commit_every = parse_usize(&args, "--commit-every", 20); + let peers_n = parse_usize(&args, "--peers", 3).max(1); + + let seed = parse_u64( + &args, + "--seed", + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(), + ); + + let out_dir = parse_out_dir(&args); + std::fs::create_dir_all(&out_dir)?; + + let moon_bin = std::env::var("MOON_BIN").unwrap_or_else(|_| "moon".to_string()); + let node_bin = std::env::var("NODE_BIN").unwrap_or_else(|_| "node".to_string()); + anyhow::ensure!( + bin_available(&moon_bin, &["version"]), + "moon not available (set MOON_BIN)" + ); + anyhow::ensure!( + bin_available(&node_bin, &["--version"]), + "node not available (set NODE_BIN)" + ); + + let cli_js = build_moon_cli_js(&moon_bin)?; + + let peer_ids: Vec = (1..=peers_n as u64).collect(); + + for i in 0..iters { + let case_seed = seed.wrapping_add(i as u64); + + let doc = LoroDoc::new(); + let commit_frontiers = apply_random_ops(&doc, case_seed, ops, commit_every, &peer_ids)?; + + let expected_local = doc.get_deep_value().to_json_value(); + let end = doc.oplog_vv(); + + // Choose a deterministic starting point (empty or a commit frontier). + let mut rng = StdRng::seed_from_u64(case_seed ^ 0xD1B5_4A32_D192_ED03); + let mut starts: Vec> = vec![None]; + let end_frontiers = commit_frontiers + .last() + .cloned() + .ok_or_else(|| anyhow::anyhow!("missing end frontiers"))?; + for f in &commit_frontiers { + if f != &end_frontiers { + starts.push(Some(f.clone())); + } + } + let start_frontiers: Option = starts[rng.gen_range(0..starts.len())].clone(); + + let (start_vv, base_snapshot_blob) = match &start_frontiers { + None => (VersionVector::default(), None), + Some(f) => { + let vv: VersionVector = doc + .frontiers_to_vv(f) + .ok_or_else(|| anyhow::anyhow!("failed to convert frontiers to vv"))?; + let base_snapshot = doc.export(ExportMode::SnapshotAt { + version: Cow::Borrowed(f), + })?; + (vv, Some(base_snapshot)) + } + }; + + let rust_updates_blob = doc.export(ExportMode::Updates { + from: Cow::Borrowed(&start_vv), + })?; + let expected_doc = LoroDoc::new(); + configure_styles(&expected_doc); + if let Some(base_snapshot) = &base_snapshot_blob { + expected_doc.import(base_snapshot)?; + } + expected_doc.import(&rust_updates_blob)?; + let expected = expected_doc.get_deep_value().to_json_value(); + + let schema = doc.export_json_updates(&start_vv, &end); + let json = serde_json::to_string(&schema)?; + + let out_blob = run_encode_jsonschema(&node_bin, &cli_js, &json)?; + let got_doc = LoroDoc::new(); + configure_styles(&got_doc); + if let Some(base_snapshot) = &base_snapshot_blob { + got_doc.import(base_snapshot)?; + } + got_doc.import(&out_blob)?; + let got = got_doc.get_deep_value().to_json_value(); + + let expected_vv = expected_doc.oplog_vv(); + let got_vv = got_doc.oplog_vv(); + + let expected_frontiers = expected_doc.state_frontiers(); + let got_frontiers = got_doc.state_frontiers(); + let end_frontiers = doc.state_frontiers(); + + let expected_rich_root = expected_doc.get_text("text").get_richtext_value().to_json_value(); + let got_rich_root = got_doc.get_text("text").get_richtext_value().to_json_value(); + let expected_rich_child = richtext_json_child_text(&expected_doc)?; + let got_rich_child = richtext_json_child_text(&got_doc)?; + + let ok = got == expected + && got_vv == end + && expected_vv == end + && frontiers_sorted_strings(&got_frontiers) == frontiers_sorted_strings(&end_frontiers) + && frontiers_sorted_strings(&expected_frontiers) + == frontiers_sorted_strings(&end_frontiers) + && got_rich_root == expected_rich_root + && got_rich_child == expected_rich_child; + + if !ok { + let case_dir = out_dir.join(format!("case-{case_seed}")); + std::fs::create_dir_all(&case_dir)?; + + std::fs::write(case_dir.join("schema.json"), &json)?; + std::fs::write(case_dir.join("updates_moon.blob"), &out_blob)?; + std::fs::write(case_dir.join("updates_rust.blob"), &rust_updates_blob)?; + + if let Some(base_snapshot) = &base_snapshot_blob { + std::fs::write(case_dir.join("base_snapshot.blob"), base_snapshot)?; + } + + write_json(&case_dir.join("expected.json"), &expected)?; + write_json(&case_dir.join("got.json"), &got)?; + write_json(&case_dir.join("expected_local.json"), &expected_local)?; + write_json(&case_dir.join("expected_richtext_root.json"), &expected_rich_root)?; + write_json(&case_dir.join("got_richtext_root.json"), &got_rich_root)?; + write_json(&case_dir.join("expected_richtext_child.json"), &expected_rich_child)?; + write_json(&case_dir.join("got_richtext_child.json"), &got_rich_child)?; + + let start_ids: Option> = + start_frontiers.as_ref().map(|f| f.iter().map(|id| id.to_string()).collect()); + let meta = serde_json::json!({ + "seed": case_seed, + "ops": ops, + "commit_every": commit_every, + "peers": peer_ids, + "start_frontiers": start_ids, + "end_frontiers": frontiers_sorted_strings(&end_frontiers), + "expected_frontiers": frontiers_sorted_strings(&expected_frontiers), + "got_frontiers": frontiers_sorted_strings(&got_frontiers), + "end_vv": format!("{end:?}"), + "expected_vv": format!("{expected_vv:?}"), + "got_vv": format!("{got_vv:?}"), + "diff_path": first_json_diff_path(&got, &expected, "$"), + "local_diff_path": first_json_diff_path(&got, &expected_local, "$"), + "richtext_root_diff_path": first_json_diff_path(&got_rich_root, &expected_rich_root, "$"), + "richtext_child_diff_path": first_json_diff_path(&got_rich_child, &expected_rich_child, "$"), + }); + write_json(&case_dir.join("meta.json"), &meta)?; + + anyhow::bail!( + "encode-jsonschema mismatch (seed={case_seed}); artifacts written to {}", + case_dir.display() + ); + } + + if (i + 1) % 50 == 0 { + eprintln!("ok: {}/{} (seed={case_seed})", i + 1, iters); + } + } + + eprintln!("ok: all {iters} iterations passed (base_seed={seed})"); + Ok(()) +} diff --git a/crates/loro/examples/moon_snapshot_fuzz.rs b/crates/loro/examples/moon_snapshot_fuzz.rs new file mode 100644 index 000000000..990d33510 --- /dev/null +++ b/crates/loro/examples/moon_snapshot_fuzz.rs @@ -0,0 +1,548 @@ +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::time::{SystemTime, UNIX_EPOCH}; + +use loro::{ + ExpandType, ExportMode, LoroDoc, LoroValue, StyleConfig, StyleConfigMap, Timestamp, ToJson, + TreeParentId, +}; +use rand::{rngs::StdRng, Rng, SeedableRng}; +use serde_json::Value; + +fn usage() -> ! { + eprintln!( + r#"moon_snapshot_fuzz (loro) + +Randomly generate Loro ops in Rust, export a FastSnapshot, then ask MoonBit to +decode the snapshot and output deep JSON. The deep JSON must match Rust `get_deep_value()`. + +Usage: + MOON_BIN=~/.moon/bin/moon NODE_BIN=node \ + cargo run -p loro --example moon_snapshot_fuzz -- \ + --iters [--seed ] [--ops ] [--commit-every ] [--peers ] [--out-dir ] + +If a mismatch happens, this tool writes a reproducible case into: + /case-/ + +"# + ); + std::process::exit(2); +} + +fn parse_arg_value<'a>(args: &'a [String], name: &str) -> Option<&'a str> { + args.windows(2) + .find_map(|w| (w[0] == name).then_some(w[1].as_str())) +} + +fn parse_usize(args: &[String], name: &str, default: usize) -> usize { + match parse_arg_value(args, name) { + None => default, + Some(v) => v.parse().unwrap_or_else(|_| usage()), + } +} + +fn parse_u64(args: &[String], name: &str, default: u64) -> u64 { + match parse_arg_value(args, name) { + None => default, + Some(v) => v.parse().unwrap_or_else(|_| usage()), + } +} + +fn parse_out_dir(args: &[String]) -> PathBuf { + parse_arg_value(args, "--out-dir") + .map(PathBuf::from) + .unwrap_or_else(|| PathBuf::from("moon_snapshot_fuzz_artifacts")) +} + +fn bin_available(bin: &str, args: &[&str]) -> bool { + Command::new(bin) + .args(args) + .output() + .map(|o| o.status.success()) + .unwrap_or(false) +} + +fn repo_root() -> PathBuf { + // crates/loro -> crates -> repo root + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(|p| p.parent()) + .expect("repo root") + .to_path_buf() +} + +fn build_moon_cli_js(moon_bin: &str) -> anyhow::Result { + let root = repo_root(); + let moon_dir = root.join("moon"); + let status = Command::new(moon_bin) + .current_dir(&moon_dir) + .args(["build", "--target", "js", "--release", "cmd/loro_codec_cli"]) + .status()?; + anyhow::ensure!(status.success(), "failed to build MoonBit CLI"); + Ok(moon_dir.join("_build/js/release/build/cmd/loro_codec_cli/loro_codec_cli.js")) +} + +fn run_export_deep_json(node_bin: &str, cli_js: &Path, input: &[u8]) -> anyhow::Result { + let ts = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let tmp = std::env::temp_dir().join(format!( + "loro-moon-snapshot-fuzz-{}-{ts}", + std::process::id() + )); + std::fs::create_dir_all(&tmp)?; + let in_path = tmp.join("snapshot.blob"); + std::fs::write(&in_path, input)?; + + let out = Command::new(node_bin) + .arg(cli_js) + .args(["export-deep-json", in_path.to_str().unwrap()]) + .output()?; + anyhow::ensure!( + out.status.success(), + "node export-deep-json failed: stdout={} stderr={}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + Ok(String::from_utf8(out.stdout)?) +} + +fn write_json(path: &Path, value: &serde_json::Value) -> anyhow::Result<()> { + let s = serde_json::to_string_pretty(value)?; + std::fs::write(path, s)?; + Ok(()) +} + +fn json_number_eq(a: &serde_json::Number, b: &serde_json::Number) -> bool { + if a.is_f64() || b.is_f64() { + match (a.as_f64(), b.as_f64()) { + (Some(x), Some(y)) => x == y, + _ => false, + } + } else { + a == b + } +} + +fn json_value_eq(a: &Value, b: &Value) -> bool { + match (a, b) { + (Value::Null, Value::Null) => true, + (Value::Bool(x), Value::Bool(y)) => x == y, + (Value::Number(x), Value::Number(y)) => json_number_eq(x, y), + (Value::String(x), Value::String(y)) => x == y, + (Value::Array(xs), Value::Array(ys)) => { + xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| json_value_eq(x, y)) + } + (Value::Object(mx), Value::Object(my)) => { + if mx.len() != my.len() { + return false; + } + mx.iter() + .all(|(k, vx)| my.get(k).is_some_and(|vy| json_value_eq(vx, vy))) + } + _ => false, + } +} + +fn apply_random_ops( + doc: &LoroDoc, + seed: u64, + ops: usize, + commit_every: usize, + peer_ids: &[u64], +) -> anyhow::Result<()> { + let mut rng = StdRng::seed_from_u64(seed); + let peer_ids = if peer_ids.is_empty() { &[1] } else { peer_ids }; + + let mut styles = StyleConfigMap::new(); + styles.insert( + "bold".into(), + StyleConfig { + expand: ExpandType::After, + }, + ); + styles.insert( + "link".into(), + StyleConfig { + expand: ExpandType::Before, + }, + ); + doc.config_text_style(styles); + + let mut active_peer = peer_ids[0]; + doc.set_peer_id(active_peer)?; + + let map = doc.get_map("map"); + let list = doc.get_list("list"); + let text = doc.get_text("text"); + let mlist = doc.get_movable_list("mlist"); + let tree = doc.get_tree("tree"); + tree.enable_fractional_index(0); + + // Counter (always enabled by default in this repo). + let counter = map.insert_container("counter", loro::LoroCounter::new())?; + + // Stable baseline so root containers don't disappear from deep JSON. + map.insert("keep", 0)?; + list.insert(0, 0)?; + text.insert(0, "hi😀")?; + mlist.insert(0, 0)?; + counter.increment(0.0)?; + let keep_node = tree.create(None)?; + tree.get_meta(keep_node)?.insert("title", "keep")?; + + // Ensure nested container coverage (container values in map/list/movable_list). + let child_map = map.insert_container("child_map", loro::LoroMap::new())?; + child_map.insert("a", 1)?; + let child_text = child_map.insert_container("t", loro::LoroText::new())?; + child_text.insert(0, "inner😀")?; + + let child_list = map.insert_container("child_list", loro::LoroList::new())?; + child_list.insert(0, "x")?; + let child_mlist = map.insert_container("child_mlist", loro::LoroMovableList::new())?; + child_mlist.insert(0, 10)?; + child_mlist.insert(1, 20)?; + child_mlist.mov(0, 1)?; + + let child_tree = map.insert_container("child_tree", loro::LoroTree::new())?; + child_tree.enable_fractional_index(0); + let child_tree_root = child_tree.create(None)?; + child_tree.get_meta(child_tree_root)?.insert("m", 1)?; + + let counters = [counter]; + let maps = [map.clone(), child_map]; + let lists = [list.clone(), child_list]; + let texts = [text.clone(), child_text]; + let mlists = [mlist.clone(), child_mlist]; + + struct TreeCtx { + tree: loro::LoroTree, + nodes: Vec, + } + let mut trees = [ + TreeCtx { + tree: tree.clone(), + nodes: vec![keep_node], + }, + TreeCtx { + tree: child_tree, + nodes: vec![child_tree_root], + }, + ]; + + let mut map_keys: Vec = Vec::new(); + let mut child_map_keys: Vec = Vec::new(); + + for i in 0..ops { + // Switch active peer after each commit boundary (when multiple peers are requested). + if commit_every > 0 && i > 0 && i % commit_every == 0 && peer_ids.len() > 1 { + active_peer = peer_ids[rng.gen_range(0..peer_ids.len())]; + doc.set_peer_id(active_peer)?; + } + + let op_type = rng.gen_range(0..20); + match op_type { + 0 => { + let key = format!("k{}", rng.gen::()); + map.insert(&key, rng.gen::())?; + map_keys.push(key); + } + 1 => { + let key = format!("k{}", rng.gen::()); + let value = if rng.gen::() { + LoroValue::from(rng.gen::()) + } else { + LoroValue::Null + }; + map.insert(&key, value)?; + map_keys.push(key); + } + 2 => { + // Insert more value kinds (string/f64/binary) into either root map or child_map. + let (target, keys) = if rng.gen::() { + (&maps[0], &mut map_keys) + } else { + (&maps[1], &mut child_map_keys) + }; + let key = format!("v{}", rng.gen::()); + match rng.gen_range(0..3) { + 0 => target.insert(&key, "str😀")?, + 1 => target.insert(&key, rng.gen::() - 0.5)?, + _ => target.insert(&key, vec![0u8, 1, 2, rng.gen::()])?, + } + keys.push(key); + } + 3 => { + // Map delete (guarantee it hits an existing key sometimes). + if !map_keys.is_empty() && rng.gen::() { + let idx = rng.gen_range(0..map_keys.len()); + let key = map_keys.swap_remove(idx); + let _ = map.delete(&key); + } else if !child_map_keys.is_empty() { + let idx = rng.gen_range(0..child_map_keys.len()); + let key = child_map_keys.swap_remove(idx); + let _ = maps[1].delete(&key); + } + } + 4 => { + let target = &lists[rng.gen_range(0..lists.len())]; + let index = rng.gen_range(0..=target.len()); + target.insert(index, rng.gen::())?; + } + 5 => { + let target = &lists[rng.gen_range(0..lists.len())]; + if target.len() > 0 { + let index = rng.gen_range(0..target.len()); + let max_len = (target.len() - index).min(3); + let len = rng.gen_range(1..=max_len); + target.delete(index, len)?; + } + } + 6 => { + let target = &texts[rng.gen_range(0..texts.len())]; + let index = rng.gen_range(0..=target.len_unicode()); + let s = match rng.gen_range(0..8) { + 0 => "a", + 1 => "b", + 2 => "Z", + 3 => "😀", + 4 => "中", + 5 => "ab", + 6 => "😀!", + _ => "!", + }; + target.insert(index, s)?; + } + 7 => { + let target = &texts[rng.gen_range(0..texts.len())]; + let len_u = target.len_unicode(); + if len_u > 0 { + let index = rng.gen_range(0..len_u); + let max_len = (len_u - index).min(3); + let len = rng.gen_range(1..=max_len); + target.delete(index, len)?; + } + } + 8 => { + // Text mark/unmark + let target = &texts[rng.gen_range(0..texts.len())]; + let len_u = target.len_unicode(); + if len_u >= 2 { + let start = rng.gen_range(0..len_u - 1); + let end = rng.gen_range(start + 1..=len_u); + if rng.gen::() { + let key = if rng.gen::() { "bold" } else { "link" }; + let value: LoroValue = if key == "bold" { + LoroValue::from(true) + } else { + LoroValue::from("https://loro.dev") + }; + let _ = target.mark(start..end, key, value); + } else { + let key = if rng.gen::() { "bold" } else { "link" }; + let _ = target.unmark(start..end, key); + } + } + } + 9 => { + // MovableList insert + let target = &mlists[rng.gen_range(0..mlists.len())]; + let index = rng.gen_range(0..=target.len()); + target.insert(index, rng.gen::())?; + } + 10 => { + // MovableList delete + let target = &mlists[rng.gen_range(0..mlists.len())]; + if target.len() > 0 { + let index = rng.gen_range(0..target.len()); + let max_len = (target.len() - index).min(3); + let len = rng.gen_range(1..=max_len); + target.delete(index, len)?; + } + } + 11 => { + // MovableList set + let target = &mlists[rng.gen_range(0..mlists.len())]; + if target.len() > 0 { + let index = rng.gen_range(0..target.len()); + target.set(index, rng.gen::())?; + } + } + 12 => { + // MovableList move + let target = &mlists[rng.gen_range(0..mlists.len())]; + if target.len() >= 2 { + let from = rng.gen_range(0..target.len()); + let to = rng.gen_range(0..target.len()); + let _ = target.mov(from, to); + } + } + 13 => { + // Tree create + let t = &mut trees[rng.gen_range(0..trees.len())]; + let parent = if t.nodes.is_empty() || rng.gen::() { + TreeParentId::Root + } else { + TreeParentId::from(t.nodes[rng.gen_range(0..t.nodes.len())]) + }; + let id = t.tree.create(parent)?; + t.nodes.push(id); + } + 14 => { + // Tree move + let t = &mut trees[rng.gen_range(0..trees.len())]; + if t.nodes.len() >= 2 { + let target = t.nodes[rng.gen_range(0..t.nodes.len())]; + let parent = if rng.gen::() { + TreeParentId::Root + } else { + TreeParentId::from(t.nodes[rng.gen_range(0..t.nodes.len())]) + }; + let _ = t.tree.mov(target, parent); + } + } + 15 => { + // Tree delete (try to keep at least 1 node around) + let t = &mut trees[rng.gen_range(0..trees.len())]; + if t.nodes.len() > 1 { + let idx = rng.gen_range(0..t.nodes.len()); + let id = t.nodes.swap_remove(idx); + let _ = t.tree.delete(id); + } + } + 16 => { + // Tree meta insert + let t = &mut trees[rng.gen_range(0..trees.len())]; + if !t.nodes.is_empty() { + let id = t.nodes[rng.gen_range(0..t.nodes.len())]; + if let Ok(meta) = t.tree.get_meta(id) { + let key = format!("m{}", rng.gen::()); + let _ = meta.insert(&key, rng.gen::()); + } + } + } + 17 => { + // Insert container values into sequence containers. + if rng.gen::() { + let target = &lists[rng.gen_range(0..lists.len())]; + let index = rng.gen_range(0..=target.len()); + let _ = target.insert_container(index, loro::LoroMap::new()); + } else { + let target = &mlists[rng.gen_range(0..mlists.len())]; + let index = rng.gen_range(0..=target.len()); + let _ = target.insert_container(index, loro::LoroText::new()); + } + } + 18 => { + // Counter increment + let target = &counters[rng.gen_range(0..counters.len())]; + let x = (rng.gen::() - 0.5) * 10.0; + let _ = target.increment(x); + } + 19 => { + // Counter decrement + let target = &counters[rng.gen_range(0..counters.len())]; + let x = (rng.gen::() - 0.5) * 10.0; + let _ = target.decrement(x); + } + _ => unreachable!(), + } + + if commit_every > 0 && (i + 1) % commit_every == 0 { + let msg = format!("commit-{} seed={} peer={}", i + 1, seed, active_peer); + doc.set_next_commit_message(&msg); + doc.set_next_commit_timestamp(i as Timestamp); + doc.commit(); + } + } + + let msg = format!("final seed={seed} ops={ops}"); + doc.set_next_commit_message(&msg); + doc.set_next_commit_timestamp(ops as Timestamp); + doc.commit(); + Ok(()) +} + +fn main() -> anyhow::Result<()> { + let args: Vec = std::env::args().collect(); + if args.iter().any(|a| a == "--help" || a == "-h") { + usage(); + } + + let iters = parse_usize(&args, "--iters", 100); + if iters == 0 { + usage(); + } + + let ops = parse_usize(&args, "--ops", 200); + let commit_every = parse_usize(&args, "--commit-every", 20); + let peers_n = parse_usize(&args, "--peers", 1).max(1); + + let seed = parse_u64( + &args, + "--seed", + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(), + ); + + let out_dir = parse_out_dir(&args); + std::fs::create_dir_all(&out_dir)?; + + let moon_bin = std::env::var("MOON_BIN").unwrap_or_else(|_| "moon".to_string()); + let node_bin = std::env::var("NODE_BIN").unwrap_or_else(|_| "node".to_string()); + anyhow::ensure!( + bin_available(&moon_bin, &["version"]), + "moon not available (set MOON_BIN)" + ); + anyhow::ensure!( + bin_available(&node_bin, &["--version"]), + "node not available (set NODE_BIN)" + ); + + let cli_js = build_moon_cli_js(&moon_bin)?; + + let peer_ids: Vec = (1..=peers_n as u64).collect(); + + for i in 0..iters { + let case_seed = seed.wrapping_add(i as u64); + + let doc = LoroDoc::new(); + apply_random_ops(&doc, case_seed, ops, commit_every, &peer_ids)?; + let snapshot = doc.export(ExportMode::Snapshot)?; + + let expected = doc.get_deep_value().to_json_value(); + let moon_json = run_export_deep_json(&node_bin, &cli_js, &snapshot)?; + let got: Value = serde_json::from_str(&moon_json)?; + + if !json_value_eq(&expected, &got) { + let case_dir = out_dir.join(format!("case-{case_seed}")); + std::fs::create_dir_all(&case_dir)?; + std::fs::write(case_dir.join("snapshot.blob"), &snapshot)?; + write_json(&case_dir.join("expected.json"), &expected)?; + write_json(&case_dir.join("moon.parsed.json"), &got)?; + std::fs::write(case_dir.join("moon.raw.json"), &moon_json)?; + let meta = serde_json::json!({ + "seed": case_seed, + "ops": ops, + "commit_every": commit_every, + "peers": peer_ids, + }); + write_json(&case_dir.join("meta.json"), &meta)?; + + anyhow::bail!( + "deep-json mismatch (seed={case_seed}); artifacts written to {}", + case_dir.display() + ); + } + + if (i + 1) % 50 == 0 { + eprintln!("ok: {}/{} (seed={case_seed})", i + 1, iters); + } + } + + eprintln!("ok: all {iters} iterations passed (base_seed={seed})"); + Ok(()) +} diff --git a/crates/loro/tests/moon_transcode.rs b/crates/loro/tests/moon_transcode.rs new file mode 100644 index 000000000..423634b93 --- /dev/null +++ b/crates/loro/tests/moon_transcode.rs @@ -0,0 +1,1861 @@ +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::sync::OnceLock; +use std::time::{SystemTime, UNIX_EPOCH}; + +use loro::{ + ExpandType, ExportMode, Frontiers, LoroDoc, LoroValue, StyleConfig, StyleConfigMap, Timestamp, + ToJson, TreeParentId, VersionVector, +}; +use rand::{rngs::StdRng, Rng, SeedableRng}; + +struct MoonCtx { + node_bin: String, + cli_js: PathBuf, +} + +fn bin_available(bin: &str, args: &[&str]) -> bool { + Command::new(bin) + .args(args) + .output() + .map(|o| o.status.success()) + .unwrap_or(false) +} + +fn repo_root() -> PathBuf { + // crates/loro -> crates -> repo root + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(|p| p.parent()) + .expect("repo root") + .to_path_buf() +} + +fn build_moon_cli_js(moon_bin: &str) -> Option { + let root = repo_root(); + let moon_dir = root.join("moon"); + let status = Command::new(moon_bin) + .current_dir(&moon_dir) + .args(["build", "--target", "js", "--release", "cmd/loro_codec_cli"]) + .status() + .ok()?; + if !status.success() { + return None; + } + Some( + moon_dir + .join("_build/js/release/build/cmd/loro_codec_cli/loro_codec_cli.js"), + ) +} + +fn moon_ctx() -> Option<&'static MoonCtx> { + static MOON_CTX: OnceLock> = OnceLock::new(); + MOON_CTX + .get_or_init(|| { + let moon_bin = std::env::var("MOON_BIN").unwrap_or_else(|_| "moon".to_string()); + let node_bin = std::env::var("NODE_BIN").unwrap_or_else(|_| "node".to_string()); + + if !bin_available(&moon_bin, &["version"]) { + eprintln!("skipping e2e: moon not available (set MOON_BIN)"); + return None; + } + if !bin_available(&node_bin, &["--version"]) { + eprintln!("skipping e2e: node not available (set NODE_BIN)"); + return None; + } + + let cli_js = match build_moon_cli_js(&moon_bin) { + Some(p) => p, + None => { + eprintln!("skipping e2e: failed to build MoonBit CLI"); + return None; + } + }; + + Some(MoonCtx { node_bin, cli_js }) + }) + .as_ref() +} + +fn run_transcode(node_bin: &str, cli_js: &Path, input: &[u8]) -> anyhow::Result> { + let ts = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let tmp = std::env::temp_dir().join(format!( + "loro-moon-transcode-{}-{ts}", + std::process::id() + )); + std::fs::create_dir_all(&tmp)?; + let in_path = tmp.join("in.blob"); + let out_path = tmp.join("out.blob"); + std::fs::write(&in_path, input)?; + + let status = Command::new(node_bin) + .arg(cli_js) + .args(["transcode", in_path.to_str().unwrap(), out_path.to_str().unwrap()]) + .status()?; + anyhow::ensure!(status.success(), "node transcode failed"); + + let out = std::fs::read(&out_path)?; + Ok(out) +} + +fn run_decode_updates(node_bin: &str, cli_js: &Path, input: &[u8]) -> anyhow::Result { + let ts = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let tmp = std::env::temp_dir().join(format!( + "loro-moon-decode-updates-{}-{ts}", + std::process::id() + )); + std::fs::create_dir_all(&tmp)?; + let in_path = tmp.join("in.blob"); + std::fs::write(&in_path, input)?; + + let out = Command::new(node_bin) + .arg(cli_js) + .args(["decode-updates", in_path.to_str().unwrap()]) + .output()?; + anyhow::ensure!(out.status.success(), "node decode-updates failed"); + Ok(String::from_utf8(out.stdout)?) +} + +fn run_export_jsonschema(node_bin: &str, cli_js: &Path, input: &[u8]) -> anyhow::Result { + let ts = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let tmp = std::env::temp_dir().join(format!( + "loro-moon-export-jsonschema-{}-{ts}", + std::process::id() + )); + std::fs::create_dir_all(&tmp)?; + let in_path = tmp.join("in.blob"); + std::fs::write(&in_path, input)?; + + let out = Command::new(node_bin) + .arg(cli_js) + .args(["export-jsonschema", in_path.to_str().unwrap()]) + .output()?; + if !out.status.success() { + anyhow::bail!( + "node export-jsonschema failed: stdout={} stderr={}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + } + Ok(String::from_utf8(out.stdout)?) +} + +fn run_export_deep_json(node_bin: &str, cli_js: &Path, input: &[u8]) -> anyhow::Result { + let ts = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let tmp = std::env::temp_dir().join(format!( + "loro-moon-export-deep-json-{}-{ts}", + std::process::id() + )); + std::fs::create_dir_all(&tmp)?; + let in_path = tmp.join("in.blob"); + std::fs::write(&in_path, input)?; + + let out = Command::new(node_bin) + .arg(cli_js) + .args(["export-deep-json", in_path.to_str().unwrap()]) + .output()?; + anyhow::ensure!(out.status.success(), "node export-deep-json failed"); + Ok(String::from_utf8(out.stdout)?) +} + +fn run_encode_jsonschema(node_bin: &str, cli_js: &Path, input_json: &str) -> anyhow::Result> { + let ts = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let tmp = std::env::temp_dir().join(format!( + "loro-moon-encode-jsonschema-{}-{ts}", + std::process::id() + )); + std::fs::create_dir_all(&tmp)?; + let in_path = tmp.join("in.json"); + let out_path = tmp.join("out.blob"); + std::fs::write(&in_path, input_json.as_bytes())?; + + let status = Command::new(node_bin) + .arg(cli_js) + .args([ + "encode-jsonschema", + in_path.to_str().unwrap(), + out_path.to_str().unwrap(), + ]) + .status()?; + anyhow::ensure!(status.success(), "node encode-jsonschema failed"); + + let out = std::fs::read(&out_path)?; + Ok(out) +} + +fn run_transcode_output(node_bin: &str, cli_js: &Path, input: &[u8]) -> anyhow::Result { + let ts = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let tmp = std::env::temp_dir().join(format!( + "loro-moon-transcode-raw-{}-{ts}", + std::process::id() + )); + std::fs::create_dir_all(&tmp)?; + let in_path = tmp.join("in.blob"); + let out_path = tmp.join("out.blob"); + std::fs::write(&in_path, input)?; + + Ok(Command::new(node_bin) + .arg(cli_js) + .args(["transcode", in_path.to_str().unwrap(), out_path.to_str().unwrap()]) + .output()?) +} + +fn run_decode_updates_output(node_bin: &str, cli_js: &Path, input: &[u8]) -> anyhow::Result { + let ts = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let tmp = std::env::temp_dir().join(format!( + "loro-moon-decode-updates-raw-{}-{ts}", + std::process::id() + )); + std::fs::create_dir_all(&tmp)?; + let in_path = tmp.join("in.blob"); + std::fs::write(&in_path, input)?; + + Ok(Command::new(node_bin) + .arg(cli_js) + .args(["decode-updates", in_path.to_str().unwrap()]) + .output()?) +} + +fn run_export_jsonschema_output( + node_bin: &str, + cli_js: &Path, + input: &[u8], +) -> anyhow::Result { + let ts = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let tmp = std::env::temp_dir().join(format!( + "loro-moon-export-jsonschema-raw-{}-{ts}", + std::process::id() + )); + std::fs::create_dir_all(&tmp)?; + let in_path = tmp.join("in.blob"); + std::fs::write(&in_path, input)?; + + Ok(Command::new(node_bin) + .arg(cli_js) + .args(["export-jsonschema", in_path.to_str().unwrap()]) + .output()?) +} + +fn run_export_deep_json_output(node_bin: &str, cli_js: &Path, input: &[u8]) -> anyhow::Result { + let ts = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let tmp = std::env::temp_dir().join(format!( + "loro-moon-export-deep-json-raw-{}-{ts}", + std::process::id() + )); + std::fs::create_dir_all(&tmp)?; + let in_path = tmp.join("in.blob"); + std::fs::write(&in_path, input)?; + + Ok(Command::new(node_bin) + .arg(cli_js) + .args(["export-deep-json", in_path.to_str().unwrap()]) + .output()?) +} + +fn run_encode_jsonschema_output( + node_bin: &str, + cli_js: &Path, + input_json: &str, +) -> anyhow::Result { + let ts = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let tmp = std::env::temp_dir().join(format!( + "loro-moon-encode-jsonschema-raw-{}-{ts}", + std::process::id() + )); + std::fs::create_dir_all(&tmp)?; + let in_path = tmp.join("in.json"); + let out_path = tmp.join("out.blob"); + std::fs::write(&in_path, input_json.as_bytes())?; + + Ok(Command::new(node_bin) + .arg(cli_js) + .args([ + "encode-jsonschema", + in_path.to_str().unwrap(), + out_path.to_str().unwrap(), + ]) + .output()?) +} + +fn apply_random_ops(doc: &LoroDoc, seed: u64, ops: usize, commit_every: usize) -> anyhow::Result<()> { + apply_random_ops_with_peers(doc, seed, ops, commit_every, &[1]) +} + +fn apply_random_ops_with_peers( + doc: &LoroDoc, + seed: u64, + ops: usize, + commit_every: usize, + peer_ids: &[u64], +) -> anyhow::Result<()> { + let mut rng = StdRng::seed_from_u64(seed); + + let peer_ids = if peer_ids.is_empty() { &[1] } else { peer_ids }; + + let mut styles = StyleConfigMap::new(); + styles.insert( + "bold".into(), + StyleConfig { + expand: ExpandType::After, + }, + ); + styles.insert( + "link".into(), + StyleConfig { + expand: ExpandType::Before, + }, + ); + doc.config_text_style(styles); + + let mut active_peer = peer_ids[0]; + doc.set_peer_id(active_peer)?; + let map = doc.get_map("map"); + let list = doc.get_list("list"); + let text = doc.get_text("text"); + let mlist = doc.get_movable_list("mlist"); + let tree = doc.get_tree("tree"); + tree.enable_fractional_index(0); + + // Stable baseline so root containers don't disappear from deep JSON. + map.insert("keep", 0)?; + list.insert(0, 0)?; + text.insert(0, "hi😀")?; + mlist.insert(0, 0)?; + let keep_node = tree.create(None)?; + tree.get_meta(keep_node)?.insert("title", "keep")?; + + // Ensure Text mark/mark_end coverage. + if text.len_unicode() >= 2 { + text.mark(0..2, "bold", true)?; + if text.len_unicode() >= 3 { + text.mark(1..3, "link", "https://example.com")?; + } + text.unmark(0..1, "bold")?; + } + + // Ensure nested container coverage (container values in map/list/movable_list). + let child_map = map.insert_container("child_map", loro::LoroMap::new())?; + child_map.insert("a", 1)?; + let child_text = child_map.insert_container("t", loro::LoroText::new())?; + child_text.insert(0, "inner😀")?; + + let child_list = map.insert_container("child_list", loro::LoroList::new())?; + child_list.insert(0, "x")?; + let child_mlist = map.insert_container("child_mlist", loro::LoroMovableList::new())?; + child_mlist.insert(0, 10)?; + child_mlist.insert(1, 20)?; + child_mlist.mov(0, 1)?; + + let child_tree = map.insert_container("child_tree", loro::LoroTree::new())?; + child_tree.enable_fractional_index(0); + let child_tree_root = child_tree.create(None)?; + child_tree.get_meta(child_tree_root)?.insert("m", 1)?; + + let maps = [map.clone(), child_map]; + let lists = [list.clone(), child_list]; + let texts = [text.clone(), child_text]; + let mlists = [mlist.clone(), child_mlist]; + + struct TreeCtx { + tree: loro::LoroTree, + nodes: Vec, + } + let mut trees = [ + TreeCtx { + tree: tree.clone(), + nodes: vec![keep_node], + }, + TreeCtx { + tree: child_tree, + nodes: vec![child_tree_root], + }, + ]; + + let mut map_keys: Vec = Vec::new(); + let mut child_map_keys: Vec = Vec::new(); + + for i in 0..ops { + // Switch active peer after each commit boundary (when multiple peers are requested). + if commit_every > 0 && i > 0 && i % commit_every == 0 && peer_ids.len() > 1 { + active_peer = peer_ids[rng.gen_range(0..peer_ids.len())]; + doc.set_peer_id(active_peer)?; + } + + let op_type = rng.gen_range(0..18); + match op_type { + 0 => { + let key = format!("k{}", rng.gen::()); + map.insert(&key, rng.gen::())?; + map_keys.push(key); + } + 1 => { + let key = format!("k{}", rng.gen::()); + let value = if rng.gen::() { + LoroValue::from(rng.gen::()) + } else { + LoroValue::Null + }; + map.insert(&key, value)?; + map_keys.push(key); + } + 2 => { + // Insert more value kinds (string/f64/binary) into either root map or child_map. + let (target, keys) = if rng.gen::() { + (&maps[0], &mut map_keys) + } else { + (&maps[1], &mut child_map_keys) + }; + let key = format!("v{}", rng.gen::()); + match rng.gen_range(0..3) { + 0 => target.insert(&key, "str😀")?, + 1 => target.insert(&key, rng.gen::() - 0.5)?, + _ => target.insert(&key, vec![0u8, 1, 2, rng.gen::()])?, + } + keys.push(key); + } + 3 => { + // Map delete (guarantee it hits an existing key sometimes). + if !map_keys.is_empty() && rng.gen::() { + let idx = rng.gen_range(0..map_keys.len()); + let key = map_keys.swap_remove(idx); + map.delete(&key)?; + } else if !child_map_keys.is_empty() { + let idx = rng.gen_range(0..child_map_keys.len()); + let key = child_map_keys.swap_remove(idx); + maps[1].delete(&key)?; + } + } + 4 => { + let target = &lists[rng.gen_range(0..lists.len())]; + let index = rng.gen_range(0..=target.len()); + target.insert(index, rng.gen::())?; + } + 5 => { + let target = &lists[rng.gen_range(0..lists.len())]; + if target.len() > 0 { + let index = rng.gen_range(0..target.len()); + let max_len = (target.len() - index).min(3); + let len = rng.gen_range(1..=max_len); + target.delete(index, len)?; + } + } + 6 => { + let target = &texts[rng.gen_range(0..texts.len())]; + let index = rng.gen_range(0..=target.len_unicode()); + let s = match rng.gen_range(0..8) { + 0 => "a", + 1 => "b", + 2 => "Z", + 3 => "😀", + 4 => "中", + 5 => "ab", + 6 => "😀!", + _ => "!", + }; + target.insert(index, s)?; + } + 7 => { + let target = &texts[rng.gen_range(0..texts.len())]; + let len_u = target.len_unicode(); + if len_u > 0 { + let index = rng.gen_range(0..len_u); + let max_len = (len_u - index).min(3); + let len = rng.gen_range(1..=max_len); + target.delete(index, len)?; + } + } + 8 => { + // Text mark/unmark + let target = &texts[rng.gen_range(0..texts.len())]; + let len_u = target.len_unicode(); + if len_u >= 2 { + let start = rng.gen_range(0..len_u - 1); + let end = rng.gen_range(start + 1..=len_u); + if rng.gen::() { + let key = if rng.gen::() { "bold" } else { "link" }; + let value: LoroValue = if key == "bold" { + LoroValue::from(true) + } else { + LoroValue::from("https://loro.dev") + }; + let _ = target.mark(start..end, key, value); + } else { + let key = if rng.gen::() { "bold" } else { "link" }; + let _ = target.unmark(start..end, key); + } + } + } + 9 => { + // MovableList insert + let target = &mlists[rng.gen_range(0..mlists.len())]; + let index = rng.gen_range(0..=target.len()); + target.insert(index, rng.gen::())?; + } + 10 => { + // MovableList delete + let target = &mlists[rng.gen_range(0..mlists.len())]; + if target.len() > 0 { + let index = rng.gen_range(0..target.len()); + let max_len = (target.len() - index).min(3); + let len = rng.gen_range(1..=max_len); + target.delete(index, len)?; + } + } + 11 => { + // MovableList set + let target = &mlists[rng.gen_range(0..mlists.len())]; + if target.len() > 0 { + let index = rng.gen_range(0..target.len()); + target.set(index, rng.gen::())?; + } + } + 12 => { + // MovableList move + let target = &mlists[rng.gen_range(0..mlists.len())]; + if target.len() >= 2 { + let from = rng.gen_range(0..target.len()); + let to = rng.gen_range(0..target.len()); + let _ = target.mov(from, to); + } + } + 13 => { + // Tree create + let t = &mut trees[rng.gen_range(0..trees.len())]; + let parent = if t.nodes.is_empty() || rng.gen::() { + TreeParentId::Root + } else { + TreeParentId::from(t.nodes[rng.gen_range(0..t.nodes.len())]) + }; + let id = t.tree.create(parent)?; + t.nodes.push(id); + } + 14 => { + // Tree move + let t = &mut trees[rng.gen_range(0..trees.len())]; + if t.nodes.len() >= 2 { + let target = t.nodes[rng.gen_range(0..t.nodes.len())]; + let parent = if rng.gen::() { + TreeParentId::Root + } else { + TreeParentId::from(t.nodes[rng.gen_range(0..t.nodes.len())]) + }; + let _ = t.tree.mov(target, parent); + } + } + 15 => { + // Tree delete (try to keep at least 1 node around) + let t = &mut trees[rng.gen_range(0..trees.len())]; + if t.nodes.len() > 1 { + let idx = rng.gen_range(0..t.nodes.len()); + let id = t.nodes.swap_remove(idx); + let _ = t.tree.delete(id); + } + } + 16 => { + // Tree meta insert + let t = &mut trees[rng.gen_range(0..trees.len())]; + if !t.nodes.is_empty() { + let id = t.nodes[rng.gen_range(0..t.nodes.len())]; + if let Ok(meta) = t.tree.get_meta(id) { + let key = format!("m{}", rng.gen::()); + let _ = meta.insert(&key, rng.gen::()); + } + } + } + 17 => { + // Insert container values into sequence containers. + if rng.gen::() { + let target = &lists[rng.gen_range(0..lists.len())]; + let index = rng.gen_range(0..=target.len()); + let _ = target.insert_container(index, loro::LoroMap::new()); + } else { + let target = &mlists[rng.gen_range(0..mlists.len())]; + let index = rng.gen_range(0..=target.len()); + let _ = target.insert_container(index, loro::LoroText::new()); + } + } + _ => unreachable!(), + } + + if commit_every > 0 && (i + 1) % commit_every == 0 { + let msg = format!("commit-{} seed={} peer={}", i + 1, seed, active_peer); + doc.set_next_commit_message(&msg); + doc.set_next_commit_timestamp(i as Timestamp); + doc.commit(); + } + } + + let msg = format!("final seed={seed} ops={ops}"); + doc.set_next_commit_message(&msg); + doc.set_next_commit_timestamp(ops as Timestamp); + doc.commit(); + + Ok(()) +} + +fn first_json_diff_path(a: &serde_json::Value, b: &serde_json::Value, path: &str) -> Option { + use serde_json::Value; + if a == b { + return None; + } + match (a, b) { + (Value::Object(ao), Value::Object(bo)) => { + for (k, av) in ao { + let Some(bv) = bo.get(k) else { + return Some(format!("{path}.{k} (missing rhs)")); + }; + if let Some(p) = first_json_diff_path(av, bv, &format!("{path}.{k}")) { + return Some(p); + } + } + for k in bo.keys() { + if !ao.contains_key(k) { + return Some(format!("{path}.{k} (missing lhs)")); + } + } + Some(path.to_string()) + } + (Value::Array(aa), Value::Array(ba)) => { + if aa.len() != ba.len() { + return Some(format!("{path} (len {} != {})", aa.len(), ba.len())); + } + for (i, (av, bv)) in aa.iter().zip(ba.iter()).enumerate() { + if let Some(p) = first_json_diff_path(av, bv, &format!("{path}[{i}]")) { + return Some(p); + } + } + Some(path.to_string()) + } + _ => Some(path.to_string()), + } +} + +fn first_json_diff( + a: &serde_json::Value, + b: &serde_json::Value, + path: &str, +) -> Option<(String, serde_json::Value, serde_json::Value)> { + use serde_json::Value; + if a == b { + return None; + } + match (a, b) { + (Value::Object(ao), Value::Object(bo)) => { + for (k, av) in ao { + let Some(bv) = bo.get(k) else { + return Some((format!("{path}.{k} (missing rhs)"), av.clone(), Value::Null)); + }; + if let Some((p, ga, gb)) = first_json_diff(av, bv, &format!("{path}.{k}")) { + return Some((p, ga, gb)); + } + } + for (k, bv) in bo { + if !ao.contains_key(k) { + return Some((format!("{path}.{k} (missing lhs)"), Value::Null, bv.clone())); + } + } + Some((path.to_string(), a.clone(), b.clone())) + } + (Value::Array(aa), Value::Array(ba)) => { + if aa.len() != ba.len() { + return Some(( + format!("{path} (len {} != {})", aa.len(), ba.len()), + a.clone(), + b.clone(), + )); + } + for (i, (av, bv)) in aa.iter().zip(ba.iter()).enumerate() { + if let Some((p, ga, gb)) = first_json_diff(av, bv, &format!("{path}[{i}]")) { + return Some((p, ga, gb)); + } + } + Some((path.to_string(), a.clone(), b.clone())) + } + _ => Some((path.to_string(), a.clone(), b.clone())), + } +} + +fn first_bytes_diff(a: &[u8], b: &[u8]) -> Option { + let min_len = a.len().min(b.len()); + for i in 0..min_len { + if a[i] != b[i] { + return Some(i); + } + } + (a.len() != b.len()).then_some(min_len) +} + +fn assert_updates_jsonschema_matches_rust(doc: &LoroDoc, ctx: &MoonCtx) -> anyhow::Result<()> { + let start = VersionVector::default(); + let end = doc.oplog_vv(); + + let updates_blob = doc.export(ExportMode::Updates { + from: std::borrow::Cow::Borrowed(&start), + })?; + let moon_json = run_export_jsonschema(&ctx.node_bin, &ctx.cli_js, &updates_blob)?; + let moon_value: serde_json::Value = serde_json::from_str(&moon_json)?; + + let rust_schema = doc.export_json_updates(&start, &end); + let rust_value = serde_json::to_value(&rust_schema)?; + + anyhow::ensure!( + moon_value == rust_value, + "jsonschema mismatch at {:?}", + first_json_diff_path(&moon_value, &rust_value, "$") + ); + Ok(()) +} + +fn assert_snapshot_deep_json_matches_rust(doc: &LoroDoc, ctx: &MoonCtx) -> anyhow::Result<()> { + let expected = doc.get_deep_value().to_json_value(); + let snapshot_blob = doc.export(ExportMode::Snapshot)?; + + // Ensure Rust snapshot import round-trips for the same op sequence. + let doc_roundtrip = LoroDoc::new(); + doc_roundtrip.import(&snapshot_blob)?; + assert_eq!(doc_roundtrip.get_deep_value().to_json_value(), expected); + + let moon_json = run_export_deep_json(&ctx.node_bin, &ctx.cli_js, &snapshot_blob)?; + let moon_value: serde_json::Value = serde_json::from_str(&moon_json)?; + + assert_eq!(moon_value, expected); + Ok(()) +} + +fn apply_curated_ops(doc: &LoroDoc) -> anyhow::Result<()> { + let mut styles = StyleConfigMap::new(); + styles.insert( + "bold".into(), + StyleConfig { + expand: ExpandType::After, + }, + ); + styles.insert( + "link".into(), + StyleConfig { + expand: ExpandType::Before, + }, + ); + doc.config_text_style(styles); + + // Map ops. + let map = doc.get_map("map"); + map.insert("i32", 1)?; + map.insert("bool", true)?; + map.insert("null", LoroValue::Null)?; + map.insert("str", "hello😀")?; + map.insert("f64", 1.25f64)?; + map.insert("bin", vec![0u8, 1, 2, 3])?; + // Overwrite existing key. + map.insert("i32", 2)?; + // Container values in map. + let child_map = map.insert_container("child_map", loro::LoroMap::new())?; + child_map.insert("a", 1)?; + let child_list = map.get_or_create_container("child_list", loro::LoroList::new())?; + child_list.push("x")?; + map.delete("null")?; + // Map clear (but keep non-empty at the end). + let tmp = map.insert_container("tmp", loro::LoroMap::new())?; + tmp.insert("k", 1)?; + tmp.clear()?; + tmp.insert("k2", 2)?; + + // List ops. + let list = doc.get_list("list"); + list.insert(0, "a")?; + list.push("b")?; + let list_child_text = list.insert_container(2, loro::LoroText::new())?; + list_child_text.insert(0, "t")?; + let _ = list.pop()?; + if list.len() > 0 { + list.delete(0, 1)?; + } + list.clear()?; + list.push(0)?; + let list_child_map = list.push_container(loro::LoroMap::new())?; + list_child_map.insert("k", 1)?; + + // MovableList ops. + let mlist = doc.get_movable_list("mlist"); + mlist.insert(0, "a")?; + mlist.push("b")?; + mlist.set(0, "A")?; + if mlist.len() >= 2 { + mlist.mov(0, 1)?; + } + let ml_child_text = mlist.insert_container(0, loro::LoroText::new())?; + ml_child_text.insert(0, "ml")?; + let ml_set_text = mlist.set_container(0, loro::LoroText::new())?; + ml_set_text.insert(0, "set")?; + let _ = mlist.pop()?; + if mlist.len() > 0 { + mlist.delete(0, 1)?; + } + mlist.clear()?; + mlist.push(1)?; + + // Text ops. + let text = doc.get_text("text"); + text.insert(0, "A😀BC")?; + // Use UTF-8/UTF-16 coordinate APIs at a safe ASCII boundary. + text.insert_utf8(0, "u8")?; + text.insert_utf16(0, "u16")?; + text.delete_utf8(0, 1)?; + if text.len_unicode() >= 2 { + text.mark(0..2, "bold", true)?; + text.mark(1..2, "link", "https://example.com")?; + text.unmark(0..1, "bold")?; + } + if text.len_unicode() >= 2 { + let _ = text.splice(1, 1, "Z")?; + } + if text.len_unicode() > 0 { + text.delete(0, 1)?; + } + text.insert(0, "keep")?; + + // Tree ops (fractional index + ordering moves). + let tree = doc.get_tree("tree"); + tree.enable_fractional_index(0); + let root_a = tree.create(None)?; + let root_b = tree.create(None)?; + let c1 = tree.create(root_a)?; + let c2 = tree.create_at(root_a, 0)?; + tree.mov_to(c1, root_a, 1)?; + tree.mov_after(root_a, root_b)?; + tree.mov_before(root_a, root_b)?; + tree.delete(c2)?; + + // Tree meta ops: insert/delete/clear. + let meta = tree.get_meta(root_a)?; + meta.insert("title", "A")?; + meta.insert("num", 1)?; + meta.delete("num")?; + meta.clear()?; + meta.insert("title", "A2")?; + + doc.set_next_commit_message("curated-ops"); + doc.set_next_commit_timestamp(1 as Timestamp); + doc.commit(); + Ok(()) +} + +#[test] +fn moon_transcode_e2e() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + + // Build a doc that exercises multiple op kinds. + let doc = LoroDoc::new(); + + // Commit #1 (with msg/timestamp) to create an intermediate frontiers for SnapshotAt/StateOnly. + doc.set_next_commit_message("commit-1"); + doc.set_next_commit_timestamp(1 as Timestamp); + + doc.get_map("map").insert("x", 1).unwrap(); + doc.get_map("map").insert("y", true).unwrap(); + + let list = doc.get_list("list"); + list.insert(0, 1).unwrap(); + list.insert(1, 2).unwrap(); + list.delete(0, 1).unwrap(); + + let mlist = doc.get_movable_list("mlist"); + mlist.insert(0, 10).unwrap(); + mlist.insert(1, 20).unwrap(); + mlist.mov(0, 1).unwrap(); + mlist.set(0, 99).unwrap(); + mlist.delete(0, 1).unwrap(); + + let text = doc.get_text("text"); + text.insert(0, "a😀b").unwrap(); + text.insert(3, "!").unwrap(); + text.delete(1, 1).unwrap(); + + let tree = doc.get_tree("tree"); + tree.enable_fractional_index(0); + let n1 = tree.create(None).unwrap(); + tree.get_meta(n1).unwrap().insert("title", "A").unwrap(); + let n2 = tree.create(None).unwrap(); + tree.get_meta(n2).unwrap().insert("title", "B").unwrap(); + tree.mov_after(n1, n2).unwrap(); + tree.delete(n2).unwrap(); + + doc.commit(); + let frontiers_v1: Frontiers = doc.state_frontiers(); + let expected_v1 = doc.get_deep_value().to_json_value(); + + // Commit #2 to create a newer version. + doc.set_next_commit_message("commit-2 😀"); + doc.set_next_commit_timestamp(2 as Timestamp); + doc.get_map("map").insert("z", 123).unwrap(); + doc.get_text("text").insert(0, "Z").unwrap(); + doc.commit(); + let expected = doc.get_deep_value().to_json_value(); + + // Updates e2e (FastUpdates): Rust export -> Moon transcode -> Rust import. + let updates = doc.export(ExportMode::all_updates()).unwrap(); + let out_updates = run_transcode(&ctx.node_bin, &ctx.cli_js, &updates)?; + let out_updates2 = run_transcode(&ctx.node_bin, &ctx.cli_js, &out_updates)?; + anyhow::ensure!( + out_updates2 == out_updates, + "moon transcode not idempotent for FastUpdates at {:?} ({} -> {})", + first_bytes_diff(&out_updates2, &out_updates), + out_updates.len(), + out_updates2.len() + ); + let doc2 = LoroDoc::new(); + doc2.import(&out_updates).unwrap(); + assert_eq!(doc2.get_deep_value().to_json_value(), expected); + + // JsonSchema export e2e: Rust export (FastUpdates) -> Moon export-jsonschema -> Rust import_json_updates. + let jsonschema = run_export_jsonschema(&ctx.node_bin, &ctx.cli_js, &updates)?; + let schema: loro::JsonSchema = serde_json::from_str(&jsonschema)?; + let doc_json = LoroDoc::new(); + doc_json.import_json_updates(schema).unwrap(); + assert_eq!(doc_json.get_deep_value().to_json_value(), expected); + + // Snapshot e2e (FastSnapshot): Rust export -> Moon transcode -> Rust import. + let snapshot = doc.export(ExportMode::Snapshot).unwrap(); + let out_snapshot = run_transcode(&ctx.node_bin, &ctx.cli_js, &snapshot)?; + let out_snapshot2 = run_transcode(&ctx.node_bin, &ctx.cli_js, &out_snapshot)?; + anyhow::ensure!( + out_snapshot2 == out_snapshot, + "moon transcode not idempotent for Snapshot at {:?} ({} -> {})", + first_bytes_diff(&out_snapshot2, &out_snapshot), + out_snapshot.len(), + out_snapshot2.len() + ); + let doc3 = LoroDoc::new(); + doc3.import(&out_snapshot).unwrap(); + assert_eq!(doc3.get_deep_value().to_json_value(), expected); + + // SnapshotAt e2e (FastSnapshot): decode snapshot at an earlier version. + let snapshot_at = doc + .export(ExportMode::SnapshotAt { + version: std::borrow::Cow::Borrowed(&frontiers_v1), + }) + .unwrap(); + let out_snapshot_at = run_transcode(&ctx.node_bin, &ctx.cli_js, &snapshot_at)?; + let out_snapshot_at2 = run_transcode(&ctx.node_bin, &ctx.cli_js, &out_snapshot_at)?; + anyhow::ensure!( + out_snapshot_at2 == out_snapshot_at, + "moon transcode not idempotent for SnapshotAt at {:?} ({} -> {})", + first_bytes_diff(&out_snapshot_at2, &out_snapshot_at), + out_snapshot_at.len(), + out_snapshot_at2.len() + ); + let doc_at = LoroDoc::new(); + doc_at.import(&out_snapshot_at).unwrap(); + assert_eq!(doc_at.get_deep_value().to_json_value(), expected_v1); + + // StateOnly e2e (FastSnapshot): state at an earlier version with minimal history. + let state_only = doc + .export(ExportMode::StateOnly(Some(std::borrow::Cow::Borrowed( + &frontiers_v1, + )))) + .unwrap(); + let out_state_only = run_transcode(&ctx.node_bin, &ctx.cli_js, &state_only)?; + let out_state_only2 = run_transcode(&ctx.node_bin, &ctx.cli_js, &out_state_only)?; + anyhow::ensure!( + out_state_only2 == out_state_only, + "moon transcode not idempotent for StateOnly at {:?} ({} -> {})", + first_bytes_diff(&out_state_only2, &out_state_only), + out_state_only.len(), + out_state_only2.len() + ); + let doc_state_only = LoroDoc::new(); + doc_state_only.import(&out_state_only).unwrap(); + assert_eq!(doc_state_only.get_deep_value().to_json_value(), expected_v1); + + // ShallowSnapshot e2e (FastSnapshot): full current state + partial history since v1. + let shallow = doc + .export(ExportMode::ShallowSnapshot(std::borrow::Cow::Borrowed( + &frontiers_v1, + ))) + .unwrap(); + let out_shallow = run_transcode(&ctx.node_bin, &ctx.cli_js, &shallow)?; + let out_shallow2 = run_transcode(&ctx.node_bin, &ctx.cli_js, &out_shallow)?; + anyhow::ensure!( + out_shallow2 == out_shallow, + "moon transcode not idempotent for ShallowSnapshot at {:?} ({} -> {})", + first_bytes_diff(&out_shallow2, &out_shallow), + out_shallow.len(), + out_shallow2.len() + ); + let doc_shallow = LoroDoc::new(); + doc_shallow.import(&out_shallow).unwrap(); + assert_eq!(doc_shallow.get_deep_value().to_json_value(), expected); + + // Updates(from vv) e2e: snapshot_at(v1) + updates(vv_v1) => latest. + let vv_v1: VersionVector = doc.frontiers_to_vv(&frontiers_v1).unwrap(); + let updates_since_v1 = doc.export(ExportMode::Updates { + from: std::borrow::Cow::Borrowed(&vv_v1), + })?; + let out_updates_since_v1 = run_transcode(&ctx.node_bin, &ctx.cli_js, &updates_since_v1)?; + let out_updates_since_v12 = run_transcode(&ctx.node_bin, &ctx.cli_js, &out_updates_since_v1)?; + anyhow::ensure!( + out_updates_since_v12 == out_updates_since_v1, + "moon transcode not idempotent for Updates(from) at {:?} ({} -> {})", + first_bytes_diff(&out_updates_since_v12, &out_updates_since_v1), + out_updates_since_v1.len(), + out_updates_since_v12.len() + ); + let doc_from_v1 = LoroDoc::new(); + doc_from_v1.import(&out_snapshot_at).unwrap(); + doc_from_v1.import(&out_updates_since_v1).unwrap(); + assert_eq!(doc_from_v1.get_deep_value().to_json_value(), expected); + + // Multi-peer e2e: updates should include >1 peer. + let doc_a = LoroDoc::new(); + doc_a.set_peer_id(1)?; + doc_a.set_next_commit_message("A-1"); + doc_a.get_map("m").insert("a", 1).unwrap(); + doc_a.commit(); + + let doc_b = LoroDoc::new(); + doc_b.set_peer_id(2)?; + doc_b.import(&doc_a.export(ExportMode::all_updates()).unwrap()) + .unwrap(); + doc_b.set_next_commit_message("B-1"); + doc_b.get_map("m").insert("b", 2).unwrap(); + doc_b.commit(); + let expected_b = doc_b.get_deep_value().to_json_value(); + + let updates_b = doc_b.export(ExportMode::all_updates()).unwrap(); + let out_updates_b = run_transcode(&ctx.node_bin, &ctx.cli_js, &updates_b)?; + let out_updates_b2 = run_transcode(&ctx.node_bin, &ctx.cli_js, &out_updates_b)?; + anyhow::ensure!( + out_updates_b2 == out_updates_b, + "moon transcode not idempotent for multi-peer Updates at {:?} ({} -> {})", + first_bytes_diff(&out_updates_b2, &out_updates_b), + out_updates_b.len(), + out_updates_b2.len() + ); + let doc_c = LoroDoc::new(); + doc_c.import(&out_updates_b).unwrap(); + assert_eq!(doc_c.get_deep_value().to_json_value(), expected_b); + + Ok(()) +} + +#[test] +fn moon_edge_varints_and_lengths() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + + // Stress: + // - LEB128 / varint boundaries (lengths >= 128, peer tables >= 128). + // - Big peer IDs (JS-safe boundary) carried through JSON schema. + // - String/binary lengths at 127/128. + let doc = LoroDoc::new(); + let map = doc.get_map("m"); + let list = doc.get_list("l"); + + // Commit #1: one change with many keys/ops. + doc.set_peer_id(1)?; + doc.set_next_commit_timestamp(-1 as Timestamp); + + map.insert("", "empty-key")?; + map.insert("s127", "a".repeat(127))?; + map.insert("s128", "b".repeat(128))?; + map.insert("bin0", Vec::::new())?; + map.insert("bin127", vec![7u8; 127])?; + map.insert("bin128", vec![8u8; 128])?; + + for i in 0..130u32 { + let key = format!("k{i:03}"); + map.insert(&key, i as i64)?; + list.push(i as i64)?; + } + + // Root container name length boundaries (UTF-8 byte length). + let root_127 = "r".repeat(127); + doc.get_map(root_127.as_str()).insert("x", 1)?; + let root_emoji = "😀".repeat(40); // 160 UTF-8 bytes + doc.get_list(root_emoji.as_str()).push("y")?; + + doc.commit(); + + // More peers to force peer-index varints (len >= 128). + for peer in 2u64..=130u64 { + doc.set_peer_id(peer)?; + if peer == 2 { + doc.set_next_commit_message(""); + doc.set_next_commit_timestamp(0 as Timestamp); + } else { + doc.set_next_commit_timestamp(peer as Timestamp); + } + let key = format!("p{peer:03}"); + map.insert(&key, peer as i64)?; + doc.commit(); + } + + // Big peer ID (forces bigint path in JS). + let big_peer: u64 = 9_007_199_254_740_993; // 2^53 + 1 + doc.set_peer_id(big_peer)?; + doc.set_next_commit_message("big-peer"); + doc.set_next_commit_timestamp(1_700_000_000 as Timestamp); + map.insert("big_peer", big_peer as i64)?; + doc.commit(); + + // Decode correctness (Moon export-deep-json / export-jsonschema). + assert_snapshot_deep_json_matches_rust(&doc, ctx)?; + assert_updates_jsonschema_matches_rust(&doc, ctx)?; + + // Encode correctness: Moon transcode is deterministic (idempotent) and importable by Rust. + let snapshot = doc.export(ExportMode::Snapshot)?; + let out_snapshot = run_transcode(&ctx.node_bin, &ctx.cli_js, &snapshot)?; + let out_snapshot2 = run_transcode(&ctx.node_bin, &ctx.cli_js, &out_snapshot)?; + anyhow::ensure!( + out_snapshot2 == out_snapshot, + "moon transcode not idempotent for edge Snapshot at {:?} ({} -> {})", + first_bytes_diff(&out_snapshot2, &out_snapshot), + out_snapshot.len(), + out_snapshot2.len() + ); + let doc_from_snapshot = LoroDoc::new(); + doc_from_snapshot.import(&out_snapshot).unwrap(); + assert_eq!( + doc_from_snapshot.get_deep_value().to_json_value(), + doc.get_deep_value().to_json_value() + ); + let updates = doc.export(ExportMode::all_updates())?; + let out_updates = run_transcode(&ctx.node_bin, &ctx.cli_js, &updates)?; + let out_updates2 = run_transcode(&ctx.node_bin, &ctx.cli_js, &out_updates)?; + anyhow::ensure!( + out_updates2 == out_updates, + "moon transcode not idempotent for edge Updates at {:?} ({} -> {})", + first_bytes_diff(&out_updates2, &out_updates), + out_updates.len(), + out_updates2.len() + ); + let doc_from_updates = LoroDoc::new(); + doc_from_updates.import(&out_updates).unwrap(); + assert_eq!( + doc_from_updates.get_deep_value().to_json_value(), + doc.get_deep_value().to_json_value() + ); + + // Roundtrip: Moon encode-jsonschema output must be importable by Rust (large peer/key tables). + let start = VersionVector::default(); + let end = doc.oplog_vv(); + let schema = doc.export_json_updates(&start, &end); + let json = serde_json::to_string(&schema)?; + let out_blob = run_encode_jsonschema(&ctx.node_bin, &ctx.cli_js, &json)?; + let doc2 = LoroDoc::new(); + doc2.import(&out_blob).unwrap(); + let got = doc2.get_deep_value().to_json_value(); + let expected = doc.get_deep_value().to_json_value(); + anyhow::ensure!( + got == expected, + "encode-jsonschema state mismatch: {:?}", + first_json_diff(&got, &expected, "$") + ); + + Ok(()) +} + +#[test] +fn moon_decode_ops_text_insert() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + + let peer: u64 = 0x0102_0304_0506_0708; + let doc = LoroDoc::new(); + doc.set_peer_id(peer)?; + doc.get_text("t").insert(0, "123").unwrap(); + doc.commit(); + + let updates = doc.export(ExportMode::all_updates()).unwrap(); + let json = run_decode_updates(&ctx.node_bin, &ctx.cli_js, &updates)?; + let v: serde_json::Value = serde_json::from_str(&json)?; + + let changes = v + .get("changes") + .and_then(|x| x.as_array()) + .ok_or_else(|| anyhow::anyhow!("missing changes array"))?; + + let expected_container = "cid:root-t:Text"; + let expected_peer_suffix = format!("@{peer}"); + + let mut found = false; + for c in changes { + let Some(id) = c.get("id").and_then(|x| x.as_str()) else { + continue; + }; + if !id.ends_with(&expected_peer_suffix) { + continue; + } + let Some(ops) = c.get("ops").and_then(|x| x.as_array()) else { + continue; + }; + for op in ops { + if op.get("container").and_then(|x| x.as_str()) != Some(expected_container) { + continue; + } + let Some(insert) = op + .get("content") + .and_then(|x| x.get("Text")) + .and_then(|x| x.get("Insert")) + else { + continue; + }; + if insert.get("pos").and_then(|x| x.as_i64()) == Some(0) + && insert.get("text").and_then(|x| x.as_str()) == Some("123") + { + found = true; + break; + } + } + } + + anyhow::ensure!(found, "expected Text insert op not found in Moon decode output"); + Ok(()) +} + +#[test] +fn moon_export_jsonschema_text_insert() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + + let peer: u64 = 0x0102_0304_0506_0708; + let doc = LoroDoc::new(); + doc.set_peer_id(peer)?; + doc.get_text("t").insert(0, "123").unwrap(); + doc.commit(); + + let updates = doc.export(ExportMode::all_updates()).unwrap(); + let json = run_export_jsonschema(&ctx.node_bin, &ctx.cli_js, &updates)?; + let schema: loro::JsonSchema = serde_json::from_str(&json)?; + + assert_eq!(schema.schema_version, 1); + assert_eq!(schema.peers.as_deref(), Some(&[peer][..])); + + let expected_container = "cid:root-t:Text"; + let mut found = false; + for change in &schema.changes { + // After peer-compression, change IDs use peer indices (so the only peer here is 0). + if change.id.peer != 0 { + continue; + } + for op in &change.ops { + if op.container.to_string() != expected_container { + continue; + } + match &op.content { + loro::JsonOpContent::Text(loro::JsonTextOp::Insert { pos, text }) => { + if *pos == 0 && text == "123" { + found = true; + break; + } + } + _ => {} + } + } + } + anyhow::ensure!(found, "expected Text insert op not found in Moon jsonschema output"); + + // Roundtrip: Moon jsonschema output must be importable by Rust. + let doc2 = LoroDoc::new(); + doc2.import_json_updates(schema).unwrap(); + assert_eq!(doc2.get_deep_value().to_json_value(), doc.get_deep_value().to_json_value()); + + Ok(()) +} + +#[test] +fn moon_encode_jsonschema_text_insert() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + + let peer: u64 = 0x0102_0304_0506_0708; + let doc = LoroDoc::new(); + doc.set_peer_id(peer)?; + doc.get_text("t").insert(0, "123").unwrap(); + doc.commit(); + let expected = doc.get_deep_value().to_json_value(); + + let start = VersionVector::default(); + let end = doc.oplog_vv(); + let schema = doc.export_json_updates(&start, &end); + let json = serde_json::to_string(&schema)?; + + let out_blob = run_encode_jsonschema(&ctx.node_bin, &ctx.cli_js, &json)?; + let doc2 = LoroDoc::new(); + doc2.import(&out_blob).unwrap(); + assert_eq!(doc2.get_deep_value().to_json_value(), expected); + + Ok(()) +} + +#[test] +fn moon_encode_jsonschema_cross_peer_container_refs() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + + // Peer 2 writes into a nested container created by peer 1. + // This ensures the encoded ChangeBlock for peer 2 references container IDs with another peer. + let doc = LoroDoc::new(); + let root = doc.get_map("m"); + + doc.set_peer_id(1)?; + let child = root.insert_container("child", loro::LoroMap::new())?; + child.insert("a", 1)?; + doc.commit(); + let frontiers_v1: Frontiers = doc.state_frontiers(); + + doc.set_peer_id(2)?; + child.insert("b", 2)?; + doc.commit(); + let expected = doc.get_deep_value().to_json_value(); + + let end = doc.oplog_vv(); + + // Full range should import on a fresh doc. + let schema0 = doc.export_json_updates(&VersionVector::default(), &end); + let json0 = serde_json::to_string(&schema0)?; + let blob0 = run_encode_jsonschema(&ctx.node_bin, &ctx.cli_js, &json0)?; + let doc0 = LoroDoc::new(); + doc0.import(&blob0).unwrap(); + assert_eq!(doc0.get_deep_value().to_json_value(), expected); + + // Incremental range should apply on top of SnapshotAt(v1). + let vv_v1: VersionVector = doc.frontiers_to_vv(&frontiers_v1).unwrap(); + let schema = doc.export_json_updates(&vv_v1, &end); + let json = serde_json::to_string(&schema)?; + let blob = run_encode_jsonschema(&ctx.node_bin, &ctx.cli_js, &json)?; + let base_snapshot = doc.export(ExportMode::SnapshotAt { + version: std::borrow::Cow::Borrowed(&frontiers_v1), + })?; + let base = LoroDoc::new(); + base.import(&base_snapshot)?; + base.import(&blob).unwrap(); + assert_eq!(base.get_deep_value().to_json_value(), expected); + + Ok(()) +} + +#[test] +fn moon_encode_jsonschema_random_roundtrip() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + + // A small deterministic set of cases to catch common regressions: + // - multi-peer ids and peer compression + // - non-empty start version (deps resolution) + // - mixed container/value kinds from random ops + let seeds = [0u64, 1, 2, 3]; + for seed in seeds { + let doc = LoroDoc::new(); + + // Build three committed segments with different peers so the schema definitely contains + // multiple peers, and we have a stable "start" point after the first segment. + apply_random_ops_with_peers(&doc, seed, 80, 0, &[1])?; + let frontiers_v1: Frontiers = doc.state_frontiers(); + apply_random_ops_with_peers(&doc, seed.wrapping_add(1), 80, 0, &[2])?; + apply_random_ops_with_peers(&doc, seed.wrapping_add(2), 80, 0, &[3])?; + + let expected = doc.get_deep_value().to_json_value(); + let end = doc.oplog_vv(); + + // Full range (empty start) should roundtrip on a fresh doc. + let schema0 = doc.export_json_updates(&VersionVector::default(), &end); + let json0 = serde_json::to_string(&schema0)?; + let blob0 = run_encode_jsonschema(&ctx.node_bin, &ctx.cli_js, &json0)?; + let doc0 = LoroDoc::new(); + doc0.import(&blob0).unwrap(); + let got0 = doc0.get_deep_value().to_json_value(); + anyhow::ensure!( + got0 == expected, + "seed={seed} full-range mismatch at {:?}", + first_json_diff_path(&got0, &expected, "$") + ); + + // Incremental range should apply cleanly on top of SnapshotAt(v1). + let vv_v1: VersionVector = doc.frontiers_to_vv(&frontiers_v1).unwrap(); + let schema = doc.export_json_updates(&vv_v1, &end); + let json = serde_json::to_string(&schema)?; + + let blob = run_encode_jsonschema(&ctx.node_bin, &ctx.cli_js, &json)?; + let base_snapshot = doc.export(ExportMode::SnapshotAt { + version: std::borrow::Cow::Borrowed(&frontiers_v1), + })?; + let base = LoroDoc::new(); + base.import(&base_snapshot)?; + base.import(&blob).unwrap(); + let got = base.get_deep_value().to_json_value(); + anyhow::ensure!( + got == expected, + "seed={seed} incremental mismatch at {:?}", + first_json_diff_path(&got, &expected, "$") + ); + } + + Ok(()) +} + +#[test] +fn moon_encode_jsonschema_richtext_roundtrip() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + + let configure_styles = |doc: &LoroDoc| { + let mut styles = StyleConfigMap::new(); + styles.insert( + "bold".into(), + StyleConfig { + expand: ExpandType::After, + }, + ); + styles.insert( + "link".into(), + StyleConfig { + expand: ExpandType::Before, + }, + ); + doc.config_text_style(styles); + }; + + let doc = LoroDoc::new(); + configure_styles(&doc); + let text = doc.get_text("t"); + + // Commit #1: establish marks that later inserts depend on (expand semantics). + text.insert(0, "Hello😀")?; + let len = text.len_unicode(); + text.mark(0..len, "bold", true)?; + doc.commit(); + let frontiers_v1: Frontiers = doc.state_frontiers(); + + // Commit #2: boundary inserts should expand based on style info bits. + let len2 = text.len_unicode(); + text.insert(len2, "!")?; // should be bold (ExpandType::After) + let len3 = text.len_unicode(); + text.mark(0..len3, "link", "https://example.com")?; + text.insert(0, "X")?; // should be link (ExpandType::Before) + text.unmark(1..3, "bold")?; // create splits + doc.commit(); + + let expected_deep = doc.get_deep_value().to_json_value(); + let expected_delta = doc.get_text("t").get_richtext_value().to_json_value(); + + let end = doc.oplog_vv(); + + // Full range. + let schema0 = doc.export_json_updates(&VersionVector::default(), &end); + let json0 = serde_json::to_string(&schema0)?; + let blob0 = run_encode_jsonschema(&ctx.node_bin, &ctx.cli_js, &json0)?; + let doc0 = LoroDoc::new(); + configure_styles(&doc0); + doc0.import(&blob0)?; + anyhow::ensure!( + doc0.get_deep_value().to_json_value() == expected_deep, + "full-range deep-json mismatch" + ); + anyhow::ensure!( + doc0.get_text("t").get_richtext_value().to_json_value() == expected_delta, + "full-range richtext delta mismatch" + ); + + // Incremental range (SnapshotAt(v1) + Updates(from v1)). + let vv_v1: VersionVector = doc.frontiers_to_vv(&frontiers_v1).unwrap(); + let schema = doc.export_json_updates(&vv_v1, &end); + let json = serde_json::to_string(&schema)?; + let blob = run_encode_jsonschema(&ctx.node_bin, &ctx.cli_js, &json)?; + let base_snapshot = doc.export(ExportMode::SnapshotAt { + version: std::borrow::Cow::Borrowed(&frontiers_v1), + })?; + let base = LoroDoc::new(); + configure_styles(&base); + base.import(&base_snapshot)?; + base.import(&blob)?; + anyhow::ensure!( + base.get_deep_value().to_json_value() == expected_deep, + "incremental deep-json mismatch" + ); + anyhow::ensure!( + base.get_text("t").get_richtext_value().to_json_value() == expected_delta, + "incremental richtext delta mismatch" + ); + + Ok(()) +} + +#[test] +fn moon_export_jsonschema_updates_since_v1() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + + let peer: u64 = 100; + let doc = LoroDoc::new(); + doc.set_peer_id(peer)?; + + doc.get_text("t").insert(0, "a").unwrap(); + doc.commit(); + let frontiers_v1: Frontiers = doc.state_frontiers(); + + doc.get_text("t").insert(1, "b").unwrap(); + doc.get_map("m").insert("k", 1).unwrap(); + doc.commit(); + let expected = doc.get_deep_value().to_json_value(); + + let vv_v1: VersionVector = doc.frontiers_to_vv(&frontiers_v1).unwrap(); + let updates_since_v1 = doc.export(ExportMode::Updates { + from: std::borrow::Cow::Borrowed(&vv_v1), + })?; + + let json = run_export_jsonschema(&ctx.node_bin, &ctx.cli_js, &updates_since_v1)?; + let schema: loro::JsonSchema = serde_json::from_str(&json)?; + + // `start_version` should match the starting frontiers of this range. + assert_eq!(schema.start_version, frontiers_v1); + + // Apply on top of SnapshotAt(v1) should yield the latest state. + let base = LoroDoc::new(); + base.import( + &doc.export(ExportMode::SnapshotAt { + version: std::borrow::Cow::Borrowed(&frontiers_v1), + })?, + )?; + base.import_json_updates(schema).unwrap(); + assert_eq!(base.get_deep_value().to_json_value(), expected); + + Ok(()) +} + +#[test] +fn moon_cli_robustness_rejects_invalid_inputs() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + + let doc = LoroDoc::new(); + doc.get_text("t").insert(0, "hi")?; + doc.commit(); + let updates = doc.export(ExportMode::all_updates())?; + let snapshot = doc.export(ExportMode::Snapshot)?; + + // Wrong mode should be rejected. + let out = run_export_deep_json_output(&ctx.node_bin, &ctx.cli_js, &updates)?; + anyhow::ensure!( + !out.status.success(), + "expected export-deep-json to reject Updates; stdout={} stderr={}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + let out = run_export_jsonschema_output(&ctx.node_bin, &ctx.cli_js, &snapshot)?; + anyhow::ensure!( + !out.status.success(), + "expected export-jsonschema to reject Snapshot; stdout={} stderr={}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + + // Truncated blobs should be rejected. + if snapshot.len() > 1 { + let out = run_export_deep_json_output(&ctx.node_bin, &ctx.cli_js, &snapshot[..snapshot.len() - 1])?; + anyhow::ensure!( + !out.status.success(), + "expected export-deep-json to reject truncated Snapshot; stdout={} stderr={}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + } + if updates.len() > 1 { + let out = run_export_jsonschema_output(&ctx.node_bin, &ctx.cli_js, &updates[..updates.len() - 1])?; + anyhow::ensure!( + !out.status.success(), + "expected export-jsonschema to reject truncated Updates; stdout={} stderr={}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + } + + // Malformed blobs should be rejected. + let out = run_decode_updates_output(&ctx.node_bin, &ctx.cli_js, &[])?; + anyhow::ensure!( + !out.status.success(), + "expected decode-updates to reject empty input; stdout={} stderr={}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + let out = run_transcode_output(&ctx.node_bin, &ctx.cli_js, b"not-a-loro-doc")?; + anyhow::ensure!( + !out.status.success(), + "expected transcode to reject garbage input; stdout={} stderr={}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + + // Invalid JsonSchema JSON should be rejected. + let out = run_encode_jsonschema_output(&ctx.node_bin, &ctx.cli_js, "{")?; + anyhow::ensure!( + !out.status.success(), + "expected encode-jsonschema to reject invalid json; stdout={} stderr={}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + let out = run_encode_jsonschema_output(&ctx.node_bin, &ctx.cli_js, "{}")?; + anyhow::ensure!( + !out.status.success(), + "expected encode-jsonschema to reject missing fields; stdout={} stderr={}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + + Ok(()) +} + +#[test] +fn moon_export_jsonschema_multi_peer() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + + let doc_a = LoroDoc::new(); + doc_a.set_peer_id(1)?; + doc_a.get_map("m").insert("a", 1).unwrap(); + doc_a.commit(); + + let doc_b = LoroDoc::new(); + doc_b.set_peer_id(2)?; + doc_b.import(&doc_a.export(ExportMode::all_updates()).unwrap()) + .unwrap(); + doc_b.get_map("m").insert("b", 2).unwrap(); + doc_b.commit(); + let expected_b = doc_b.get_deep_value().to_json_value(); + + let updates_b = doc_b.export(ExportMode::all_updates()).unwrap(); + let json = run_export_jsonschema(&ctx.node_bin, &ctx.cli_js, &updates_b)?; + let schema: loro::JsonSchema = serde_json::from_str(&json)?; + + let mut peers = schema.peers.clone().unwrap_or_default(); + peers.sort(); + assert_eq!(peers, vec![1, 2]); + + let doc_c = LoroDoc::new(); + doc_c.import_json_updates(schema).unwrap(); + assert_eq!(doc_c.get_deep_value().to_json_value(), expected_b); + + Ok(()) +} + +#[test] +fn moon_golden_updates_jsonschema_matches_rust() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + + let seed = 42; + let doc = LoroDoc::new(); + apply_random_ops(&doc, seed, 200, 20)?; + assert_updates_jsonschema_matches_rust(&doc, ctx) +} + +#[test] +fn moon_golden_snapshot_deep_json_matches_rust() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + + let seed = 1337; + let doc = LoroDoc::new(); + apply_random_ops(&doc, seed, 200, 20)?; + assert_snapshot_deep_json_matches_rust(&doc, ctx) +} + +fn golden_random_updates(seed: u64, ops: usize, commit_every: usize, peers: &[u64]) -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + let doc = LoroDoc::new(); + apply_random_ops_with_peers(&doc, seed, ops, commit_every, peers)?; + assert_updates_jsonschema_matches_rust(&doc, ctx) +} + +fn golden_random_snapshot(seed: u64, ops: usize, commit_every: usize, peers: &[u64]) -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + let doc = LoroDoc::new(); + apply_random_ops_with_peers(&doc, seed, ops, commit_every, peers)?; + assert_snapshot_deep_json_matches_rust(&doc, ctx) +} + +#[test] +fn moon_curated_updates_jsonschema_matches_rust() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + let doc = LoroDoc::new(); + apply_curated_ops(&doc)?; + assert_updates_jsonschema_matches_rust(&doc, ctx) +} + +#[test] +fn moon_curated_snapshot_deep_json_matches_rust() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + let doc = LoroDoc::new(); + apply_curated_ops(&doc)?; + assert_snapshot_deep_json_matches_rust(&doc, ctx) +} + +#[test] +fn moon_golden_updates_seed_0() -> anyhow::Result<()> { + golden_random_updates(0, 200, 20, &[1]) +} + +#[test] +fn moon_golden_updates_seed_1() -> anyhow::Result<()> { + golden_random_updates(1, 200, 20, &[1]) +} + +#[test] +fn moon_golden_updates_seed_2() -> anyhow::Result<()> { + golden_random_updates(2, 200, 20, &[1]) +} + +#[test] +fn moon_golden_updates_seed_3() -> anyhow::Result<()> { + golden_random_updates(3, 200, 20, &[1]) +} + +#[test] +fn moon_golden_updates_multi_peer_seed_7() -> anyhow::Result<()> { + golden_random_updates(7, 250, 25, &[1, 2, 3]) +} + +#[test] +fn moon_golden_snapshot_seed_0() -> anyhow::Result<()> { + golden_random_snapshot(0, 200, 20, &[1]) +} + +#[test] +fn moon_golden_snapshot_seed_1() -> anyhow::Result<()> { + golden_random_snapshot(1, 200, 20, &[1]) +} + +#[test] +fn moon_golden_snapshot_seed_2() -> anyhow::Result<()> { + golden_random_snapshot(2, 200, 20, &[1]) +} + +#[test] +fn moon_golden_snapshot_seed_3() -> anyhow::Result<()> { + golden_random_snapshot(3, 200, 20, &[1]) +} + +#[test] +fn moon_golden_snapshot_multi_peer_seed_7() -> anyhow::Result<()> { + golden_random_snapshot(7, 250, 25, &[1, 2, 3]) +} + +#[cfg(feature = "counter")] +#[test] +fn moon_counter_snapshot_deep_json_matches_rust() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + + let doc = LoroDoc::new(); + let map = doc.get_map("m"); + let counter = map.insert_container("c", loro::LoroCounter::new())?; + counter.increment(1.0)?; + counter.decrement(0.5)?; + doc.set_next_commit_message("counter"); + doc.set_next_commit_timestamp(1 as Timestamp); + doc.commit(); + + assert_snapshot_deep_json_matches_rust(&doc, ctx) +} + +#[cfg(feature = "counter")] +#[test] +fn moon_counter_updates_jsonschema_matches_rust() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + + let doc = LoroDoc::new(); + let map = doc.get_map("m"); + let counter = map.insert_container("c", loro::LoroCounter::new())?; + counter.increment(1.0)?; + counter.decrement(0.5)?; + doc.set_next_commit_message("counter"); + doc.set_next_commit_timestamp(1 as Timestamp); + doc.commit(); + + assert_updates_jsonschema_matches_rust(&doc, ctx) +} + +#[cfg(feature = "counter")] +#[test] +fn moon_encode_jsonschema_counter() -> anyhow::Result<()> { + let Some(ctx) = moon_ctx() else { + return Ok(()); + }; + + let doc = LoroDoc::new(); + let map = doc.get_map("m"); + let counter = map.insert_container("c", loro::LoroCounter::new())?; + counter.increment(1.0)?; + counter.decrement(0.5)?; + doc.set_next_commit_message("counter"); + doc.set_next_commit_timestamp(1 as Timestamp); + doc.commit(); + let expected = doc.get_deep_value().to_json_value(); + + let start = VersionVector::default(); + let end = doc.oplog_vv(); + let schema = doc.export_json_updates(&start, &end); + let json = serde_json::to_string(&schema)?; + + let out_blob = run_encode_jsonschema(&ctx.node_bin, &ctx.cli_js, &json)?; + let doc2 = LoroDoc::new(); + doc2.import(&out_blob).unwrap(); + assert_eq!(doc2.get_deep_value().to_json_value(), expected); + + Ok(()) +} diff --git a/docs/encoding.md b/docs/encoding.md index 3d64ec6d0..37212a279 100644 --- a/docs/encoding.md +++ b/docs/encoding.md @@ -359,7 +359,10 @@ The VersionVector is a HashMap serialized with postcard. ### Frontiers Encoding -Frontiers is encoded as a sorted Vec using **postcard** format: +Frontiers is encoded as a Vec using **postcard** format. + +- Canonical encoding sorts IDs ascending by (PeerID, Counter) before serialization. +- Decoders should accept any order; the order does not change the semantics of Frontiers. ``` ┌─────────────────────────────────────────────────────────────────┐ @@ -369,7 +372,7 @@ Frontiers is encoded as a sorted Vec using **postcard** format: ├───────────────┼─────────────────────────────────────────────────┤ │ LEB128 │ Number of IDs (N) │ ├───────────────┼─────────────────────────────────────────────────┤ -│ For each ID (sorted): │ +│ For each ID: │ │ varint │ PeerID (u64, postcard encoding) │ │ varint │ Counter (i32, postcard zigzag encoding) │ └───────────────┴─────────────────────────────────────────────────┘ @@ -699,10 +702,15 @@ Loro uses a tagged value encoding system where each value is prefixed with a typ | 14 | ListMove | List move operation | | 15 | ListSet | List set operation | | 16 | RawTreeMove | Raw tree move (internal) | -| 0x80+ | Future | Unknown/future value types | +| 0x80 \| kind | Future | Unknown/future value types | **Source**: `crates/loro-internal/src/encoding/value.rs:39-161` +**Notes (Future)**: +- The encoding uses the high bit (`0x80`) as a marker and stores the future kind in the low 7 bits. +- Decoding uses `tag & 0x7F`, so kinds `0..=16` are reserved for known types; Future kinds must be `17..=127`. +- Payload is encoded like `Binary`: `LEB128(len) + len bytes` (opaque). + ### Value Encoding Details #### Primitive Types @@ -799,6 +807,25 @@ Common patterns: **Source**: `crates/loro-internal/src/encoding/value.rs:480-485` (EncodedTreeMove struct) **Source**: `crates/loro-internal/src/encoding/value.rs:953-967` (read_tree_move) +#### RawTreeMove (internal) + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ RawTreeMove Encoding │ +├───────────────┬──────────────────────────────────────────────────┤ +│ LEB128 │ subject_peer_idx │ +│ LEB128 │ subject_cnt (encoded as usize; must fit i32) │ +│ LEB128 │ position_idx │ +│ 1 │ is_parent_null (u8 as bool) │ +│ LEB128 │ parent_peer_idx (only if !is_parent_null) │ +│ LEB128 │ parent_cnt (encoded as usize; must fit i32) │ +└───────────────┴──────────────────────────────────────────────────┘ +``` + +**Source**: `crates/loro-internal/src/encoding/value.rs:470-477` (RawTreeMove struct) +**Source**: `crates/loro-internal/src/encoding/value.rs:969-989` (read_raw_tree_move) +**Source**: `crates/loro-internal/src/encoding/value.rs:1122-1134` (write_raw_tree_move) + #### ListMove ``` diff --git a/docs/moon-codec-fuzzing.md b/docs/moon-codec-fuzzing.md new file mode 100644 index 000000000..00c6a7a40 --- /dev/null +++ b/docs/moon-codec-fuzzing.md @@ -0,0 +1,156 @@ +# Moon Codec Fuzzing (Rust ↔ MoonBit) + +This repo contains Rust-driven fuzzers that exercise the MoonBit codec implementation by round-tripping +real Loro data through the Moon CLI (compiled to JS) and validating the result in Rust. + +These fuzzers are **not** `cargo-fuzz` targets. They are deterministic, seed-based test drivers that +produce reproducible artifacts on failure. + +## Prerequisites + +- Rust toolchain (use the repo’s `rust-toolchain`) +- Node.js (`node`) +- MoonBit (`moon`) + +Environment variables used by the drivers/tests: + +- `MOON_BIN`: path to the `moon` executable (default: `moon`) +- `NODE_BIN`: path to `node` (default: `node`) + +Example (common local setup): + +```sh +export MOON_BIN="$HOME/.moon/bin/moon" +export NODE_BIN="node" +``` + +## How the fuzzers work + +All fuzzers follow the same pattern: + +1. Generate a random-but-deterministic sequence of ops in Rust (`seed` controls randomness). +2. Produce a binary blob (Snapshot or Updates) and/or JSON schema from Rust. +3. Invoke MoonBit CLI (`moon/cmd/loro_codec_cli`) compiled to JS and run with Node. +4. Validate the result back in Rust. +5. On mismatch, write a repro case to `/case-/`. + +The Moon CLI is built automatically by each driver via: + +```sh +moon build --target js --release cmd/loro_codec_cli +``` + +## Fuzz drivers + +### 1) Snapshot decode fuzz: `moon_snapshot_fuzz` + +Purpose: Validate Moon’s snapshot decoding by comparing **deep JSON**. + +What it tests: + +- Rust generates a FastSnapshot (mode=3) blob. +- Moon decodes the snapshot and prints deep JSON (`export-deep-json`). +- Rust compares it with `doc.get_deep_value().to_json_value()`. + +Run: + +```sh +MOON_BIN="$HOME/.moon/bin/moon" NODE_BIN=node \ + cargo run -p loro --example moon_snapshot_fuzz -- \ + --seed 1 --iters 200 --ops 400 --commit-every 20 --peers 10 +``` + +Repro on failure: + +- The driver writes `snapshot.blob`, `expected.json`, and Moon outputs into: + `moon_snapshot_fuzz_artifacts/case-/` +- Re-run the exact failing seed with `--iters 1`: + +```sh +MOON_BIN="$HOME/.moon/bin/moon" NODE_BIN=node \ + cargo run -p loro --example moon_snapshot_fuzz -- \ + --seed --iters 1 --ops --commit-every --peers +``` + +### 2) JsonSchema → Updates encode fuzz: `moon_jsonschema_fuzz` + +Purpose: Validate Moon’s `encode-jsonschema` (JsonSchema JSON → binary FastUpdates mode=4). + +Why the oracle is “Rust updates” (not “original local doc”): + +- Counter state uses `f64` accumulation; floating-point sums are **not associative**. +- Different (but valid) deterministic application orders can produce tiny `f64` differences. +- To avoid false negatives, the fuzzer compares Moon’s encoded updates against **Rust’s encoded updates** + for the same `(start_vv -> end_vv)` range, by importing both and comparing the resulting state. + +What it tests: + +- Rust generates a document and chooses a deterministic `start_frontiers` (sometimes non-empty). +- Rust exports: + - `schema.json` via `export_json_updates(start_vv, end_vv)` + - `updates_rust.blob` via `ExportMode::Updates { from: start_vv }` + - optionally `base_snapshot.blob` via `ExportMode::SnapshotAt { version: start_frontiers }` +- Moon encodes `schema.json` into `updates_moon.blob`. +- Rust imports `base_snapshot + updates_rust` and `base_snapshot + updates_moon` and compares: + - deep JSON state + - `oplog_vv()` (operation coverage) + - selected richtext deltas (to catch mark/unmark regressions) + +Run: + +```sh +MOON_BIN="$HOME/.moon/bin/moon" NODE_BIN=node \ + cargo run -p loro --example moon_jsonschema_fuzz -- \ + --seed 1 --iters 300 --ops 400 --commit-every 20 --peers 10 +``` + +Repro on failure: + +- Look at `moon_jsonschema_fuzz_artifacts/case-/`. +- Re-run with the failing seed and `--iters 1`. + +## “Higher confidence” running modes + +For longer runs (recommended before merging codec changes): + +- More peers + more ops: + +```sh +MOON_BIN="$HOME/.moon/bin/moon" NODE_BIN=node \ + cargo run -p loro --example moon_jsonschema_fuzz -- \ + --seed 1000 --iters 200 --ops 1000 --commit-every 50 --peers 20 +``` + +- Use `--release` for speed: + +```sh +MOON_BIN="$HOME/.moon/bin/moon" NODE_BIN=node \ + cargo run -p loro --release --example moon_jsonschema_fuzz -- \ + --seed 1 --iters 2000 --ops 1000 --commit-every 50 --peers 20 +``` + +## Adding coverage (how to extend) + +When adding new fuzz ops, prefer: + +- Ops that mutate different container types (Map/List/Text/Tree/MovableList/Counter). +- Cross-peer edits (switch peer IDs between commits). +- Non-empty `start_frontiers` ranges (incremental import correctness). +- UTF-8/UTF-16 boundary behavior for Text. +- Large tables for varint boundaries (keys/peers >= 128). + +When a failure occurs: + +- Always keep the failing artifact directory. +- Turn it into a deterministic regression test if possible (a small, minimal seed/case). + +## Robustness (negative testing) + +In addition to semantic fuzzing, there are e2e tests that ensure the Moon CLI: + +- Rejects wrong document modes (e.g., decoding updates as snapshot). +- Rejects malformed/truncated inputs. +- Rejects invalid JsonSchema JSON for `encode-jsonschema`. + +These tests are meant to catch panics/crashes and “accepting garbage input”. + diff --git a/moon/README.md b/moon/README.md new file mode 100644 index 000000000..f69af213f --- /dev/null +++ b/moon/README.md @@ -0,0 +1,14 @@ +# Moonbit Loro Codec + +本目录包含用 Moonbit 实现的 Loro 二进制编码格式编解码器(对应 `docs/encoding.md`)。 + +## 目录结构 + +- `moon/loro_codec/`:核心库(编解码、校验、压缩、SSTable、ChangeBlock 等) +- `moon/cmd/loro_codec_cli/`:命令行工具(用于 e2e 转码与调试) +- `moon/specs/`:实现计划与数据结构设计文档 + +## 开发约定 + +- 先实现基础模块(bytes/leb128/postcard/xxhash32/lz4),再实现 SSTable 与 ChangeBlock。 +- 以 Rust ↔ Moon 的 e2e 互通为最终验收。 diff --git a/moon/SPEC_NOTES.md b/moon/SPEC_NOTES.md new file mode 100644 index 000000000..98ac545e2 --- /dev/null +++ b/moon/SPEC_NOTES.md @@ -0,0 +1,75 @@ +# Moonbit Loro Codec – SPEC NOTES + +This file records implementation-critical notes for the MoonBit codec under `moon/`, +as requested by `moon/specs/01-context-checklist.md`. + +It is intentionally **not** a full spec: the source of truth is `docs/encoding.md` +and the Rust implementation referenced below. + +## Endianness (must match Rust) + +- **Document mode** (`u16`) is **big-endian** (bytes `[20..22]` in the document header). +- **Document checksum** (`xxHash32`) is stored as **u32 little-endian** in bytes `[16..20]`, + and the checksum covers **bytes `[20..]`** (mode + body), not just body. +- **ChangeBlock key** is 12 bytes: `peer(u64 BE) + counter(i32 BE)`. +- **ID.to_bytes** (peer+counter) uses **big-endian** for both peer and counter. +- **Custom ValueEncoding**: + - `F64` is **big-endian** IEEE754. + - `I64`/`DeltaInt` use **SLEB128** (two’s complement sign extension), not zigzag. +- **postcard** uses **unsigned varint + zigzag** (different from SLEB128). + +## Integer encodings used + +- **ULEB128/SLEB128**: used in document bodies (FastUpdates block lengths), `keys` arena, + and the custom value encoding. +- **postcard varint + zigzag**: used by postcard itself and by serde_columnar. + +## ContainerType mappings (two tables) + +- **Binary ContainerID / ContainerWrapper kind byte** (`ContainerType::to_bytes` mapping): + `Map=0, List=1, Text=2, Tree=3, MovableList=4, Counter=5`. +- **Historical mapping** (only for postcard `Option` in wrapper.parent): + `Text=0, Map=1, List=2, MovableList=3, Tree=4, Counter=5`. + +See Rust: `crates/loro-internal/src/state/container_store/container_wrapper.rs`. + +## Unicode (RichText) + +- Text positions for snapshot decoding and JsonSchema use **Unicode scalar count** + (not UTF-16 code units). +- Moon implementation uses `count_utf8_codepoints(...)` when converting between + string lengths and the on-wire representation. + +## serde_columnar i128 + +- DeltaRle/DeltaOfDelta conceptually operate on i128 deltas. +- Moon implementation uses `BigInt` as the internal accumulator for i128-like behavior + (see `moon/loro_codec/serde_columnar_delta_rle.mbt`). + +## LZ4 Frame (SSTable compression) + +- SSTable blocks may be compressed using **LZ4 Frame**, as in Rust (`lz4_flex::frame`). +- Moon supports: + - decoding frames (`lz4_decompress_frame`) + - encoding frames (`lz4_compress_frame`) using block-independence and BD=64KB + - per-block compression fallback: if LZ4 frame output is larger than raw, encode as `CompressionType::None` + +## Forward/unknown handling + +- Custom ValueEncoding keeps unknown tags as opaque bytes (`Value::Future(tag, data)`), + enabling conservative round-tripping at the value layer. +- JsonSchema import still rejects: + - `UnknownOp` (forward-compat op content) for non-Counter containers + - root container values (`🦜:cid:root-...`) because binary container values + reconstruct IDs from `op_id + container_type` and cannot represent roots. + +## Rust “truth” pointers (for debugging) + +- Document header/body: `crates/loro-internal/src/encoding.rs`, `.../encoding/fast_snapshot.rs` +- SSTable: `crates/kv-store/src/sstable.rs`, `crates/kv-store/src/block.rs`, `crates/kv-store/src/compress.rs` +- ChangeBlock: `crates/loro-internal/src/oplog/change_store/block_encode.rs`, + `crates/loro-internal/src/oplog/change_store/block_meta_encode.rs`, + `crates/loro-internal/src/encoding/outdated_encode_reordered.rs` +- Value encoding: `crates/loro-internal/src/encoding/value.rs` +- IDs / ContainerIDs: `crates/loro-common/src/lib.rs` + diff --git a/moon/cmd/loro_codec_cli/fs.mbt b/moon/cmd/loro_codec_cli/fs.mbt new file mode 100644 index 000000000..e9a11ecb6 --- /dev/null +++ b/moon/cmd/loro_codec_cli/fs.mbt @@ -0,0 +1,14 @@ +///| +pub fn read_file(path : String) -> Bytes { + read_file_internal(path) +} + +///| +pub fn write_file(path : String, data : Bytes) -> Unit { + write_file_internal(path, data) +} + +///| +pub fn exit_process(code : Int) -> Unit { + exit_process_internal(code) +} diff --git a/moon/cmd/loro_codec_cli/fs_js.mbt b/moon/cmd/loro_codec_cli/fs_js.mbt new file mode 100644 index 000000000..74b728824 --- /dev/null +++ b/moon/cmd/loro_codec_cli/fs_js.mbt @@ -0,0 +1,22 @@ +///| +extern "js" fn read_file_internal(path : String) -> Bytes = + #| function(path) { + #| const fs = require("node:fs"); + #| return fs.readFileSync(path); + #| } + +///| +extern "js" fn write_file_internal(path : String, data : Bytes) -> Unit = + #| function(path, data) { + #| const fs = require("node:fs"); + #| fs.writeFileSync(path, data); + #| } + +///| +extern "js" fn exit_process_internal(code : Int) -> Unit = + #| function(code) { + #| if (typeof process !== "undefined" && typeof process.exit === "function") { + #| process.exit(code); + #| } + #| throw new Error("process.exit is not available"); + #| } diff --git a/moon/cmd/loro_codec_cli/fs_native.mbt b/moon/cmd/loro_codec_cli/fs_native.mbt new file mode 100644 index 000000000..83fa381d9 --- /dev/null +++ b/moon/cmd/loro_codec_cli/fs_native.mbt @@ -0,0 +1,14 @@ +///| +fn read_file_internal(_path : String) -> Bytes { + abort("read_file is not supported on native target yet") +} + +///| +fn write_file_internal(_path : String, _data : Bytes) -> Unit { + abort("write_file is not supported on native target yet") +} + +///| +fn exit_process_internal(_code : Int) -> Unit { + abort("exit_process is not supported on native target yet") +} diff --git a/moon/cmd/loro_codec_cli/fs_wasm.mbt b/moon/cmd/loro_codec_cli/fs_wasm.mbt new file mode 100644 index 000000000..297d454a7 --- /dev/null +++ b/moon/cmd/loro_codec_cli/fs_wasm.mbt @@ -0,0 +1,14 @@ +///| +fn read_file_internal(_path : String) -> Bytes { + abort("read_file is not supported on wasm/wasm-gc target") +} + +///| +fn write_file_internal(_path : String, _data : Bytes) -> Unit { + abort("write_file is not supported on wasm/wasm-gc target") +} + +///| +fn exit_process_internal(_code : Int) -> Unit { + abort("exit_process is not supported on wasm/wasm-gc target") +} diff --git a/moon/cmd/loro_codec_cli/main.mbt b/moon/cmd/loro_codec_cli/main.mbt new file mode 100644 index 000000000..bc41a6441 --- /dev/null +++ b/moon/cmd/loro_codec_cli/main.mbt @@ -0,0 +1,142 @@ +///| +fn usage() -> Unit { + println("loro-codec (moonbit) v" + @loro_codec.version()) + println("Usage:") + println(" loro-codec transcode ") + println(" loro-codec decode-updates ") + println(" loro-codec export-jsonschema ") + println(" loro-codec encode-jsonschema ") + println(" loro-codec export-deep-json ") +} + +///| +fn find_cmd_index(args : Array[String]) -> Int? { + let mut i = 0 + while i < args.length() { + match args[i] { + "transcode" => return Some(i) + "decode-updates" => return Some(i) + "export-jsonschema" => return Some(i) + "encode-jsonschema" => return Some(i) + "export-deep-json" => return Some(i) + _ => () + } + i = i + 1 + } + None +} + +///| +fn main { + let args = @env.args() + match find_cmd_index(args) { + None => { + usage() + exit_process(1) + } + Some(i) => { + let cmd = args[i] + match cmd { + "transcode" => { + if i + 2 >= args.length() { + usage() + exit_process(1) + } + let input_path = args[i + 1] + let output_path = args[i + 2] + let input = read_file(input_path) + let output = @loro_codec.transcode_document(input, true) catch { + @loro_codec.DecodeError(msg) => { + println("decode error: " + msg) + exit_process(2) + b"" + } + } + write_file(output_path, output) + } + "decode-updates" => { + if i + 1 >= args.length() { + usage() + exit_process(1) + } + let input_path = args[i + 1] + let input = read_file(input_path) + let json = @loro_codec.decode_fast_updates_changes_json(input, true) catch { + @loro_codec.DecodeError(msg) => { + println("decode error: " + msg) + exit_process(2) + "" + } + } + println(json) + } + "export-jsonschema" => { + if i + 1 >= args.length() { + usage() + exit_process(1) + } + let input_path = args[i + 1] + let input = read_file(input_path) + let json = @loro_codec.export_json_schema_from_fast_updates( + input, true, + ) catch { + @loro_codec.DecodeError(msg) => { + println("decode error: " + msg) + exit_process(2) + "" + } + } + println(json) + } + "encode-jsonschema" => { + if i + 2 >= args.length() { + usage() + exit_process(1) + } + let input_path = args[i + 1] + let output_path = args[i + 2] + let input = read_file(input_path) + let json = @encoding/utf8.decode(input) catch { + @encoding/utf8.Malformed(_) => { + println("decode error: invalid utf8 input json") + exit_process(2) + "" + } + } + let output = @loro_codec.encode_fast_updates_from_json_schema( + json, true, + ) catch { + @loro_codec.DecodeError(msg) => { + println("decode error: " + msg) + exit_process(2) + b"" + } + } + write_file(output_path, output) + } + "export-deep-json" => { + if i + 1 >= args.length() { + usage() + exit_process(1) + } + let input_path = args[i + 1] + let input = read_file(input_path) + let json = @loro_codec.export_deep_json_from_fast_snapshot( + input, true, + ) catch { + @loro_codec.DecodeError(msg) => { + println("decode error: " + msg) + exit_process(2) + "" + } + } + println(json) + } + _ => { + usage() + exit_process(1) + } + } + } + } +} diff --git a/moon/cmd/loro_codec_cli/moon.pkg.json b/moon/cmd/loro_codec_cli/moon.pkg.json new file mode 100644 index 000000000..19adc0def --- /dev/null +++ b/moon/cmd/loro_codec_cli/moon.pkg.json @@ -0,0 +1,19 @@ +{ + "is-main": true, + "import": [ + "loro-dev/loro/loro_codec" + ], + "targets": { + "fs_js.mbt": [ + "js" + ], + "fs_wasm.mbt": [ + "wasm", + "wasm-gc" + ], + "fs_native.mbt": [ + "native", + "llvm" + ] + } +} diff --git a/moon/loro_codec/bytes.mbt b/moon/loro_codec/bytes.mbt new file mode 100644 index 000000000..b07d99f86 --- /dev/null +++ b/moon/loro_codec/bytes.mbt @@ -0,0 +1,190 @@ +///| +pub struct BytesReader { + buf : BytesView + mut pos : Int +} + +///| +pub fn BytesReader::new(bytes : Bytes) -> BytesReader { + { buf: bytes[:], pos: 0 } +} + +///| +pub fn BytesReader::from_view(view : BytesView) -> BytesReader { + { buf: view, pos: 0 } +} + +///| +pub fn BytesReader::remaining(self : BytesReader) -> Int { + self.buf.length() - self.pos +} + +///| +pub fn BytesReader::remaining_view(self : BytesReader) -> BytesView { + self.buf[self.pos:self.buf.length()] +} + +///| +fn BytesReader::require(self : BytesReader, n : Int) -> Unit raise DecodeError { + if n < 0 { + raise DecodeError("invalid read length") + } + if self.pos + n > self.buf.length() { + raise DecodeError("unexpected eof") + } +} + +///| +pub fn BytesReader::read_u8(self : BytesReader) -> Byte raise DecodeError { + self.require(1) + let b = self.buf[self.pos] + self.pos = self.pos + 1 + b +} + +///| +pub fn BytesReader::read_exact( + self : BytesReader, + n : Int, +) -> BytesView raise DecodeError { + self.require(n) + let start = self.pos + self.pos = self.pos + n + self.buf[start:start + n] +} + +///| +pub fn BytesReader::skip(self : BytesReader, n : Int) -> Unit raise DecodeError { + let _ = self.read_exact(n) + +} + +///| +pub fn BytesReader::read_u16_le(self : BytesReader) -> UInt raise DecodeError { + let b0 = self.read_u8().to_uint() + let b1 = self.read_u8().to_uint() + b0 | (b1 << 8) +} + +///| +pub fn BytesReader::read_u16_be(self : BytesReader) -> UInt raise DecodeError { + let b0 = self.read_u8().to_uint() + let b1 = self.read_u8().to_uint() + (b0 << 8) | b1 +} + +///| +pub fn BytesReader::read_u32_le(self : BytesReader) -> UInt raise DecodeError { + let b0 = self.read_u8().to_uint() + let b1 = self.read_u8().to_uint() + let b2 = self.read_u8().to_uint() + let b3 = self.read_u8().to_uint() + b0 | (b1 << 8) | (b2 << 16) | (b3 << 24) +} + +///| +pub fn BytesReader::read_u32_be(self : BytesReader) -> UInt raise DecodeError { + let b0 = self.read_u8().to_uint() + let b1 = self.read_u8().to_uint() + let b2 = self.read_u8().to_uint() + let b3 = self.read_u8().to_uint() + (b0 << 24) | (b1 << 16) | (b2 << 8) | b3 +} + +///| +pub fn BytesReader::read_u64_le(self : BytesReader) -> UInt64 raise DecodeError { + let mut v : UInt64 = 0 + for i in 0..<8 { + let b = self.read_u8().to_uint64() + v = v | (b << (8 * i)) + } + v +} + +///| +pub fn BytesReader::read_u64_be(self : BytesReader) -> UInt64 raise DecodeError { + let mut v : UInt64 = 0 + for i in 0..<8 { + let b = self.read_u8().to_uint64() + v = (v << 8) | b + } + v +} + +///| +pub struct BytesWriter { + buf : @buffer.Buffer +} + +///| +pub fn BytesWriter::new() -> BytesWriter { + { buf: @buffer.new() } +} + +///| +pub fn BytesWriter::to_bytes(self : BytesWriter) -> Bytes { + self.buf.to_bytes() +} + +///| +pub fn BytesWriter::write_u8(self : BytesWriter, value : Byte) -> Unit { + self.buf.write_byte(value) +} + +///| +pub fn BytesWriter::write_bytes(self : BytesWriter, value : Bytes) -> Unit { + self.buf.write_bytes(value) +} + +///| +pub fn BytesWriter::write_bytesview( + self : BytesWriter, + value : BytesView, +) -> Unit { + self.buf.write_bytesview(value) +} + +///| +pub fn BytesWriter::write_u16_le(self : BytesWriter, value : UInt) -> Unit { + self.write_u8((value & 0xFF).to_byte()) + self.write_u8(((value >> 8) & 0xFF).to_byte()) +} + +///| +pub fn BytesWriter::write_u16_be(self : BytesWriter, value : UInt) -> Unit { + self.write_u8(((value >> 8) & 0xFF).to_byte()) + self.write_u8((value & 0xFF).to_byte()) +} + +///| +pub fn BytesWriter::write_u32_le(self : BytesWriter, value : UInt) -> Unit { + self.write_u8((value & 0xFF).to_byte()) + self.write_u8(((value >> 8) & 0xFF).to_byte()) + self.write_u8(((value >> 16) & 0xFF).to_byte()) + self.write_u8(((value >> 24) & 0xFF).to_byte()) +} + +///| +pub fn BytesWriter::write_u32_be(self : BytesWriter, value : UInt) -> Unit { + self.write_u8(((value >> 24) & 0xFF).to_byte()) + self.write_u8(((value >> 16) & 0xFF).to_byte()) + self.write_u8(((value >> 8) & 0xFF).to_byte()) + self.write_u8((value & 0xFF).to_byte()) +} + +///| +pub fn BytesWriter::write_u64_le(self : BytesWriter, value : UInt64) -> Unit { + let mut value = value + for _i in 0..<8 { + self.write_u8((value & 0xFF).to_byte()) + value = value >> 8 + } +} + +///| +pub fn BytesWriter::write_u64_be(self : BytesWriter, value : UInt64) -> Unit { + for i in 0..<8 { + let shift = 56 - 8 * i + self.write_u8(((value >> shift) & 0xFF).to_byte()) + } +} diff --git a/moon/loro_codec/bytes_test.mbt b/moon/loro_codec/bytes_test.mbt new file mode 100644 index 000000000..7c88760e8 --- /dev/null +++ b/moon/loro_codec/bytes_test.mbt @@ -0,0 +1,28 @@ +///| +test "bytes reader/writer roundtrip" { + let w = BytesWriter::new() + w.write_u8(0xAB) + w.write_u16_le(0x1234) + w.write_u16_be(0xABCD) + w.write_u32_le(0x89ABCDEF) + w.write_u32_be(0x01020304) + w.write_u64_le(0x0123456789ABCDEF) + w.write_u64_be(0x0123456789ABCDEF) + let bytes = w.to_bytes() + let r = BytesReader::new(bytes) + assert_eq(try! r.read_u8(), 0xAB) + assert_eq(try! r.read_u16_le(), 0x1234) + assert_eq(try! r.read_u16_be(), 0xABCD) + assert_eq(try! r.read_u32_le(), 0x89ABCDEF) + assert_eq(try! r.read_u32_be(), 0x01020304) + assert_eq(try! r.read_u64_le(), 0x0123456789ABCDEF) + assert_eq(try! r.read_u64_be(), 0x0123456789ABCDEF) + assert_eq(r.remaining(), 0) +} + +///| +test "bytes reader eof" { + let r = BytesReader::new(b"") + let b = r.read_u8() catch { DecodeError(_) => b'\xFF' } + assert_eq(b, b'\xFF') +} diff --git a/moon/loro_codec/change.mbt b/moon/loro_codec/change.mbt new file mode 100644 index 000000000..a3725c053 --- /dev/null +++ b/moon/loro_codec/change.mbt @@ -0,0 +1,50 @@ +///| +pub struct Change { + id : ID + timestamp : Int64 + deps : Array[ID] + lamport : UInt + msg : String? + ops : Array[Op] +} derive(Eq, Show) + +///| +pub fn Change::new( + id : ID, + timestamp : Int64, + deps : Array[ID], + lamport : UInt, + msg : String?, +) -> Change { + { id, timestamp, deps, lamport, msg, ops: [] } +} + +///| +pub fn Change::id(self : Change) -> ID { + self.id +} + +///| +pub fn Change::timestamp(self : Change) -> Int64 { + self.timestamp +} + +///| +pub fn Change::deps(self : Change) -> Array[ID] { + self.deps +} + +///| +pub fn Change::lamport(self : Change) -> UInt { + self.lamport +} + +///| +pub fn Change::msg(self : Change) -> String? { + self.msg +} + +///| +pub fn Change::ops(self : Change) -> Array[Op] { + self.ops +} diff --git a/moon/loro_codec/change_block.mbt b/moon/loro_codec/change_block.mbt new file mode 100644 index 000000000..b3ad401bc --- /dev/null +++ b/moon/loro_codec/change_block.mbt @@ -0,0 +1,10 @@ +// ChangeBlock codec (FastUpdates). +// +// Implementation is split into cohesive files: +// - change_block_encoded_block.mbt +// - change_block_header.mbt +// - change_block_meta.mbt +// - change_block_arena.mbt +// - change_block_ops.mbt +// - change_block_decode.mbt +// - change_block_encode.mbt diff --git a/moon/loro_codec/change_block_arena.mbt b/moon/loro_codec/change_block_arena.mbt new file mode 100644 index 000000000..162395573 --- /dev/null +++ b/moon/loro_codec/change_block_arena.mbt @@ -0,0 +1,78 @@ +///| +pub fn decode_keys(bytes : BytesView) -> Array[String] raise DecodeError { + let r = BytesReader::from_view(bytes) + let out : Array[String] = [] + while r.remaining() > 0 { + let len_u64 = r.read_uleb128_u64() + if len_u64 > 0x7FFF_FFFFUL { + raise DecodeError("keys: key too long") + } + let len = len_u64.to_int() + if len < 0 || len > r.remaining() { + raise DecodeError("keys: invalid key length") + } + let key_bytes = r.read_exact(len) + let key = @encoding/utf8.decode(key_bytes) catch { + @encoding/utf8.Malformed(_) => raise DecodeError("keys: invalid utf8") + } + out.push(key) + } + out +} + +///| +pub fn decode_container_arena( + bytes : BytesView, + peers : Array[UInt64], + keys : Array[String], +) -> Array[ContainerID] raise DecodeError { + let r = BytesReader::from_view(bytes) + let n_u64 = r.read_varint_u64() + if n_u64 > 0x7FFF_FFFFUL { + raise DecodeError("container_arena: too many containers") + } + let n = n_u64.to_int() + if n < 0 { + raise DecodeError("container_arena: invalid len") + } + let out : Array[ContainerID] = [] + for _i in 0.. false + b'\x01' => true + _ => raise DecodeError("container_arena: invalid bool") + } + let kind = container_type_from_u8(r.read_u8()) + let peer_idx_u64 = r.read_varint_u64() + if peer_idx_u64 > 0x7FFF_FFFFUL { + raise DecodeError("container_arena: peer_idx too large") + } + let peer_idx = peer_idx_u64.to_int() + let key_idx_or_counter_i64 = r.read_varint_i64() + if key_idx_or_counter_i64 < -2147483648L || + key_idx_or_counter_i64 > 2147483647L { + raise DecodeError("container_arena: i32 overflow") + } + let key_idx_or_counter = key_idx_or_counter_i64.to_int() + if is_root { + if key_idx_or_counter < 0 || key_idx_or_counter >= keys.length() { + raise DecodeError("container_arena: invalid root key idx") + } + out.push(ContainerID::root(keys[key_idx_or_counter], kind)) + } else { + if peer_idx < 0 || peer_idx >= peers.length() { + raise DecodeError("container_arena: invalid peer idx") + } + out.push(ContainerID::normal(peers[peer_idx], key_idx_or_counter, kind)) + } + } + if r.remaining() != 0 { + raise DecodeError("container_arena: trailing bytes") + } + out +} diff --git a/moon/loro_codec/change_block_decode.mbt b/moon/loro_codec/change_block_decode.mbt new file mode 100644 index 000000000..30051764f --- /dev/null +++ b/moon/loro_codec/change_block_decode.mbt @@ -0,0 +1,235 @@ +///| +pub fn decode_change_block( + bytes : BytesView, +) -> Array[Change] raise DecodeError { + let doc = decode_encoded_block(bytes) + let header = decode_changes_header( + doc.header()[:], + doc.n_changes(), + doc.counter_start(), + doc.counter_len(), + doc.lamport_start(), + doc.lamport_len(), + ) + let meta = decode_changes_meta(doc.change_meta()[:], doc.n_changes()) + let keys = decode_keys(doc.keys()[:]) + let cids = decode_container_arena(doc.cids()[:], header.peers(), keys) + let positions = decode_position_arena_v2(doc.positions()[:]) + let encoded_ops = decode_encoded_ops(doc.ops()[:]) + let delete_start_ids = decode_delete_start_ids(doc.delete_start_ids()[:]) + let n_u64 = doc.n_changes().to_uint64() + if n_u64 > 0x7FFF_FFFFUL { + raise DecodeError("change_block: too many changes") + } + let n = n_u64.to_int() + let changes : Array[Change] = [] + for i in 0.. 0x7FFF_FFFFU { + raise DecodeError("change_block: container_index too large") + } + let cid_i = row.container_index.reinterpret_as_int() + if cid_i < 0 || cid_i >= cids.length() { + raise DecodeError("change_block: invalid container_index") + } + let cid = cids[cid_i] + let tag = (row.value_type & 0xFF).to_byte() + let (value, rest) = decode_value_content(tag, values_rest) + values_rest = rest + if counter_i64 < -2147483648L || counter_i64 > 2147483647L { + raise DecodeError("change_block: counter overflow") + } + let counter = counter_i64.to_int() + let op_id = ID::new(peer, counter) + let (content, next_del_idx) = decode_op_content( + cid, + row.prop, + row.len, + value, + header.peers(), + keys, + positions, + delete_start_ids, + del_idx, + op_id, + ) + del_idx = next_del_idx + if change_idx < 0 || change_idx >= changes.length() { + raise DecodeError("change_block: change index overflow") + } + changes[change_idx].ops().push(Op::new(cid, counter, row.len, content)) + counter_i64 = counter_i64 + row.len.to_int64() + if change_idx + 1 < header.counters().length() { + let next_boundary = header.counters()[change_idx + 1].to_int64() + if counter_i64 > next_boundary { + raise DecodeError("change_block: op len overflow change boundary") + } + if counter_i64 == next_boundary { + change_idx = change_idx + 1 + } + } + } + if values_rest.length() != 0 { + raise DecodeError("change_block: trailing value bytes") + } + if del_idx != delete_start_ids.length() { + raise DecodeError("change_block: unused delete_start_ids") + } + changes +} + +///| +pub struct DecodedChangeBlock { + peers : Array[UInt64] + keys : Array[String] + cids : Array[ContainerID] + positions : Array[Bytes] + changes : Array[Change] +} + +///| +pub fn DecodedChangeBlock::peers(self : DecodedChangeBlock) -> Array[UInt64] { + self.peers +} + +///| +pub fn DecodedChangeBlock::keys(self : DecodedChangeBlock) -> Array[String] { + self.keys +} + +///| +pub fn DecodedChangeBlock::cids( + self : DecodedChangeBlock, +) -> Array[ContainerID] { + self.cids +} + +///| +pub fn DecodedChangeBlock::positions(self : DecodedChangeBlock) -> Array[Bytes] { + self.positions +} + +///| +pub fn DecodedChangeBlock::changes(self : DecodedChangeBlock) -> Array[Change] { + self.changes +} + +///| +pub fn decode_change_block_full( + bytes : BytesView, +) -> DecodedChangeBlock raise DecodeError { + let doc = decode_encoded_block(bytes) + let header = decode_changes_header( + doc.header()[:], + doc.n_changes(), + doc.counter_start(), + doc.counter_len(), + doc.lamport_start(), + doc.lamport_len(), + ) + let keys = decode_keys(doc.keys()[:]) + let cids = decode_container_arena(doc.cids()[:], header.peers(), keys) + let positions = decode_position_arena_v2(doc.positions()[:]) + let changes = decode_change_block(bytes) + { peers: header.peers(), keys, cids, positions, changes } +} + +///| +fn change_atom_len_u64(c : Change) -> UInt64 { + let mut sum : UInt64 = 0 + for op in c.ops() { + sum = sum + op.len().to_uint64() + } + sum +} + +///| +fn init_u64_index(xs : Array[UInt64]) -> @hashmap.HashMap[UInt64, UInt64] { + let m = @hashmap.new(capacity=xs.length()) + for i in 0.. @hashmap.HashMap[String, UInt64] { + let m = @hashmap.new(capacity=xs.length()) + for i in 0.. UInt64 { + match peer_to_idx.get(peer) { + Some(idx) => idx + None => { + let idx = peers.length().to_uint64() + peers.push(peer) + peer_to_idx.set(peer, idx) + idx + } + } +} + +///| +fn register_key( + keys : Array[String], + key_to_idx : @hashmap.HashMap[String, UInt64], + key : String, +) -> UInt64 { + match key_to_idx.get(key) { + Some(idx) => idx + None => { + let idx = keys.length().to_uint64() + keys.push(key) + key_to_idx.set(key, idx) + idx + } + } +} + +///| +fn register_cid(cids : Array[ContainerID], cid : ContainerID) -> UInt { + for i in 0.. UInt64 { + for i in 0.. Bytes raise EncodeError { + let changes = block.changes() + if changes.length() == 0 { + raise EncodeError("change_block: empty changes") + } + + // Mutable working tables (can append new items if needed). + let peers : Array[UInt64] = [] + for p in block.peers() { + peers.push(p) + } + let peer_to_idx = init_u64_index(peers) + let keys : Array[String] = [] + for k in block.keys() { + keys.push(k) + } + let key_to_idx = init_string_index(keys) + let cids : Array[ContainerID] = [] + for cid in block.cids() { + cids.push(cid) + } + let positions : Array[Bytes] = [] + for p in block.positions() { + positions.push(p) + } + + // Encode ops/values first; this may append to peers/keys/cids/positions. + let (encoded_ops, delete_start_ids, values) = encode_ops_and_values( + changes, peers, peer_to_idx, keys, key_to_idx, cids, positions, + ) + + // Encode container arena next; this may append to peers/keys. + let cids_bytes = encode_container_arena_from_table( + cids, peers, peer_to_idx, keys, key_to_idx, + ) + let keys_bytes = encode_keys_from_table(keys) + let positions_bytes = encode_position_arena_v2(positions) + let ops_bytes = encode_encoded_ops(encoded_ops) + let delete_start_ids_bytes = encode_delete_start_ids(delete_start_ids) + + // Encode header/meta last; this may append to peers (via deps). + let header = encode_changes_header_from_changes(changes, peers, peer_to_idx) + let change_meta = encode_changes_meta_from_changes(changes) + + // Derive block-level ranges. + let first = changes[0] + let last = changes[changes.length() - 1] + let counter_start = first.id().counter() + let mut counter_len_u64 : UInt64 = 0 + for c in changes { + counter_len_u64 = counter_len_u64 + change_atom_len_u64(c) + } + if counter_start < 0 || counter_start > 2147483647 { + raise EncodeError("change_block: counter_start overflow") + } + if counter_len_u64 > 0xFFFF_FFFFUL { + raise EncodeError("change_block: counter_len overflow") + } + let lamport_start = first.lamport() + let last_len = change_atom_len_u64(last) + let lamport_end_u64 = last.lamport().to_uint64() + last_len + if lamport_end_u64 < lamport_start.to_uint64() { + raise EncodeError("change_block: invalid lamport range") + } + let lamport_len_u64 = lamport_end_u64 - lamport_start.to_uint64() + if lamport_len_u64 > 0xFFFF_FFFFUL { + raise EncodeError("change_block: lamport_len overflow") + } + let out = { + counter_start: counter_start.reinterpret_as_uint(), + counter_len: counter_len_u64.to_uint(), + lamport_start, + lamport_len: lamport_len_u64.to_uint(), + n_changes: changes.length().reinterpret_as_uint(), + header, + change_meta, + cids: cids_bytes, + keys: keys_bytes, + positions: positions_bytes, + ops: ops_bytes, + delete_start_ids: delete_start_ids_bytes, + values, + } + encode_encoded_block(out) +} diff --git a/moon/loro_codec/change_block_encode_header_meta.mbt b/moon/loro_codec/change_block_encode_header_meta.mbt new file mode 100644 index 000000000..235c58dc5 --- /dev/null +++ b/moon/loro_codec/change_block_encode_header_meta.mbt @@ -0,0 +1,82 @@ +///| +fn encode_changes_header_from_changes( + changes : Array[Change], + peers : Array[UInt64], + peer_to_idx : @hashmap.HashMap[UInt64, UInt64], +) -> Bytes raise EncodeError { + if changes.length() == 0 { + raise EncodeError("change_block: empty changes") + } + let peer0 = changes[0].id().peer() + if peers.length() == 0 { + peers.push(peer0) + peer_to_idx.set(peer0, 0UL) + } else if peers[0] != peer0 { + raise EncodeError("change_block: peers[0] must be block peer") + } + let n = changes.length() + let dep_on_self : Array[Bool] = [] + let dep_lens : Array[UInt64] = [] + let dep_peer_idxs : Array[UInt64] = [] + let dep_counters : Array[Int64] = [] + for c in changes { + let mut on_self = false + for dep in c.deps() { + if dep.peer() == peer0 { + on_self = true + } else { + dep_peer_idxs.push(register_peer(peers, peer_to_idx, dep.peer())) + dep_counters.push(dep.counter().to_int64()) + } + } + dep_on_self.push(on_self) + let dep_len = if on_self { + (c.deps().length() - 1).to_uint64() + } else { + c.deps().length().to_uint64() + } + dep_lens.push(dep_len) + } + let w = BytesWriter::new() + w.write_uleb128_u64(peers.length().to_uint64()) + for p in peers { + w.write_u64_le(p) + } + for i in 0..<(n - 1) { + let atom_len = change_atom_len_u64(changes[i]) + w.write_uleb128_u64(atom_len) + } + w.write_bytes(encode_bool_rle(dep_on_self)) + w.write_bytes(encode_any_rle_usize(dep_lens)) + w.write_bytes(encode_any_rle_usize(dep_peer_idxs)) + w.write_bytes(encode_delta_of_delta_i64(dep_counters)) + let lamports : Array[Int64] = [] + for i in 0..<(n - 1) { + lamports.push(changes[i].lamport().to_int64()) + } + w.write_bytes(encode_delta_of_delta_i64(lamports)) + w.to_bytes() +} + +///| +fn encode_changes_meta_from_changes(changes : Array[Change]) -> Bytes { + let timestamps : Array[Int64] = [] + let lens_u32 : Array[UInt] = [] + let msgs_w = BytesWriter::new() + for c in changes { + timestamps.push(c.timestamp()) + match c.msg() { + None => lens_u32.push(0) + Some(s) => { + let b = @encoding/utf8.encode(s[:]) + lens_u32.push(b.length().reinterpret_as_uint()) + msgs_w.write_bytes(b) + } + } + } + let w = BytesWriter::new() + w.write_bytes(encode_delta_of_delta_i64(timestamps)) + w.write_bytes(encode_any_rle_u32(lens_u32)) + w.write_bytes(msgs_w.to_bytes()) + w.to_bytes() +} diff --git a/moon/loro_codec/change_block_encode_ops_values.mbt b/moon/loro_codec/change_block_encode_ops_values.mbt new file mode 100644 index 000000000..5f0874c03 --- /dev/null +++ b/moon/loro_codec/change_block_encode_ops_values.mbt @@ -0,0 +1,162 @@ +///| +fn encode_ops_and_values( + changes : Array[Change], + peers : Array[UInt64], + peer_to_idx : @hashmap.HashMap[UInt64, UInt64], + keys : Array[String], + key_to_idx : @hashmap.HashMap[String, UInt64], + cids : Array[ContainerID], + positions : Array[Bytes], +) -> (Array[EncodedOpRow], Array[EncodedDeleteStartIdRow], Bytes) raise EncodeError { + let ops : Array[EncodedOpRow] = [] + let del_ids : Array[EncodedDeleteStartIdRow] = [] + let values_w = BytesWriter::new() + for c in changes { + for op in c.ops() { + let cid_idx = register_cid(cids, op.container()) + let (prop, value, maybe_del) = match op.content() { + OpContent::List(ListOp::Insert(pos, items)) => { + let v = Value::LoroValue(LoroValue::List(items)) + (pos.reinterpret_as_int(), v, Option::None) + } + OpContent::List(ListOp::Delete(pos, del_len, start_id)) => + (pos, Value::DeleteSeq, Option::Some((start_id, del_len))) + OpContent::MovableList(MovableListOp::Insert(pos, items)) => { + let v = Value::LoroValue(LoroValue::List(items)) + (pos.reinterpret_as_int(), v, Option::None) + } + OpContent::MovableList(MovableListOp::Delete(pos, del_len, start_id)) => + (pos, Value::DeleteSeq, Option::Some((start_id, del_len))) + OpContent::MovableList(MovableListOp::Move(from, to, elem_id)) => { + let from_idx = register_peer(peers, peer_to_idx, elem_id.peer()) + let v = Value::ListMove({ + from: from.to_uint64(), + from_idx, + lamport: elem_id.lamport().to_uint64(), + }) + (to.reinterpret_as_int(), v, Option::None) + } + OpContent::MovableList(MovableListOp::Set(elem_id, value)) => { + let peer_idx = register_peer(peers, peer_to_idx, elem_id.peer()) + let v = Value::ListSet({ peer_idx, lamport: elem_id.lamport(), value }) + (0, v, Option::None) + } + OpContent::Map(MapOp::Insert(key, value)) => { + let key_idx = register_key(keys, key_to_idx, key) + if key_idx > 0x7FFF_FFFFUL { + raise EncodeError("op: map key idx too large") + } + (key_idx.to_int(), Value::LoroValue(value), Option::None) + } + OpContent::Map(MapOp::Delete(key)) => { + let key_idx = register_key(keys, key_to_idx, key) + if key_idx > 0x7FFF_FFFFUL { + raise EncodeError("op: map key idx too large") + } + (key_idx.to_int(), Value::DeleteOnce, Option::None) + } + OpContent::Text(TextOp::Insert(pos, s)) => + (pos.reinterpret_as_int(), Value::Str(s), Option::None) + OpContent::Text(TextOp::Delete(pos, del_len, start_id)) => + (pos, Value::DeleteSeq, Option::Some((start_id, del_len))) + OpContent::Text(TextOp::Mark(start, end, key, value, info)) => { + let key_idx = register_key(keys, key_to_idx, key) + if key_idx > 0x7FFF_FFFFUL { + raise EncodeError("op: mark key idx too large") + } + let len_u64 = (end - start).to_uint64() + let v = Value::MarkStart({ info, len: len_u64, key_idx, value }) + (start.reinterpret_as_int(), v, Option::None) + } + OpContent::Text(TextOp::MarkEnd) => (0, Value::Null, Option::None) + OpContent::Tree(TreeOp::Create(target, parent, fi)) => { + let subject_peer_idx = register_peer( + peers, + peer_to_idx, + target.peer(), + ) + let pos_idx = register_position(positions, fi.bytes()) + let (is_parent_null, parent_peer_idx, parent_cnt) = match parent { + Option::None => (true, 0UL, 0) + Option::Some(p) => + (false, register_peer(peers, peer_to_idx, p.peer()), p.counter()) + } + let v = Value::RawTreeMove({ + subject_peer_idx, + subject_cnt: target.counter(), + position_idx: pos_idx, + is_parent_null, + parent_peer_idx, + parent_cnt, + }) + (0, v, Option::None) + } + OpContent::Tree(TreeOp::Move(target, parent, fi)) => { + let subject_peer_idx = register_peer( + peers, + peer_to_idx, + target.peer(), + ) + let pos_idx = register_position(positions, fi.bytes()) + let (is_parent_null, parent_peer_idx, parent_cnt) = match parent { + Option::None => (true, 0UL, 0) + Option::Some(p) => + (false, register_peer(peers, peer_to_idx, p.peer()), p.counter()) + } + let v = Value::RawTreeMove({ + subject_peer_idx, + subject_cnt: target.counter(), + position_idx: pos_idx, + is_parent_null, + parent_peer_idx, + parent_cnt, + }) + (0, v, Option::None) + } + OpContent::Tree(TreeOp::Delete(target)) => { + let subject_peer_idx = register_peer( + peers, + peer_to_idx, + target.peer(), + ) + let deleted_root_peer = 0xFFFF_FFFF_FFFF_FFFFUL + let deleted_root_cnt = 2147483647 + let parent_peer_idx = register_peer( + peers, peer_to_idx, deleted_root_peer, + ) + let v = Value::RawTreeMove({ + subject_peer_idx, + subject_cnt: target.counter(), + position_idx: 0UL, + is_parent_null: false, + parent_peer_idx, + parent_cnt: deleted_root_cnt, + }) + (0, v, Option::None) + } + OpContent::Future(FutureOp::Unknown(prop, raw)) => + (prop, raw, Option::None) + } + match maybe_del { + Option::None => () + Option::Some((start_id, signed_len)) => { + let peer_idx = register_peer(peers, peer_to_idx, start_id.peer()) + del_ids.push({ + peer_idx, + counter: start_id.counter(), + len: signed_len, + }) + } + } + let (tag, content) = encode_value_content(value) + values_w.write_bytes(content) + ops.push({ + container_index: cid_idx, + prop, + value_type: tag.to_uint(), + len: op.len(), + }) + } + } + (ops, del_ids, values_w.to_bytes()) +} diff --git a/moon/loro_codec/change_block_encode_tables.mbt b/moon/loro_codec/change_block_encode_tables.mbt new file mode 100644 index 000000000..5527b6838 --- /dev/null +++ b/moon/loro_codec/change_block_encode_tables.mbt @@ -0,0 +1,48 @@ +///| +fn encode_keys_from_table(keys : Array[String]) -> Bytes { + let w = BytesWriter::new() + for key in keys { + let b = @encoding/utf8.encode(key[:]) + w.write_uleb128_u64(b.length().to_uint64()) + w.write_bytes(b) + } + w.to_bytes() +} + +///| +fn encode_container_arena_from_table( + cids : Array[ContainerID], + peers : Array[UInt64], + peer_to_idx : @hashmap.HashMap[UInt64, UInt64], + keys : Array[String], + key_to_idx : @hashmap.HashMap[String, UInt64], +) -> Bytes raise EncodeError { + let w = BytesWriter::new() + w.write_varint_u64(cids.length().to_uint64()) + for cid in cids { + w.write_varint_u64(4UL) + match cid { + ContainerID::Root(name, kind) => { + w.write_u8(b'\x01') + w.write_u8(container_type_to_u8(kind)) + w.write_varint_u64(0UL) + let idx = register_key(keys, key_to_idx, name) + if idx > 0x7FFF_FFFFUL { + raise EncodeError("container_arena: root key idx too large") + } + w.write_varint_i64(idx.reinterpret_as_int64()) + } + ContainerID::Normal(peer, counter, kind) => { + w.write_u8(b'\x00') + w.write_u8(container_type_to_u8(kind)) + let idx = register_peer(peers, peer_to_idx, peer) + if idx > 0x7FFF_FFFFUL { + raise EncodeError("container_arena: peer idx too large") + } + w.write_varint_u64(idx) + w.write_varint_i64(counter.to_int64()) + } + } + } + w.to_bytes() +} diff --git a/moon/loro_codec/change_block_encoded_block.mbt b/moon/loro_codec/change_block_encoded_block.mbt new file mode 100644 index 000000000..98b9e71ce --- /dev/null +++ b/moon/loro_codec/change_block_encoded_block.mbt @@ -0,0 +1,171 @@ +///| +pub struct EncodedBlock { + counter_start : UInt + counter_len : UInt + lamport_start : UInt + lamport_len : UInt + n_changes : UInt + header : Bytes + change_meta : Bytes + cids : Bytes + keys : Bytes + positions : Bytes + ops : Bytes + delete_start_ids : Bytes + values : Bytes +} + +///| +pub fn EncodedBlock::counter_start(self : EncodedBlock) -> UInt { + self.counter_start +} + +///| +pub fn EncodedBlock::counter_len(self : EncodedBlock) -> UInt { + self.counter_len +} + +///| +pub fn EncodedBlock::lamport_start(self : EncodedBlock) -> UInt { + self.lamport_start +} + +///| +pub fn EncodedBlock::lamport_len(self : EncodedBlock) -> UInt { + self.lamport_len +} + +///| +pub fn EncodedBlock::n_changes(self : EncodedBlock) -> UInt { + self.n_changes +} + +///| +pub fn EncodedBlock::header(self : EncodedBlock) -> Bytes { + self.header +} + +///| +pub fn EncodedBlock::change_meta(self : EncodedBlock) -> Bytes { + self.change_meta +} + +///| +pub fn EncodedBlock::cids(self : EncodedBlock) -> Bytes { + self.cids +} + +///| +pub fn EncodedBlock::keys(self : EncodedBlock) -> Bytes { + self.keys +} + +///| +pub fn EncodedBlock::positions(self : EncodedBlock) -> Bytes { + self.positions +} + +///| +pub fn EncodedBlock::ops(self : EncodedBlock) -> Bytes { + self.ops +} + +///| +pub fn EncodedBlock::delete_start_ids(self : EncodedBlock) -> Bytes { + self.delete_start_ids +} + +///| +pub fn EncodedBlock::values(self : EncodedBlock) -> Bytes { + self.values +} + +///| +fn read_postcard_u32_block(r : BytesReader) -> UInt raise DecodeError { + let v = r.read_varint_u64() + if v > 0xFFFF_FFFFUL { + raise DecodeError("postcard: u32 overflow") + } + v.to_uint() +} + +///| +fn write_postcard_u32_block(w : BytesWriter, v : UInt) -> Unit { + w.write_varint_u64(v.to_uint64()) +} + +///| +fn read_postcard_bytes(r : BytesReader) -> Bytes raise DecodeError { + let len_u64 = r.read_varint_u64() + if len_u64 > 0x7FFF_FFFFUL { + raise DecodeError("postcard: bytes too large") + } + let len = len_u64.to_int() + if len < 0 || len > r.remaining() { + raise DecodeError("postcard: invalid bytes length") + } + r.read_exact(len).to_bytes() +} + +///| +fn write_postcard_bytes(w : BytesWriter, b : Bytes) -> Unit { + w.write_varint_u64(b.length().to_uint64()) + w.write_bytes(b) +} + +///| +pub fn decode_encoded_block( + bytes : BytesView, +) -> EncodedBlock raise DecodeError { + let r = BytesReader::from_view(bytes) + let counter_start = read_postcard_u32_block(r) + let counter_len = read_postcard_u32_block(r) + let lamport_start = read_postcard_u32_block(r) + let lamport_len = read_postcard_u32_block(r) + let n_changes = read_postcard_u32_block(r) + let header = read_postcard_bytes(r) + let change_meta = read_postcard_bytes(r) + let cids = read_postcard_bytes(r) + let keys = read_postcard_bytes(r) + let positions = read_postcard_bytes(r) + let ops = read_postcard_bytes(r) + let delete_start_ids = read_postcard_bytes(r) + let values = read_postcard_bytes(r) + if r.remaining() != 0 { + raise DecodeError("postcard: trailing bytes") + } + { + counter_start, + counter_len, + lamport_start, + lamport_len, + n_changes, + header, + change_meta, + cids, + keys, + positions, + ops, + delete_start_ids, + values, + } +} + +///| +pub fn encode_encoded_block(block : EncodedBlock) -> Bytes { + let w = BytesWriter::new() + write_postcard_u32_block(w, block.counter_start) + write_postcard_u32_block(w, block.counter_len) + write_postcard_u32_block(w, block.lamport_start) + write_postcard_u32_block(w, block.lamport_len) + write_postcard_u32_block(w, block.n_changes) + write_postcard_bytes(w, block.header) + write_postcard_bytes(w, block.change_meta) + write_postcard_bytes(w, block.cids) + write_postcard_bytes(w, block.keys) + write_postcard_bytes(w, block.positions) + write_postcard_bytes(w, block.ops) + write_postcard_bytes(w, block.delete_start_ids) + write_postcard_bytes(w, block.values) + w.to_bytes() +} diff --git a/moon/loro_codec/change_block_encoded_block_test.mbt b/moon/loro_codec/change_block_encoded_block_test.mbt new file mode 100644 index 000000000..2b9650255 --- /dev/null +++ b/moon/loro_codec/change_block_encoded_block_test.mbt @@ -0,0 +1,21 @@ +///| +test "change_block: postcard encoded block roundtrip" { + // u32 fields: 1,2,10,2,1; then 8 bytes fields with varint len. + let encoded = b"\x01\x02\x0A\x02\x01\x03hdr\x04meta\x01c\x01k\x01p\x01o\x01d\x01v" + let decoded = try! decode_encoded_block(encoded[:]) + assert_eq(decoded.counter_start(), 1) + assert_eq(decoded.counter_len(), 2) + assert_eq(decoded.lamport_start(), 10) + assert_eq(decoded.lamport_len(), 2) + assert_eq(decoded.n_changes(), 1) + assert_eq(decoded.header(), b"hdr") + assert_eq(decoded.change_meta(), b"meta") + assert_eq(decoded.cids(), b"c") + assert_eq(decoded.keys(), b"k") + assert_eq(decoded.positions(), b"p") + assert_eq(decoded.ops(), b"o") + assert_eq(decoded.delete_start_ids(), b"d") + assert_eq(decoded.values(), b"v") + let encoded2 = encode_encoded_block(decoded) + assert_eq(encoded2, encoded) +} diff --git a/moon/loro_codec/change_block_header.mbt b/moon/loro_codec/change_block_header.mbt new file mode 100644 index 000000000..aad1c132b --- /dev/null +++ b/moon/loro_codec/change_block_header.mbt @@ -0,0 +1,182 @@ +///| +pub struct ChangesHeader { + peer : UInt64 + peers : Array[UInt64] + counters : Array[Int] + lengths : Array[Int] + lamports : Array[UInt] + deps : Array[Array[ID]] +} + +///| +pub fn ChangesHeader::peer(self : ChangesHeader) -> UInt64 { + self.peer +} + +///| +pub fn ChangesHeader::peers(self : ChangesHeader) -> Array[UInt64] { + self.peers +} + +///| +pub fn ChangesHeader::counters(self : ChangesHeader) -> Array[Int] { + self.counters +} + +///| +pub fn ChangesHeader::lengths(self : ChangesHeader) -> Array[Int] { + self.lengths +} + +///| +pub fn ChangesHeader::lamports(self : ChangesHeader) -> Array[UInt] { + self.lamports +} + +///| +pub fn ChangesHeader::deps(self : ChangesHeader) -> Array[Array[ID]] { + self.deps +} + +///| +pub fn decode_changes_header( + bytes : BytesView, + n_changes : UInt, + counter_start : UInt, + counter_len : UInt, + lamport_start : UInt, + lamport_len : UInt, +) -> ChangesHeader raise DecodeError { + let n_u64 = n_changes.to_uint64() + if n_u64 > 0x7FFF_FFFFUL { + raise DecodeError("change_header: too many changes") + } + let n = n_u64.to_int() + if n <= 0 { + raise DecodeError("change_header: empty block") + } + let first_counter = counter_start.reinterpret_as_int() + let counter_len_i = counter_len.reinterpret_as_int() + if counter_len_i < 0 { + raise DecodeError("change_header: invalid counter_len") + } + let r = BytesReader::from_view(bytes) + let peer_num_u64 = r.read_uleb128_u64() + if peer_num_u64 > 0x7FFF_FFFFUL { + raise DecodeError("change_header: too many peers") + } + let peer_num = peer_num_u64.to_int() + if peer_num <= 0 { + raise DecodeError("change_header: empty peer table") + } + let peers : Array[UInt64] = [] + for _i in 0.. 0x7FFF_FFFFUL { + raise DecodeError("change_header: atom_len too large") + } + let len_i = len_u64.to_int() + if len_i < 0 { + raise DecodeError("change_header: invalid atom_len") + } + sum = sum + len_i + lengths.push(len_i) + } + let last_len = counter_len_i - sum + if last_len < 0 { + raise DecodeError("change_header: invalid atom_len sum") + } + lengths.push(last_len) + let (dep_self, rest1) = bool_rle_take_n_finalize(r.remaining_view(), n) + let (dep_lens_u64, rest2) = any_rle_take_n_finalize_usize(rest1, n) + let deps_len : Array[Int] = [] + let mut other_dep_num : Int = 0 + for x in dep_lens_u64 { + if x > 0x7FFF_FFFFUL { + raise DecodeError("change_header: dep_len too large") + } + let xi = x.to_int() + if xi < 0 { + raise DecodeError("change_header: invalid dep_len") + } + other_dep_num = other_dep_num + xi + deps_len.push(xi) + } + let (dep_peers_u64, rest3) = any_rle_take_n_finalize_usize( + rest2, other_dep_num, + ) + let dep_peers : Array[Int] = [] + for x in dep_peers_u64 { + if x > 0x7FFF_FFFFUL { + raise DecodeError("change_header: dep_peer_idx too large") + } + dep_peers.push(x.to_int()) + } + let (dep_counters_i64, rest4) = delta_of_delta_take_n_finalize_i64( + rest3, other_dep_num, + ) + let dep_counters : Array[Int] = [] + for x in dep_counters_i64 { + if x < 0L || x > 2147483647L { + raise DecodeError("change_header: dep counter overflow") + } + dep_counters.push(x.to_int()) + } + let deps : Array[Array[ID]] = [] + let mut this_counter = first_counter + let mut dep_idx = 0 + for i in 0..= dep_peers.length() || dep_idx >= dep_counters.length() { + raise DecodeError("change_header: deps underflow") + } + let peer_idx = dep_peers[dep_idx] + if peer_idx < 0 || peer_idx >= peers.length() { + raise DecodeError("change_header: invalid dep peer idx") + } + ids.push(ID::new(peers[peer_idx], dep_counters[dep_idx])) + dep_idx = dep_idx + 1 + } + deps.push(ids) + this_counter = this_counter + lengths[i] + } + if dep_idx != dep_peers.length() || dep_idx != dep_counters.length() { + raise DecodeError("change_header: deps trailing") + } + let counters : Array[Int] = [] + let mut cur = first_counter + for i in 0.. 0xFFFF_FFFFL { + raise DecodeError("change_header: lamport overflow") + } + lamports.push(x.reinterpret_as_uint64().to_uint()) + } + let last_len_u64 = lengths[n - 1].to_uint64() + let last_lamport_u64 = lamport_start.to_uint64() + lamport_len.to_uint64() + if last_lamport_u64 < last_len_u64 { + raise DecodeError("change_header: invalid lamport range") + } + let last_lamport = (last_lamport_u64 - last_len_u64).to_uint() + lamports.push(last_lamport) + if rest5.length() != 0 { + raise DecodeError("change_header: trailing bytes") + } + { peer: peers[0], peers, counters, lengths, lamports, deps } +} diff --git a/moon/loro_codec/change_block_meta.mbt b/moon/loro_codec/change_block_meta.mbt new file mode 100644 index 000000000..e63307b1f --- /dev/null +++ b/moon/loro_codec/change_block_meta.mbt @@ -0,0 +1,67 @@ +///| +pub struct ChangesMeta { + timestamps : Array[Int64] + commit_msgs : Array[String?] +} + +///| +pub fn ChangesMeta::timestamps(self : ChangesMeta) -> Array[Int64] { + self.timestamps +} + +///| +pub fn ChangesMeta::commit_msgs(self : ChangesMeta) -> Array[String?] { + self.commit_msgs +} + +///| +pub fn decode_changes_meta( + bytes : BytesView, + n_changes : UInt, +) -> ChangesMeta raise DecodeError { + let n_u64 = n_changes.to_uint64() + if n_u64 > 0x7FFF_FFFFUL { + raise DecodeError("change_meta: too many changes") + } + let n = n_u64.to_int() + if n <= 0 { + raise DecodeError("change_meta: empty block") + } + let (timestamps, rest1) = delta_of_delta_take_n_finalize_i64(bytes, n) + let (lens_u32, rest2) = any_rle_take_n_finalize_u32(rest1, n) + let mut total : Int = 0 + let lens : Array[Int] = [] + for x in lens_u32 { + let xi = x.reinterpret_as_int() + if xi < 0 { + raise DecodeError("change_meta: invalid msg len") + } + total = total + xi + lens.push(xi) + } + if total < 0 || total > rest2.length() { + raise DecodeError("change_meta: invalid msg bytes length") + } + let msgs : Array[String?] = [] + let mut offset = 0 + for len in lens { + if len == 0 { + msgs.push(None) + continue + } + let end = offset + len + if end < 0 || end > rest2.length() { + raise DecodeError("change_meta: msg bytes overflow") + } + let s = @encoding/utf8.decode(rest2[offset:end]) catch { + @encoding/utf8.Malformed(_) => + raise DecodeError("change_meta: invalid utf8 msg") + } + msgs.push(Some(s)) + offset = end + } + if offset != total { + raise DecodeError("change_meta: msg bytes trailing") + } + { timestamps, commit_msgs: msgs } +} diff --git a/moon/loro_codec/change_block_minimal_fixture_test.mbt b/moon/loro_codec/change_block_minimal_fixture_test.mbt new file mode 100644 index 000000000..e3d2e12b9 --- /dev/null +++ b/moon/loro_codec/change_block_minimal_fixture_test.mbt @@ -0,0 +1,110 @@ +///| +fn build_minimal_map_insert_encoded_block(peer0 : UInt64) -> Bytes { + // --- header (n_changes = 1) --- + let header_w = BytesWriter::new() + header_w.write_uleb128_u64(1UL) // peer_num + header_w.write_u64_le(peer0) + header_w.write_bytes(encode_bool_rle([false])) // dep_on_self + header_w.write_bytes(encode_any_rle_usize([0UL])) // dep lens + header_w.write_bytes(encode_delta_of_delta_i64([])) // dep counters (empty) + header_w.write_bytes(encode_delta_of_delta_i64([])) // lamports (empty, n-1=0) + let header_bytes = header_w.to_bytes() + + // --- meta (n_changes = 1) --- + let meta_w = BytesWriter::new() + meta_w.write_bytes(encode_delta_of_delta_i64([1234L])) // timestamp + meta_w.write_bytes(encode_any_rle_u32([0])) // no commit msg + let meta_bytes = meta_w.to_bytes() + + // --- keys --- + let keys_w = BytesWriter::new() + let root = @encoding/utf8.encode("root"[:]) + keys_w.write_uleb128_u64(root.length().to_uint64()) + keys_w.write_bytes(root) + let k = @encoding/utf8.encode("a"[:]) + keys_w.write_uleb128_u64(k.length().to_uint64()) + keys_w.write_bytes(k) + let keys_bytes = keys_w.to_bytes() + + // --- container arena (1 root map, name=keys[0]) --- + let arena_w = BytesWriter::new() + arena_w.write_varint_u64(1UL) + arena_w.write_varint_u64(4UL) // field count + arena_w.write_u8(b'\x01') // is_root + arena_w.write_u8(container_type_to_u8(ContainerType::map())) + arena_w.write_varint_u64(0UL) + arena_w.write_varint_i64(0L) + let cids_bytes = arena_w.to_bytes() + + // --- ops (1 row) --- + let ops_col0 = encode_delta_rle_u32([0]) // container_index + let ops_col1 = encode_delta_rle_i32([1]) // prop = key idx + let ops_col2 = encode_rle_u8([11]) // value_type = LoroValue + let ops_col3 = encode_rle_u32([1]) // len = 1 + let ops_bytes = encode_columnar_vec_wrapped([ + ops_col0, ops_col1, ops_col2, ops_col3, + ]) + + // --- values: Value::LoroValue(I64(10)) content only (no tag byte) --- + // LoroValue::I64 => [0x03, sleb128(10)=0x0A] + let values_bytes = b"\x03\x0A" + + // postcard EncodedBlock + let w = BytesWriter::new() + w.write_varint_u64(10UL) // counter_start + w.write_varint_u64(1UL) // counter_len + w.write_varint_u64(100UL) // lamport_start + w.write_varint_u64(1UL) // lamport_len + w.write_varint_u64(1UL) // n_changes + for + part in [ + header_bytes, meta_bytes, cids_bytes, keys_bytes, b"", ops_bytes, b"", values_bytes, + ] { + w.write_varint_u64(part.length().to_uint64()) + w.write_bytes(part) + } + w.to_bytes() +} + +///| +test "change_block: decode full block with ops/values" { + let peer0 = 0x0102030405060708UL + let encoded = build_minimal_map_insert_encoded_block(peer0) + let decoded = try! decode_change_block(encoded[:]) + assert_eq(decoded.length(), 1) + let c = decoded[0] + assert_eq(c.id(), ID::new(peer0, 10)) + assert_eq(c.timestamp(), 1234L) + assert_eq(c.deps().length(), 0) + assert_eq(c.lamport(), 100) + assert_eq(c.msg(), None) + assert_eq(c.ops().length(), 1) + let op = c.ops()[0] + assert_eq(op.counter(), 10) + assert_eq(op.len(), 1) + assert_eq(op.container(), ContainerID::root("root", ContainerType::map())) + match op.content() { + OpContent::Map(MapOp::Insert(key, v)) => { + assert_eq(key, "a") + match v { + LoroValue::I64(x) => assert_eq(x, 10L) + _ => fail("unexpected map value") + } + } + _ => fail("unexpected op content") + } +} + +///| +test "change_block: decode -> encode -> decode (semantic) roundtrip" { + // Reuse the minimal block fixture (one change with one Map insert op). + let peer0 = 0x0102030405060708UL + let encoded = build_minimal_map_insert_encoded_block(peer0) + let decoded = try! decode_change_block_full(encoded[:]) + let encoded2 = encode_change_block(decoded) catch { + EncodeError(msg) => fail(msg) + } + let changes1 = try! decode_change_block(encoded[:]) + let changes2 = try! decode_change_block(encoded2[:]) + assert_eq(changes2, changes1) +} diff --git a/moon/loro_codec/change_block_ops.mbt b/moon/loro_codec/change_block_ops.mbt new file mode 100644 index 000000000..58cf0232e --- /dev/null +++ b/moon/loro_codec/change_block_ops.mbt @@ -0,0 +1,5 @@ +// Encoded ops in ChangeBlock. +// +// Implementation is split into cohesive files: +// - change_block_ops_tables.mbt +// - change_block_ops_decode_content.mbt diff --git a/moon/loro_codec/change_block_ops_decode_content.mbt b/moon/loro_codec/change_block_ops_decode_content.mbt new file mode 100644 index 000000000..cc1a0b5ec --- /dev/null +++ b/moon/loro_codec/change_block_ops_decode_content.mbt @@ -0,0 +1,239 @@ +///| +fn is_deleted_tree_root(peer : UInt64, counter : Int) -> Bool { + peer == 0xFFFF_FFFF_FFFF_FFFFUL && counter == 2147483647 +} + +///| +fn decode_delete_seq_start_id( + delete_start_ids : Array[EncodedDeleteStartIdRow], + del_idx : Int, + peers : Array[UInt64], +) -> (ID, Int64, Int) raise DecodeError { + if del_idx < 0 || del_idx >= delete_start_ids.length() { + raise DecodeError("op: delete_start_ids underflow") + } + let del = delete_start_ids[del_idx] + if del.peer_idx > 0x7FFF_FFFFUL { + raise DecodeError("op: delete_start peer_idx too large") + } + let peer_i = del.peer_idx.to_int() + if peer_i < 0 || peer_i >= peers.length() { + raise DecodeError("op: delete_start invalid peer_idx") + } + (ID::new(peers[peer_i], del.counter), del.len, del_idx + 1) +} + +///| +fn decode_op_content( + cid : ContainerID, + prop : Int, + _len : UInt, + value : Value, + peers : Array[UInt64], + keys : Array[String], + positions : Array[Bytes], + delete_start_ids : Array[EncodedDeleteStartIdRow], + del_idx : Int, + op_id : ID, +) -> (OpContent, Int) raise DecodeError { + let kind = match cid { + ContainerID::Root(_, k) => k + ContainerID::Normal(_, _, k) => k + } + match kind { + ContainerType::Map => { + if prop < 0 || prop >= keys.length() { + raise DecodeError("op: invalid map key idx") + } + let key = keys[prop] + match value { + Value::DeleteOnce => (OpContent::Map(MapOp::Delete(key)), del_idx) + Value::LoroValue(v) => (OpContent::Map(MapOp::Insert(key, v)), del_idx) + _ => raise DecodeError("op: invalid map value kind") + } + } + ContainerType::Text => + match value { + Value::Str(s) => { + if prop < 0 { + raise DecodeError("op: invalid text insert pos") + } + ( + OpContent::Text(TextOp::Insert(prop.reinterpret_as_uint(), s)), + del_idx, + ) + } + Value::DeleteSeq => { + let (start_id, del_len, next_del_idx) = decode_delete_seq_start_id( + delete_start_ids, del_idx, peers, + ) + ( + OpContent::Text(TextOp::Delete(prop, del_len, start_id)), + next_del_idx, + ) + } + Value::MarkStart(m) => { + if prop < 0 { + raise DecodeError("op: invalid mark start") + } + if m.len > 0xFFFF_FFFFUL { + raise DecodeError("op: mark len overflow") + } + if m.key_idx > 0x7FFF_FFFFUL { + raise DecodeError("op: mark key_idx too large") + } + let key_i = m.key_idx.to_int() + if key_i < 0 || key_i >= keys.length() { + raise DecodeError("op: invalid mark key idx") + } + let start_u = prop.reinterpret_as_uint() + let end_u = start_u + m.len.to_uint() + ( + OpContent::Text( + TextOp::Mark(start_u, end_u, keys[key_i], m.value, m.info), + ), + del_idx, + ) + } + Value::Null => (OpContent::Text(TextOp::MarkEnd), del_idx) + _ => raise DecodeError("op: invalid text value kind") + } + ContainerType::List => + match value { + Value::LoroValue(LoroValue::List(items)) => { + if prop < 0 { + raise DecodeError("op: invalid list insert pos") + } + ( + OpContent::List(ListOp::Insert(prop.reinterpret_as_uint(), items)), + del_idx, + ) + } + Value::DeleteSeq => { + let (start_id, del_len, next_del_idx) = decode_delete_seq_start_id( + delete_start_ids, del_idx, peers, + ) + ( + OpContent::List(ListOp::Delete(prop, del_len, start_id)), + next_del_idx, + ) + } + _ => raise DecodeError("op: invalid list value kind") + } + ContainerType::MovableList => + match value { + Value::LoroValue(LoroValue::List(items)) => { + if prop < 0 { + raise DecodeError("op: invalid movable_list insert pos") + } + ( + OpContent::MovableList( + MovableListOp::Insert(prop.reinterpret_as_uint(), items), + ), + del_idx, + ) + } + Value::DeleteSeq => { + let (start_id, del_len, next_del_idx) = decode_delete_seq_start_id( + delete_start_ids, del_idx, peers, + ) + ( + OpContent::MovableList( + MovableListOp::Delete(prop, del_len, start_id), + ), + next_del_idx, + ) + } + Value::ListMove(m) => { + if m.from > 0xFFFF_FFFFUL { + raise DecodeError("op: movable_list move from overflow") + } + if m.lamport > 0xFFFF_FFFFUL { + raise DecodeError("op: movable_list move lamport overflow") + } + if m.from_idx > 0x7FFF_FFFFUL { + raise DecodeError("op: movable_list move peer_idx too large") + } + let peer_i = m.from_idx.to_int() + if peer_i < 0 || peer_i >= peers.length() { + raise DecodeError("op: movable_list move invalid peer_idx") + } + if prop < 0 { + raise DecodeError("op: movable_list move invalid to") + } + let elem = IdLp::new(peers[peer_i], m.lamport.to_uint()) + ( + OpContent::MovableList( + MovableListOp::Move( + m.from.to_uint(), + prop.reinterpret_as_uint(), + elem, + ), + ), + del_idx, + ) + } + Value::ListSet(s) => { + if s.peer_idx > 0x7FFF_FFFFUL { + raise DecodeError("op: movable_list set peer_idx too large") + } + let peer_i = s.peer_idx.to_int() + if peer_i < 0 || peer_i >= peers.length() { + raise DecodeError("op: movable_list set invalid peer_idx") + } + let elem = IdLp::new(peers[peer_i], s.lamport) + (OpContent::MovableList(MovableListOp::Set(elem, s.value)), del_idx) + } + _ => raise DecodeError("op: invalid movable_list value kind") + } + ContainerType::Tree => + match value { + Value::RawTreeMove(t) => { + if t.subject_peer_idx > 0x7FFF_FFFFUL || + t.parent_peer_idx > 0x7FFF_FFFFUL { + raise DecodeError("op: tree peer_idx too large") + } + let subject_peer_i = t.subject_peer_idx.to_int() + if subject_peer_i < 0 || subject_peer_i >= peers.length() { + raise DecodeError("op: tree invalid subject peer_idx") + } + let subject = ID::new(peers[subject_peer_i], t.subject_cnt) + let parent : ID? = if t.is_parent_null { + None + } else { + let parent_peer_i = t.parent_peer_idx.to_int() + if parent_peer_i < 0 || parent_peer_i >= peers.length() { + raise DecodeError("op: tree invalid parent peer_idx") + } + Some(ID::new(peers[parent_peer_i], t.parent_cnt)) + } + match parent { + Some(p) => + if is_deleted_tree_root(p.peer(), p.counter()) { + return (OpContent::Tree(TreeOp::Delete(subject)), del_idx) + } + None => () + } + if t.position_idx > 0x7FFF_FFFFUL { + raise DecodeError("op: tree position_idx too large") + } + let pos_i = t.position_idx.to_int() + if pos_i < 0 || pos_i >= positions.length() { + raise DecodeError("op: tree invalid position_idx") + } + let fi = FractionalIndex::new(positions[pos_i]) + let is_create = subject.peer() == op_id.peer() && + subject.counter() == op_id.counter() + if is_create { + (OpContent::Tree(TreeOp::Create(subject, parent, fi)), del_idx) + } else { + (OpContent::Tree(TreeOp::Move(subject, parent, fi)), del_idx) + } + } + _ => raise DecodeError("op: invalid tree value kind") + } + _ => + // Counter/Unknown container types: keep as opaque future op. + (OpContent::Future(FutureOp::Unknown(prop, value)), del_idx) + } +} diff --git a/moon/loro_codec/change_block_ops_tables.mbt b/moon/loro_codec/change_block_ops_tables.mbt new file mode 100644 index 000000000..3e5394b5a --- /dev/null +++ b/moon/loro_codec/change_block_ops_tables.mbt @@ -0,0 +1,111 @@ +///| +pub struct EncodedOpRow { + container_index : UInt + prop : Int + value_type : UInt + len : UInt +} derive(Eq, Show) + +///| +pub fn decode_encoded_ops( + bytes : BytesView, +) -> Array[EncodedOpRow] raise DecodeError { + if bytes.length() == 0 { + return [] + } + let cols = decode_columnar_vec_maybe_wrapped(bytes) + if cols.length() != 4 { + raise DecodeError("encoded_ops: invalid column count") + } + let container_indices = decode_delta_rle_u32(cols[0]) + let props = decode_delta_rle_i32(cols[1]) + let value_types = decode_rle_u8(cols[2]) + let lens = decode_rle_u32(cols[3]) + let n = container_indices.length() + if props.length() != n || value_types.length() != n || lens.length() != n { + raise DecodeError("encoded_ops: column length mismatch") + } + let out : Array[EncodedOpRow] = [] + for i in 0.. Bytes { + if ops.length() == 0 { + return encode_columnar_vec_wrapped([b"", b"", b"", b""]) + } + let container_indices : Array[UInt] = [] + let props : Array[Int] = [] + let value_types : Array[UInt] = [] + let lens : Array[UInt] = [] + for op in ops { + container_indices.push(op.container_index) + props.push(op.prop) + value_types.push(op.value_type) + lens.push(op.len) + } + let col0 = encode_delta_rle_u32(container_indices) + let col1 = encode_delta_rle_i32(props) + let col2 = encode_rle_u8(value_types) + let col3 = encode_rle_u32(lens) + encode_columnar_vec_wrapped([col0, col1, col2, col3]) +} + +///| +pub struct EncodedDeleteStartIdRow { + peer_idx : UInt64 + counter : Int + len : Int64 +} derive(Eq, Show) + +///| +pub fn decode_delete_start_ids( + bytes : BytesView, +) -> Array[EncodedDeleteStartIdRow] raise DecodeError { + if bytes.length() == 0 { + return [] + } + let cols = decode_columnar_vec_maybe_wrapped(bytes) + if cols.length() != 3 { + raise DecodeError("delete_start_ids: invalid column count") + } + let peer_idxs = decode_delta_rle_usize(cols[0]) + let counters = decode_delta_rle_i32(cols[1]) + let lens = decode_delta_rle_isize(cols[2]) + let n = peer_idxs.length() + if counters.length() != n || lens.length() != n { + raise DecodeError("delete_start_ids: column length mismatch") + } + let out : Array[EncodedDeleteStartIdRow] = [] + for i in 0.. Bytes { + if ids.length() == 0 { + return b"" + } + let peer_idxs : Array[UInt64] = [] + let counters : Array[Int] = [] + let lens : Array[Int64] = [] + for id in ids { + peer_idxs.push(id.peer_idx) + counters.push(id.counter) + lens.push(id.len) + } + let col0 = encode_delta_rle_usize(peer_idxs) + let col1 = encode_delta_rle_i32(counters) + let col2 = encode_delta_rle_isize(lens) + encode_columnar_vec_wrapped([col0, col1, col2]) +} diff --git a/moon/loro_codec/changes_json.mbt b/moon/loro_codec/changes_json.mbt new file mode 100644 index 000000000..1200988e9 --- /dev/null +++ b/moon/loro_codec/changes_json.mbt @@ -0,0 +1,7 @@ +// Debug-only JSON renderer for decoded FastUpdates changes. +// +// Implementation is split into cohesive files: +// - changes_json_helpers.mbt +// - changes_json_value.mbt +// - changes_json_op.mbt +// - changes_json_change.mbt diff --git a/moon/loro_codec/changes_json_change.mbt b/moon/loro_codec/changes_json_change.mbt new file mode 100644 index 000000000..fa284be03 --- /dev/null +++ b/moon/loro_codec/changes_json_change.mbt @@ -0,0 +1,44 @@ +///| +fn change_json(change : Change, keys : Array[String]) -> Json { + let deps : Array[String] = [] + for d in change.deps() { + deps.push(id_string(d)) + } + let ops : Array[Json] = [] + for op in change.ops() { + ops.push(op_json(op, keys)) + } + { + "id": id_string(change.id()), + "timestamp": change.timestamp().to_string(), + "lamport": change.lamport().to_uint64().to_string(), + "msg": match change.msg() { + None => Json::null() + Some(m) => Json::string(m) + }, + "deps": deps, + "ops": Json::array(ops), + } +} + +///| +pub fn decode_fast_updates_changes_json( + bytes : Bytes, + validate : Bool, +) -> String raise DecodeError { + let doc = parse_document(bytes, validate) + if doc.mode() != 4 { + raise DecodeError("decode-updates: not a FastUpdates (mode=4) document") + } + let blocks = parse_fast_updates_body(doc.body_view()) + let changes : Array[Json] = [] + for b in blocks { + let decoded = decode_change_block_full(b) + let keys = decoded.keys() + for c in decoded.changes() { + changes.push(change_json(c, keys)) + } + } + let root : Json = { "mode": 4, "changes": Json::array(changes) } + root.stringify(indent=2) +} diff --git a/moon/loro_codec/changes_json_helpers.mbt b/moon/loro_codec/changes_json_helpers.mbt new file mode 100644 index 000000000..04865b375 --- /dev/null +++ b/moon/loro_codec/changes_json_helpers.mbt @@ -0,0 +1,67 @@ +///| +fn id_string(id : ID) -> String { + id.counter().to_string() + "@" + id.peer().to_string() +} + +///| +fn idlp_string(id : IdLp) -> String { + "L" + id.lamport().to_string() + "@" + id.peer().to_string() +} + +///| +fn container_type_string(kind : ContainerType) -> String { + match kind { + ContainerType::Map => "Map" + ContainerType::List => "List" + ContainerType::Text => "Text" + ContainerType::Tree => "Tree" + ContainerType::MovableList => "MovableList" + ContainerType::Counter => "Counter" + ContainerType::Unknown(k) => "Unknown(" + k.to_string() + ")" + } +} + +///| +fn container_id_string(cid : ContainerID) -> String { + match cid { + ContainerID::Root(name, kind) => + "cid:root-" + name + ":" + container_type_string(kind) + ContainerID::Normal(peer, counter, kind) => + "cid:" + + counter.to_string() + + "@" + + peer.to_string() + + ":" + + container_type_string(kind) + } +} + +///| +fn bytes_hex_upper(bytes : Bytes) -> String { + let sb = StringBuilder::new() + let hex : Array[Char] = [ + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', + ] + for b in bytes { + let u = b.to_uint() + let hi = ((u >> 4) & 0xF).reinterpret_as_int() + let lo = (u & 0xF).reinterpret_as_int() + sb.write_char(hex[hi]) + sb.write_char(hex[lo]) + } + sb.to_string() +} + +///| +fn fractional_index_json(fi : FractionalIndex) -> Json { + Json::string(bytes_hex_upper(fi.bytes())) +} + +///| +fn binary_json(bytes : Bytes) -> Json { + let out : Array[Json] = [] + for b in bytes { + out.push(Json::number(b.to_uint().to_double())) + } + Json::array(out) +} diff --git a/moon/loro_codec/changes_json_op.mbt b/moon/loro_codec/changes_json_op.mbt new file mode 100644 index 000000000..2fac6f8e4 --- /dev/null +++ b/moon/loro_codec/changes_json_op.mbt @@ -0,0 +1,144 @@ +///| +fn op_content_json(content : OpContent, keys : Array[String]) -> Json { + match content { + OpContent::Map(MapOp::Insert(k, v)) => + { + "Map": { "Insert": { "key": k, "value": loro_value_json(v, keys, 0) } }, + } + OpContent::Map(MapOp::Delete(k)) => { "Map": { "Delete": { "key": k } } } + OpContent::List(ListOp::Insert(pos, values)) => { + let out : Array[Json] = [] + for v in values { + out.push(loro_value_json(v, keys, 0)) + } + { + "List": { + "Insert": { + "pos": pos.reinterpret_as_int(), + "values": Json::array(out), + }, + }, + } + } + OpContent::List(ListOp::Delete(pos, len, start_id)) => + { + "List": { + "Delete": { + "pos": pos, + "len": len.to_string(), + "start_id": id_string(start_id), + }, + }, + } + OpContent::MovableList(MovableListOp::Insert(pos, values)) => { + let out : Array[Json] = [] + for v in values { + out.push(loro_value_json(v, keys, 0)) + } + { + "MovableList": { + "Insert": { + "pos": pos.reinterpret_as_int(), + "values": Json::array(out), + }, + }, + } + } + OpContent::MovableList(MovableListOp::Delete(pos, len, start_id)) => + { + "MovableList": { + "Delete": { + "pos": pos, + "len": len.to_string(), + "start_id": id_string(start_id), + }, + }, + } + OpContent::MovableList(MovableListOp::Move(from, to, elem_id)) => + { + "MovableList": { + "Move": { + "from": from.reinterpret_as_int(), + "to": to.reinterpret_as_int(), + "elem_id": idlp_string(elem_id), + }, + }, + } + OpContent::MovableList(MovableListOp::Set(elem_id, value)) => + { + "MovableList": { + "Set": { + "elem_id": idlp_string(elem_id), + "value": loro_value_json(value, keys, 0), + }, + }, + } + OpContent::Text(TextOp::Insert(pos, text)) => + { + "Text": { "Insert": { "pos": pos.reinterpret_as_int(), "text": text } }, + } + OpContent::Text(TextOp::Delete(pos, len, start_id)) => + { + "Text": { + "Delete": { + "pos": pos, + "len": len.to_string(), + "start_id": id_string(start_id), + }, + }, + } + OpContent::Text(TextOp::Mark(start, end, key, value, info)) => + { + "Text": { + "Mark": { + "start": start.reinterpret_as_int(), + "end": end.reinterpret_as_int(), + "key": key, + "value": loro_value_json(value, keys, 0), + "info": info.to_uint().reinterpret_as_int(), + }, + }, + } + OpContent::Text(TextOp::MarkEnd) => { "Text": { "MarkEnd": Json::null() } } + OpContent::Tree(TreeOp::Create(target, parent, fi)) => + { + "Tree": { + "Create": { + "target": id_string(target), + "parent": match parent { + None => Json::null() + Some(p) => Json::string(id_string(p)) + }, + "fractional_index": fractional_index_json(fi), + }, + }, + } + OpContent::Tree(TreeOp::Move(target, parent, fi)) => + { + "Tree": { + "Move": { + "target": id_string(target), + "parent": match parent { + None => Json::null() + Some(p) => Json::string(id_string(p)) + }, + "fractional_index": fractional_index_json(fi), + }, + }, + } + OpContent::Tree(TreeOp::Delete(target)) => + { "Tree": { "Delete": { "target": id_string(target) } } } + OpContent::Future(FutureOp::Unknown(prop, _raw)) => + { "Future": { "Unknown": { "prop": prop } } } + } +} + +///| +fn op_json(op : Op, keys : Array[String]) -> Json { + { + "container": container_id_string(op.container()), + "counter": op.counter(), + "len": op.len().to_uint64().to_string(), + "content": op_content_json(op.content(), keys), + } +} diff --git a/moon/loro_codec/changes_json_value.mbt b/moon/loro_codec/changes_json_value.mbt new file mode 100644 index 000000000..774ee78d6 --- /dev/null +++ b/moon/loro_codec/changes_json_value.mbt @@ -0,0 +1,38 @@ +///| +fn loro_value_json(v : LoroValue, keys : Array[String], depth : Int) -> Json { + if depth > 1024 { + return Json::string("") + } + match v { + LoroValue::Null => Json::null() + LoroValue::True => Json::boolean(true) + LoroValue::False => Json::boolean(false) + LoroValue::I64(x) => Json::string(x.to_string()) + LoroValue::F64(x) => Json::number(x) + LoroValue::Str(s) => Json::string(s) + LoroValue::Binary(b) => binary_json(b) + LoroValue::List(items) => { + let out : Array[Json] = [] + for it in items { + out.push(loro_value_json(it, keys, depth + 1)) + } + Json::array(out) + } + LoroValue::Map(items) => { + let obj = Map::new(capacity=items.length()) + for pair in items { + let (key_idx, it) = pair + let mut key = "" + if key_idx <= 0x7FFF_FFFFUL { + let i = key_idx.to_int() + if i >= 0 && i < keys.length() { + key = keys[i] + } + } + obj[key] = loro_value_json(it, keys, depth + 1) + } + Json::object(obj) + } + LoroValue::ContainerType(ct) => Json::number(ct.to_uint().to_double()) + } +} diff --git a/moon/loro_codec/container_id.mbt b/moon/loro_codec/container_id.mbt new file mode 100644 index 000000000..1e1dc673b --- /dev/null +++ b/moon/loro_codec/container_id.mbt @@ -0,0 +1,7 @@ +///| +/// Container ID & type codecs. +/// +/// This file is only an entrypoint. See: +/// - `container_type.mbt` +/// - `container_id_bytes.mbt` +/// - `container_id_postcard.mbt` diff --git a/moon/loro_codec/container_id_bytes.mbt b/moon/loro_codec/container_id_bytes.mbt new file mode 100644 index 000000000..9e1256cdf --- /dev/null +++ b/moon/loro_codec/container_id_bytes.mbt @@ -0,0 +1,80 @@ +///| +pub enum ContainerID { + Root(String, ContainerType) + Normal(UInt64, Int, ContainerType) +} derive(Eq, Show) + +///| +pub fn ContainerID::root(name : String, kind : ContainerType) -> ContainerID { + ContainerID::Root(name, kind) +} + +///| +pub fn ContainerID::normal( + peer : UInt64, + counter : Int, + kind : ContainerType, +) -> ContainerID { + ContainerID::Normal(peer, counter, kind) +} + +///| +pub fn ContainerID::to_bytes(self : ContainerID) -> Bytes { + let w = BytesWriter::new() + match self { + ContainerID::Root(name, kind) => { + let first = container_type_to_u8(kind).to_uint() | 0x80 + w.write_u8((first & 0xFF).to_byte()) + let name_bytes = @encoding/utf8.encode(name[:]) + w.write_uleb128_u64(name_bytes.length().to_uint64()) + w.write_bytes(name_bytes) + } + ContainerID::Normal(peer, counter, kind) => { + w.write_u8(container_type_to_u8(kind)) + w.write_u64_le(peer) + w.write_u32_le(counter.reinterpret_as_uint()) + } + } + w.to_bytes() +} + +///| +pub fn container_id_from_bytes( + bytes : BytesView, +) -> ContainerID raise DecodeError { + if bytes.length() < 1 { + raise DecodeError("container_id: empty bytes") + } + let r = BytesReader::from_view(bytes) + let first = r.read_u8().to_uint() + let kind = container_type_from_u8((first & 0x7F).to_byte()) + let is_root = (first & 0x80) != 0 + if is_root { + let name_len_u64 = r.read_uleb128_u64() + if name_len_u64 > 0x7FFF_FFFFUL { + raise DecodeError("container_id: name too long") + } + let name_len = name_len_u64.to_int() + if name_len < 0 || name_len > r.remaining() { + raise DecodeError("container_id: invalid name length") + } + let name_bytes = r.read_exact(name_len) + let name = @encoding/utf8.decode(name_bytes) catch { + @encoding/utf8.Malformed(_) => + raise DecodeError("container_id: invalid utf8 name") + } + if r.remaining() != 0 { + raise DecodeError("container_id: trailing bytes") + } + return ContainerID::Root(name, kind) + } + if r.remaining() != 12 { + raise DecodeError("container_id: invalid normal length") + } + let peer = r.read_u64_le() + let counter = r.read_u32_le().reinterpret_as_int() + if r.remaining() != 0 { + raise DecodeError("container_id: trailing bytes") + } + ContainerID::Normal(peer, counter, kind) +} diff --git a/moon/loro_codec/container_id_postcard.mbt b/moon/loro_codec/container_id_postcard.mbt new file mode 100644 index 000000000..27cf7f0fd --- /dev/null +++ b/moon/loro_codec/container_id_postcard.mbt @@ -0,0 +1,77 @@ +///| +pub fn postcard_decode_option_container_id( + bytes : BytesView, +) -> (ContainerID?, BytesView) raise DecodeError { + let r = BytesReader::from_view(bytes) + let opt_tag = r.read_varint_u64() + match opt_tag { + 0UL => return (None, bytes[bytes.length() - r.remaining():]) + 1UL => () + _ => raise DecodeError("postcard: invalid Option tag") + } + let cid_tag = r.read_varint_u64() + match cid_tag { + 0UL => { + // Root: (name: String, container_type: ContainerType as historical u8) + let name_len_u64 = r.read_varint_u64() + if name_len_u64 > 0x7FFF_FFFFUL { + raise DecodeError("postcard: name too long") + } + let name_len = name_len_u64.to_int() + if name_len < 0 || name_len > r.remaining() { + raise DecodeError("postcard: invalid name length") + } + let name_bytes = r.read_exact(name_len) + let name = @encoding/utf8.decode(name_bytes) catch { + @encoding/utf8.Malformed(_) => + raise DecodeError("postcard: invalid utf8 name") + } + let kind = container_type_from_historical_u8(r.read_u8()) + return ( + Some(ContainerID::Root(name, kind)), + bytes[bytes.length() - r.remaining():], + ) + } + 1UL => { + // Normal: (peer: u64 varint, counter: i32 zigzag varint, container_type as historical u8) + let peer = r.read_varint_u64() + let counter = r.read_varint_i64().to_int() + let kind = container_type_from_historical_u8(r.read_u8()) + return ( + Some(ContainerID::Normal(peer, counter, kind)), + bytes[bytes.length() - r.remaining():], + ) + } + _ => raise DecodeError("postcard: invalid ContainerID tag") + } +} + +///| +pub fn postcard_encode_option_container_id(cid : ContainerID?) -> Bytes { + let w = BytesWriter::new() + match cid { + None => { + w.write_varint_u64(0) + return w.to_bytes() + } + Some(cid) => { + w.write_varint_u64(1) + match cid { + ContainerID::Root(name, kind) => { + w.write_varint_u64(0) + let name_bytes = @encoding/utf8.encode(name[:]) + w.write_varint_u64(name_bytes.length().to_uint64()) + w.write_bytes(name_bytes) + w.write_u8(container_type_to_historical_u8(kind)) + } + ContainerID::Normal(peer, counter, kind) => { + w.write_varint_u64(1) + w.write_varint_u64(peer) + w.write_varint_i64(counter.to_int64()) + w.write_u8(container_type_to_historical_u8(kind)) + } + } + w.to_bytes() + } + } +} diff --git a/moon/loro_codec/container_id_test.mbt b/moon/loro_codec/container_id_test.mbt new file mode 100644 index 000000000..fa722ebc3 --- /dev/null +++ b/moon/loro_codec/container_id_test.mbt @@ -0,0 +1,63 @@ +///| +test "container_id to_bytes/from_bytes root" { + let cid = ContainerID::root("root", ContainerType::map()) + let bytes = cid.to_bytes() + assert_eq(bytes, b"\x80\x04root") + let decoded = try! container_id_from_bytes(bytes[:]) + match decoded { + ContainerID::Root(name, kind) => { + assert_eq(name, "root") + assert_eq(kind, ContainerType::map()) + } + _ => assert_true(false) + } +} + +///| +test "container_id to_bytes/from_bytes normal" { + let cid = ContainerID::normal(7, 42, ContainerType::list()) + let bytes = cid.to_bytes() + assert_eq(bytes, b"\x01\x07\x00\x00\x00\x00\x00\x00\x00\x2A\x00\x00\x00") + let decoded = try! container_id_from_bytes(bytes[:]) + match decoded { + ContainerID::Normal(peer, counter, kind) => { + assert_eq(peer, 7) + assert_eq(counter, 42) + assert_eq(kind, ContainerType::list()) + } + _ => assert_true(false) + } +} + +///| +test "postcard option matches rust" { + // These bytes are generated by Rust `postcard::to_allocvec` on Option. + assert_eq(postcard_encode_option_container_id(None), b"\x00") + let root_map = ContainerID::root("root", ContainerType::map()) + assert_eq( + postcard_encode_option_container_id(Some(root_map)), + b"\x01\x00\x04root\x01", + ) + let root_text = ContainerID::root("t", ContainerType::text()) + assert_eq( + postcard_encode_option_container_id(Some(root_text)), + b"\x01\x00\x01t\x00", + ) + let normal_list = ContainerID::normal(7, 42, ContainerType::list()) + assert_eq( + postcard_encode_option_container_id(Some(normal_list)), + b"\x01\x01\x07\x54\x02", + ) + let (decoded, rest) = try! postcard_decode_option_container_id( + b"\x01\x01\x07\x54\x02"[:], + ) + assert_eq(rest.length(), 0) + match decoded { + Some(ContainerID::Normal(peer, counter, kind)) => { + assert_eq(peer, 7) + assert_eq(counter, 42) + assert_eq(kind, ContainerType::list()) + } + _ => assert_true(false) + } +} diff --git a/moon/loro_codec/container_type.mbt b/moon/loro_codec/container_type.mbt new file mode 100644 index 000000000..3e90389b6 --- /dev/null +++ b/moon/loro_codec/container_type.mbt @@ -0,0 +1,102 @@ +///| +pub enum ContainerType { + Text + Map + List + MovableList + Tree + Counter + Unknown(UInt) +} derive(Eq, Show) + +// NOTE: MoonBit makes enum constructors read-only across packages. Expose explicit +// constructor functions for blackbox tests and future callers. + +///| +pub fn ContainerType::text() -> ContainerType { + ContainerType::Text +} + +///| +pub fn ContainerType::map() -> ContainerType { + ContainerType::Map +} + +///| +pub fn ContainerType::list() -> ContainerType { + ContainerType::List +} + +///| +pub fn ContainerType::movable_list() -> ContainerType { + ContainerType::MovableList +} + +///| +pub fn ContainerType::tree() -> ContainerType { + ContainerType::Tree +} + +///| +pub fn ContainerType::counter() -> ContainerType { + ContainerType::Counter +} + +///| +pub fn ContainerType::unknown(kind : UInt) -> ContainerType { + ContainerType::Unknown(kind) +} + +///| +pub fn container_type_to_u8(t : ContainerType) -> Byte { + match t { + ContainerType::Map => b'\x00' + ContainerType::List => b'\x01' + ContainerType::Text => b'\x02' + ContainerType::Tree => b'\x03' + ContainerType::MovableList => b'\x04' + ContainerType::Counter => b'\x05' + ContainerType::Unknown(k) => (k & 0xFF).to_byte() + } +} + +///| +pub fn container_type_from_u8(b : Byte) -> ContainerType { + match b.to_uint() { + 0 => ContainerType::Map + 1 => ContainerType::List + 2 => ContainerType::Text + 3 => ContainerType::Tree + 4 => ContainerType::MovableList + 5 => ContainerType::Counter + x => ContainerType::Unknown(x) + } +} + +// Used by serde (postcard) for historical reasons. + +///| +pub fn container_type_to_historical_u8(t : ContainerType) -> Byte { + match t { + ContainerType::Text => b'\x00' + ContainerType::Map => b'\x01' + ContainerType::List => b'\x02' + ContainerType::MovableList => b'\x03' + ContainerType::Tree => b'\x04' + ContainerType::Counter => b'\x05' + ContainerType::Unknown(k) => (k & 0xFF).to_byte() + } +} + +///| +pub fn container_type_from_historical_u8(b : Byte) -> ContainerType { + match b.to_uint() { + 0 => ContainerType::Text + 1 => ContainerType::Map + 2 => ContainerType::List + 3 => ContainerType::MovableList + 4 => ContainerType::Tree + 5 => ContainerType::Counter + x => ContainerType::Unknown(x) + } +} diff --git a/moon/loro_codec/container_wrapper.mbt b/moon/loro_codec/container_wrapper.mbt new file mode 100644 index 000000000..a42a5a149 --- /dev/null +++ b/moon/loro_codec/container_wrapper.mbt @@ -0,0 +1,58 @@ +///| +pub struct ContainerWrapper { + kind : ContainerType + depth : UInt64 + parent : ContainerID? + payload : BytesView +} + +///| +pub fn ContainerWrapper::kind(self : ContainerWrapper) -> ContainerType { + self.kind +} + +///| +pub fn ContainerWrapper::depth(self : ContainerWrapper) -> UInt64 { + self.depth +} + +///| +pub fn ContainerWrapper::parent(self : ContainerWrapper) -> ContainerID? { + self.parent +} + +///| +pub fn ContainerWrapper::payload_view(self : ContainerWrapper) -> BytesView { + self.payload +} + +///| +pub fn parse_container_wrapper( + bytes : BytesView, +) -> ContainerWrapper raise DecodeError { + if bytes.length() < 1 { + raise DecodeError("container_wrapper: empty bytes") + } + let r = BytesReader::from_view(bytes) + let kind = container_type_from_u8(r.read_u8()) + let depth = r.read_uleb128_u64() + let (parent, rest) = try! postcard_decode_option_container_id( + bytes[bytes.length() - r.remaining():], + ) + { kind, depth, parent, payload: rest } +} + +///| +pub fn encode_container_wrapper( + kind : ContainerType, + depth : UInt64, + parent : ContainerID?, + payload : Bytes, +) -> Bytes { + let w = BytesWriter::new() + w.write_u8(container_type_to_u8(kind)) + w.write_uleb128_u64(depth) + w.write_bytes(postcard_encode_option_container_id(parent)) + w.write_bytes(payload) + w.to_bytes() +} diff --git a/moon/loro_codec/container_wrapper_test.mbt b/moon/loro_codec/container_wrapper_test.mbt new file mode 100644 index 000000000..dabebede6 --- /dev/null +++ b/moon/loro_codec/container_wrapper_test.mbt @@ -0,0 +1,23 @@ +///| +test "container wrapper encode/parse header" { + let payload = b"PAYLOAD" + let parent = Some(ContainerID::normal(7, 42, ContainerType::list())) + let bytes = encode_container_wrapper( + ContainerType::text(), + 3, + parent, + payload, + ) + let parsed = try! parse_container_wrapper(bytes[:]) + assert_eq(parsed.kind(), ContainerType::text()) + assert_eq(parsed.depth(), 3) + match parsed.parent() { + Some(ContainerID::Normal(peer, counter, kind)) => { + assert_eq(peer, 7) + assert_eq(counter, 42) + assert_eq(kind, ContainerType::list()) + } + _ => assert_true(false) + } + assert_eq(parsed.payload_view().to_bytes(), payload) +} diff --git a/moon/loro_codec/deep_json_snapshot.mbt b/moon/loro_codec/deep_json_snapshot.mbt new file mode 100644 index 000000000..d9ea8e623 --- /dev/null +++ b/moon/loro_codec/deep_json_snapshot.mbt @@ -0,0 +1,419 @@ +///| +fn container_id_hex_key(cid : ContainerID) -> String { + bytes_hex_upper(cid.to_bytes()) +} + +///| +fn container_id_hex_key_normal( + peer : UInt64, + counter : Int, + kind : ContainerType, +) -> String { + let w = BytesWriter::new() + w.write_u8(container_type_to_u8(kind)) + w.write_u64_le(peer) + w.write_u32_le(counter.reinterpret_as_uint()) + bytes_hex_upper(w.to_bytes()) +} + +///| +fn common_value_json( + entries : @hashmap.HashMap[String, (ContainerType, Bytes)], + v : CommonValue, + validate : Bool, + depth : Int, +) -> Json raise DecodeError { + if depth > 1024 { + raise DecodeError("snapshot: value too deep") + } + match v { + CommonValue::Null => Json::null() + CommonValue::Bool(b) => Json::boolean(b) + CommonValue::Double(d) => Json::number(d) + CommonValue::I64(x) => jsonschema_number_i64(x) + CommonValue::String(s) => Json::string(s) + CommonValue::Binary(b) => binary_json(b) + CommonValue::List(items) => { + let out : Array[Json] = [] + for i in 0.. { + let obj = Map::new(capacity=items.length()) + for pair in items { + let (k, vv) = pair + obj[k] = common_value_json(entries, vv, validate, depth + 1) + } + Json::object(obj) + } + CommonValue::Container(cid) => { + let key = container_id_hex_key(cid) + container_json_from_key(entries, key, validate, depth + 1) + } + } +} + +///| +fn container_json_map( + entries : @hashmap.HashMap[String, (ContainerType, Bytes)], + payload : Bytes, + validate : Bool, + depth : Int, +) -> Json raise DecodeError { + let (values, rest1) = postcard_take_map_string_common_value(payload[:]) + let (_deleted_keys, rest2) = postcard_take_vec_string(rest1) + let (_peers, _meta_bytes) = take_peer_table(rest2) + + let obj = Map::new(capacity=values.length()) + for pair in values { + let (k, vv) = pair + obj[k] = common_value_json(entries, vv, validate, depth + 1) + } + Json::object(obj) +} + +///| +fn container_json_list( + entries : @hashmap.HashMap[String, (ContainerType, Bytes)], + payload : Bytes, + validate : Bool, + depth : Int, +) -> Json raise DecodeError { + let (values, _rest1) = postcard_take_vec_common_value(payload[:]) + let out : Array[Json] = [] + for i in 0.. Json raise DecodeError { + let (values, _rest1) = postcard_take_vec_common_value(payload[:]) + let out : Array[Json] = [] + for i in 0.. Json raise DecodeError { + let (text, _rest1) = postcard_take_string(payload[:]) + Json::string(text) +} + +///| +fn container_json_counter(payload : Bytes) -> Json raise DecodeError { + let r = BytesReader::new(payload) + if r.remaining() != 8 { + raise DecodeError("counter_state: invalid payload length") + } + let bits = r.read_u64_le() + Json::number(bits.reinterpret_as_double()) +} + +///| +fn tree_nodes_json( + entries : @hashmap.HashMap[String, (ContainerType, Bytes)], + payload : Bytes, + validate : Bool, + depth : Int, +) -> Json raise DecodeError { + if depth > 256 { + raise DecodeError("tree: too deep") + } + let (peers, rest1) = take_peer_table(payload[:]) + let r = BytesReader::from_view(rest1) + let n_fields = r.read_varint_u64() + if n_fields != 4UL { + raise DecodeError("tree: invalid EncodedTree field count") + } + + // node_ids + let node_ids_view = r.remaining_view() + let (node_id_cols, rest_after_node_ids) = take_columnar_vec(node_ids_view) + r.skip(node_ids_view.length() - rest_after_node_ids.length()) + if node_id_cols.length() != 2 { + raise DecodeError("tree: invalid node_id column count") + } + let node_peer_idx = decode_delta_rle_usize(node_id_cols[0]) + let node_counter = decode_delta_rle_i32(node_id_cols[1]) + if node_peer_idx.length() != node_counter.length() { + raise DecodeError("tree: node_id column length mismatch") + } + + // nodes + let nodes_view = r.remaining_view() + let (node_cols, rest_after_nodes) = take_columnar_vec(nodes_view) + r.skip(nodes_view.length() - rest_after_nodes.length()) + if node_cols.length() != 5 { + raise DecodeError("tree: invalid node column count") + } + let parent_idx_plus_two = decode_delta_rle_usize(node_cols[0]) + let last_set_peer_idx = decode_delta_rle_usize(node_cols[1]) + let last_set_counter = decode_delta_rle_i32(node_cols[2]) + let last_set_lamport_sub = decode_delta_rle_i32(node_cols[3]) + let fractional_idx_idx = decode_postcard_vec_usize(node_cols[4]) + let n_nodes = node_peer_idx.length() + if parent_idx_plus_two.length() != n_nodes || + last_set_peer_idx.length() != n_nodes || + last_set_counter.length() != n_nodes || + last_set_lamport_sub.length() != n_nodes || + fractional_idx_idx.length() != n_nodes { + raise DecodeError("tree: node column length mismatch") + } + + let frac_view = r.remaining_view() + let (fractional_indexes_bytes, rest_after_frac) = postcard_take_bytes( + frac_view, + ) + r.skip(frac_view.length() - rest_after_frac.length()) + let reserved_view = r.remaining_view() + let (_reserved_bytes, rest_after_reserved) = postcard_take_bytes(reserved_view) + r.skip(reserved_view.length() - rest_after_reserved.length()) + if r.remaining() != 0 { + raise DecodeError("tree: trailing bytes") + } + let positions = decode_position_arena_v2(fractional_indexes_bytes[:]) + + // Alive nodes are encoded first. Deleted nodes follow (and the first deleted node has parent=Deleted). + let mut alive_n = n_nodes + for i in 0.. 0x7FFF_FFFFUL || + peer_idx_u64.to_int() < 0 || + peer_idx_u64.to_int() >= peers.length() { + raise DecodeError("tree: node peer_idx out of range") + } + } + let peer = peers[peer_idx_u64.to_int()] + let counter = node_counter[i] + id_strs.push(counter.to_string() + "@" + peer.to_string()) + let p = parent_idx_plus_two[i] + if p == 0UL { + parent_idx.push(None) + } else if p >= 2UL { + let idx = (p - 2UL).to_int() + if validate && (idx < 0 || idx >= alive_n) { + raise DecodeError("tree: invalid parent index") + } + parent_idx.push(Some(idx)) + } else { + // parent=Deleted should not appear for alive nodes. + raise DecodeError("tree: unexpected deleted parent in alive nodes") + } + let fi_idx = fractional_idx_idx[i].to_int() + if validate && (fi_idx < 0 || fi_idx >= positions.length()) { + raise DecodeError("tree: invalid fractional_index_idx") + } + fi_hex.push(bytes_hex_upper(positions[fi_idx])) + meta_key.push(container_id_hex_key_normal(peer, counter, ContainerType::map())) + } + + // children lists + let children : Array[Array[Int]] = [] + for _i in 0.. roots.push(i) + Some(p) => children[p].push(i) + } + } + + // sort siblings by fractional_index, and assign index field + let index_arr : Array[Int] = [] + for _i in 0.. fi_hex[i]) + for i in 0.. fi_hex[i]) + for i in 0.. Json raise DecodeError { + let obj = Map::new(capacity=6) + obj["parent"] = match parent_idx[i] { + None => Json::null() + Some(p) => Json::string(id_strs[p]) + } + obj["meta"] = container_json_from_key( + entries, + meta_key[i], + validate, + depth + 1, + ) + obj["id"] = Json::string(id_strs[i]) + obj["index"] = jsonschema_number_int(index_arr[i]) + let out_children : Array[Json] = [] + for c in children[i] { + out_children.push( + node_json( + entries, + id_strs, + parent_idx, + fi_hex, + meta_key, + index_arr, + children, + c, + validate, + depth + 1, + ), + ) + } + obj["children"] = Json::array(out_children) + obj["fractional_index"] = Json::string(fi_hex[i]) + Json::object(obj) + } + + let out_roots : Array[Json] = [] + for i in roots { + out_roots.push( + node_json( + entries, + id_strs, + parent_idx, + fi_hex, + meta_key, + index_arr, + children, + i, + validate, + depth + 1, + ), + ) + } + Json::array(out_roots) +} + +///| +fn container_json_from_key( + entries : @hashmap.HashMap[String, (ContainerType, Bytes)], + key : String, + validate : Bool, + depth : Int, +) -> Json raise DecodeError { + if depth > 2048 { + raise DecodeError("snapshot: container nesting too deep") + } + match entries.get(key) { + None => raise DecodeError("snapshot: missing container state") + Some((kind, payload)) => + match kind { + ContainerType::Map => container_json_map(entries, payload, validate, depth) + ContainerType::List => + container_json_list(entries, payload, validate, depth) + ContainerType::Text => container_json_text(payload) + ContainerType::Tree => tree_nodes_json(entries, payload, validate, depth) + ContainerType::MovableList => + container_json_movable_list(entries, payload, validate, depth) + ContainerType::Counter => container_json_counter(payload) + ContainerType::Unknown(_) => + raise DecodeError("snapshot: unsupported container type") + } + } +} + +///| +fn deep_json_from_state_kv_store( + state_bytes : Bytes, + validate : Bool, +) -> Json raise DecodeError { + // For shallow_root_state_bytes in non-shallow snapshots. + if state_bytes.length() == 0 { + return Json::object(Map::new(capacity=0)) + } + // Special sentinel for empty state. + if state_bytes.length() == 1 && state_bytes[0] == b'E' { + return Json::object(Map::new(capacity=0)) + } + + let kvs = sstable_import_all(state_bytes, validate) + let entries : @hashmap.HashMap[String, (ContainerType, Bytes)] = @hashmap.new( + capacity=if kvs.length() < 16 { 16 } else { kvs.length() }, + ) + let roots : Array[(String, String)] = [] + for kv in kvs { + let (k, v) = kv + if k.length() == 2 && k[0] == b'f' && k[1] == b'r' { + continue + } + let cid = container_id_from_bytes(k[:]) + let key = bytes_hex_upper(k) + let wrapper = parse_container_wrapper(v[:]) + entries.set(key, (wrapper.kind(), wrapper.payload_view().to_bytes())) + match cid { + ContainerID::Root(name, _kind) => roots.push((name, key)) + ContainerID::Normal(_, _, _) => () + } + } + + let obj = Map::new(capacity=roots.length()) + for pair in roots { + let (name, key) = pair + obj[name] = container_json_from_key(entries, key, validate, 0) + } + Json::object(obj) +} + +///|\ +/// Export a FastSnapshot (mode=3) document blob into deep JSON format, +/// matching Rust `doc.get_deep_value().to_json_value()`. +pub fn export_deep_json_from_fast_snapshot( + bytes : Bytes, + validate : Bool, +) -> String raise DecodeError { + let doc = parse_document(bytes, validate) + if doc.mode() != 3 { + raise DecodeError("deep-json: not a FastSnapshot (mode=3) document") + } + let parsed = parse_fast_snapshot_body(doc.body_view()) + let root = deep_json_from_state_kv_store( + parsed.state_bytes_view().to_bytes(), + validate, + ) + root.stringify(indent=2) +} diff --git a/moon/loro_codec/document.mbt b/moon/loro_codec/document.mbt new file mode 100644 index 000000000..2439e9d44 --- /dev/null +++ b/moon/loro_codec/document.mbt @@ -0,0 +1,219 @@ +///| +pub struct ParsedDocument { + mode : UInt + body : BytesView +} + +///| +pub fn ParsedDocument::mode(self : ParsedDocument) -> UInt { + self.mode +} + +///| +pub fn ParsedDocument::body(self : ParsedDocument) -> Bytes { + self.body.to_bytes() +} + +///| +pub fn ParsedDocument::body_view(self : ParsedDocument) -> BytesView { + self.body +} + +///| +pub struct FastSnapshotBody { + oplog_bytes : BytesView + state_bytes : BytesView + shallow_root_state_bytes : BytesView +} + +///| +pub fn FastSnapshotBody::oplog_bytes_view(self : FastSnapshotBody) -> BytesView { + self.oplog_bytes +} + +///| +pub fn FastSnapshotBody::state_bytes_view(self : FastSnapshotBody) -> BytesView { + self.state_bytes +} + +///| +pub fn FastSnapshotBody::shallow_root_state_bytes_view( + self : FastSnapshotBody, +) -> BytesView { + self.shallow_root_state_bytes +} + +///| +const MAGIC_BYTES : Bytes = b"loro" + +///| +const MIN_HEADER_SIZE : Int = 22 + +///| +pub fn parse_document( + bytes : Bytes, + check_checksum : Bool, +) -> ParsedDocument raise DecodeError { + if bytes.length() < MIN_HEADER_SIZE { + raise DecodeError("invalid document: too short") + } + if bytes[0:4] != MAGIC_BYTES[:] { + raise DecodeError("invalid document: magic mismatch") + } + + // checksum field is bytes[4..20], but only last 4 bytes are used for FastSnapshot/FastUpdates. + let stored_checksum = BytesReader::from_view(bytes[16:20]).read_u32_le() + let mode = BytesReader::from_view(bytes[20:22]).read_u16_be() + let body = bytes[22:] + if check_checksum { + // IMPORTANT: checksum covers bytes[20..] (mode + body), not just body. + let checksum_body = bytes[20:] + let expected = xxhash32(checksum_body, LORO_XXH32_SEED) + if expected != stored_checksum { + raise DecodeError("invalid document: checksum mismatch") + } + } + { mode, body } +} + +///| +pub fn parse_fast_snapshot_body( + body : BytesView, +) -> FastSnapshotBody raise DecodeError { + let r = BytesReader::from_view(body) + let oplog_len = r.read_u32_le().reinterpret_as_int() + if oplog_len < 0 || oplog_len > r.remaining() { + raise DecodeError("fastsnapshot: invalid oplog length") + } + let oplog_bytes = r.read_exact(oplog_len) + let state_len = r.read_u32_le().reinterpret_as_int() + if state_len < 0 || state_len > r.remaining() { + raise DecodeError("fastsnapshot: invalid state length") + } + let state_bytes = r.read_exact(state_len) + let shallow_len = r.read_u32_le().reinterpret_as_int() + if shallow_len < 0 || shallow_len > r.remaining() { + raise DecodeError("fastsnapshot: invalid shallow_root_state length") + } + let shallow_root_state_bytes = r.read_exact(shallow_len) + if r.remaining() != 0 { + raise DecodeError("fastsnapshot: trailing bytes") + } + { oplog_bytes, state_bytes, shallow_root_state_bytes } +} + +///| +pub fn parse_fast_updates_body( + body : BytesView, +) -> Array[BytesView] raise DecodeError { + let r = BytesReader::from_view(body) + let blocks : Array[BytesView] = [] + while r.remaining() > 0 { + let len_u64 = r.read_uleb128_u64() + if len_u64 > 0x7FFF_FFFFUL { + raise DecodeError("fastupdates: block too large") + } + let len = len_u64.to_int() + if len < 0 || len > r.remaining() { + raise DecodeError("fastupdates: invalid block length") + } + blocks.push(r.read_exact(len)) + } + blocks +} + +///| +pub fn encode_fast_snapshot_body( + oplog : Bytes, + state : Bytes, + shallow_root_state : Bytes, +) -> Bytes { + let w = BytesWriter::new() + w.write_u32_le(oplog.length().reinterpret_as_uint()) + w.write_bytes(oplog) + w.write_u32_le(state.length().reinterpret_as_uint()) + w.write_bytes(state) + w.write_u32_le(shallow_root_state.length().reinterpret_as_uint()) + w.write_bytes(shallow_root_state) + w.to_bytes() +} + +///| +pub fn encode_fast_updates_body(blocks : Array[Bytes]) -> Bytes { + let w = BytesWriter::new() + for b in blocks { + w.write_uleb128_u64(b.length().to_uint64()) + w.write_bytes(b) + } + w.to_bytes() +} + +///| +pub fn encode_document(mode : UInt, body : Bytes) -> Bytes raise DecodeError { + // checksum covers bytes[20..] = mode(u16 BE) + body + let checksum_input = BytesWriter::new() + checksum_input.write_u16_be(mode) + checksum_input.write_bytes(body) + let checksum = xxhash32(checksum_input.to_bytes()[:], LORO_XXH32_SEED) + let w = BytesWriter::new() + w.write_bytes(MAGIC_BYTES) + for _i in 0..<12 { + w.write_u8(0x00) + } + w.write_u32_le(checksum) + w.write_u16_be(mode) + w.write_bytes(body) + w.to_bytes() +} + +///| +pub fn transcode_document( + bytes : Bytes, + validate : Bool, +) -> Bytes raise DecodeError { + let doc = parse_document(bytes, true) + match doc.mode() { + 3 => { + let parsed = parse_fast_snapshot_body(doc.body_view()) + let oplog = if validate { + transcode_oplog_kv_store(parsed.oplog_bytes_view().to_bytes(), true) + } else { + parsed.oplog_bytes_view().to_bytes() + } + let state = if validate { + transcode_state_kv_store(parsed.state_bytes_view().to_bytes(), true) + } else { + parsed.state_bytes_view().to_bytes() + } + let shallow_root_state = if validate { + transcode_state_kv_store( + parsed.shallow_root_state_bytes_view().to_bytes(), + true, + ) + } else { + parsed.shallow_root_state_bytes_view().to_bytes() + } + let body = encode_fast_snapshot_body(oplog, state, shallow_root_state) + encode_document(3, body) + } + 4 => { + let blocks = parse_fast_updates_body(doc.body_view()) + let out_blocks : Array[Bytes] = [] + for b in blocks { + let decoded = decode_change_block_full(b) + let encoded = encode_change_block(decoded) catch { + EncodeError(msg) => + raise DecodeError("fastupdates: encode change block failed: " + msg) + } + if validate { + let _ = decode_change_block(encoded[:]) + + } + out_blocks.push(encoded) + } + let body = encode_fast_updates_body(out_blocks) + encode_document(4, body) + } + _ => raise DecodeError("unsupported document mode") + } +} diff --git a/moon/loro_codec/document_test.mbt b/moon/loro_codec/document_test.mbt new file mode 100644 index 000000000..a031992a5 --- /dev/null +++ b/moon/loro_codec/document_test.mbt @@ -0,0 +1,155 @@ +///| +test "parse_document checksum covers mode+body" { + let mode : UInt = 3 + let body : Bytes = b"\x01\x02\x03" + let checksum_input = BytesWriter::new() + checksum_input.write_u16_be(mode) + checksum_input.write_bytes(body) + let checksum = try! xxhash32(checksum_input.to_bytes()[:], LORO_XXH32_SEED) + + // Construct: magic(4) + checksum(16) + mode(2) + body + let w = BytesWriter::new() + w.write_bytes(b"loro") + for _i in 0..<12 { + w.write_u8(0x00) + } + // write checksum to bytes[16..20] LE + w.write_u32_le(checksum) + w.write_u16_be(mode) + w.write_bytes(body) + let doc = try! parse_document(w.to_bytes(), true) + assert_eq(doc.mode(), mode) + assert_eq(doc.body(), body) +} + +///| +test "parse_document rejects bad checksum" { + let w = BytesWriter::new() + w.write_bytes(b"loro") + for _i in 0..<16 { + w.write_u8(0x00) + } + w.write_u16_be(3) + w.write_u8(0x01) + let res = try? parse_document(w.to_bytes(), true) + match res { + Ok(_) => assert_eq(true, false) + Err(_) => () + } +} + +///| +test "parse fastsnapshot body" { + let w = BytesWriter::new() + w.write_u32_le(3) + w.write_bytes(b"op!") + w.write_u32_le(1) + w.write_bytes(b"E") + w.write_u32_le(0) + let parsed = try! parse_fast_snapshot_body(w.to_bytes()[:]) + assert_eq(parsed.oplog_bytes_view().to_bytes(), b"op!") + assert_eq(parsed.state_bytes_view().to_bytes(), b"E") + assert_eq(parsed.shallow_root_state_bytes_view().length(), 0) +} + +///| +test "parse fastupdates body" { + let w = BytesWriter::new() + w.write_uleb128_u64(3) + w.write_bytes(b"abc") + w.write_uleb128_u64(1) + w.write_bytes(b"Z") + let blocks = try! parse_fast_updates_body(w.to_bytes()[:]) + assert_eq(blocks.length(), 2) + assert_eq(blocks[0].to_bytes(), b"abc") + assert_eq(blocks[1].to_bytes(), b"Z") +} + +///| +test "encode_document roundtrip" { + let mode : UInt = 4 + let body : Bytes = b"\x00\x01\x02" + let encoded = try! encode_document(mode, body) + let parsed = try! parse_document(encoded, true) + assert_eq(parsed.mode(), mode) + assert_eq(parsed.body(), body) +} + +///| +fn make_minimal_change_block_bytes() -> Bytes { + let peer0 = 0x0102030405060708UL + let header_w = BytesWriter::new() + header_w.write_uleb128_u64(1UL) // peer_num + header_w.write_u64_le(peer0) + header_w.write_bytes(encode_bool_rle([false])) // dep_on_self + header_w.write_bytes(encode_any_rle_usize([0UL])) // dep lens + header_w.write_bytes(encode_delta_of_delta_i64([])) // dep counters (empty) + header_w.write_bytes(encode_delta_of_delta_i64([])) // lamports (empty) + let header_bytes = header_w.to_bytes() + let meta_w = BytesWriter::new() + meta_w.write_bytes(encode_delta_of_delta_i64([1234L])) // timestamp + meta_w.write_bytes(encode_any_rle_u32([0])) // no commit msg + let meta_bytes = meta_w.to_bytes() + let keys_w = BytesWriter::new() + let root = @encoding/utf8.encode("root"[:]) + keys_w.write_uleb128_u64(root.length().to_uint64()) + keys_w.write_bytes(root) + let k = @encoding/utf8.encode("a"[:]) + keys_w.write_uleb128_u64(k.length().to_uint64()) + keys_w.write_bytes(k) + let keys_bytes = keys_w.to_bytes() + let arena_w = BytesWriter::new() + arena_w.write_varint_u64(1UL) + arena_w.write_varint_u64(4UL) // field count + arena_w.write_u8(b'\x01') // is_root + arena_w.write_u8(container_type_to_u8(ContainerType::map())) + arena_w.write_varint_u64(0UL) + arena_w.write_varint_i64(0L) + let cids_bytes = arena_w.to_bytes() + let ops_col0 = encode_delta_rle_u32([0]) // container_index + let ops_col1 = encode_delta_rle_i32([1]) // prop = key idx + let ops_col2 = encode_rle_u8([11]) // value_type = LoroValue + let ops_col3 = encode_rle_u32([1]) // len = 1 + let ops_bytes = encode_columnar_vec_wrapped([ + ops_col0, ops_col1, ops_col2, ops_col3, + ]) + let values_bytes = b"\x03\x0A" + let w = BytesWriter::new() + w.write_varint_u64(10UL) // counter_start + w.write_varint_u64(1UL) // counter_len + w.write_varint_u64(100UL) // lamport_start + w.write_varint_u64(1UL) // lamport_len + w.write_varint_u64(1UL) // n_changes + for + part in [ + header_bytes, meta_bytes, cids_bytes, keys_bytes, b"", ops_bytes, b"", values_bytes, + ] { + w.write_varint_u64(part.length().to_uint64()) + w.write_bytes(part) + } + w.to_bytes() +} + +///| +test "transcode_document preserves fastsnapshot" { + let vv : VersionVector = [(1UL, 0)] + let vv_bytes = postcard_encode_version_vector(vv) + let fr_bytes = postcard_encode_frontiers([]) + // Ensure keys are sorted: "fr" < "vv" + let oplog = sstable_export_all([(b"fr", fr_bytes), (b"vv", vv_bytes)], 4096) catch { + EncodeError(_) => fail("failed to build oplog sstable") + } + let body = encode_fast_snapshot_body(oplog, b"E", b"") + let doc = try! encode_document(3, body) + let out = try! transcode_document(doc, true) + assert_eq(out, doc) +} + +///| +test "transcode_document validates fastupdates blocks" { + let block = make_minimal_change_block_bytes() + let body = encode_fast_updates_body([block]) + let doc = try! encode_document(4, body) + let out = try! transcode_document(doc, true) + assert_eq(out, doc) +} diff --git a/moon/loro_codec/errors.mbt b/moon/loro_codec/errors.mbt new file mode 100644 index 000000000..b0781501e --- /dev/null +++ b/moon/loro_codec/errors.mbt @@ -0,0 +1,9 @@ +///| +pub(all) suberror DecodeError { + DecodeError(String) +} derive(Show, Eq) + +///| +pub(all) suberror EncodeError { + EncodeError(String) +} derive(Show, Eq) diff --git a/moon/loro_codec/id.mbt b/moon/loro_codec/id.mbt new file mode 100644 index 000000000..5175e4532 --- /dev/null +++ b/moon/loro_codec/id.mbt @@ -0,0 +1,42 @@ +///| +pub struct ID { + peer : UInt64 + counter : Int +} derive(Eq, Show) + +///| +pub fn ID::new(peer : UInt64, counter : Int) -> ID { + { peer, counter } +} + +///| +pub fn ID::peer(self : ID) -> UInt64 { + self.peer +} + +///| +pub fn ID::counter(self : ID) -> Int { + self.counter +} + +///| +pub fn ID::to_change_block_key(self : ID) -> Bytes { + let w = BytesWriter::new() + w.write_u64_be(self.peer) + w.write_u32_be(self.counter.reinterpret_as_uint()) + w.to_bytes() +} + +///| +pub fn id_from_change_block_key(bytes : BytesView) -> ID raise DecodeError { + if bytes.length() != 12 { + raise DecodeError("id: invalid change block key length") + } + let r = BytesReader::from_view(bytes) + let peer = r.read_u64_be() + let counter = r.read_u32_be().reinterpret_as_int() + if r.remaining() != 0 { + raise DecodeError("id: trailing bytes") + } + { peer, counter } +} diff --git a/moon/loro_codec/id_test.mbt b/moon/loro_codec/id_test.mbt new file mode 100644 index 000000000..d3101f28a --- /dev/null +++ b/moon/loro_codec/id_test.mbt @@ -0,0 +1,9 @@ +///| +test "change block key encode/decode" { + let id = ID::new(0x0102030405060708, -1) + let key = id.to_change_block_key() + assert_eq(key, b"\x01\x02\x03\x04\x05\x06\x07\x08\xFF\xFF\xFF\xFF") + let parsed = try! id_from_change_block_key(key[:]) + assert_eq(parsed.peer(), 0x0102030405060708) + assert_eq(parsed.counter(), -1) +} diff --git a/moon/loro_codec/json_schema_common.mbt b/moon/loro_codec/json_schema_common.mbt new file mode 100644 index 000000000..6a2f32ed8 --- /dev/null +++ b/moon/loro_codec/json_schema_common.mbt @@ -0,0 +1,12 @@ +///| +const LORO_CONTAINER_ID_PREFIX : String = "🦜:" + +///| +fn id_inc(id : ID, delta : Int) -> ID raise DecodeError { + let mut counter64 = id.counter().to_int64() + counter64 = counter64 + delta.to_int64() + if counter64 < -2147483648L || counter64 > 2147483647L { + raise DecodeError("jsonschema: id counter overflow") + } + ID::new(id.peer(), counter64.to_int()) +} diff --git a/moon/loro_codec/json_schema_export.mbt b/moon/loro_codec/json_schema_export.mbt new file mode 100644 index 000000000..00d0507fa --- /dev/null +++ b/moon/loro_codec/json_schema_export.mbt @@ -0,0 +1,12 @@ +// ----------------------------------------------------------------------------- +// FastUpdates (decode) -> JsonSchema (export) +// ----------------------------------------------------------------------------- +// +// Implementation is split into cohesive files: +// - json_schema_common.mbt +// - json_schema_export_helpers.mbt +// - json_schema_export_hex.mbt +// - json_schema_export_numbers.mbt +// - json_schema_export_values.mbt +// - json_schema_export_ops.mbt +// - json_schema_export_changes.mbt diff --git a/moon/loro_codec/json_schema_export_changes.mbt b/moon/loro_codec/json_schema_export_changes.mbt new file mode 100644 index 000000000..6cac61051 --- /dev/null +++ b/moon/loro_codec/json_schema_export_changes.mbt @@ -0,0 +1,133 @@ +///| +const JSON_SCHEMA_VERSION : Int = 1 + +///| +fn change_json_schema( + change : Change, + keys : Array[String], + peers : Array[UInt64], + peer_to_idx : @hashmap.HashMap[UInt64, UInt64], +) -> Json raise DecodeError { + let deps : Array[String] = [] + for d in change.deps() { + deps.push(id_string_with_peer_index(d, peers, peer_to_idx)) + } + let ops : Array[Json] = [] + for op in change.ops() { + ops.push(op_json_schema(op, keys, change.id().peer(), peers, peer_to_idx)) + } + { + "id": id_string_with_peer_index(change.id(), peers, peer_to_idx), + "timestamp": jsonschema_number_i64(change.timestamp()), + "deps": deps, + "lamport": jsonschema_number_u64(change.lamport().to_uint64()), + "msg": match change.msg() { + None => Json::null() + Some(m) => Json::string(m) + }, + "ops": Json::array(ops), + } +} + +///| +/// Export a FastUpdates (mode=4) document blob into JSON schema format as described in +/// `docs/JsonSchema.md` (peer-compressed). +pub fn export_json_schema_from_fast_updates( + bytes : Bytes, + validate : Bool, +) -> String raise DecodeError { + let doc = parse_document(bytes, validate) + if doc.mode() != 4 { + raise DecodeError("jsonschema: not a FastUpdates (mode=4) document") + } + + // Decode blocks first so we can detect which deps are outside the exported set. + let blocks = parse_fast_updates_body(doc.body_view()) + let decoded_blocks : Array[DecodedChangeBlock] = [] + for b in blocks { + decoded_blocks.push(decode_change_block_full(b)) + } + + // Determine the per-peer exported starting counter (like start_vv[peer]). + // FastUpdates always include a contiguous counter range per peer, so deps are external iff + // `dep.counter < start_counter` or the peer isn't exported at all. + let start_counter_by_peer : @hashmap.HashMap[UInt64, Int] = @hashmap.new( + capacity=64, + ) + for blk in decoded_blocks { + for c in blk.changes() { + let id = c.id() + let peer = id.peer() + let counter = id.counter() + match start_counter_by_peer.get(peer) { + None => start_counter_by_peer.set(peer, counter) + Some(old) => + if counter < old { + start_counter_by_peer.set(peer, counter) + } + } + } + } + + // Derive `start_version` by collecting deps that are outside the exported set. + let external_deps_max : @hashmap.HashMap[UInt64, Int] = @hashmap.new( + capacity=64, + ) + for blk in decoded_blocks { + for c in blk.changes() { + for dep in c.deps() { + let is_external = match start_counter_by_peer.get(dep.peer()) { + None => true + Some(start_counter) => dep.counter() < start_counter + } + if is_external { + match external_deps_max.get(dep.peer()) { + None => external_deps_max.set(dep.peer(), dep.counter()) + Some(old) => + if dep.counter() > old { + external_deps_max.set(dep.peer(), dep.counter()) + } + } + } + } + } + } + let peers : Array[UInt64] = [] + let peer_to_idx : @hashmap.HashMap[UInt64, UInt64] = @hashmap.new(capacity=64) + // Rust `export_json_updates` orders changes by lamport. FastUpdates blocks are + // typically grouped by peer/counter, so normalize here to match Rust output. + let change_entries : Array[(UInt64, Array[String], Change)] = [] + for blk in decoded_blocks { + let keys = blk.keys() + for c in blk.changes() { + change_entries.push((c.lamport().to_uint64(), keys, c)) + } + } + change_entries.sort_by_key(entry => { + let (lamport, _keys, _c) = entry + lamport + }) + let changes : Array[Json] = [] + for entry in change_entries { + let (_lamport, keys, c) = entry + changes.push(change_json_schema(c, keys, peers, peer_to_idx)) + } + let peers_json : Array[Json] = [] + for p in peers { + peers_json.push(Json::string(p.to_string())) + } + let start_version : Map[String, Json] = Map::new( + capacity=external_deps_max.length(), + ) + for pair in external_deps_max.to_array() { + let (peer, counter) = pair + start_version[peer.to_string()] = jsonschema_number_int(counter) + } + let root : Json = { + "schema_version": JSON_SCHEMA_VERSION, + "start_version": Json::object(start_version), + "peers": Json::array(peers_json), + "changes": Json::array(changes), + } + root.stringify(indent=2) +} diff --git a/moon/loro_codec/json_schema_export_helpers.mbt b/moon/loro_codec/json_schema_export_helpers.mbt new file mode 100644 index 000000000..cf0d9f098 --- /dev/null +++ b/moon/loro_codec/json_schema_export_helpers.mbt @@ -0,0 +1,70 @@ +///| +fn jsonschema_register_peer( + peers : Array[UInt64], + peer_to_idx : @hashmap.HashMap[UInt64, UInt64], + peer : UInt64, +) -> UInt64 { + match peer_to_idx.get(peer) { + Some(idx) => idx + None => { + let idx = peers.length().to_uint64() + peers.push(peer) + peer_to_idx.set(peer, idx) + idx + } + } +} + +///| +fn id_string_with_peer_index( + id : ID, + peers : Array[UInt64], + peer_to_idx : @hashmap.HashMap[UInt64, UInt64], +) -> String { + let idx = jsonschema_register_peer(peers, peer_to_idx, id.peer()) + id.counter().to_string() + "@" + idx.to_string() +} + +///| +fn idlp_string_with_peer_index( + idlp : IdLp, + peers : Array[UInt64], + peer_to_idx : @hashmap.HashMap[UInt64, UInt64], +) -> String { + let idx = jsonschema_register_peer(peers, peer_to_idx, idlp.peer()) + "L" + idlp.lamport().to_uint64().to_string() + "@" + idx.to_string() +} + +///| +fn jsonschema_container_type_string(kind : ContainerType) -> String { + match kind { + ContainerType::Map => "Map" + ContainerType::List => "List" + ContainerType::Text => "Text" + ContainerType::Tree => "Tree" + ContainerType::MovableList => "MovableList" + ContainerType::Counter => "Counter" + ContainerType::Unknown(k) => "Unknown(" + k.to_string() + ")" + } +} + +///| +fn container_id_string_with_peer_index( + cid : ContainerID, + peers : Array[UInt64], + peer_to_idx : @hashmap.HashMap[UInt64, UInt64], +) -> String { + match cid { + ContainerID::Root(name, kind) => + "cid:root-" + name + ":" + jsonschema_container_type_string(kind) + ContainerID::Normal(peer, counter, kind) => { + let idx = jsonschema_register_peer(peers, peer_to_idx, peer) + "cid:" + + counter.to_string() + + "@" + + idx.to_string() + + ":" + + jsonschema_container_type_string(kind) + } + } +} diff --git a/moon/loro_codec/json_schema_export_hex.mbt b/moon/loro_codec/json_schema_export_hex.mbt new file mode 100644 index 000000000..bdf499ed0 --- /dev/null +++ b/moon/loro_codec/json_schema_export_hex.mbt @@ -0,0 +1,24 @@ +///| +fn jsonschema_bytes_hex_upper(bytes : Bytes) -> String { + let sb = StringBuilder::new() + let hex : Array[Char] = [ + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', + ] + for b in bytes { + let u = b.to_uint() + let hi = ((u >> 4) & 0xF).reinterpret_as_int() + let lo = (u & 0xF).reinterpret_as_int() + sb.write_char(hex[hi]) + sb.write_char(hex[lo]) + } + sb.to_string() +} + +///| +fn jsonschema_binary_json(bytes : Bytes) -> Json { + let out : Array[Json] = [] + for b in bytes { + out.push(Json::number(b.to_uint().to_double())) + } + Json::array(out) +} diff --git a/moon/loro_codec/json_schema_export_numbers.mbt b/moon/loro_codec/json_schema_export_numbers.mbt new file mode 100644 index 000000000..bd5689f20 --- /dev/null +++ b/moon/loro_codec/json_schema_export_numbers.mbt @@ -0,0 +1,14 @@ +///| +fn jsonschema_number_i64(i : Int64) -> Json { + Json::number(i.to_double(), repr=i.to_string()) +} + +///| +fn jsonschema_number_u64(u : UInt64) -> Json { + Json::number(u.to_double(), repr=u.to_string()) +} + +///| +fn jsonschema_number_int(i : Int) -> Json { + Json::number(i.to_double(), repr=i.to_string()) +} diff --git a/moon/loro_codec/json_schema_export_ops.mbt b/moon/loro_codec/json_schema_export_ops.mbt new file mode 100644 index 000000000..b9f42ad60 --- /dev/null +++ b/moon/loro_codec/json_schema_export_ops.mbt @@ -0,0 +1,212 @@ +///| +fn op_content_json_schema( + container : ContainerID, + content : OpContent, + keys : Array[String], + op_id : ID, + peers : Array[UInt64], + peer_to_idx : @hashmap.HashMap[UInt64, UInt64], +) -> Json raise DecodeError { + match content { + OpContent::Text(TextOp::Insert(pos, text)) => + { "type": "insert", "pos": pos.reinterpret_as_int(), "text": text } + OpContent::Text(TextOp::Delete(pos, len, start_id)) => + { + "type": "delete", + "pos": pos, + "len": jsonschema_number_i64(len), + "start_id": id_string_with_peer_index(start_id, peers, peer_to_idx), + } + OpContent::Text(TextOp::Mark(start, end, style_key, style_value, info)) => + { + "type": "mark", + "start": start.reinterpret_as_int(), + "end": end.reinterpret_as_int(), + "style_key": style_key, + "style_value": loro_value_json_schema( + style_value, keys, op_id, peers, peer_to_idx, 0, + ), + "info": info.to_uint().reinterpret_as_int(), + } + OpContent::Text(TextOp::MarkEnd) => { "type": "mark_end" } + OpContent::List(ListOp::Insert(pos, values)) => { + let out : Array[Json] = [] + for i in 0.. + { + "type": "delete", + "pos": pos, + "len": jsonschema_number_i64(len), + "start_id": id_string_with_peer_index(start_id, peers, peer_to_idx), + } + OpContent::MovableList(MovableListOp::Insert(pos, values)) => { + let out : Array[Json] = [] + for i in 0.. + { + "type": "delete", + "pos": pos, + "len": jsonschema_number_i64(len), + "start_id": id_string_with_peer_index(start_id, peers, peer_to_idx), + } + OpContent::MovableList(MovableListOp::Move(from, to, elem_id)) => + { + "type": "move", + "from": from.reinterpret_as_int(), + "to": to.reinterpret_as_int(), + "elem_id": idlp_string_with_peer_index(elem_id, peers, peer_to_idx), + } + OpContent::MovableList(MovableListOp::Set(elem_id, value)) => + { + "type": "set", + "elem_id": idlp_string_with_peer_index(elem_id, peers, peer_to_idx), + "value": loro_value_json_schema( + value, keys, op_id, peers, peer_to_idx, 0, + ), + } + OpContent::Map(MapOp::Insert(key, value)) => + { + "type": "insert", + "key": key, + "value": loro_value_json_schema( + value, keys, op_id, peers, peer_to_idx, 0, + ), + } + OpContent::Map(MapOp::Delete(key)) => { "type": "delete", "key": key } + OpContent::Tree(TreeOp::Create(target, parent, fi)) => + { + "type": "create", + "target": id_string_with_peer_index(target, peers, peer_to_idx), + "parent": match parent { + None => Json::null() + Some(p) => + Json::string(id_string_with_peer_index(p, peers, peer_to_idx)) + }, + "fractional_index": jsonschema_bytes_hex_upper(fi.bytes()), + } + OpContent::Tree(TreeOp::Move(target, parent, fi)) => + { + "type": "move", + "target": id_string_with_peer_index(target, peers, peer_to_idx), + "parent": match parent { + None => Json::null() + Some(p) => + Json::string(id_string_with_peer_index(p, peers, peer_to_idx)) + }, + "fractional_index": jsonschema_bytes_hex_upper(fi.bytes()), + } + OpContent::Tree(TreeOp::Delete(target)) => + { + "type": "delete", + "target": id_string_with_peer_index(target, peers, peer_to_idx), + } + OpContent::Future(FutureOp::Unknown(prop, raw)) => { + let kind = match container { + ContainerID::Root(_, k) => k + ContainerID::Normal(_, _, k) => k + } + match kind { + ContainerType::Counter => + match raw { + // Match Rust JsonSchema `FutureOp::Counter(OwnedValue::F64(..))`. + Value::F64(x) => { + let s = x.to_string() + let repr = if s.contains(".") || s.contains("e") || s.contains("E") { + s + } else { + s + ".0" + } + { + "type": "counter", + "prop": prop, + "value_type": "f64", + "value": Json::number(x, repr=repr), + } + } + Value::I64(x) => + { + "type": "counter", + "prop": prop, + "value_type": "f64", + "value": Json::number(x.to_double(), repr=x.to_string() + ".0"), + } + _ => + raise DecodeError( + "jsonschema: Counter op must have f64 raw value", + ) + } + _ => + // Best-effort: keep unknown ops opaque. This is only used when container type is Unknown. + { + "type": "unknown", + "prop": prop, + "value_type": "unknown", + "value": Json::null(), + } + } + } + } +} + +///| +fn op_json_schema( + op : Op, + keys : Array[String], + change_peer : UInt64, + peers : Array[UInt64], + peer_to_idx : @hashmap.HashMap[UInt64, UInt64], +) -> Json raise DecodeError { + let op_id = ID::new(change_peer, op.counter()) + { + "container": container_id_string_with_peer_index( + op.container(), + peers, + peer_to_idx, + ), + "content": op_content_json_schema( + op.container(), + op.content(), + keys, + op_id, + peers, + peer_to_idx, + ), + "counter": jsonschema_number_int(op.counter()), + } +} diff --git a/moon/loro_codec/json_schema_export_values.mbt b/moon/loro_codec/json_schema_export_values.mbt new file mode 100644 index 000000000..9a1a2deee --- /dev/null +++ b/moon/loro_codec/json_schema_export_values.mbt @@ -0,0 +1,72 @@ +///| +fn loro_value_json_schema( + v : LoroValue, + keys : Array[String], + id : ID, + peers : Array[UInt64], + peer_to_idx : @hashmap.HashMap[UInt64, UInt64], + depth : Int, +) -> Json raise DecodeError { + if depth > 1024 { + raise DecodeError("jsonschema: value too deep") + } + match v { + LoroValue::Null => Json::null() + LoroValue::True => Json::boolean(true) + LoroValue::False => Json::boolean(false) + LoroValue::I64(x) => jsonschema_number_i64(x) + LoroValue::F64(x) => Json::number(x) + LoroValue::Str(s) => Json::string(s) + LoroValue::Binary(b) => jsonschema_binary_json(b) + LoroValue::List(items) => { + let out : Array[Json] = [] + for i in 0.. { + let obj = Map::new(capacity=items.length()) + for pair in items { + let (key_idx, value) = pair + let mut key = "" + if key_idx <= 0x7FFF_FFFFUL { + let i = key_idx.to_int() + if i >= 0 && i < keys.length() { + key = keys[i] + } + } + obj[key] = loro_value_json_schema( + value, + keys, + id, + peers, + peer_to_idx, + depth + 1, + ) + } + Json::object(obj) + } + LoroValue::ContainerType(ct) => { + let kind = container_type_from_u8(ct) + let peer_idx = jsonschema_register_peer(peers, peer_to_idx, id.peer()) + let cid = "cid:" + + id.counter().to_string() + + "@" + + peer_idx.to_string() + + ":" + + jsonschema_container_type_string(kind) + Json::string(LORO_CONTAINER_ID_PREFIX + cid) + } + } +} diff --git a/moon/loro_codec/json_schema_import.mbt b/moon/loro_codec/json_schema_import.mbt new file mode 100644 index 000000000..174bfb521 --- /dev/null +++ b/moon/loro_codec/json_schema_import.mbt @@ -0,0 +1,11 @@ +// ----------------------------------------------------------------------------- +// JsonSchema -> FastUpdates (encode) +// ----------------------------------------------------------------------------- +// +// Implementation is split into cohesive files: +// - json_schema_import_parse.mbt +// - json_schema_import_ids.mbt +// - json_schema_import_hex.mbt +// - json_schema_import_values.mbt +// - json_schema_import_ops.mbt +// - json_schema_import_changes.mbt diff --git a/moon/loro_codec/json_schema_import_changes.mbt b/moon/loro_codec/json_schema_import_changes.mbt new file mode 100644 index 000000000..17fec18aa --- /dev/null +++ b/moon/loro_codec/json_schema_import_changes.mbt @@ -0,0 +1,187 @@ +///| +fn jsonschema_import_parse_change( + v : Json, + keys : Array[String], + key_to_idx : @hashmap.HashMap[String, UInt64], + peers : Array[UInt64]?, +) -> Change raise DecodeError { + let obj = jsonschema_import_expect_object(v, "change") + let id_str = jsonschema_import_req_string(obj, "id", "change.id") + let id = jsonschema_import_parse_id(id_str, peers, "change.id") + let timestamp = jsonschema_import_req_i64( + obj, "timestamp", "change.timestamp", + ) + let lamport_u32 = jsonschema_import_req_u32(obj, "lamport", "change.lamport") + let msg : String? = match obj.get("msg") { + None => None + Some(Json::Null) => None + Some(vs) => Some(jsonschema_import_expect_string(vs, "change.msg")) + } + let deps_json = jsonschema_import_req_array(obj, "deps", "change.deps") + let deps : Array[ID] = [] + for i in 0.. Array[Change] raise DecodeError { + if changes.length() == 0 { + raise DecodeError("jsonschema_import: empty changes") + } + let peer0 = changes[0].id().peer() + for c in changes { + if c.id().peer() != peer0 { + raise DecodeError("jsonschema_import: mixed peers in one block") + } + } + // Sort by change start counter. + changes.sort_by_key(c => c.id().counter()) + let mut expected = changes[0].id().counter() + for c in changes { + if c.id().counter() != expected { + raise DecodeError("jsonschema_import: change counters not contiguous") + } + let ops = c.ops() + ops.sort_by_key(op => op.counter()) + let mut op_expected = expected + let mut atom_len : Int64 = 0 + for op in ops { + if op.counter() != op_expected { + raise DecodeError("jsonschema_import: op counters not contiguous") + } + let l = op.len().to_int64() + atom_len = atom_len + l + op_expected = op_expected + l.to_int() + } + expected = expected + atom_len.to_int() + } + changes +} + +///| +/// Encode a JsonSchema JSON string into a FastUpdates (mode=4) document blob. +/// +/// This is intended for tooling/tests (JsonSchema <-> binary). It only supports +/// the subset of JsonSchema that corresponds to current containers (Map/List/Text/Tree/MovableList) +/// and does not support UnknownOp yet. +pub fn encode_fast_updates_from_json_schema( + json : String, + validate : Bool, +) -> Bytes raise DecodeError { + let root = @json.parse(json[:]) catch { + _ => raise DecodeError("jsonschema_import: invalid json") + } + let obj = jsonschema_import_expect_object(root, "root") + // peers: optional; when present, ids use peer indices. + let peers : Array[UInt64]? = match obj.get("peers") { + None => None + Some(Json::Null) => None + Some(v) => { + let arr = jsonschema_import_expect_array(v, "peers") + let ps : Array[UInt64] = [] + for i in 0.. { + let arr : Array[Change] = [] + arr.push(c) + by_peer.set(p, arr) + } + Some(arr) => arr.push(c) + } + } + let peer_entries = by_peer.to_array() + let peer_ids : Array[UInt64] = [] + for pair in peer_entries { + let (p, _arr) = pair + peer_ids.push(p) + } + peer_ids.sort() + let blocks : Array[Bytes] = [] + for peer in peer_ids { + let arr = match by_peer.get(peer) { + None => [] + Some(x) => x + } + let sorted_changes = jsonschema_import_sort_and_validate_changes(arr) + let block : DecodedChangeBlock = { + // Ensure the block peer is always at peers[0], as required by ChangeBlock encoding, + // even when this peer references containers created by other peers. + peers: [peer], + keys, + cids: [], + positions: [], + changes: sorted_changes, + } + let b = encode_change_block(block) catch { + EncodeError(msg) => + raise DecodeError( + "jsonschema_import: encode change block failed: " + msg, + ) + } + if validate { + let _ = decode_change_block(b[:]) + + } + blocks.push(b) + } + let body = encode_fast_updates_body(blocks) + encode_document(4, body) +} diff --git a/moon/loro_codec/json_schema_import_hex.mbt b/moon/loro_codec/json_schema_import_hex.mbt new file mode 100644 index 000000000..0105fa6b0 --- /dev/null +++ b/moon/loro_codec/json_schema_import_hex.mbt @@ -0,0 +1,26 @@ +///| +fn jsonschema_import_hex_nibble(c : UInt16) -> Int raise DecodeError { + let x = c.to_int() + if x >= '0'.to_int() && x <= '9'.to_int() { + x - '0'.to_int() + } else if x >= 'a'.to_int() && x <= 'f'.to_int() { + 10 + (x - 'a'.to_int()) + } else if x >= 'A'.to_int() && x <= 'F'.to_int() { + 10 + (x - 'A'.to_int()) + } else { + raise DecodeError("jsonschema_import: invalid hex char") + } +} + +///| +fn jsonschema_import_hex_to_bytes(hex : String) -> Bytes raise DecodeError { + if hex.length() % 2 != 0 { + raise DecodeError("jsonschema_import: invalid hex length") + } + let out_len = hex.length() / 2 + Bytes::makei(out_len, i => { + let hi = jsonschema_import_hex_nibble(hex[2 * i]) + let lo = jsonschema_import_hex_nibble(hex[2 * i + 1]) + ((hi << 4) | lo).to_byte() + }) +} diff --git a/moon/loro_codec/json_schema_import_ids.mbt b/moon/loro_codec/json_schema_import_ids.mbt new file mode 100644 index 000000000..34b99d966 --- /dev/null +++ b/moon/loro_codec/json_schema_import_ids.mbt @@ -0,0 +1,147 @@ +///| +fn jsonschema_import_container_type_from_string( + s : String, +) -> ContainerType raise DecodeError { + match s { + "Map" => ContainerType::Map + "List" => ContainerType::List + "Text" => ContainerType::Text + "Tree" => ContainerType::Tree + "MovableList" => ContainerType::MovableList + "Counter" => ContainerType::Counter + _ => raise DecodeError("jsonschema_import: unknown container type: " + s) + } +} + +///| +fn jsonschema_import_parse_id_raw( + s : String, + what : String, +) -> (Int, UInt64) raise DecodeError { + let parts = s.split("@").collect() + if parts.length() != 2 { + raise DecodeError("jsonschema_import: invalid " + what + " id: " + s) + } + let counter = jsonschema_import_parse_int_decimal( + parts[0].to_string(), + what + ".counter", + ) + let peer = jsonschema_import_parse_uint64_decimal( + parts[1].to_string(), + what + ".peer", + ) + (counter, peer) +} + +///| +fn jsonschema_import_parse_id( + s : String, + peers : Array[UInt64]?, + what : String, +) -> ID raise DecodeError { + let (counter, peer_or_idx) = jsonschema_import_parse_id_raw(s, what) + let peer = match peers { + None => peer_or_idx + Some(ps) => { + if peer_or_idx > 0x7FFF_FFFFUL { + raise DecodeError("jsonschema_import: peer_idx too large for " + what) + } + let i = peer_or_idx.to_int() + if i < 0 || i >= ps.length() { + raise DecodeError( + "jsonschema_import: peer_idx out of range for " + what, + ) + } + ps[i] + } + } + ID::new(peer, counter) +} + +///| +fn jsonschema_import_parse_idlp( + s : String, + peers : Array[UInt64]?, + what : String, +) -> IdLp raise DecodeError { + if !s.has_prefix("L") { + raise DecodeError("jsonschema_import: invalid " + what + " idlp: " + s) + } + let rest = (s[1:] catch { + _ => raise DecodeError("jsonschema_import: invalid " + what + " idlp: " + s) + }).to_string() + let (lamport_i, peer_or_idx) = jsonschema_import_parse_id_raw(rest, what) + if lamport_i < 0 { + raise DecodeError("jsonschema_import: negative lamport for " + what) + } + let peer = match peers { + None => peer_or_idx + Some(ps) => { + if peer_or_idx > 0x7FFF_FFFFUL { + raise DecodeError("jsonschema_import: peer_idx too large for " + what) + } + let i = peer_or_idx.to_int() + if i < 0 || i >= ps.length() { + raise DecodeError( + "jsonschema_import: peer_idx out of range for " + what, + ) + } + ps[i] + } + } + IdLp::new(peer, lamport_i.reinterpret_as_uint()) +} + +///| +fn jsonschema_import_parse_container_id( + s : String, + peers : Array[UInt64]?, +) -> ContainerID raise DecodeError { + if !s.has_prefix("cid:") { + raise DecodeError("jsonschema_import: invalid container id: " + s) + } + if s.has_prefix("cid:root-") { + // cid:root-${name}:${ContainerType} + let rest = (s["cid:root-".length():] catch { + _ => + raise DecodeError("jsonschema_import: invalid root container id: " + s) + }).to_string() + let parts = rest.split(":").collect() + if parts.length() != 2 { + raise DecodeError("jsonschema_import: invalid root container id: " + s) + } + let name = parts[0].to_string() + let kind = jsonschema_import_container_type_from_string( + parts[1].to_string(), + ) + return ContainerID::Root(name, kind) + } + + // cid:${counter}@${peer}:${ContainerType} + let rest = (s["cid:".length():] catch { + _ => raise DecodeError("jsonschema_import: invalid container id: " + s) + }).to_string() + let parts = rest.split(":").collect() + if parts.length() != 2 { + raise DecodeError("jsonschema_import: invalid container id: " + s) + } + let id_part = parts[0].to_string() + let kind = jsonschema_import_container_type_from_string(parts[1].to_string()) + let (counter, peer_or_idx) = jsonschema_import_parse_id_raw( + id_part, "container_id", + ) + let peer = match peers { + None => peer_or_idx + Some(ps) => { + if peer_or_idx > 0x7FFF_FFFFUL { + raise DecodeError("jsonschema_import: container peer_idx too large") + } + let i = peer_or_idx.to_int() + if i < 0 || i >= ps.length() { + raise DecodeError("jsonschema_import: container peer_idx out of range") + } + ps[i] + } + } + ContainerID::Normal(peer, counter, kind) +} diff --git a/moon/loro_codec/json_schema_import_ops.mbt b/moon/loro_codec/json_schema_import_ops.mbt new file mode 100644 index 000000000..fce44d5bf --- /dev/null +++ b/moon/loro_codec/json_schema_import_ops.mbt @@ -0,0 +1,69 @@ +///| +fn jsonschema_import_op_len(content : OpContent) -> UInt { + match content { + OpContent::Map(_) => 1 + OpContent::Tree(_) => 1 + OpContent::Future(_) => 1 + OpContent::List(ListOp::Insert(_pos, values)) => + values.length().reinterpret_as_uint() + OpContent::List(ListOp::Delete(_pos, len, _start_id)) => + len.abs().reinterpret_as_uint64().to_uint() + OpContent::MovableList(MovableListOp::Insert(_pos, values)) => + values.length().reinterpret_as_uint() + OpContent::MovableList(MovableListOp::Delete(_pos, len, _start_id)) => + len.abs().reinterpret_as_uint64().to_uint() + OpContent::MovableList(MovableListOp::Move(_, _, _)) => 1 + OpContent::MovableList(MovableListOp::Set(_, _)) => 1 + OpContent::Text(TextOp::Insert(_pos, text)) => + count_utf8_codepoints(text).reinterpret_as_uint() + OpContent::Text(TextOp::Delete(_pos, len, _start_id)) => + len.abs().reinterpret_as_uint64().to_uint() + OpContent::Text(TextOp::Mark(_, _, _, _, _)) => 1 + OpContent::Text(TextOp::MarkEnd) => 1 + } +} + +///| +fn jsonschema_import_parse_op_content( + container : ContainerID, + v : Json, + op_id : ID, + keys : Array[String], + key_to_idx : @hashmap.HashMap[String, UInt64], + peers : Array[UInt64]?, +) -> OpContent raise DecodeError { + let obj = jsonschema_import_expect_object(v, "op.content") + let t = match obj.get("type") { + Some(vt) => jsonschema_import_expect_string(vt, "op.content.type") + None => raise DecodeError("jsonschema_import: missing op.content.type") + } + let kind = match container { + ContainerID::Root(_, k) => k + ContainerID::Normal(_, _, k) => k + } + match kind { + ContainerType::Text => + jsonschema_import_parse_text_op_content( + obj, t, op_id, keys, key_to_idx, peers, + ) + ContainerType::List => + jsonschema_import_parse_list_op_content( + obj, t, op_id, keys, key_to_idx, peers, + ) + ContainerType::MovableList => + jsonschema_import_parse_movable_list_op_content( + obj, t, op_id, keys, key_to_idx, peers, + ) + ContainerType::Map => + jsonschema_import_parse_map_op_content( + obj, t, op_id, keys, key_to_idx, peers, + ) + ContainerType::Tree => + jsonschema_import_parse_tree_op_content(obj, t, peers) + ContainerType::Counter => jsonschema_import_parse_counter_op_content(obj, t) + ContainerType::Unknown(_) => + raise DecodeError( + "jsonschema_import: Unknown container type is not supported", + ) + } +} diff --git a/moon/loro_codec/json_schema_import_ops_counter.mbt b/moon/loro_codec/json_schema_import_ops_counter.mbt new file mode 100644 index 000000000..b4a1be5f9 --- /dev/null +++ b/moon/loro_codec/json_schema_import_ops_counter.mbt @@ -0,0 +1,53 @@ +///| +fn jsonschema_import_parse_counter_op_content( + obj : Map[String, Json], + t : String, +) -> OpContent raise DecodeError { + // Counter ops are encoded via JsonOpContent::Future (same shape as Unknown), + // and ultimately stored as raw Value (I64/F64) in ChangeBlock values. + if t != "counter" && t != "unknown" { + raise DecodeError( + "jsonschema_import: invalid Counter op content.type: " + t, + ) + } + let prop = jsonschema_import_req_int(obj, "prop", "op.content.prop") + if prop != 0 { + raise DecodeError("jsonschema_import: Counter op prop must be 0") + } + let value_type = jsonschema_import_req_string( + obj, + "value_type", + "op.content.value_type", + ) + let value_json = jsonschema_import_req(obj, "value", "op.content.value") + let raw = match value_type { + "f64" => { + let (n, repr) = jsonschema_import_expect_number( + value_json, + "op.content.value", + ) + // Prefer parsing from the lexical representation when available, to avoid any + // precision loss in the JSON number parser. + let v = match repr { + None => n + Some(s) => + @strconv.parse_double(s[:]) catch { + @strconv.StrConvError(err) => + raise DecodeError( + "jsonschema_import: invalid op.content.value f64: " + err, + ) + } + } + Value::F64(v) + } + "i64" => + Value::I64( + jsonschema_import_number_to_i64(value_json, "op.content.value"), + ) + _ => + raise DecodeError( + "jsonschema_import: Counter op unsupported value_type: " + value_type, + ) + } + OpContent::Future(FutureOp::Unknown(prop, raw)) +} diff --git a/moon/loro_codec/json_schema_import_ops_helpers.mbt b/moon/loro_codec/json_schema_import_ops_helpers.mbt new file mode 100644 index 000000000..4e3a59352 --- /dev/null +++ b/moon/loro_codec/json_schema_import_ops_helpers.mbt @@ -0,0 +1,44 @@ +///| +fn jsonschema_import_parse_values_array( + values_json : Array[Json], + op_id : ID, + keys : Array[String], + key_to_idx : @hashmap.HashMap[String, UInt64], + peers : Array[UInt64]?, +) -> Array[LoroValue] raise DecodeError { + let values : Array[LoroValue] = [] + for i in 0.. ID? raise DecodeError { + match obj.get("parent") { + None => None + Some(vp) => + match vp { + Json::Null => None + _ => + Some( + jsonschema_import_parse_id( + jsonschema_import_expect_string(vp, "tree.parent"), + peers, + "tree_id", + ), + ) + } + } +} diff --git a/moon/loro_codec/json_schema_import_ops_list.mbt b/moon/loro_codec/json_schema_import_ops_list.mbt new file mode 100644 index 000000000..87ddf7699 --- /dev/null +++ b/moon/loro_codec/json_schema_import_ops_list.mbt @@ -0,0 +1,33 @@ +///| +fn jsonschema_import_parse_list_op_content( + obj : Map[String, Json], + t : String, + op_id : ID, + keys : Array[String], + key_to_idx : @hashmap.HashMap[String, UInt64], + peers : Array[UInt64]?, +) -> OpContent raise DecodeError { + match t { + "insert" => { + let pos = jsonschema_import_req_u32(obj, "pos", "list.insert.pos") + let values_json = jsonschema_import_req_array( + obj, "value", "list.insert.value", + ) + let values = jsonschema_import_parse_values_array( + values_json, op_id, keys, key_to_idx, peers, + ) + OpContent::List(ListOp::Insert(pos, values)) + } + "delete" => { + let pos = jsonschema_import_req_int(obj, "pos", "list.delete.pos") + let len_i64 = jsonschema_import_req_i64(obj, "len", "list.delete.len") + let start_id = jsonschema_import_parse_id( + jsonschema_import_req_string(obj, "start_id", "list.delete.start_id"), + peers, + "start_id", + ) + OpContent::List(ListOp::Delete(pos, len_i64, start_id)) + } + _ => raise DecodeError("jsonschema_import: unknown list op type: " + t) + } +} diff --git a/moon/loro_codec/json_schema_import_ops_map.mbt b/moon/loro_codec/json_schema_import_ops_map.mbt new file mode 100644 index 000000000..b7a9d094b --- /dev/null +++ b/moon/loro_codec/json_schema_import_ops_map.mbt @@ -0,0 +1,28 @@ +///| +fn jsonschema_import_parse_map_op_content( + obj : Map[String, Json], + t : String, + op_id : ID, + keys : Array[String], + key_to_idx : @hashmap.HashMap[String, UInt64], + peers : Array[UInt64]?, +) -> OpContent raise DecodeError { + match t { + "insert" => { + let key = jsonschema_import_req_string(obj, "key", "map.insert.key") + let value = jsonschema_import_loro_value( + jsonschema_import_req(obj, "value", "map.insert.value"), + op_id, + keys, + key_to_idx, + peers, + ) + OpContent::Map(MapOp::Insert(key, value)) + } + "delete" => { + let key = jsonschema_import_req_string(obj, "key", "map.delete.key") + OpContent::Map(MapOp::Delete(key)) + } + _ => raise DecodeError("jsonschema_import: unknown map op type: " + t) + } +} diff --git a/moon/loro_codec/json_schema_import_ops_movable_list.mbt b/moon/loro_codec/json_schema_import_ops_movable_list.mbt new file mode 100644 index 000000000..2cda8648b --- /dev/null +++ b/moon/loro_codec/json_schema_import_ops_movable_list.mbt @@ -0,0 +1,59 @@ +///| +fn jsonschema_import_parse_movable_list_op_content( + obj : Map[String, Json], + t : String, + op_id : ID, + keys : Array[String], + key_to_idx : @hashmap.HashMap[String, UInt64], + peers : Array[UInt64]?, +) -> OpContent raise DecodeError { + match t { + "insert" => { + let pos = jsonschema_import_req_u32(obj, "pos", "mlist.insert.pos") + let values_json = jsonschema_import_req_array( + obj, "value", "mlist.insert.value", + ) + let values = jsonschema_import_parse_values_array( + values_json, op_id, keys, key_to_idx, peers, + ) + OpContent::MovableList(MovableListOp::Insert(pos, values)) + } + "delete" => { + let pos = jsonschema_import_req_int(obj, "pos", "mlist.delete.pos") + let len_i64 = jsonschema_import_req_i64(obj, "len", "mlist.delete.len") + let start_id = jsonschema_import_parse_id( + jsonschema_import_req_string(obj, "start_id", "mlist.delete.start_id"), + peers, + "start_id", + ) + OpContent::MovableList(MovableListOp::Delete(pos, len_i64, start_id)) + } + "move" => { + let from = jsonschema_import_req_u32(obj, "from", "mlist.move.from") + let to = jsonschema_import_req_u32(obj, "to", "mlist.move.to") + let elem_id = jsonschema_import_parse_idlp( + jsonschema_import_req_string(obj, "elem_id", "mlist.move.elem_id"), + peers, + "elem_id", + ) + OpContent::MovableList(MovableListOp::Move(from, to, elem_id)) + } + "set" => { + let elem_id = jsonschema_import_parse_idlp( + jsonschema_import_req_string(obj, "elem_id", "mlist.set.elem_id"), + peers, + "elem_id", + ) + let value = jsonschema_import_loro_value( + jsonschema_import_req(obj, "value", "mlist.set.value"), + op_id, + keys, + key_to_idx, + peers, + ) + OpContent::MovableList(MovableListOp::Set(elem_id, value)) + } + _ => + raise DecodeError("jsonschema_import: unknown movable_list op type: " + t) + } +} diff --git a/moon/loro_codec/json_schema_import_ops_text.mbt b/moon/loro_codec/json_schema_import_ops_text.mbt new file mode 100644 index 000000000..7a89d97cc --- /dev/null +++ b/moon/loro_codec/json_schema_import_ops_text.mbt @@ -0,0 +1,50 @@ +///| +fn jsonschema_import_parse_text_op_content( + obj : Map[String, Json], + t : String, + op_id : ID, + keys : Array[String], + key_to_idx : @hashmap.HashMap[String, UInt64], + peers : Array[UInt64]?, +) -> OpContent raise DecodeError { + match t { + "insert" => { + let pos = jsonschema_import_req_u32(obj, "pos", "text.insert.pos") + let text = jsonschema_import_req_string(obj, "text", "text.insert.text") + OpContent::Text(TextOp::Insert(pos, text)) + } + "delete" => { + let pos = jsonschema_import_req_int(obj, "pos", "text.delete.pos") + let len_i64 = jsonschema_import_req_i64(obj, "len", "text.delete.len") + let start_id = jsonschema_import_parse_id( + jsonschema_import_req_string(obj, "start_id", "text.delete.start_id"), + peers, + "start_id", + ) + OpContent::Text(TextOp::Delete(pos, len_i64, start_id)) + } + "mark" => { + let start = jsonschema_import_req_u32(obj, "start", "text.mark.start") + let end = jsonschema_import_req_u32(obj, "end", "text.mark.end") + let style_key = jsonschema_import_req_string( + obj, "style_key", "text.mark.style_key", + ) + let style_val = jsonschema_import_loro_value( + jsonschema_import_req(obj, "style_value", "text.mark.style_value"), + op_id, + keys, + key_to_idx, + peers, + ) + let info_i = jsonschema_import_req_int(obj, "info", "text.mark.info") + if info_i < 0 || info_i > 255 { + raise DecodeError("jsonschema_import: invalid text.mark.info") + } + OpContent::Text( + TextOp::Mark(start, end, style_key, style_val, info_i.to_byte()), + ) + } + "mark_end" => OpContent::Text(TextOp::MarkEnd) + _ => raise DecodeError("jsonschema_import: unknown text op type: " + t) + } +} diff --git a/moon/loro_codec/json_schema_import_ops_tree.mbt b/moon/loro_codec/json_schema_import_ops_tree.mbt new file mode 100644 index 000000000..4631fa48a --- /dev/null +++ b/moon/loro_codec/json_schema_import_ops_tree.mbt @@ -0,0 +1,44 @@ +///| +fn jsonschema_import_parse_tree_op_content( + obj : Map[String, Json], + t : String, + peers : Array[UInt64]?, +) -> OpContent raise DecodeError { + match t { + "create" => { + let target = jsonschema_import_parse_id( + jsonschema_import_req_string(obj, "target", "tree.create.target"), + peers, + "tree_id", + ) + let parent = jsonschema_import_parse_tree_parent(obj, peers) + let fi_hex = jsonschema_import_req_string( + obj, "fractional_index", "tree.fractional_index", + ) + let fi = FractionalIndex::new(jsonschema_import_hex_to_bytes(fi_hex)) + OpContent::Tree(TreeOp::Create(target, parent, fi)) + } + "move" => { + let target = jsonschema_import_parse_id( + jsonschema_import_req_string(obj, "target", "tree.move.target"), + peers, + "tree_id", + ) + let parent = jsonschema_import_parse_tree_parent(obj, peers) + let fi_hex = jsonschema_import_req_string( + obj, "fractional_index", "tree.fractional_index", + ) + let fi = FractionalIndex::new(jsonschema_import_hex_to_bytes(fi_hex)) + OpContent::Tree(TreeOp::Move(target, parent, fi)) + } + "delete" => { + let target = jsonschema_import_parse_id( + jsonschema_import_req_string(obj, "target", "tree.delete.target"), + peers, + "tree_id", + ) + OpContent::Tree(TreeOp::Delete(target)) + } + _ => raise DecodeError("jsonschema_import: unknown tree op type: " + t) + } +} diff --git a/moon/loro_codec/json_schema_import_parse.mbt b/moon/loro_codec/json_schema_import_parse.mbt new file mode 100644 index 000000000..6176b7d21 --- /dev/null +++ b/moon/loro_codec/json_schema_import_parse.mbt @@ -0,0 +1,189 @@ +///| +fn jsonschema_import_parse_uint64_decimal( + s : String, + what : String, +) -> UInt64 raise DecodeError { + @strconv.parse_uint64(s[:]) catch { + @strconv.StrConvError(err) => + raise DecodeError("jsonschema_import: invalid " + what + ": " + err) + } +} + +///| +fn jsonschema_import_parse_int64_decimal( + s : String, + what : String, +) -> Int64 raise DecodeError { + @strconv.parse_int64(s[:]) catch { + @strconv.StrConvError(err) => + raise DecodeError("jsonschema_import: invalid " + what + ": " + err) + } +} + +///| +fn jsonschema_import_parse_int_decimal( + s : String, + what : String, +) -> Int raise DecodeError { + @strconv.parse_int(s[:]) catch { + @strconv.StrConvError(err) => + raise DecodeError("jsonschema_import: invalid " + what + ": " + err) + } +} + +///| +fn jsonschema_import_expect_object( + v : Json, + what : String, +) -> Map[String, Json] raise DecodeError { + match v { + Json::Object(obj) => obj + _ => raise DecodeError("jsonschema_import: expected object for " + what) + } +} + +///| +fn jsonschema_import_expect_array( + v : Json, + what : String, +) -> Array[Json] raise DecodeError { + match v { + Json::Array(arr) => arr + _ => raise DecodeError("jsonschema_import: expected array for " + what) + } +} + +///| +fn jsonschema_import_expect_string( + v : Json, + what : String, +) -> String raise DecodeError { + match v { + Json::String(s) => s + _ => raise DecodeError("jsonschema_import: expected string for " + what) + } +} + +///| +fn jsonschema_import_expect_number( + v : Json, + what : String, +) -> (Double, String?) raise DecodeError { + match v { + Json::Number(n, repr~) => (n, repr) + _ => raise DecodeError("jsonschema_import: expected number for " + what) + } +} + +///| +fn jsonschema_import_req( + obj : Map[String, Json], + key : String, + what : String, +) -> Json raise DecodeError { + match obj.get(key) { + Some(v) => v + None => raise DecodeError("jsonschema_import: missing " + what) + } +} + +///| +fn jsonschema_import_req_string( + obj : Map[String, Json], + key : String, + what : String, +) -> String raise DecodeError { + jsonschema_import_expect_string(jsonschema_import_req(obj, key, what), what) +} + +///| +fn jsonschema_import_req_array( + obj : Map[String, Json], + key : String, + what : String, +) -> Array[Json] raise DecodeError { + jsonschema_import_expect_array(jsonschema_import_req(obj, key, what), what) +} + +///| +fn jsonschema_import_req_int( + obj : Map[String, Json], + key : String, + what : String, +) -> Int raise DecodeError { + jsonschema_import_number_to_int(jsonschema_import_req(obj, key, what), what) +} + +///| +fn jsonschema_import_req_i64( + obj : Map[String, Json], + key : String, + what : String, +) -> Int64 raise DecodeError { + jsonschema_import_number_to_i64(jsonschema_import_req(obj, key, what), what) +} + +///| +fn jsonschema_import_req_u32( + obj : Map[String, Json], + key : String, + what : String, +) -> UInt raise DecodeError { + jsonschema_import_number_to_u32(jsonschema_import_req(obj, key, what), what) +} + +///| +fn jsonschema_import_number_to_int( + v : Json, + what : String, +) -> Int raise DecodeError { + let (n, repr) = jsonschema_import_expect_number(v, what) + // Prefer repr for integer decoding to avoid float rounding. + match repr { + Some(s) => jsonschema_import_parse_int_decimal(s, what) + None => { + // Best-effort: require it to be an integer. + let i = n.to_int() + if i.to_double() != n { + raise DecodeError( + "jsonschema_import: expected integer number for " + what, + ) + } + i + } + } +} + +///| +fn jsonschema_import_number_to_i64( + v : Json, + what : String, +) -> Int64 raise DecodeError { + let (n, repr) = jsonschema_import_expect_number(v, what) + match repr { + Some(s) => jsonschema_import_parse_int64_decimal(s, what) + None => { + let i = n.to_int64() + if i.to_double() != n { + raise DecodeError( + "jsonschema_import: expected integer number for " + what, + ) + } + i + } + } +} + +///| +fn jsonschema_import_number_to_u32( + v : Json, + what : String, +) -> UInt raise DecodeError { + let i = jsonschema_import_number_to_int(v, what) + if i < 0 { + raise DecodeError( + "jsonschema_import: expected non-negative integer for " + what, + ) + } + i.reinterpret_as_uint() +} diff --git a/moon/loro_codec/json_schema_import_values.mbt b/moon/loro_codec/json_schema_import_values.mbt new file mode 100644 index 000000000..4a1eac157 --- /dev/null +++ b/moon/loro_codec/json_schema_import_values.mbt @@ -0,0 +1,93 @@ +///| +fn jsonschema_import_loro_value( + v : Json, + op_id : ID, + keys : Array[String], + key_to_idx : @hashmap.HashMap[String, UInt64], + peers : Array[UInt64]?, +) -> LoroValue raise DecodeError { + match v { + Json::Null => LoroValue::Null + Json::True => LoroValue::True + Json::False => LoroValue::False + Json::Number(_n, repr~) => + // Decide int vs float by lexical repr when available. + match repr { + Some(s) => + if s.contains(".") || s.contains("e") || s.contains("E") { + LoroValue::F64(_n) + } else { + LoroValue::I64( + jsonschema_import_parse_int64_decimal(s, "loro_value.i64"), + ) + } + None => { + let i = _n.to_int64() + // If the number is an integer (no fractional part), treat it as i64. + // This matches Rust JsonSchema output for i64 values when the JSON parser + // doesn't retain the original lexical representation. + if i.to_double() == _n { + LoroValue::I64(i) + } else { + LoroValue::F64(_n) + } + } + } + Json::String(s) => + if s.has_prefix(LORO_CONTAINER_ID_PREFIX) { + let cid_str = (s[LORO_CONTAINER_ID_PREFIX.length():] catch { + _ => + raise DecodeError( + "jsonschema_import: invalid container value string", + ) + }).to_string() + let cid = jsonschema_import_parse_container_id(cid_str, peers) + match cid { + ContainerID::Root(_, _kind) => + // Root container ids are not representable in binary value encoding; reject for now. + raise DecodeError( + "jsonschema_import: root container value is not supported", + ) + ContainerID::Normal(peer, counter, kind) => { + // Binary value encoding reconstructs container id from op_id + type. + if peer != op_id.peer() || counter != op_id.counter() { + raise DecodeError( + "jsonschema_import: container value id mismatch", + ) + } + LoroValue::ContainerType(container_type_to_u8(kind)) + } + } + } else { + LoroValue::Str(s) + } + Json::Array(arr) => { + let out : Array[LoroValue] = [] + for i in 0.. { + let out : Array[(UInt64, LoroValue)] = [] + for k, vv in obj { + let idx = register_key(keys, key_to_idx, k) + out.push( + ( + idx, + jsonschema_import_loro_value(vv, op_id, keys, key_to_idx, peers), + ), + ) + } + LoroValue::Map(out) + } + } +} diff --git a/moon/loro_codec/leb128.mbt b/moon/loro_codec/leb128.mbt new file mode 100644 index 000000000..f2190cc1d --- /dev/null +++ b/moon/loro_codec/leb128.mbt @@ -0,0 +1,83 @@ +///| +pub fn BytesReader::read_uleb128_u64( + self : BytesReader, +) -> UInt64 raise DecodeError { + let mut result : UInt64 = 0 + let mut shift = 0 + while true { + if shift >= 64 { + raise DecodeError("uleb128 overflow") + } + let byte = self.read_u8().to_uint64() + result = result | ((byte & 0x7F) << shift) + if (byte & 0x80) == 0 { + return result + } + shift = shift + 7 + } + 0 +} + +///| +pub fn BytesWriter::write_uleb128_u64( + self : BytesWriter, + value : UInt64, +) -> Unit { + let mut value = value + while true { + let mut byte = value & 0x7F + value = value >> 7 + if value != 0 { + byte = byte | 0x80 + } + self.write_u8(byte.to_byte()) + if value == 0 { + break + } + } +} + +///| +pub fn BytesReader::read_sleb128_i64( + self : BytesReader, +) -> Int64 raise DecodeError { + let mut result : Int64 = 0 + let mut shift = 0 + let mut byte : UInt64 = 0 + while true { + if shift >= 64 { + raise DecodeError("sleb128 overflow") + } + byte = self.read_u8().to_uint64() + let slice = (byte & 0x7F).reinterpret_as_int64() + result = result | (slice << shift) + shift = shift + 7 + if (byte & 0x80) == 0 { + break + } + } + if shift < 64 && (byte & 0x40) != 0 { + result = result | (-1 << shift) + } + result +} + +///| +pub fn BytesWriter::write_sleb128_i64( + self : BytesWriter, + value : Int64, +) -> Unit { + let mut value = value + let mut more = true + while more { + let mut byte = (value & 0x7F).to_byte() + value = value >> 7 + let sign_bit = (byte & 0x40) != 0 + if (value == 0 && !sign_bit) || (value == -1 && sign_bit) { + more = false + } else { + byte = byte | 0x80 + } + self.write_u8(byte) + } +} diff --git a/moon/loro_codec/leb128_test.mbt b/moon/loro_codec/leb128_test.mbt new file mode 100644 index 000000000..f215841e3 --- /dev/null +++ b/moon/loro_codec/leb128_test.mbt @@ -0,0 +1,40 @@ +///| +test "uleb128 examples" { + fn roundtrip(v : UInt64, expected : Bytes) raise { + let w = BytesWriter::new() + w.write_uleb128_u64(v) + assert_eq(w.to_bytes(), expected) + let r = BytesReader::new(expected) + assert_eq(try! r.read_uleb128_u64(), v) + assert_eq(r.remaining(), 0) + } + + try! roundtrip(0, b"\x00") + try! roundtrip(1, b"\x01") + try! roundtrip(127, b"\x7F") + try! roundtrip(128, b"\x80\x01") + try! roundtrip(300, b"\xAC\x02") + try! roundtrip(16384, b"\x80\x80\x01") +} + +///| +test "sleb128 examples" { + fn roundtrip(v : Int64, expected : Bytes) raise { + let w = BytesWriter::new() + w.write_sleb128_i64(v) + assert_eq(w.to_bytes(), expected) + let r = BytesReader::new(expected) + assert_eq(try! r.read_sleb128_i64(), v) + assert_eq(r.remaining(), 0) + } + + try! roundtrip(0, b"\x00") + try! roundtrip(1, b"\x01") + try! roundtrip(-1, b"\x7F") + try! roundtrip(63, b"\x3F") + try! roundtrip(-64, b"\x40") + try! roundtrip(64, b"\xC0\x00") + try! roundtrip(-65, b"\xBF\x7F") + try! roundtrip(127, b"\xFF\x00") + try! roundtrip(-128, b"\x80\x7F") +} diff --git a/moon/loro_codec/lz4_frame.mbt b/moon/loro_codec/lz4_frame.mbt new file mode 100644 index 000000000..eb7d96331 --- /dev/null +++ b/moon/loro_codec/lz4_frame.mbt @@ -0,0 +1,351 @@ +///| +const LZ4_MAGIC : UInt = 0x184D2204 + +///| +fn lz4_header_checksum(descriptor : BytesView) -> Byte raise DecodeError { + // LZ4 Frame spec: header checksum is xxHash32(descriptor, seed=0) >> 8 & 0xFF + let h = xxhash32(descriptor, 0) + ((h >> 8) & 0xFF).to_byte() +} + +///| +fn lz4_block_checksum(block_data : BytesView) -> UInt raise DecodeError { + xxhash32(block_data, 0) +} + +///| +fn read_ext_len(r : BytesReader, base : Int) -> Int raise DecodeError { + let mut len = base + if base != 15 { + return len + } + while true { + let b = r.read_u8().to_int() + len = len + b + if b != 255 { + break + } + } + len +} + +///| +fn lz4_decompress_block_into( + out : @buffer.Buffer, + input : BytesView, +) -> Unit raise DecodeError { + let r = BytesReader::from_view(input) + while r.remaining() > 0 { + let token = r.read_u8().to_uint() + let literal_base = ((token >> 4) & 0x0F).reinterpret_as_int() + let literal_len = read_ext_len(r, literal_base) + if literal_len < 0 { + raise DecodeError("lz4: invalid literal length") + } + out.write_bytesview(r.read_exact(literal_len)) + if r.remaining() == 0 { + break + } + let offset = r.read_u16_le().reinterpret_as_int() + if offset <= 0 { + raise DecodeError("lz4: invalid match offset") + } + if offset > out.length() { + raise DecodeError("lz4: match offset out of bounds") + } + let match_base = (token & 0x0F).reinterpret_as_int() + let mut match_len = match_base + 4 + if match_base == 15 { + match_len = match_len + read_ext_len(r, 15) - 15 + } + if match_len < 0 { + raise DecodeError("lz4: invalid match length") + } + let start = out.length() - offset + for i in 0.. Bytes raise DecodeError { + let out = @buffer.new() + lz4_decompress_block_into(out, input) + out.to_bytes() +} + +///| +fn block_max_size_from_bd(bd : Byte) -> Int raise DecodeError { + if (bd.to_uint() & 0x8F) != 0 { + raise DecodeError("lz4: invalid BD") + } + let id = ((bd.to_uint() >> 4) & 0x07).reinterpret_as_int() + match id { + 4 => 64 * 1024 + 5 => 256 * 1024 + 6 => 1 * 1024 * 1024 + 7 => 4 * 1024 * 1024 + _ => raise DecodeError("lz4: unsupported block max size") + } +} + +///| +pub fn lz4_decompress_frame( + bytes : BytesView, + check_checksum : Bool, +) -> Bytes raise DecodeError { + let r = BytesReader::from_view(bytes) + let magic = r.read_u32_le() + if magic != LZ4_MAGIC { + raise DecodeError("lz4: invalid magic") + } + let flg = r.read_u8() + let version = ((flg.to_uint() >> 6) & 0x03).reinterpret_as_int() + if version != 1 { + raise DecodeError("lz4: unsupported version") + } + if (flg.to_uint() & 0x02) != 0 { + raise DecodeError("lz4: invalid FLG (reserved bit set)") + } + let block_independence = (flg.to_uint() & 0x20) != 0 + let block_checksum_flag = (flg.to_uint() & 0x10) != 0 + let content_size_flag = (flg.to_uint() & 0x08) != 0 + let content_checksum_flag = (flg.to_uint() & 0x04) != 0 + let dict_id_flag = (flg.to_uint() & 0x01) != 0 + let bd = r.read_u8() + let block_max_size = block_max_size_from_bd(bd) + let desc = BytesWriter::new() + desc.write_u8(flg) + desc.write_u8(bd) + let mut expected_content_size : UInt64? = None + if content_size_flag { + let size = r.read_u64_le() + expected_content_size = Some(size) + desc.write_u64_le(size) + } + if dict_id_flag { + let dict_id = r.read_u32_le() + desc.write_u32_le(dict_id) + } + let header_checksum = r.read_u8() + if check_checksum { + let expected = lz4_header_checksum(desc.to_bytes()[:]) + if expected != header_checksum { + raise DecodeError("lz4: header checksum mismatch") + } + } + let out = @buffer.new() + while true { + let block_size_raw = r.read_u32_le() + if block_size_raw == 0 { + break + } + let is_uncompressed = (block_size_raw & 0x80000000) != 0 + let size = (block_size_raw & 0x7FFFFFFF).reinterpret_as_int() + if size < 0 || size > block_max_size { + raise DecodeError("lz4: invalid block size") + } + let block_data = r.read_exact(size) + if block_checksum_flag { + let stored = r.read_u32_le() + if check_checksum { + let expected = lz4_block_checksum(block_data) + if expected != stored { + raise DecodeError("lz4: block checksum mismatch") + } + } + } + if is_uncompressed { + out.write_bytesview(block_data) + } else if block_independence { + out.write_bytes(lz4_decompress_block(block_data)) + } else { + lz4_decompress_block_into(out, block_data) + } + } + if content_checksum_flag { + let stored = r.read_u32_le() + if check_checksum { + let expected = xxhash32(out.to_bytes()[:], 0) + if expected != stored { + raise DecodeError("lz4: content checksum mismatch") + } + } + } + if check_checksum { + match expected_content_size { + Some(sz) => + if out.length().to_uint64() != sz { + raise DecodeError("lz4: content size mismatch") + } + None => () + } + } + if r.remaining() != 0 { + raise DecodeError("lz4: trailing bytes") + } + out.to_bytes() +} + +///| +fn write_ext_len(w : BytesWriter, extra : Int) -> Unit raise EncodeError { + if extra < 0 { + raise EncodeError("lz4: negative ext len") + } + let mut left = extra + while left >= 255 { + w.write_u8(255) + left = left - 255 + } + w.write_u8(left.to_byte()) +} + +///| +fn read_u32_le_at(input : BytesView, i : Int) -> UInt { + let b0 = input[i].to_uint() + let b1 = input[i + 1].to_uint() + let b2 = input[i + 2].to_uint() + let b3 = input[i + 3].to_uint() + b0 | (b1 << 8) | (b2 << 16) | (b3 << 24) +} + +///| +fn bytes_eq4(input : BytesView, a : Int, b : Int) -> Bool { + input[a] == input[b] && + input[a + 1] == input[b + 1] && + input[a + 2] == input[b + 2] && + input[a + 3] == input[b + 3] +} + +///| +fn hash_u32(x : UInt) -> Int { + // LZ4 uses a multiplicative hash for 4-byte sequences. + let mul = x.to_uint64() * 2654435761UL + ((mul >> 16) & 0xFFFFUL).to_int() +} + +///| +fn lz4_compress_block(input : BytesView) -> Bytes raise EncodeError { + let n = input.length() + if n == 0 { + return b"" + } + + // Hash table for 4-byte sequences. Use -1 as empty. + let table : Array[Int] = [] + for _i in 0..<65536 { + table.push(-1) + } + + let w = BytesWriter::new() + let mut anchor = 0 + let mut i = 0 + while i + 4 <= n { + let seq = read_u32_le_at(input, i) + let h = hash_u32(seq) + let ref_pos = table[h] + table[h] = i + + if ref_pos >= 0 && i - ref_pos <= 65535 && + bytes_eq4(input, ref_pos, i) { + // Extend match. + let mut mlen = 4 + while i + mlen < n && input[ref_pos + mlen] == input[i + mlen] { + mlen = mlen + 1 + } + + let lit_len = i - anchor + let match_len_minus4 = mlen - 4 + let lit_n = if lit_len < 15 { lit_len } else { 15 } + let match_n = if match_len_minus4 < 15 { + match_len_minus4 + } else { + 15 + } + w.write_u8(((lit_n << 4) | match_n).to_byte()) + if lit_len >= 15 { + write_ext_len(w, lit_len - 15) + } + w.write_bytesview(input[anchor:i]) + + let offset = i - ref_pos + if offset <= 0 || offset > 65535 { + raise EncodeError("lz4: invalid match offset") + } + w.write_u16_le(offset.reinterpret_as_uint()) + + if match_len_minus4 >= 15 { + write_ext_len(w, match_len_minus4 - 15) + } + + i = i + mlen + anchor = i + + // Prime the table at i-2 for better compression, like the reference impl. + if i - 2 >= 0 && i - 2 + 4 <= n { + let seq2 = read_u32_le_at(input, i - 2) + table[hash_u32(seq2)] = i - 2 + } + continue + } + + i = i + 1 + } + + // Last literals. + let lit_len = n - anchor + let lit_n = if lit_len < 15 { lit_len } else { 15 } + w.write_u8((lit_n << 4).to_byte()) + if lit_len >= 15 { + write_ext_len(w, lit_len - 15) + } + w.write_bytesview(input[anchor:n]) + w.to_bytes() +} + +///| +pub fn lz4_compress_frame(bytes : Bytes) -> Bytes raise EncodeError { + // Emit a minimal LZ4 frame (block independent, no checksums/content size), + // compatible with Rust's `lz4_flex::frame::FrameDecoder`. + let flg = b'\x60' // version=1, block_independence=1 + let bd = b'\x40' // block_max_size=64KB + + let desc = BytesWriter::new() + desc.write_u8(flg) + desc.write_u8(bd) + let header_checksum = lz4_header_checksum(desc.to_bytes()[:]) catch { + DecodeError(e) => raise EncodeError("lz4: header checksum failed: " + e) + } + + let w = BytesWriter::new() + w.write_u32_le(LZ4_MAGIC) + w.write_u8(flg) + w.write_u8(bd) + w.write_u8(header_checksum) + + let block_max = 64 * 1024 + let mut pos = 0 + while pos < bytes.length() { + let remaining = bytes.length() - pos + let len = if remaining > block_max { block_max } else { remaining } + let chunk = bytes[pos:pos + len] + let compressed = lz4_compress_block(chunk) + if compressed.length() >= len { + // Use an uncompressed block inside the frame. + let raw = 0x8000_0000U | len.reinterpret_as_uint() + w.write_u32_le(raw) + w.write_bytesview(chunk) + } else { + w.write_u32_le(compressed.length().reinterpret_as_uint()) + w.write_bytes(compressed) + } + pos = pos + len + } + + // End mark. + w.write_u32_le(0) + w.to_bytes() +} diff --git a/moon/loro_codec/lz4_frame_test.mbt b/moon/loro_codec/lz4_frame_test.mbt new file mode 100644 index 000000000..024d418e3 --- /dev/null +++ b/moon/loro_codec/lz4_frame_test.mbt @@ -0,0 +1,81 @@ +///| +test "lz4 frame: uncompressed block (Hello, World!)" { + let input = b"Hello, World!" + let frame = b"\x04\x22\x4D\x18\x60\x40\x82\x0D\x00\x00\x80Hello, World!\x00\x00\x00\x00" + let out = lz4_decompress_frame(frame[:], true) catch { DecodeError(_) => b"" } + assert_eq(out, input) +} + +///| +test "lz4 frame: header checksum mismatch" { + let frame = b"\x04\x22\x4D\x18\x60\x40\x83\x00\x00\x00\x00" + let out = lz4_decompress_frame(frame[:], true) catch { + DecodeError(_) => b"ERR" + } + assert_eq(out, b"ERR") +} + +///| +test "lz4 frame: compressed block (simple match copy)" { + // Decompresses to "abcabcabc". + let payload = b"\x32abc\x03\x00" + let w = BytesWriter::new() + w.write_u32_le(0x184D2204) + w.write_u8(0x60) + w.write_u8(0x40) + w.write_u8(0x82) + w.write_u32_le(payload.length().reinterpret_as_uint()) + w.write_bytes(payload) + w.write_u32_le(0) + let frame = w.to_bytes() + let out = lz4_decompress_frame(frame[:], true) catch { DecodeError(_) => b"" } + assert_eq(out, b"abcabcabc") +} + +///| +test "lz4 frame: compressed block overlap copy" { + // Decompresses to 10x 'a'. + let payload = b"\x15a\x01\x00" + let w = BytesWriter::new() + w.write_u32_le(0x184D2204) + w.write_u8(0x60) + w.write_u8(0x40) + w.write_u8(0x82) + w.write_u32_le(payload.length().reinterpret_as_uint()) + w.write_bytes(payload) + w.write_u32_le(0) + let frame = w.to_bytes() + let out = lz4_decompress_frame(frame[:], true) catch { DecodeError(_) => b"" } + assert_eq(out, b"aaaaaaaaaa") +} + +///| +fn repeat_byte(byte : Byte, n : Int) -> Bytes { + let w = BytesWriter::new() + for _i in 0.. b"" } + let out = lz4_decompress_frame(encoded[:], true) catch { DecodeError(_) => b"" } + assert_eq(out, input) + // Should compress enough to beat frame overhead. + assert_true(encoded.length() < input.length()) +} + +///| +test "lz4 frame: encode uses uncompressed block when not beneficial" { + let input = b"Hello, World!" + let encoded = lz4_compress_frame(input) catch { EncodeError(_) => b"" } + // Frame header is 7 bytes; next u32_le is the first block size. + let block_size_raw = BytesReader::from_view(encoded[7:11]) + .read_u32_le() + assert_true((block_size_raw & 0x8000_0000) != 0) + let out = lz4_decompress_frame(encoded[:], true) catch { DecodeError(_) => b"" } + assert_eq(out, input) +} diff --git a/moon/loro_codec/moon.pkg.json b/moon/loro_codec/moon.pkg.json new file mode 100644 index 000000000..311847daa --- /dev/null +++ b/moon/loro_codec/moon.pkg.json @@ -0,0 +1,2 @@ +{} + diff --git a/moon/loro_codec/op.mbt b/moon/loro_codec/op.mbt new file mode 100644 index 000000000..e5d6291c2 --- /dev/null +++ b/moon/loro_codec/op.mbt @@ -0,0 +1,123 @@ +///| +pub struct IdLp { + peer : UInt64 + lamport : UInt +} derive(Eq, Show) + +///| +pub fn IdLp::new(peer : UInt64, lamport : UInt) -> IdLp { + { peer, lamport } +} + +///| +pub fn IdLp::peer(self : IdLp) -> UInt64 { + self.peer +} + +///| +pub fn IdLp::lamport(self : IdLp) -> UInt { + self.lamport +} + +///| +pub struct FractionalIndex { + bytes : Bytes +} derive(Eq, Show) + +///| +pub fn FractionalIndex::new(bytes : Bytes) -> FractionalIndex { + { bytes, } +} + +///| +pub fn FractionalIndex::bytes(self : FractionalIndex) -> Bytes { + self.bytes +} + +///| +pub enum ListOp { + Insert(UInt, Array[LoroValue]) + Delete(Int, Int64, ID) // (pos, len, start_id) +} derive(Eq, Show) + +///| +pub enum MovableListOp { + Insert(UInt, Array[LoroValue]) + Delete(Int, Int64, ID) // (pos, len, start_id) + Move(UInt, UInt, IdLp) // (from, to, elem_id) + Set(IdLp, LoroValue) +} derive(Eq, Show) + +///| +pub enum MapOp { + Insert(String, LoroValue) + Delete(String) +} derive(Eq, Show) + +///| +pub enum TextOp { + Insert(UInt, String) + Delete(Int, Int64, ID) // (pos, len, start_id) + Mark(UInt, UInt, String, LoroValue, Byte) // (start,end,key,value,info) + MarkEnd +} derive(Eq, Show) + +///| +pub enum TreeOp { + Create(ID, ID?, FractionalIndex) + Move(ID, ID?, FractionalIndex) + Delete(ID) +} derive(Eq, Show) + +///| +pub enum FutureOp { + Unknown(Int, Value) // (prop, raw encoded value) +} derive(Eq, Show) + +///| +pub enum OpContent { + List(ListOp) + MovableList(MovableListOp) + Map(MapOp) + Text(TextOp) + Tree(TreeOp) + Future(FutureOp) +} derive(Eq, Show) + +///| +pub struct Op { + container : ContainerID + counter : Int + len : UInt + content : OpContent +} derive(Eq, Show) + +///| +pub fn Op::new( + container : ContainerID, + counter : Int, + len : UInt, + content : OpContent, +) -> Op { + { container, counter, len, content } +} + +///| +pub fn Op::container(self : Op) -> ContainerID { + self.container +} + +///| +pub fn Op::counter(self : Op) -> Int { + self.counter +} + +///| +pub fn Op::len(self : Op) -> UInt { + self.len +} + +///| +pub fn Op::content(self : Op) -> OpContent { + self.content +} diff --git a/moon/loro_codec/position_arena.mbt b/moon/loro_codec/position_arena.mbt new file mode 100644 index 000000000..a99e1754e --- /dev/null +++ b/moon/loro_codec/position_arena.mbt @@ -0,0 +1,117 @@ +///| +fn longest_common_prefix_length(a : Bytes, b : Bytes) -> UInt64 { + let n = if a.length() < b.length() { a.length() } else { b.length() } + let mut i = 0 + while i < n { + if a[i] != b[i] { + break + } + i = i + 1 + } + i.to_uint64() +} + +///| +pub fn decode_position_arena_v2( + bytes : BytesView, +) -> Array[Bytes] raise DecodeError { + if bytes.length() == 0 { + return [] + } + let cols = decode_columnar_vec_maybe_wrapped(bytes) + if cols.length() != 2 { + raise DecodeError("position_arena: invalid column count") + } + let common_prefix_u64 = decode_any_rle_usize(cols[0]) + let n = common_prefix_u64.length() + let rest_col = cols[1] + let r = BytesReader::from_view(rest_col) + let rest_n_u64 = r.read_varint_u64() + if rest_n_u64 > 0x7FFF_FFFFUL { + raise DecodeError("position_arena: too many rests") + } + let rest_n = rest_n_u64.to_int() + if rest_n != n { + raise DecodeError("position_arena: rest count mismatch") + } + let rests : Array[Bytes] = [] + for _i in 0.. 0x7FFF_FFFFUL { + raise DecodeError("position_arena: rest too large") + } + let len = len_u64.to_int() + if len < 0 || len > r.remaining() { + raise DecodeError("position_arena: invalid rest length") + } + rests.push(r.read_exact(len).to_bytes()) + } + if r.remaining() != 0 { + raise DecodeError("position_arena: trailing bytes") + } + if rests.length() != n { + raise DecodeError("position_arena: invalid rest count") + } + let out : Array[Bytes] = [] + let mut last : Bytes = b"" + for i in 0.. 0x7FFF_FFFFUL { + raise DecodeError("position_arena: common_prefix too large") + } + let common = common_u64.to_int() + if common < 0 || common > last.length() { + raise DecodeError("position_arena: invalid common_prefix") + } + let w = BytesWriter::new() + w.write_bytesview(last[0:common]) + w.write_bytes(rests[i]) + last = w.to_bytes() + out.push(last) + } + out +} + +///| +pub fn encode_position_arena_v2(positions : Array[Bytes]) -> Bytes { + if positions.length() == 0 { + return b"" + } + let common_prefix : Array[UInt64] = [] + let rest_col_w = BytesWriter::new() + rest_col_w.write_varint_u64(positions.length().to_uint64()) + let mut last : Bytes = b"" + for pos in positions { + let common = longest_common_prefix_length(last, pos) + common_prefix.push(common) + let rest = pos[common.to_int():pos.length()] + rest_col_w.write_varint_u64(rest.length().to_uint64()) + rest_col_w.write_bytesview(rest) + last = pos + } + let col0 = encode_any_rle_usize(common_prefix) + let col1 = rest_col_w.to_bytes() + encode_columnar_vec_wrapped([col0, col1]) +} + +// Rust `PositionArena::encode()` always emits a serde_columnar payload even when empty. +// (TreeState uses this form and `PositionArena::decode()` does not accept empty bytes.) + +///| +pub fn encode_position_arena(positions : Array[Bytes]) -> Bytes { + let common_prefix : Array[UInt64] = [] + let rest_col_w = BytesWriter::new() + rest_col_w.write_varint_u64(positions.length().to_uint64()) + let mut last : Bytes = b"" + for pos in positions { + let common = longest_common_prefix_length(last, pos) + common_prefix.push(common) + let rest = pos[common.to_int():pos.length()] + rest_col_w.write_varint_u64(rest.length().to_uint64()) + rest_col_w.write_bytesview(rest) + last = pos + } + let col0 = encode_any_rle_usize(common_prefix) + let col1 = rest_col_w.to_bytes() + encode_columnar_vec_wrapped([col0, col1]) +} diff --git a/moon/loro_codec/position_arena_test.mbt b/moon/loro_codec/position_arena_test.mbt new file mode 100644 index 000000000..a1b5f1127 --- /dev/null +++ b/moon/loro_codec/position_arena_test.mbt @@ -0,0 +1,18 @@ +///| +test "position_arena: v2 roundtrip" { + let positions : Array[Bytes] = [ + b"\x01\x02\x03", b"\x01\x02\x04", b"\x01\x02\x04\xFF", b"\x01", b"", + ] + let encoded = encode_position_arena_v2(positions) + let decoded = try! decode_position_arena_v2(encoded[:]) + assert_eq(decoded.length(), positions.length()) + for i in 0.. (Bytes, BytesView) raise DecodeError { + let r = BytesReader::from_view(bytes) + let b = read_postcard_bytes_val(r) + (b, r.remaining_view()) +} + +///| +pub fn postcard_encode_bytes(b : Bytes) -> Bytes { + let w = BytesWriter::new() + write_postcard_bytes_val(w, b) + w.to_bytes() +} diff --git a/moon/loro_codec/postcard_common_value.mbt b/moon/loro_codec/postcard_common_value.mbt new file mode 100644 index 000000000..725df4ce0 --- /dev/null +++ b/moon/loro_codec/postcard_common_value.mbt @@ -0,0 +1,7 @@ +///| +/// Postcard `CommonValue` codec. +/// +/// This file is only an entrypoint. See: +/// - `postcard_common_value_types.mbt` +/// - `postcard_common_value_decode.mbt` +/// - `postcard_common_value_encode.mbt` diff --git a/moon/loro_codec/postcard_common_value_decode.mbt b/moon/loro_codec/postcard_common_value_decode.mbt new file mode 100644 index 000000000..a87f3f8b9 --- /dev/null +++ b/moon/loro_codec/postcard_common_value_decode.mbt @@ -0,0 +1,90 @@ +///| +fn read_common_value( + r : BytesReader, + depth : Int, +) -> CommonValue raise DecodeError { + if depth > 1024 { + raise DecodeError("postcard: value too deep") + } + let discr = r.read_varint_u64() + match discr { + 0UL => CommonValue::Null + 1UL => + match r.read_u8() { + b'\x00' => CommonValue::Bool(false) + b'\x01' => CommonValue::Bool(true) + _ => raise DecodeError("postcard: invalid bool") + } + 2UL => { + // f64 little-endian + let bits = r.read_u64_le() + CommonValue::Double(bits.reinterpret_as_double()) + } + 3UL => + // i64 zigzag varint + CommonValue::I64(r.read_varint_i64()) + 4UL => CommonValue::String(read_postcard_utf8(r)) + 5UL => { + let len = read_postcard_len_u64(r).to_int() + let items : Array[CommonValue] = [] + for _i in 0.. { + let len = read_postcard_len_u64(r).to_int() + let items : Array[(String, CommonValue)] = [] + for _i in 0.. { + let (cid, rest) = postcard_take_container_id(r.remaining_view()) + r.skip(r.remaining() - rest.length()) + CommonValue::Container(cid) + } + 8UL => CommonValue::Binary(read_postcard_bytes_val(r)) + _ => raise DecodeError("postcard: invalid LoroValue discriminant") + } +} + +///| +pub fn postcard_take_common_value( + bytes : BytesView, +) -> (CommonValue, BytesView) raise DecodeError { + let r = BytesReader::from_view(bytes) + let v = read_common_value(r, 0) + (v, r.remaining_view()) +} + +///| +pub fn postcard_take_vec_common_value( + bytes : BytesView, +) -> (Array[CommonValue], BytesView) raise DecodeError { + let r = BytesReader::from_view(bytes) + let len = read_postcard_len_u64(r).to_int() + let out : Array[CommonValue] = [] + for _i in 0.. (Array[(String, CommonValue)], BytesView) raise DecodeError { + let r = BytesReader::from_view(bytes) + let len = read_postcard_len_u64(r).to_int() + let out : Array[(String, CommonValue)] = [] + for _i in 0.. Unit { + match v { + CommonValue::Null => w.write_varint_u64(0) + CommonValue::Bool(b) => { + w.write_varint_u64(1) + w.write_u8(if b { b'\x01' } else { b'\x00' }) + } + CommonValue::Double(d) => { + w.write_varint_u64(2) + w.write_u64_le(d.reinterpret_as_uint64()) + } + CommonValue::I64(x) => { + w.write_varint_u64(3) + w.write_varint_i64(x) + } + CommonValue::String(s) => { + w.write_varint_u64(4) + write_postcard_utf8(w, s) + } + CommonValue::List(items) => { + w.write_varint_u64(5) + w.write_varint_u64(items.length().to_uint64()) + for it in items { + write_common_value(w, it) + } + } + CommonValue::Map(items) => { + w.write_varint_u64(6) + w.write_varint_u64(items.length().to_uint64()) + for pair in items { + let (k, it) = pair + write_postcard_utf8(w, k) + write_common_value(w, it) + } + } + CommonValue::Container(cid) => { + w.write_varint_u64(7) + w.write_bytes(postcard_encode_container_id(cid)) + } + CommonValue::Binary(b) => { + w.write_varint_u64(8) + write_postcard_bytes_val(w, b) + } + } +} + +///| +pub fn postcard_encode_common_value(v : CommonValue) -> Bytes { + let w = BytesWriter::new() + write_common_value(w, v) + w.to_bytes() +} + +///| +pub fn postcard_encode_vec_common_value(values : Array[CommonValue]) -> Bytes { + let w = BytesWriter::new() + w.write_varint_u64(values.length().to_uint64()) + for v in values { + write_common_value(w, v) + } + w.to_bytes() +} + +///| +pub fn postcard_encode_map_string_common_value( + values : Array[(String, CommonValue)], +) -> Bytes { + let w = BytesWriter::new() + w.write_varint_u64(values.length().to_uint64()) + for pair in values { + let (k, v) = pair + write_postcard_utf8(w, k) + write_common_value(w, v) + } + w.to_bytes() +} diff --git a/moon/loro_codec/postcard_common_value_types.mbt b/moon/loro_codec/postcard_common_value_types.mbt new file mode 100644 index 000000000..6b26e1c55 --- /dev/null +++ b/moon/loro_codec/postcard_common_value_types.mbt @@ -0,0 +1,60 @@ +///| +pub enum CommonValue { + Null + Bool(Bool) + Double(Double) + I64(Int64) + String(String) + List(Array[CommonValue]) + Map(Array[(String, CommonValue)]) + Container(ContainerID) + Binary(Bytes) +} derive(Eq, Show) + +// NOTE: MoonBit makes enum constructors read-only across packages. Expose explicit +// constructor functions for blackbox tests and future callers. + +///| +pub fn CommonValue::null() -> CommonValue { + CommonValue::Null +} + +///| +pub fn CommonValue::bool(v : Bool) -> CommonValue { + CommonValue::Bool(v) +} + +///| +pub fn CommonValue::double(v : Double) -> CommonValue { + CommonValue::Double(v) +} + +///| +pub fn CommonValue::i64(v : Int64) -> CommonValue { + CommonValue::I64(v) +} + +///| +pub fn CommonValue::string(v : String) -> CommonValue { + CommonValue::String(v) +} + +///| +pub fn CommonValue::binary(v : Bytes) -> CommonValue { + CommonValue::Binary(v) +} + +///| +pub fn CommonValue::list(v : Array[CommonValue]) -> CommonValue { + CommonValue::List(v) +} + +///| +pub fn CommonValue::map(v : Array[(String, CommonValue)]) -> CommonValue { + CommonValue::Map(v) +} + +///| +pub fn CommonValue::container(v : ContainerID) -> CommonValue { + CommonValue::Container(v) +} diff --git a/moon/loro_codec/postcard_container_id.mbt b/moon/loro_codec/postcard_container_id.mbt new file mode 100644 index 000000000..58f411d9d --- /dev/null +++ b/moon/loro_codec/postcard_container_id.mbt @@ -0,0 +1,46 @@ +///| +pub fn postcard_take_container_id( + bytes : BytesView, +) -> (ContainerID, BytesView) raise DecodeError { + let r = BytesReader::from_view(bytes) + let cid_tag = r.read_varint_u64() + match cid_tag { + 0UL => { + let name = read_postcard_utf8(r) + let kind = container_type_from_historical_u8(r.read_u8()) + (ContainerID::root(name, kind), r.remaining_view()) + } + 1UL => { + let peer = r.read_varint_u64() + let counter_i64 = r.read_varint_i64() + if counter_i64 < -2147483648L || counter_i64 > 2147483647L { + raise DecodeError("postcard: container counter overflow") + } + let kind = container_type_from_historical_u8(r.read_u8()) + ( + ContainerID::normal(peer, counter_i64.to_int(), kind), + r.remaining_view(), + ) + } + _ => raise DecodeError("postcard: invalid ContainerID tag") + } +} + +///| +pub fn postcard_encode_container_id(cid : ContainerID) -> Bytes { + let w = BytesWriter::new() + match cid { + ContainerID::Root(name, kind) => { + w.write_varint_u64(0) + write_postcard_utf8(w, name) + w.write_u8(container_type_to_historical_u8(kind)) + } + ContainerID::Normal(peer, counter, kind) => { + w.write_varint_u64(1) + w.write_varint_u64(peer) + w.write_varint_i64(counter.to_int64()) + w.write_u8(container_type_to_historical_u8(kind)) + } + } + w.to_bytes() +} diff --git a/moon/loro_codec/postcard_id.mbt b/moon/loro_codec/postcard_id.mbt new file mode 100644 index 000000000..cd069aadb --- /dev/null +++ b/moon/loro_codec/postcard_id.mbt @@ -0,0 +1,19 @@ +///| +pub fn postcard_take_id(bytes : BytesView) -> (ID, BytesView) raise DecodeError { + let r = BytesReader::from_view(bytes) + let peer = r.read_varint_u64() + let counter_i64 = r.read_varint_i64() + if counter_i64 < -2147483648L || counter_i64 > 2147483647L { + raise DecodeError("postcard: id counter overflow") + } + let id = ID::new(peer, counter_i64.to_int()) + (id, r.remaining_view()) +} + +///| +pub fn postcard_encode_id(id : ID) -> Bytes { + let w = BytesWriter::new() + w.write_varint_u64(id.peer()) + w.write_varint_i64(id.counter().to_int64()) + w.to_bytes() +} diff --git a/moon/loro_codec/postcard_primitives.mbt b/moon/loro_codec/postcard_primitives.mbt new file mode 100644 index 000000000..697f7cdd7 --- /dev/null +++ b/moon/loro_codec/postcard_primitives.mbt @@ -0,0 +1,53 @@ +///| +const POSTCARD_MAX_COLLECTION_SIZE : UInt64 = 268435456UL + +///| +fn read_postcard_len_u64(r : BytesReader) -> UInt64 raise DecodeError { + let n = r.read_varint_u64() + if n > POSTCARD_MAX_COLLECTION_SIZE { + raise DecodeError("postcard: collection too large") + } + n +} + +///| +fn read_postcard_utf8(r : BytesReader) -> String raise DecodeError { + let len_u64 = r.read_varint_u64() + if len_u64 > 0x7FFF_FFFFUL { + raise DecodeError("postcard: string too long") + } + let len = len_u64.to_int() + if len < 0 || len > r.remaining() { + raise DecodeError("postcard: invalid string length") + } + let bytes = r.read_exact(len) + @encoding/utf8.decode(bytes) catch { + @encoding/utf8.Malformed(_) => raise DecodeError("postcard: invalid utf8") + } +} + +///| +fn write_postcard_utf8(w : BytesWriter, s : String) -> Unit { + let bytes = @encoding/utf8.encode(s[:]) + w.write_varint_u64(bytes.length().to_uint64()) + w.write_bytes(bytes) +} + +///| +fn read_postcard_bytes_val(r : BytesReader) -> Bytes raise DecodeError { + let len_u64 = r.read_varint_u64() + if len_u64 > 0x7FFF_FFFFUL { + raise DecodeError("postcard: bytes too long") + } + let len = len_u64.to_int() + if len < 0 || len > r.remaining() { + raise DecodeError("postcard: invalid bytes length") + } + r.read_exact(len).to_bytes() +} + +///| +fn write_postcard_bytes_val(w : BytesWriter, b : Bytes) -> Unit { + w.write_varint_u64(b.length().to_uint64()) + w.write_bytes(b) +} diff --git a/moon/loro_codec/postcard_string.mbt b/moon/loro_codec/postcard_string.mbt new file mode 100644 index 000000000..0aaf25565 --- /dev/null +++ b/moon/loro_codec/postcard_string.mbt @@ -0,0 +1,15 @@ +///| +pub fn postcard_take_string( + bytes : BytesView, +) -> (String, BytesView) raise DecodeError { + let r = BytesReader::from_view(bytes) + let s = read_postcard_utf8(r) + (s, r.remaining_view()) +} + +///| +pub fn postcard_encode_string(s : String) -> Bytes { + let w = BytesWriter::new() + write_postcard_utf8(w, s) + w.to_bytes() +} diff --git a/moon/loro_codec/postcard_test.mbt b/moon/loro_codec/postcard_test.mbt new file mode 100644 index 000000000..21d652be7 --- /dev/null +++ b/moon/loro_codec/postcard_test.mbt @@ -0,0 +1,68 @@ +///| +test "postcard: id encode/decode" { + let id = ID::new(7UL, 42) + let bytes = postcard_encode_id(id) + let (decoded, rest) = try! postcard_take_id(bytes[:]) + assert_eq(rest.length(), 0) + assert_eq(decoded, id) +} + +///| +test "postcard: container id encode/decode" { + let cid0 = ContainerID::root("r", ContainerType::map()) + let bytes0 = postcard_encode_container_id(cid0) + let (decoded0, rest0) = try! postcard_take_container_id(bytes0[:]) + assert_eq(rest0.length(), 0) + assert_eq(decoded0, cid0) + let cid1 = ContainerID::normal(7UL, 42, ContainerType::list()) + let bytes1 = postcard_encode_container_id(cid1) + let (decoded1, rest1) = try! postcard_take_container_id(bytes1[:]) + assert_eq(rest1.length(), 0) + assert_eq(decoded1, cid1) +} + +///| +test "postcard: common value encode/decode basics" { + let values : Array[CommonValue] = [ + CommonValue::null(), + CommonValue::bool(false), + CommonValue::bool(true), + CommonValue::i64(-1L), + CommonValue::string("hi"), + CommonValue::binary(b"\x01\x02"), + CommonValue::list([CommonValue::null(), CommonValue::i64(1L)]), + CommonValue::map([("a", CommonValue::null()), ("b", CommonValue::i64(2L))]), + CommonValue::container(ContainerID::root("x", ContainerType::text())), + ] + for v in values { + let bytes = postcard_encode_common_value(v) + let (decoded, rest) = try! postcard_take_common_value(bytes[:]) + assert_eq(rest.length(), 0) + assert_eq(decoded, v) + } +} + +///| +test "postcard: frontiers / version vector encode/decode" { + let vv : VersionVector = [(1UL, 2), (7UL, 42)] + let vv_bytes = postcard_encode_version_vector(vv) + let (vv2, rest0) = try! postcard_take_version_vector(vv_bytes[:]) + assert_eq(rest0.length(), 0) + assert_eq(vv2, vv) + let fr : Array[ID] = [ + ID::new(7UL, 42), + ID::new(1UL, 0), + ID::new(7UL, 41), + ID::new(1UL, -1), + ] + let fr_bytes = postcard_encode_frontiers(fr) + let (fr2, rest1) = try! postcard_take_frontiers(fr_bytes[:]) + assert_eq(rest1.length(), 0) + let fr_expected : Array[ID] = [ + ID::new(1UL, -1), + ID::new(1UL, 0), + ID::new(7UL, 41), + ID::new(7UL, 42), + ] + assert_eq(fr2, fr_expected) +} diff --git a/moon/loro_codec/postcard_varint.mbt b/moon/loro_codec/postcard_varint.mbt new file mode 100644 index 000000000..530bdc11c --- /dev/null +++ b/moon/loro_codec/postcard_varint.mbt @@ -0,0 +1,61 @@ +// Postcard 的整数编码基于 varint + zigzag(不是 SLEB128)。 + +///| +pub fn BytesReader::read_varint_u64( + self : BytesReader, +) -> UInt64 raise DecodeError { + let mut result : UInt64 = 0 + let mut shift = 0 + // u64 varint 最多 10 字节 + for _i in 0..<10 { + let byte = self.read_u8().to_uint64() + result = result | ((byte & 0x7F) << shift) + if (byte & 0x80) == 0 { + return result + } + shift = shift + 7 + } + raise DecodeError("varint too long") +} + +///| +pub fn BytesWriter::write_varint_u64( + self : BytesWriter, + value : UInt64, +) -> Unit { + let mut value = value + while true { + let mut byte = value & 0x7F + value = value >> 7 + if value != 0 { + byte = byte | 0x80 + } + self.write_u8(byte.to_byte()) + if value == 0 { + break + } + } +} + +///| +pub fn zigzag_encode_i64(value : Int64) -> UInt64 { + ((value << 1) ^ (value >> 63)).reinterpret_as_uint64() +} + +///| +pub fn zigzag_decode_i64(value : UInt64) -> Int64 { + let lo = (value & 1).reinterpret_as_int64() + (value >> 1).reinterpret_as_int64() ^ -lo +} + +///| +pub fn BytesReader::read_varint_i64( + self : BytesReader, +) -> Int64 raise DecodeError { + zigzag_decode_i64(self.read_varint_u64()) +} + +///| +pub fn BytesWriter::write_varint_i64(self : BytesWriter, value : Int64) -> Unit { + self.write_varint_u64(zigzag_encode_i64(value)) +} diff --git a/moon/loro_codec/postcard_varint_test.mbt b/moon/loro_codec/postcard_varint_test.mbt new file mode 100644 index 000000000..34af6ecdb --- /dev/null +++ b/moon/loro_codec/postcard_varint_test.mbt @@ -0,0 +1,37 @@ +///| +test "postcard zigzag i64 examples" { + fn enc(v : Int64) -> Bytes { + let w = BytesWriter::new() + w.write_varint_i64(v) + w.to_bytes() + } + + // 重要:zigzag 与 SLEB128 不同 + assert_eq(enc(0), b"\x00") + assert_eq(enc(-1), b"\x01") + assert_eq(enc(1), b"\x02") + assert_eq(enc(-2), b"\x03") + assert_eq(enc(2), b"\x04") + assert_eq(enc(-64), b"\x7F") +} + +///| +test "postcard varint roundtrip" { + fn rt(v : UInt64) raise { + let w = BytesWriter::new() + w.write_varint_u64(v) + let bytes = w.to_bytes() + let r = BytesReader::new(bytes) + assert_eq(try! r.read_varint_u64(), v) + assert_eq(r.remaining(), 0) + } + + try! rt(0) + try! rt(1) + try! rt(127) + try! rt(128) + try! rt(300) + try! rt(16384) + try! rt(0xFFFF_FFFF) + try! rt(0xFFFF_FFFF_FFFF_FFFF) +} diff --git a/moon/loro_codec/postcard_vec_string.mbt b/moon/loro_codec/postcard_vec_string.mbt new file mode 100644 index 000000000..788eeeccc --- /dev/null +++ b/moon/loro_codec/postcard_vec_string.mbt @@ -0,0 +1,22 @@ +///| +pub fn postcard_take_vec_string( + bytes : BytesView, +) -> (Array[String], BytesView) raise DecodeError { + let r = BytesReader::from_view(bytes) + let len = read_postcard_len_u64(r).to_int() + let out : Array[String] = [] + for _i in 0.. Bytes { + let w = BytesWriter::new() + w.write_varint_u64(values.length().to_uint64()) + for s in values { + write_postcard_utf8(w, s) + } + w.to_bytes() +} diff --git a/moon/loro_codec/postcard_vv_frontiers.mbt b/moon/loro_codec/postcard_vv_frontiers.mbt new file mode 100644 index 000000000..3d68ac6b2 --- /dev/null +++ b/moon/loro_codec/postcard_vv_frontiers.mbt @@ -0,0 +1,82 @@ +// --- VersionVector / Frontiers --- + +///| +pub type VersionVector = Array[(UInt64, Int)] // (peer, counter) + +///| +pub fn postcard_take_version_vector( + bytes : BytesView, +) -> (VersionVector, BytesView) raise DecodeError { + let r = BytesReader::from_view(bytes) + let len_u64 = r.read_varint_u64() + if len_u64 > 0x7FFF_FFFFUL { + raise DecodeError("postcard: vv too large") + } + let len = len_u64.to_int() + let out : VersionVector = [] + for _i in 0.. 2147483647L { + raise DecodeError("postcard: vv counter overflow") + } + out.push((peer, counter_i64.to_int())) + } + (out, r.remaining_view()) +} + +///| +pub fn postcard_encode_version_vector(vv : VersionVector) -> Bytes { + let w = BytesWriter::new() + w.write_varint_u64(vv.length().to_uint64()) + for pair in vv { + let (peer, counter) = pair + w.write_varint_u64(peer) + w.write_varint_i64(counter.to_int64()) + } + w.to_bytes() +} + +///| +pub fn postcard_take_frontiers( + bytes : BytesView, +) -> (Array[ID], BytesView) raise DecodeError { + let r = BytesReader::from_view(bytes) + let len_u64 = r.read_varint_u64() + if len_u64 > 0x7FFF_FFFFUL { + raise DecodeError("postcard: frontiers too large") + } + let len = len_u64.to_int() + let out : Array[ID] = [] + for _i in 0.. 2147483647L { + raise DecodeError("postcard: frontier counter overflow") + } + out.push(ID::new(peer, counter_i64.to_int())) + } + (out, r.remaining_view()) +} + +///| +pub fn postcard_encode_frontiers(ids : Array[ID]) -> Bytes { + let sorted : Array[ID] = [] + for id in ids { + sorted.push(id) + } + // Canonical encoding: sort by (peer, counter). + let counter_bias = BigInt::from_int64(-2147483648L) + sorted.sort_by_key(id => { + let peer_key = BigInt::from_uint64(id.peer()) << 32 + let counter_key = BigInt::from_int(id.counter()) - counter_bias + peer_key | counter_key + }) + let w = BytesWriter::new() + w.write_varint_u64(sorted.length().to_uint64()) + for id in sorted { + w.write_varint_u64(id.peer()) + w.write_varint_i64(id.counter().to_int64()) + } + w.to_bytes() +} diff --git a/moon/loro_codec/serde_columnar.mbt b/moon/loro_codec/serde_columnar.mbt new file mode 100644 index 000000000..9f7ac2cd8 --- /dev/null +++ b/moon/loro_codec/serde_columnar.mbt @@ -0,0 +1,82 @@ +// Serde columnar outer format helpers. +// +// Strategies are split into separate files: +// - serde_columnar_bool_rle.mbt +// - serde_columnar_any_rle.mbt +// - serde_columnar_delta_rle.mbt +// - serde_columnar_delta_of_delta.mbt + +///| +pub fn take_columnar_vec( + bytes : BytesView, +) -> (Array[BytesView], BytesView) raise DecodeError { + let r = BytesReader::from_view(bytes) + let n_cols_u64 = r.read_varint_u64() + if n_cols_u64 > 0x7FFF_FFFFUL { + raise DecodeError("serde_columnar: too many columns") + } + let n_cols = n_cols_u64.to_int() + let cols : Array[BytesView] = [] + for _i in 0.. 0x7FFF_FFFFUL { + raise DecodeError("serde_columnar: column too large") + } + let len = len_u64.to_int() + if len < 0 || len > r.remaining() { + raise DecodeError("serde_columnar: invalid column length") + } + cols.push(r.read_exact(len)) + } + (cols, r.remaining_view()) +} + +///| +pub fn decode_columnar_vec( + bytes : BytesView, +) -> Array[BytesView] raise DecodeError { + let (cols, rest) = take_columnar_vec(bytes) + if rest.length() != 0 { + raise DecodeError("serde_columnar: trailing bytes") + } + cols +} + +// Common pattern: serde_columnar::to_vec(&Struct { field: Vec }) where +// the struct has exactly one field, and that field is annotated with `#[columnar(class="vec")]`. +// In this case the serialized bytes are: varint(1) + (the columnar vec bytes). + +///| +pub fn decode_columnar_vec_maybe_wrapped( + bytes : BytesView, +) -> Array[BytesView] raise DecodeError { + if bytes.length() == 0 { + return [] + } + let r = BytesReader::from_view(bytes) + let first = r.read_varint_u64() + if first == 1UL { + return decode_columnar_vec(r.remaining_view()) + } + decode_columnar_vec(bytes) +} + +///| +pub fn encode_columnar_vec_wrapped(cols : Array[Bytes]) -> Bytes { + let inner = encode_columnar_vec(cols) + let w = BytesWriter::new() + w.write_varint_u64(1UL) + w.write_bytes(inner) + w.to_bytes() +} + +///| +pub fn encode_columnar_vec(cols : Array[Bytes]) -> Bytes { + let w = BytesWriter::new() + w.write_varint_u64(cols.length().to_uint64()) + for col in cols { + w.write_varint_u64(col.length().to_uint64()) + w.write_bytes(col) + } + w.to_bytes() +} diff --git a/moon/loro_codec/serde_columnar_any_rle.mbt b/moon/loro_codec/serde_columnar_any_rle.mbt new file mode 100644 index 000000000..916f38c4c --- /dev/null +++ b/moon/loro_codec/serde_columnar_any_rle.mbt @@ -0,0 +1,162 @@ +///| +pub fn decode_rle_u8(bytes : BytesView) -> Array[UInt] raise DecodeError { + decode_any_rle(bytes, read_postcard_u8) +} + +///| +pub fn encode_rle_u8(values : Array[UInt]) -> Bytes { + encode_any_rle_literal(values, write_postcard_u8) +} + +///| +pub fn decode_rle_u32(bytes : BytesView) -> Array[UInt] raise DecodeError { + decode_any_rle(bytes, read_postcard_u32) +} + +///| +pub fn encode_rle_u32(values : Array[UInt]) -> Bytes { + encode_any_rle_literal(values, write_postcard_u32) +} + +///| +pub fn decode_any_rle_usize( + bytes : BytesView, +) -> Array[UInt64] raise DecodeError { + decode_any_rle(bytes, read_postcard_usize) +} + +///| +pub fn any_rle_take_n_finalize_usize( + bytes : BytesView, + n : Int, +) -> (Array[UInt64], BytesView) raise DecodeError { + any_rle_take_n_finalize(bytes, n, read_postcard_usize) +} + +///| +pub fn encode_any_rle_usize(values : Array[UInt64]) -> Bytes { + encode_any_rle_literal(values, write_postcard_usize) +} + +///| +pub fn decode_any_rle_u32(bytes : BytesView) -> Array[UInt] raise DecodeError { + decode_any_rle(bytes, read_postcard_u32) +} + +///| +pub fn any_rle_take_n_finalize_u32( + bytes : BytesView, + n : Int, +) -> (Array[UInt], BytesView) raise DecodeError { + any_rle_take_n_finalize(bytes, n, read_postcard_u32) +} + +///| +pub fn encode_any_rle_u32(values : Array[UInt]) -> Bytes { + encode_any_rle_literal(values, write_postcard_u32) +} + +///| +pub fn decode_any_rle_u64(bytes : BytesView) -> Array[UInt64] raise DecodeError { + decode_any_rle(bytes, read_postcard_u64) +} + +///| +pub fn encode_any_rle_u64(values : Array[UInt64]) -> Bytes { + encode_any_rle_literal(values, write_postcard_u64) +} + +///| +pub fn decode_any_rle_u8(bytes : BytesView) -> Array[UInt] raise DecodeError { + decode_any_rle(bytes, read_postcard_u8) +} + +///| +pub fn any_rle_take_n_finalize_u8( + bytes : BytesView, + n : Int, +) -> (Array[UInt], BytesView) raise DecodeError { + any_rle_take_n_finalize(bytes, n, read_postcard_u8) +} + +///| +pub fn encode_any_rle_u8(values : Array[UInt]) -> Bytes { + encode_any_rle_literal(values, write_postcard_u8) +} + +///| +pub fn decode_any_rle_i32(bytes : BytesView) -> Array[Int] raise DecodeError { + decode_any_rle(bytes, read_postcard_i32) +} + +///| +pub fn any_rle_take_n_finalize_i32( + bytes : BytesView, + n : Int, +) -> (Array[Int], BytesView) raise DecodeError { + any_rle_take_n_finalize(bytes, n, read_postcard_i32) +} + +///| +pub fn encode_any_rle_i32(values : Array[Int]) -> Bytes { + encode_any_rle_literal(values, write_postcard_i32) +} + +///| +fn read_varint_u128_bigint(r : BytesReader) -> BigInt raise DecodeError { + let mut result : BigInt = 0N + let mut shift = 0 + for _i in 0..<19 { + let byte = r.read_u8().to_uint() + let chunk = BigInt::from_uint(byte & 0x7F) + result = result | (chunk << shift) + if (byte & 0x80) == 0 { + return result + } + shift = shift + 7 + } + raise DecodeError("varint too long") +} + +///| +fn write_varint_u128_bigint(w : BytesWriter, value : BigInt) -> Unit { + let mut v = value + while true { + let byte = (v & 0x7FN).to_uint() + v = v >> 7 + if v != 0N { + w.write_u8((byte | 0x80).to_byte()) + } else { + w.write_u8(byte.to_byte()) + break + } + } +} + +///| +fn zigzag_decode_bigint(encoded : BigInt) -> BigInt { + if (encoded & 1N) == 0N { + encoded >> 1 + } else { + -((encoded >> 1) + 1N) + } +} + +///| +fn zigzag_encode_bigint(value : BigInt) -> BigInt { + if value >= 0N { + value << 1 + } else { + (-value << 1) - 1N + } +} + +///| +fn read_postcard_i128_bigint(r : BytesReader) -> BigInt raise DecodeError { + zigzag_decode_bigint(read_varint_u128_bigint(r)) +} + +///| +fn write_postcard_i128_bigint(w : BytesWriter, v : BigInt) -> Unit { + write_varint_u128_bigint(w, zigzag_encode_bigint(v)) +} diff --git a/moon/loro_codec/serde_columnar_bool_rle.mbt b/moon/loro_codec/serde_columnar_bool_rle.mbt new file mode 100644 index 000000000..541f73dec --- /dev/null +++ b/moon/loro_codec/serde_columnar_bool_rle.mbt @@ -0,0 +1,82 @@ +///| +pub fn decode_bool_rle(bytes : BytesView) -> Array[Bool] raise DecodeError { + if bytes.length() == 0 { + return [] + } + let r = BytesReader::from_view(bytes) + let out : Array[Bool] = [] + let mut state = false + while r.remaining() > 0 { + let len_u64 = r.read_varint_u64() + if len_u64 > 0x7FFF_FFFFUL { + raise DecodeError("boolrle: run too long") + } + let len = len_u64.to_int() + if len < 0 { + raise DecodeError("boolrle: invalid run length") + } + for _i in 0.. Bytes { + let w = BytesWriter::new() + if values.length() == 0 { + return w.to_bytes() + } + let mut state = false + let mut run_len : UInt64 = 0 + for v in values { + if v == state { + run_len = run_len + 1 + } else { + w.write_varint_u64(run_len) + state = !state + run_len = 1 + } + } + w.write_varint_u64(run_len) + w.to_bytes() +} + +///| +pub fn bool_rle_take_n_finalize( + bytes : BytesView, + n : Int, +) -> (Array[Bool], BytesView) raise DecodeError { + if n < 0 { + raise DecodeError("boolrle: invalid n") + } + if n == 0 { + return ([], bytes) + } + let r = BytesReader::from_view(bytes) + let out : Array[Bool] = [] + let mut state = false + while out.length() < n { + if r.remaining() == 0 { + raise DecodeError("boolrle: not enough elements") + } + let len_u64 = r.read_varint_u64() + if len_u64 > 0x7FFF_FFFFUL { + raise DecodeError("boolrle: run too long") + } + let len = len_u64.to_int() + if len < 0 { + raise DecodeError("boolrle: invalid run length") + } + if out.length() + len > n { + raise DecodeError("boolrle: too many elements") + } + for _i in 0.. BitReader raise DecodeError { + if last_used_bits > 8 { + raise DecodeError("delta_of_delta: invalid last_used_bits") + } + if last_used_bits == 0 { + if buf.length() != 0 { + raise DecodeError("delta_of_delta: unexpected bitstream bytes") + } + return { buf, bit_pos: 0, total_bits: 0 } + } + if buf.length() == 0 { + raise DecodeError("delta_of_delta: missing bitstream bytes") + } + let last_bits = last_used_bits.reinterpret_as_int() + let total_bits = (buf.length() - 1) * 8 + last_bits + { buf, bit_pos: 0, total_bits } +} + +///| +fn BitReader::remaining(self : BitReader) -> Int { + self.total_bits - self.bit_pos +} + +///| +fn BitReader::read_bit(self : BitReader) -> Bool raise DecodeError { + if self.bit_pos >= self.total_bits { + raise DecodeError("delta_of_delta: unexpected end of bitstream") + } + let byte_index = self.bit_pos / 8 + let bit_in_byte = self.bit_pos % 8 + self.bit_pos = self.bit_pos + 1 + let mask = (1 << (7 - bit_in_byte)).to_byte() + (self.buf[byte_index] & mask) != b'\x00' +} + +///| +fn BitReader::read_bits_u64( + self : BitReader, + n : Int, +) -> UInt64 raise DecodeError { + if n < 0 || n > 64 { + raise DecodeError("delta_of_delta: invalid bit width") + } + let mut v : UInt64 = 0 + for _i in 0.. Int64 raise DecodeError { + let b0 = br.read_bit() + if !b0 { + return 0 + } + let b1 = br.read_bit() + if !b1 { + let v = br.read_bits_u64(7).reinterpret_as_int64() + return v - 63L + } + let b2 = br.read_bit() + if !b2 { + let v = br.read_bits_u64(9).reinterpret_as_int64() + return v - 255L + } + let b3 = br.read_bit() + if !b3 { + let v = br.read_bits_u64(12).reinterpret_as_int64() + return v - 2047L + } + let b4 = br.read_bit() + if !b4 { + let v = br.read_bits_u64(21).reinterpret_as_int64() + return v - 1048575L + } + br.read_bits_u64(64).reinterpret_as_int64() +} diff --git a/moon/loro_codec/serde_columnar_delta_of_delta_common.mbt b/moon/loro_codec/serde_columnar_delta_of_delta_common.mbt new file mode 100644 index 000000000..8765c964d --- /dev/null +++ b/moon/loro_codec/serde_columnar_delta_of_delta_common.mbt @@ -0,0 +1,20 @@ +///| +fn read_postcard_option_i64(r : BytesReader) -> Int64? raise DecodeError { + let tag = r.read_varint_u64() + match tag { + 0UL => None + 1UL => Some(r.read_varint_i64()) + _ => raise DecodeError("postcard: invalid Option tag") + } +} + +///| +fn write_postcard_option_i64(w : BytesWriter, v : Int64?) -> Unit { + match v { + None => w.write_varint_u64(0) + Some(x) => { + w.write_varint_u64(1) + w.write_varint_i64(x) + } + } +} diff --git a/moon/loro_codec/serde_columnar_delta_of_delta_decode.mbt b/moon/loro_codec/serde_columnar_delta_of_delta_decode.mbt new file mode 100644 index 000000000..ead1631a9 --- /dev/null +++ b/moon/loro_codec/serde_columnar_delta_of_delta_decode.mbt @@ -0,0 +1,241 @@ +///| +pub fn decode_delta_of_delta_i64( + bytes : BytesView, +) -> Array[Int64] raise DecodeError { + let r = BytesReader::from_view(bytes) + let first_opt = read_postcard_option_i64(r) + if r.remaining() < 1 { + raise DecodeError("delta_of_delta: invalid bytes") + } + let last_used_bits = r.read_u8().to_uint() + let bitstream = r.read_exact(r.remaining()) + match first_opt { + None => { + if last_used_bits != 0 || bitstream.length() != 0 { + raise DecodeError("delta_of_delta: invalid empty encoding") + } + [] + } + Some(first) => { + let br = BitReader::new(bitstream, last_used_bits) + let out : Array[Int64] = [first] + let mut prev = first + let mut delta : Int64 = 0 + while br.remaining() > 0 { + let dod = decode_delta_of_delta_value(br) + delta = delta + dod + prev = prev + delta + out.push(prev) + } + out + } + } +} + +///| +priv struct DeltaOfDeltaDecoder { + bits : BytesView + mut head_num : Int64? + mut prev_value : Int64 + mut prev_delta : Int64 + mut index : Int + mut current_bits_index : Int + last_used_bits : Int +} + +///| +fn DeltaOfDeltaDecoder::new( + bits : BytesView, + head_num : Int64?, + last_used_bits : UInt, +) -> DeltaOfDeltaDecoder raise DecodeError { + if last_used_bits > 8 { + raise DecodeError("delta_of_delta: invalid last_used_bits") + } + { + bits, + head_num, + prev_value: 0L, + prev_delta: 0L, + index: 0, + current_bits_index: 0, + last_used_bits: last_used_bits.reinterpret_as_int(), + } +} + +///| +fn DeltaOfDeltaDecoder::read_bits( + self : DeltaOfDeltaDecoder, + count : Int, +) -> UInt64? { + if self.index >= self.bits.length() { + return None + } + let total_bits = if self.bits.length() == 0 { + 0 + } else { + (self.bits.length() - 1) * 8 + self.last_used_bits + } + let read_bits = self.index * 8 + self.current_bits_index + let remaining_bits = total_bits - read_bits + if remaining_bits < count { + return None + } + let current_byte_remaining = 8 - self.current_bits_index + if count <= current_byte_remaining { + let current_index = self.index + self.current_bits_index = self.current_bits_index + count + if self.current_bits_index == 8 { + self.index = self.index + 1 + self.current_bits_index = 0 + } + let mask = 0xFF >> (8 - count) + let current_byte = self.bits[current_index].to_uint() + let after_shift = current_byte >> (current_byte_remaining - count) + Some((after_shift & mask.reinterpret_as_uint()).to_uint64()) + } else { + let mut ans : UInt64 = (self.bits[self.index].to_uint() & + (0xFF >> (8 - current_byte_remaining))).to_uint64() + self.index = self.index + 1 + self.current_bits_index = 0 + let mut rest = count - current_byte_remaining + while rest > 8 { + ans = (ans << 8) | self.bits[self.index].to_uint64() + self.index = self.index + 1 + rest = rest - 8 + } + ans = (ans << rest) | + (self.bits[self.index].to_uint() >> (8 - rest)).to_uint64() + self.current_bits_index = self.current_bits_index + rest + if self.current_bits_index == 8 { + self.index = self.index + 1 + self.current_bits_index = 0 + } + Some(ans) + } +} + +///| +fn DeltaOfDeltaDecoder::try_next( + self : DeltaOfDeltaDecoder, +) -> Int64? raise DecodeError { + match self.head_num { + Some(head) => { + self.prev_value = head + self.head_num = None + } + None => + match self.read_bits(1) { + None => return None + Some(0UL) => self.prev_value = self.prev_value + self.prev_delta + Some(1UL) => { + let mut num_bits = 0 + let mut bias : Int64 = 0 + match self.read_bits(1) { + Some(0UL) => { + num_bits = 7 + bias = 63L + } + Some(1UL) => + match self.read_bits(1) { + Some(0UL) => { + num_bits = 9 + bias = 255L + } + Some(1UL) => + match self.read_bits(1) { + Some(0UL) => { + num_bits = 12 + bias = 2047L + } + Some(1UL) => + match self.read_bits(1) { + Some(0UL) => { + num_bits = 21 + bias = 1048575L + } + Some(1UL) => { + num_bits = 64 + bias = 0L + } + _ => raise DecodeError("delta_of_delta: invalid flag") + } + _ => raise DecodeError("delta_of_delta: invalid flag") + } + _ => raise DecodeError("delta_of_delta: invalid flag") + } + _ => raise DecodeError("delta_of_delta: invalid flag") + } + match self.read_bits(num_bits) { + Some(bits) => { + let delta_of_delta = bits.reinterpret_as_int64() - bias + self.prev_delta = self.prev_delta + delta_of_delta + self.prev_value = self.prev_value + self.prev_delta + } + None => return None + } + } + _ => raise DecodeError("delta_of_delta: invalid flag") + } + } + Some(self.prev_value) +} + +///| +fn DeltaOfDeltaDecoder::finalize(self : DeltaOfDeltaDecoder) -> BytesView { + if self.bits.length() == 0 { + return self.bits + } + if self.current_bits_index > 0 { + self.index = self.index + 1 + } + if self.index < 0 { + return self.bits + } + if self.index > self.bits.length() { + return self.bits[self.bits.length():self.bits.length()] + } + self.bits[self.index:self.bits.length()] +} + +///| +pub fn delta_of_delta_take_n_finalize_i64( + bytes : BytesView, + n : Int, +) -> (Array[Int64], BytesView) raise DecodeError { + if n < 0 { + raise DecodeError("delta_of_delta: invalid n") + } + let r = BytesReader::from_view(bytes) + let first_opt = read_postcard_option_i64(r) + if r.remaining() < 1 { + raise DecodeError("delta_of_delta: invalid bytes") + } + let last_used_bits = r.read_u8().to_uint() + let bits = r.remaining_view() + match first_opt { + None => { + if n != 0 { + raise DecodeError("delta_of_delta: not enough elements") + } + if last_used_bits != 0 { + raise DecodeError("delta_of_delta: invalid empty encoding") + } + ([], bits) + } + Some(_) => { + if n == 0 { + raise DecodeError("delta_of_delta: too many elements") + } + let dec = DeltaOfDeltaDecoder::new(bits, first_opt, last_used_bits) + let out : Array[Int64] = [] + for _i in 0.. raise DecodeError("delta_of_delta: not enough elements") + Some(v) => out.push(v) + } + } + (out, dec.finalize()) + } + } +} diff --git a/moon/loro_codec/serde_columnar_delta_of_delta_encode.mbt b/moon/loro_codec/serde_columnar_delta_of_delta_encode.mbt new file mode 100644 index 000000000..94ed1f05c --- /dev/null +++ b/moon/loro_codec/serde_columnar_delta_of_delta_encode.mbt @@ -0,0 +1,95 @@ +///| +priv struct BitWriter { + bytes : Array[Byte] + mut cur : UInt + mut bits : Int +} + +///| +fn BitWriter::new() -> BitWriter { + { bytes: [], cur: 0, bits: 0 } +} + +///| +fn BitWriter::write_bit(self : BitWriter, bit : Bool) -> Unit { + self.cur = (self.cur << 1) | (if bit { 1 } else { 0 }) + self.bits = self.bits + 1 + if self.bits == 8 { + self.bytes.push((self.cur & 0xFF).to_byte()) + self.cur = 0 + self.bits = 0 + } +} + +///| +fn BitWriter::write_bits_u64(self : BitWriter, v : UInt64, n : Int) -> Unit { + for i in 0..> shift) & 1UL) != 0UL + self.write_bit(bit) + } +} + +///| +fn encode_delta_of_delta_value(bw : BitWriter, v : Int64) -> Unit { + if v == 0 { + bw.write_bit(false) + return + } + if v >= -63L && v <= 64L { + bw.write_bits_u64(0b10UL, 2) + bw.write_bits_u64((v + 63L).reinterpret_as_uint64(), 7) + return + } + if v >= -255L && v <= 256L { + bw.write_bits_u64(0b110UL, 3) + bw.write_bits_u64((v + 255L).reinterpret_as_uint64(), 9) + return + } + if v >= -2047L && v <= 2048L { + bw.write_bits_u64(0b1110UL, 4) + bw.write_bits_u64((v + 2047L).reinterpret_as_uint64(), 12) + return + } + if v >= -1048575L && v <= 1048576L { + bw.write_bits_u64(0b11110UL, 5) + bw.write_bits_u64((v + 1048575L).reinterpret_as_uint64(), 21) + return + } + bw.write_bits_u64(0b11111UL, 5) + bw.write_bits_u64(v.reinterpret_as_uint64(), 64) +} + +///| +pub fn encode_delta_of_delta_i64(values : Array[Int64]) -> Bytes { + let w = BytesWriter::new() + if values.length() == 0 { + write_postcard_option_i64(w, None) + w.write_u8(b'\x00') + return w.to_bytes() + } + write_postcard_option_i64(w, Some(values[0])) + if values.length() == 1 { + w.write_u8(b'\x00') + return w.to_bytes() + } + let bw = BitWriter::new() + let mut prev_delta : Int64 = 0 + for i in 1.. Array[UInt] raise DecodeError { + let deltas = decode_any_rle(bytes, read_postcard_i128_bigint) + let out : Array[UInt] = [] + let mut abs : BigInt = 0N + let max = BigInt::from_uint64(0xFFFF_FFFFUL) + for d in deltas { + abs = abs + d + if abs < 0N || abs > max { + raise DecodeError("deltarle: u32 overflow") + } + out.push(abs.to_uint()) + } + out +} + +///| +pub fn encode_delta_rle_u32(values : Array[UInt]) -> Bytes { + if values.length() == 0 { + return b"" + } + let deltas : Array[BigInt] = [] + let mut prev : BigInt = 0N + for v in values { + let cur = BigInt::from_uint(v) + deltas.push(cur - prev) + prev = cur + } + encode_any_rle_literal(deltas, write_postcard_i128_bigint) +} + +///| +pub fn decode_delta_rle_i32(bytes : BytesView) -> Array[Int] raise DecodeError { + let deltas = decode_any_rle(bytes, read_postcard_i128_bigint) + let out : Array[Int] = [] + let mut abs : BigInt = 0N + let min = BigInt::from_int64(-2147483648L) + let max = BigInt::from_int64(2147483647L) + for d in deltas { + abs = abs + d + if abs < min || abs > max { + raise DecodeError("deltarle: i32 overflow") + } + out.push(abs.to_int()) + } + out +} + +///| +pub fn encode_delta_rle_i32(values : Array[Int]) -> Bytes { + if values.length() == 0 { + return b"" + } + let deltas : Array[BigInt] = [] + let mut prev : BigInt = 0N + for v in values { + let cur = BigInt::from_int(v) + deltas.push(cur - prev) + prev = cur + } + encode_any_rle_literal(deltas, write_postcard_i128_bigint) +} + +///| +pub fn decode_delta_rle_usize( + bytes : BytesView, +) -> Array[UInt64] raise DecodeError { + let deltas = decode_any_rle(bytes, read_postcard_i128_bigint) + let out : Array[UInt64] = [] + let mut abs : BigInt = 0N + let max = BigInt::from_uint64(0xFFFF_FFFF_FFFF_FFFFUL) + for d in deltas { + abs = abs + d + if abs < 0N || abs > max { + raise DecodeError("deltarle: usize overflow") + } + out.push(abs.to_uint64()) + } + out +} + +///| +pub fn encode_delta_rle_usize(values : Array[UInt64]) -> Bytes { + if values.length() == 0 { + return b"" + } + let deltas : Array[BigInt] = [] + let mut prev : BigInt = 0N + for v in values { + let cur = BigInt::from_uint64(v) + deltas.push(cur - prev) + prev = cur + } + encode_any_rle_literal(deltas, write_postcard_i128_bigint) +} + +///| +pub fn decode_delta_rle_isize( + bytes : BytesView, +) -> Array[Int64] raise DecodeError { + let deltas = decode_any_rle(bytes, read_postcard_i128_bigint) + let out : Array[Int64] = [] + let mut abs : BigInt = 0N + let min = BigInt::from_int64(-9223372036854775808L) + let max = BigInt::from_int64(9223372036854775807L) + for d in deltas { + abs = abs + d + if abs < min || abs > max { + raise DecodeError("deltarle: isize overflow") + } + out.push(abs.to_int64()) + } + out +} + +///| +pub fn encode_delta_rle_isize(values : Array[Int64]) -> Bytes { + if values.length() == 0 { + return b"" + } + let deltas : Array[BigInt] = [] + let mut prev : BigInt = 0N + for v in values { + let cur = BigInt::from_int64(v) + deltas.push(cur - prev) + prev = cur + } + encode_any_rle_literal(deltas, write_postcard_i128_bigint) +} diff --git a/moon/loro_codec/serde_columnar_rle_core.mbt b/moon/loro_codec/serde_columnar_rle_core.mbt new file mode 100644 index 000000000..5f567420b --- /dev/null +++ b/moon/loro_codec/serde_columnar_rle_core.mbt @@ -0,0 +1,163 @@ +///| +fn[T] decode_any_rle( + bytes : BytesView, + read_value : (BytesReader) -> T raise DecodeError, +) -> Array[T] raise DecodeError { + let r = BytesReader::from_view(bytes) + let out : Array[T] = [] + while r.remaining() > 0 { + let signed_len = r.read_varint_i64() + if signed_len == 0 { + raise DecodeError("anyrle: zero length segment") + } + if signed_len > 0 { + let len_u64 = signed_len.reinterpret_as_uint64() + if len_u64 > 0x7FFF_FFFFUL { + raise DecodeError("anyrle: run too long") + } + let len = len_u64.to_int() + let v = read_value(r) + for _i in 0.. 0x7FFF_FFFFUL { + raise DecodeError("anyrle: literal too long") + } + let len = len_u64.to_int() + for _i in 0.. T raise DecodeError, +) -> (Array[T], BytesView) raise DecodeError { + if n < 0 { + raise DecodeError("anyrle: invalid n") + } + if n == 0 { + return ([], bytes) + } + let r = BytesReader::from_view(bytes) + let out : Array[T] = [] + while out.length() < n { + if r.remaining() == 0 { + raise DecodeError("anyrle: not enough elements") + } + let signed_len = r.read_varint_i64() + if signed_len == 0 { + raise DecodeError("anyrle: zero length segment") + } + if signed_len > 0 { + let len_u64 = signed_len.reinterpret_as_uint64() + if len_u64 > 0x7FFF_FFFFUL { + raise DecodeError("anyrle: run too long") + } + let len = len_u64.to_int() + if out.length() + len > n { + raise DecodeError("anyrle: too many elements") + } + let v = read_value(r) + for _i in 0.. 0x7FFF_FFFFUL { + raise DecodeError("anyrle: literal too long") + } + let len = len_u64.to_int() + if out.length() + len > n { + raise DecodeError("anyrle: too many elements") + } + for _i in 0.. Unit, +) -> Bytes { + let w = BytesWriter::new() + if values.length() == 0 { + return w.to_bytes() + } + let n = values.length() + let signed_len = (-n).to_int64() + w.write_varint_i64(signed_len) + for v in values { + write_value(w, v) + } + w.to_bytes() +} + +///| +fn read_postcard_u8(r : BytesReader) -> UInt raise DecodeError { + r.read_u8().to_uint() +} + +///| +fn write_postcard_u8(w : BytesWriter, v : UInt) -> Unit { + w.write_u8((v & 0xFF).to_byte()) +} + +///| +fn read_postcard_u32(r : BytesReader) -> UInt raise DecodeError { + let v = r.read_varint_u64() + if v > 0xFFFF_FFFFUL { + raise DecodeError("postcard: u32 overflow") + } + v.to_uint() +} + +///| +fn write_postcard_u32(w : BytesWriter, v : UInt) -> Unit { + w.write_varint_u64(v.to_uint64()) +} + +///| +fn read_postcard_u64(r : BytesReader) -> UInt64 raise DecodeError { + r.read_varint_u64() +} + +///| +fn write_postcard_u64(w : BytesWriter, v : UInt64) -> Unit { + w.write_varint_u64(v) +} + +///| +fn read_postcard_usize(r : BytesReader) -> UInt64 raise DecodeError { + r.read_varint_u64() +} + +///| +fn write_postcard_usize(w : BytesWriter, v : UInt64) -> Unit { + w.write_varint_u64(v) +} + +///| +fn read_postcard_i32(r : BytesReader) -> Int raise DecodeError { + let v = r.read_varint_i64() + if v < -2147483648L || v > 2147483647L { + raise DecodeError("postcard: i32 overflow") + } + v.to_int() +} + +///| +fn write_postcard_i32(w : BytesWriter, v : Int) -> Unit { + w.write_varint_i64(v.to_int64()) +} diff --git a/moon/loro_codec/serde_columnar_test.mbt b/moon/loro_codec/serde_columnar_test.mbt new file mode 100644 index 000000000..27a321729 --- /dev/null +++ b/moon/loro_codec/serde_columnar_test.mbt @@ -0,0 +1,111 @@ +///| +test "serde_columnar: outer format" { + let cols : Array[Bytes] = [b"abc", b"\x01\x02"] + let encoded = encode_columnar_vec(cols) + assert_eq(encoded, b"\x02\x03abc\x02\x01\x02") + let decoded = try! decode_columnar_vec(encoded[:]) + assert_eq(decoded.length(), 2) + assert_eq(decoded[0].to_bytes(), b"abc") + assert_eq(decoded[1].to_bytes(), b"\x01\x02") +} + +///| +test "serde_columnar: boolrle decode example" { + // [T, T, F, F, F, T] -> [0, 2, 3, 1] + let decoded = try! decode_bool_rle(b"\x00\x02\x03\x01"[:]) + assert_eq(decoded, [true, true, false, false, false, true]) +} + +///| +test "serde_columnar: boolrle encode/decode" { + let cases : Array[Array[Bool]] = [ + [], + [false, false, true], + [true, true, false, false, false, true], + [true, true, true, true, true], + [false, false, false], + ] + for v in cases { + let enc = encode_bool_rle(v) + let dec = try! decode_bool_rle(enc[:]) + assert_eq(dec, v) + } +} + +///| +test "serde_columnar: anyrle u8 decode example" { + // [5,5,5,3,3] -> [0x06,0x05,0x04,0x03] + let decoded = try! decode_rle_u8(b"\x06\x05\x04\x03"[:]) + assert_eq(decoded, [5, 5, 5, 3, 3]) +} + +///| +test "serde_columnar: anyrle u32 literal example" { + // [1,2,3] literal => length=-3 zigzag(-3)=5 => [0x05, 0x01, 0x02, 0x03] + let decoded = try! decode_any_rle_u32(b"\x05\x01\x02\x03"[:]) + assert_eq(decoded, [1, 2, 3]) +} + +///| +test "serde_columnar: deltarle u32 decode example" { + // Values [10, 11, 12, 13, 15, 17] + // Deltas [10,1,1,1,2,2] encoded as AnyRle: + // run(1,10), run(3,1), run(2,2) => + // [zigzag(1)=2, zigzag(10)=20, zigzag(3)=6, zigzag(1)=2, zigzag(2)=4, zigzag(2)=4] + let decoded = try! decode_delta_rle_u32(b"\x02\x14\x06\x02\x04\x04"[:]) + assert_eq(decoded, [10, 11, 12, 13, 15, 17]) +} + +///| +test "serde_columnar: deltarle encode/decode" { + let values : Array[UInt] = [10, 11, 12, 13, 15, 17] + let enc = encode_delta_rle_u32(values) + let dec = try! decode_delta_rle_u32(enc[:]) + assert_eq(dec, values) +} + +///| +test "serde_columnar: deltarle usize encode/decode" { + let values : Array[UInt64] = [0UL, 1UL, 2UL, 10UL, 11UL] + let enc = encode_delta_rle_usize(values) + let dec = try! decode_delta_rle_usize(enc[:]) + assert_eq(dec, values) +} + +///| +test "serde_columnar: deltarle isize encode/decode" { + let values : Array[Int64] = [-2L, -1L, 0L, 5L, 3L] + let enc = encode_delta_rle_isize(values) + let dec = try! decode_delta_rle_isize(enc[:]) + assert_eq(dec, values) +} + +///| +test "serde_columnar: delta_of_delta encode/decode empty" { + let enc = encode_delta_of_delta_i64([]) + assert_eq(enc, b"\x00\x00") + let dec = try! decode_delta_of_delta_i64(enc[:]) + assert_eq(dec, []) +} + +///| +test "serde_columnar: delta_of_delta encode/decode single" { + let enc = encode_delta_of_delta_i64([5L]) + assert_eq(enc, b"\x01\x0A\x00") + let dec = try! decode_delta_of_delta_i64(enc[:]) + assert_eq(dec, [5L]) +} + +///| +test "serde_columnar: delta_of_delta known bytes" { + // Values [1,2,3]: + // first=1 => Option::Some(1) => [0x01, zigzag(1)=0x02] + // delta1=1 => prefix 10 + (1+63)=64 => bits "10 1000000" + // delta2=1 => dod=0 => bit "0" + // bitstream bits: 10100000 00...... => bytes [0xA0, 0x00], last_used_bits=2 + let encoded = b"\x01\x02\x02\xA0\x00" + let decoded = try! decode_delta_of_delta_i64(encoded[:]) + assert_eq(decoded, [1L, 2L, 3L]) + let re = encode_delta_of_delta_i64([1L, 2L, 3L]) + assert_eq(re, encoded) +} diff --git a/moon/loro_codec/snapshot_kv.mbt b/moon/loro_codec/snapshot_kv.mbt new file mode 100644 index 000000000..cdbeb5d45 --- /dev/null +++ b/moon/loro_codec/snapshot_kv.mbt @@ -0,0 +1,159 @@ +///| +const DEFAULT_SSTABLE_BLOCK_SIZE : Int = 4096 + +///| +fn bytes_eq(a : Bytes, b : Bytes) -> Bool { + if a.length() != b.length() { + return false + } + for i in 0.. Bytes raise DecodeError { + let kvs = sstable_import_all(oplog_bytes, validate) + if validate { + let mut seen_vv = false + let mut seen_fr = false + let mut seen_sv = false + let mut seen_sf = false + for kv in kvs { + let (k, v) = kv + if bytes_eq(k, b"vv") { + if seen_vv { + raise DecodeError("oplog: duplicate vv") + } + let (_vv, rest) = postcard_take_version_vector(v[:]) + if rest.length() != 0 { + raise DecodeError("oplog: vv trailing bytes") + } + seen_vv = true + } else if bytes_eq(k, b"fr") { + if seen_fr { + raise DecodeError("oplog: duplicate fr") + } + let (_fr, rest) = postcard_take_frontiers(v[:]) + if rest.length() != 0 { + raise DecodeError("oplog: fr trailing bytes") + } + seen_fr = true + } else if bytes_eq(k, b"sv") { + if seen_sv { + raise DecodeError("oplog: duplicate sv") + } + let (_sv, rest) = postcard_take_version_vector(v[:]) + if rest.length() != 0 { + raise DecodeError("oplog: sv trailing bytes") + } + seen_sv = true + } else if bytes_eq(k, b"sf") { + if seen_sf { + raise DecodeError("oplog: duplicate sf") + } + let (_sf, rest) = postcard_take_frontiers(v[:]) + if rest.length() != 0 { + raise DecodeError("oplog: sf trailing bytes") + } + seen_sf = true + } else { + if k.length() != 12 { + raise DecodeError("oplog: invalid key length") + } + let block_id = id_from_change_block_key(k[:]) + let changes = decode_change_block(v[:]) + if changes.length() == 0 { + raise DecodeError("oplog: empty change block") + } + if changes[0].id() != block_id { + raise DecodeError("oplog: block key mismatch") + } + } + } + + // Not all snapshot kinds include shallow keys, but vv/fr should always exist. + if !seen_vv || !seen_fr { + raise DecodeError("oplog: missing vv/fr") + } + } + + // Re-encode SSTable (compression=None) for canonical Moon output. + sstable_export_all(kvs, DEFAULT_SSTABLE_BLOCK_SIZE) catch { + EncodeError(msg) => + raise DecodeError("oplog: encode sstable failed: " + msg) + } +} + +///| +pub fn transcode_state_kv_store( + state_bytes : Bytes, + validate : Bool, +) -> Bytes raise DecodeError { + // For shallow_root_state_bytes in non-shallow snapshots. + if state_bytes.length() == 0 { + return state_bytes + } + // Special sentinel for empty state. + if state_bytes.length() == 1 && state_bytes[0] == b'E' { + return state_bytes + } + let kvs = sstable_import_all(state_bytes, validate) + if kvs.length() == 0 { + return b"E" + } + if validate { + for kv in kvs { + let (k, v) = kv + if bytes_eq(k, b"fr") { + let (_fr, rest) = postcard_take_frontiers(v[:]) + if rest.length() != 0 { + raise DecodeError("state: fr trailing bytes") + } + continue + } + let cid = container_id_from_bytes(k[:]) + let wrapper = parse_container_wrapper(v[:]) + // Basic consistency check: wrapper kind should match key's container kind. + match cid { + ContainerID::Root(_, kind) | ContainerID::Normal(_, _, kind) => + if wrapper.kind() != kind { + raise DecodeError("state: container kind mismatch") + } + } + } + } + + // For now, keep payload opaque and only normalize the wrapper prefix. + let out_kvs : Array[(Bytes, Bytes)] = [] + for kv in kvs { + let (k, v) = kv + if bytes_eq(k, b"fr") { + out_kvs.push((k, v)) + continue + } + let wrapper = parse_container_wrapper(v[:]) + let payload = transcode_container_state_snapshot( + wrapper.kind(), + wrapper.payload_view(), + validate, + ) + let out_v = encode_container_wrapper( + wrapper.kind(), + wrapper.depth(), + wrapper.parent(), + payload, + ) + out_kvs.push((k, out_v)) + } + sstable_export_all(out_kvs, DEFAULT_SSTABLE_BLOCK_SIZE) catch { + EncodeError(msg) => + raise DecodeError("state: encode sstable failed: " + msg) + } +} diff --git a/moon/loro_codec/sstable.mbt b/moon/loro_codec/sstable.mbt new file mode 100644 index 000000000..3c2a933d3 --- /dev/null +++ b/moon/loro_codec/sstable.mbt @@ -0,0 +1,6 @@ +// SSTable codec used by FastSnapshot. +// +// Implementation is split into cohesive files: +// - sstable_types.mbt +// - sstable_decode.mbt +// - sstable_encode.mbt diff --git a/moon/loro_codec/sstable_decode.mbt b/moon/loro_codec/sstable_decode.mbt new file mode 100644 index 000000000..52bd708e1 --- /dev/null +++ b/moon/loro_codec/sstable_decode.mbt @@ -0,0 +1,188 @@ +///| +fn decode_block_meta( + bytes : BytesView, + check_checksum : Bool, +) -> Array[BlockMeta] raise DecodeError { + let len = bytes.length() + if len < 8 { + raise DecodeError("sstable: invalid meta bytes") + } + if check_checksum { + let stored = BytesReader::from_view(bytes[len - 4:len]).read_u32_le() + let expected = xxhash32(bytes[4:len - 4], LORO_XXH32_SEED) + if expected != stored { + raise DecodeError("sstable: meta checksum mismatch") + } + } + let r = BytesReader::from_view(bytes) + let num = r.read_u32_le().reinterpret_as_int() + if num < 0 || num > MAX_BLOCK_NUM { + raise DecodeError("sstable: invalid block count") + } + let metas : Array[BlockMeta] = [] + for _i in 0.. Array[(Bytes, Bytes)] raise DecodeError { + if data.length() < 2 { + raise DecodeError("sstable: invalid normal block") + } + let total_len = data.length() + let kv_len = BytesReader::from_view(data[total_len - 2:total_len]) + .read_u16_le() + .reinterpret_as_int() + if kv_len < 0 { + raise DecodeError("sstable: invalid kv_len") + } + let offsets_bytes_len = kv_len * 2 + let data_end = total_len - 2 - offsets_bytes_len + if data_end < 0 { + raise DecodeError("sstable: invalid offsets section") + } + let offsets_view = data[data_end:data_end + offsets_bytes_len] + let offsets : Array[Int] = [] + for i in 0.. data_end { + raise DecodeError("sstable: invalid offset range") + } + if i == 0 { + kvs.push((first_key, data[start:end].to_bytes())) + continue + } + let rest = data[start:end] + if rest.length() < 3 { + raise DecodeError("sstable: invalid kv chunk") + } + let common = rest[0].to_int() + if common < 0 || common > first_key.length() { + raise DecodeError("sstable: invalid common prefix len") + } + let key_suffix_len = BytesReader::from_view(rest[1:3]) + .read_u16_le() + .reinterpret_as_int() + if key_suffix_len < 0 || 3 + key_suffix_len > rest.length() { + raise DecodeError("sstable: invalid key suffix len") + } + let key_suffix = rest[3:3 + key_suffix_len] + let value = rest[3 + key_suffix_len:] + let w = BytesWriter::new() + w.write_bytesview(first_key[0:common]) + w.write_bytesview(key_suffix) + kvs.push((w.to_bytes(), value.to_bytes())) + } + kvs +} + +///| +pub fn sstable_import_all( + bytes : Bytes, + check_checksum : Bool, +) -> Array[(Bytes, Bytes)] raise DecodeError { + if bytes.length() < 9 { + raise DecodeError("sstable: invalid bytes") + } + if bytes[0:4] != SSTABLE_MAGIC[:] { + raise DecodeError("sstable: invalid magic") + } + if bytes[4] != CURRENT_SCHEMA_VERSION { + raise DecodeError("sstable: invalid schema version") + } + let meta_offset = BytesReader::from_view(bytes[bytes.length() - 4:]) + .read_u32_le() + .reinterpret_as_int() + if meta_offset < 5 || meta_offset >= bytes.length() - 4 { + raise DecodeError("sstable: invalid meta offset") + } + let raw_meta = bytes[meta_offset:bytes.length() - 4] + let meta = decode_block_meta(raw_meta, check_checksum) + let kvs : Array[(Bytes, Bytes)] = [] + for i in 0.. meta_offset { + raise DecodeError("sstable: invalid block offset") + } + let raw_block_and_check = bytes[offset:offset_end] + if raw_block_and_check.length() < 4 { + raise DecodeError("sstable: invalid block bytes") + } + let stored = BytesReader::from_view( + raw_block_and_check[raw_block_and_check.length() - 4:], + ).read_u32_le() + let body = raw_block_and_check[0:raw_block_and_check.length() - 4] + if check_checksum { + let expected = xxhash32(body, LORO_XXH32_SEED) + if expected != stored { + raise DecodeError("sstable: block checksum mismatch") + } + } + let uncompressed = match m.compression_type { + CompressionType::None => body.to_bytes() + CompressionType::LZ4 => lz4_decompress_frame(body, check_checksum) + } + if m.is_large { + kvs.push((m.first_key, uncompressed)) + } else { + let pairs = decode_normal_block(uncompressed[:], m.first_key) + for p in pairs { + kvs.push(p) + } + } + } + kvs +} diff --git a/moon/loro_codec/sstable_encode.mbt b/moon/loro_codec/sstable_encode.mbt new file mode 100644 index 000000000..12dd6b6ab --- /dev/null +++ b/moon/loro_codec/sstable_encode.mbt @@ -0,0 +1,162 @@ +///| +pub fn sstable_export_all( + kvs : Array[(Bytes, Bytes)], + block_size : Int, +) -> Bytes raise EncodeError { + if block_size <= 0 || block_size > 0xFFFF { + raise EncodeError("sstable: invalid block_size") + } + for i in 0.. 0 { + let (prev, _) = kvs[i - 1] + if bytes_compare(prev[:], k[:]) > 0 { + raise EncodeError("sstable: keys must be sorted") + } + } + } + let blocks : Array[Bytes] = [] + let meta : Array[BlockMeta] = [] + let mut next_offset = 5 + let mut idx = 0 + while idx < kvs.length() { + let (key, value) = kvs[idx] + if value.length() > block_size { + let compressed = lz4_compress_frame(value) catch { + EncodeError(e) => raise EncodeError("sstable: lz4 encode failed: " + e) + } + let (body, compression_type) = if compressed.length() > value.length() { + (value, CompressionType::None) + } else { + (compressed, CompressionType::LZ4) + } + let checksum = xxhash32(body[:], LORO_XXH32_SEED) catch { + DecodeError(e) => raise EncodeError("sstable: checksum failed: " + e) + } + let w = BytesWriter::new() + w.write_bytes(body) + w.write_u32_le(checksum) + let block_bytes = w.to_bytes() + blocks.push(block_bytes) + meta.push({ + offset: next_offset, + is_large: true, + compression_type, + first_key: key, + last_key: None, + }) + next_offset = next_offset + block_bytes.length() + idx = idx + 1 + continue + } + let first_key = key + let mut last_key = key + let data_w = BytesWriter::new() + let offsets : Array[Int] = [] + let mut data_len = 0 + offsets.push(data_len) + data_w.write_bytes(value) + data_len = data_len + value.length() + idx = idx + 1 + while idx < kvs.length() { + let (k2, v2) = kvs[idx] + if v2.length() > block_size { + break + } + let estimated = 2 + offsets.length() * 2 + data_len + 4 + let add_est = k2.length() + v2.length() + 1 + 2 + if estimated + add_est > block_size { + break + } + let common = common_prefix_len(first_key, k2) + let suffix = k2[common:] + let suffix_len = suffix.length() + if suffix_len > 0xFFFF { + raise EncodeError("sstable: key too long") + } + if data_len > 0xFFFF { + raise EncodeError("sstable: block too large") + } + offsets.push(data_len) + data_w.write_u8(common.to_byte()) + data_w.write_u16_le(suffix_len.reinterpret_as_uint()) + data_w.write_bytesview(suffix) + data_w.write_bytes(v2) + data_len = data_len + 1 + 2 + suffix_len + v2.length() + last_key = k2 + idx = idx + 1 + } + let body_w = BytesWriter::new() + body_w.write_bytes(data_w.to_bytes()) + for off in offsets { + if off < 0 || off > 0xFFFF { + raise EncodeError("sstable: invalid offset") + } + body_w.write_u16_le(off.reinterpret_as_uint()) + } + body_w.write_u16_le(offsets.length().reinterpret_as_uint()) + let raw_body = body_w.to_bytes() + let compressed = lz4_compress_frame(raw_body) catch { + EncodeError(e) => raise EncodeError("sstable: lz4 encode failed: " + e) + } + let (body, compression_type) = if compressed.length() > raw_body.length() { + (raw_body, CompressionType::None) + } else { + (compressed, CompressionType::LZ4) + } + let checksum = xxhash32(body[:], LORO_XXH32_SEED) catch { + DecodeError(e) => raise EncodeError("sstable: checksum failed: " + e) + } + let w = BytesWriter::new() + w.write_bytes(body) + w.write_u32_le(checksum) + let block_bytes = w.to_bytes() + blocks.push(block_bytes) + meta.push({ + offset: next_offset, + is_large: false, + compression_type, + first_key, + last_key: Some(last_key), + }) + next_offset = next_offset + block_bytes.length() + } + let meta_offset = next_offset + let meta_w = BytesWriter::new() + meta_w.write_u32_le(meta.length().reinterpret_as_uint()) + for m in meta { + meta_w.write_u32_le(m.offset.reinterpret_as_uint()) + meta_w.write_u16_le(m.first_key.length().reinterpret_as_uint()) + meta_w.write_bytes(m.first_key) + let flags = (if m.is_large { 0x80 } else { 0 }).reinterpret_as_uint() | + compression_type_to_u8(m.compression_type).to_uint() + meta_w.write_u8((flags & 0xFF).to_byte()) + if !m.is_large { + match m.last_key { + Some(last_key) => { + meta_w.write_u16_le(last_key.length().reinterpret_as_uint()) + meta_w.write_bytes(last_key) + } + None => raise EncodeError("sstable: missing last_key") + } + } + } + let meta_bytes_without_checksum = meta_w.to_bytes() + let meta_checksum = xxhash32(meta_bytes_without_checksum[4:], LORO_XXH32_SEED) catch { + DecodeError(e) => raise EncodeError("sstable: checksum failed: " + e) + } + meta_w.write_u32_le(meta_checksum) + let meta_bytes = meta_w.to_bytes() + let out = BytesWriter::new() + out.write_bytes(SSTABLE_MAGIC) + out.write_u8(CURRENT_SCHEMA_VERSION) + for b in blocks { + out.write_bytes(b) + } + out.write_bytes(meta_bytes) + out.write_u32_le(meta_offset.reinterpret_as_uint()) + out.to_bytes() +} diff --git a/moon/loro_codec/sstable_test.mbt b/moon/loro_codec/sstable_test.mbt new file mode 100644 index 000000000..cb4045414 --- /dev/null +++ b/moon/loro_codec/sstable_test.mbt @@ -0,0 +1,58 @@ +///| +test "sstable: roundtrip simple" { + let kvs : Array[(Bytes, Bytes)] = [ + (b"a", b"1"), + (b"ab", b"2"), + (b"b", b""), + (b"c", b"ccc"), + ] + let encoded = sstable_export_all(kvs, 128) catch { EncodeError(_) => b"" } + let decoded = sstable_import_all(encoded, true) catch { DecodeError(_) => [] } + assert_eq(decoded, kvs) +} + +///| +test "sstable: large value block" { + let kvs : Array[(Bytes, Bytes)] = [(b"k", b"0123456789abcdef")] + let encoded = sstable_export_all(kvs, 8) catch { EncodeError(_) => b"" } + let decoded = sstable_import_all(encoded, true) catch { DecodeError(_) => [] } + assert_eq(decoded, kvs) +} + +///| +test "sstable: multiple blocks split" { + let kvs : Array[(Bytes, Bytes)] = [ + (b"a", b"11111111"), + (b"aa", b"22222222"), + (b"aaa", b"33333333"), + ] + let encoded = sstable_export_all(kvs, 16) catch { EncodeError(_) => b"" } + let decoded = sstable_import_all(encoded, true) catch { DecodeError(_) => [] } + assert_eq(decoded, kvs) +} + +///| +fn repeat_byte(byte : Byte, n : Int) -> Bytes { + let w = BytesWriter::new() + for _i in 0.. 10x 'a', z => 200x 'z' + let fixture = b"\x4C\x4F\x52\x4F\x00\x04\x22\x4D\x18\x60\x40\x82\x1D\x00\x00\x00\x15\x61\x01\x00\x55\x00\x02\x00\x61\x62\x0E\x00\x67\x61\x00\x02\x00\x61\x63\x0F\x00\x70\x00\x0A\x00\x19\x00\x03\x00\x00\x00\x00\x00\x1C\x44\xC3\x37\x04\x22\x4D\x18\x60\x40\x82\x0C\x00\x00\x00\x1F\x7A\x01\x00\xAE\x60\x7A\x7A\x7A\x7A\x7A\x7A\x00\x00\x00\x00\xC5\xE2\xB4\x79\x02\x00\x00\x00\x05\x00\x00\x00\x01\x00\x61\x01\x02\x00\x61\x63\x35\x00\x00\x00\x01\x00\x7A\x81\xFF\xA8\xF1\x0F\x54\x00\x00\x00" + let decoded = try! sstable_import_all(fixture, true) + let expected : Array[(Bytes, Bytes)] = [ + (b"a", repeat_byte(b'a', 10)), + (b"ab", repeat_byte(b'a', 10)), + (b"ac", repeat_byte(b'a', 10)), + (b"z", repeat_byte(b'z', 200)), + ] + assert_eq(decoded, expected) +} diff --git a/moon/loro_codec/sstable_types.mbt b/moon/loro_codec/sstable_types.mbt new file mode 100644 index 000000000..911fc785c --- /dev/null +++ b/moon/loro_codec/sstable_types.mbt @@ -0,0 +1,66 @@ +///| +const SSTABLE_MAGIC : Bytes = b"LORO" + +///| +const CURRENT_SCHEMA_VERSION : Byte = b'\x00' + +///| +const MAX_BLOCK_NUM : Int = 10_000_000 + +///| +pub enum CompressionType { + None + LZ4 +} + +///| +fn compression_type_from_u8(v : UInt) -> CompressionType raise DecodeError { + match v { + 0 => CompressionType::None + 1 => CompressionType::LZ4 + _ => raise DecodeError("sstable: invalid compression type") + } +} + +///| +fn compression_type_to_u8(v : CompressionType) -> Byte { + match v { + CompressionType::None => b'\x00' + CompressionType::LZ4 => b'\x01' + } +} + +///| +pub struct BlockMeta { + offset : Int + is_large : Bool + compression_type : CompressionType + first_key : Bytes + last_key : Bytes? +} + +///| +fn bytes_compare(a : BytesView, b : BytesView) -> Int { + let n = if a.length() < b.length() { a.length() } else { b.length() } + for i in 0.. Int { + let n = if a.length() < b.length() { a.length() } else { b.length() } + let mut i = 0 + while i < n && i < 255 { + if a[i] != b[i] { + break + } + i = i + 1 + } + i +} diff --git a/moon/loro_codec/state_snapshot.mbt b/moon/loro_codec/state_snapshot.mbt new file mode 100644 index 000000000..7090f83c0 --- /dev/null +++ b/moon/loro_codec/state_snapshot.mbt @@ -0,0 +1,11 @@ +// State snapshot transcoders (FastSnapshot container states). +// +// Implementation is split into cohesive files: +// - state_snapshot_peer_table.mbt +// - state_snapshot_map.mbt +// - state_snapshot_list.mbt +// - state_snapshot_richtext.mbt +// - state_snapshot_tree.mbt +// - state_snapshot_movable_list.mbt +// - state_snapshot_counter.mbt +// - state_snapshot_dispatch.mbt diff --git a/moon/loro_codec/state_snapshot_counter.mbt b/moon/loro_codec/state_snapshot_counter.mbt new file mode 100644 index 000000000..450a8d45b --- /dev/null +++ b/moon/loro_codec/state_snapshot_counter.mbt @@ -0,0 +1,14 @@ +///| +pub fn transcode_counter_state_snapshot( + bytes : BytesView, + _validate : Bool, +) -> Bytes raise DecodeError { + if bytes.length() != 8 { + raise DecodeError("counter_state: invalid length") + } + // f64 little-endian, keep exact bits. + let bits = BytesReader::from_view(bytes).read_u64_le() + let w = BytesWriter::new() + w.write_u64_le(bits) + w.to_bytes() +} diff --git a/moon/loro_codec/state_snapshot_dispatch.mbt b/moon/loro_codec/state_snapshot_dispatch.mbt new file mode 100644 index 000000000..d21ecd1fc --- /dev/null +++ b/moon/loro_codec/state_snapshot_dispatch.mbt @@ -0,0 +1,18 @@ +///| +pub fn transcode_container_state_snapshot( + kind : ContainerType, + payload : BytesView, + validate : Bool, +) -> Bytes raise DecodeError { + match kind { + ContainerType::Map => transcode_map_state_snapshot(payload, validate) + ContainerType::List => transcode_list_state_snapshot(payload, validate) + ContainerType::Text => transcode_richtext_state_snapshot(payload, validate) + ContainerType::Tree => transcode_tree_state_snapshot(payload, validate) + ContainerType::MovableList => + transcode_movable_list_state_snapshot(payload, validate) + ContainerType::Counter => + transcode_counter_state_snapshot(payload, validate) + ContainerType::Unknown(_) => payload.to_bytes() + } +} diff --git a/moon/loro_codec/state_snapshot_list.mbt b/moon/loro_codec/state_snapshot_list.mbt new file mode 100644 index 000000000..2df32813f --- /dev/null +++ b/moon/loro_codec/state_snapshot_list.mbt @@ -0,0 +1,42 @@ +///| +pub fn transcode_list_state_snapshot( + bytes : BytesView, + validate : Bool, +) -> Bytes raise DecodeError { + let (values, rest1) = postcard_take_vec_common_value(bytes) + let (peers, rest2) = take_peer_table(rest1) + let cols = decode_columnar_vec_maybe_wrapped(rest2) + if cols.length() != 3 { + raise DecodeError("list_state: invalid id column count") + } + let peer_idx = decode_delta_rle_usize(cols[0]) + let counter = decode_delta_rle_i32(cols[1]) + let lamport_sub = decode_delta_rle_i32(cols[2]) + if validate { + if peer_idx.length() != values.length() { + raise DecodeError("list_state: id length mismatch") + } + if counter.length() != values.length() || + lamport_sub.length() != values.length() { + raise DecodeError("list_state: id length mismatch") + } + for p in peer_idx { + if p > 0x7FFF_FFFFUL { + raise DecodeError("list_state: peer_idx too large") + } + if p.to_int() < 0 || p.to_int() >= peers.length() { + raise DecodeError("list_state: peer_idx out of range") + } + } + } + let ids_bytes = encode_columnar_vec_wrapped([ + encode_delta_rle_usize(peer_idx), + encode_delta_rle_i32(counter), + encode_delta_rle_i32(lamport_sub), + ]) + let w = BytesWriter::new() + w.write_bytes(postcard_encode_vec_common_value(values)) + w.write_bytes(encode_peer_table(peers)) + w.write_bytes(ids_bytes) + w.to_bytes() +} diff --git a/moon/loro_codec/state_snapshot_map.mbt b/moon/loro_codec/state_snapshot_map.mbt new file mode 100644 index 000000000..5a54045df --- /dev/null +++ b/moon/loro_codec/state_snapshot_map.mbt @@ -0,0 +1,79 @@ +///| +fn collect_sorted_unique_keys( + values : Array[(String, CommonValue)], + deleted_keys : Array[String], + validate : Bool, +) -> Array[String] raise DecodeError { + let keys : Array[String] = [] + for pair in values { + let (k, _v) = pair + keys.push(k) + } + for k in deleted_keys { + keys.push(k) + } + keys.sort() + if !validate { + return keys + } + // Validate uniqueness. + if keys.length() <= 1 { + return keys + } + let out : Array[String] = [] + out.push(keys[0]) + for i in 1.. Bytes raise DecodeError { + let (values, rest1) = postcard_take_map_string_common_value(bytes) + let (deleted_keys, rest2) = postcard_take_vec_string(rest1) + let (peers, meta_bytes) = take_peer_table(rest2) + let keys = collect_sorted_unique_keys(values, deleted_keys, validate) + let meta_r = BytesReader::from_view(meta_bytes) + let peer_idxs : Array[UInt64] = [] + let lamports : Array[UInt64] = [] + for _k in keys { + let peer_idx = meta_r.read_uleb128_u64() + let lamport = meta_r.read_uleb128_u64() + if validate { + if peer_idx > 0x7FFF_FFFFUL { + raise DecodeError("map_state: peer_idx too large") + } + if peer_idx.to_int() < 0 || peer_idx.to_int() >= peers.length() { + raise DecodeError("map_state: peer_idx out of range") + } + } + peer_idxs.push(peer_idx) + lamports.push(lamport) + } + if meta_r.remaining() != 0 { + raise DecodeError("map_state: trailing meta bytes") + } + + // Normalize encoding (stable ordering). + values.sort_by_key(pair => { + let (k, _v) = pair + k + }) + deleted_keys.sort() + let w = BytesWriter::new() + w.write_bytes(postcard_encode_map_string_common_value(values)) + w.write_bytes(postcard_encode_vec_string(deleted_keys)) + w.write_bytes(encode_peer_table(peers)) + for i in 0.. Bytes raise DecodeError { + let (values, rest1) = postcard_take_vec_common_value(bytes) + let (peers, rest2) = take_peer_table(rest1) + let r = BytesReader::from_view(rest2) + let n_fields = r.read_varint_u64() + if n_fields != 4UL { + raise DecodeError("movable_list: invalid EncodedFastSnapshot field count") + } + let items_view = r.remaining_view() + let (items_cols, rest_after_items) = take_columnar_vec(items_view) + r.skip(items_view.length() - rest_after_items.length()) + if items_cols.length() != 3 { + raise DecodeError("movable_list: invalid items column count") + } + let invisible_list_item = decode_delta_rle_usize(items_cols[0]) + let pos_id_eq_elem_id = decode_bool_rle(items_cols[1]) + let elem_id_eq_last_set_id = decode_bool_rle(items_cols[2]) + let n_items = invisible_list_item.length() + if pos_id_eq_elem_id.length() != n_items || + elem_id_eq_last_set_id.length() != n_items { + raise DecodeError("movable_list: items column length mismatch") + } + let list_ids_view = r.remaining_view() + let (list_id_cols, rest_after_list_ids) = take_columnar_vec(list_ids_view) + r.skip(list_ids_view.length() - rest_after_list_ids.length()) + if list_id_cols.length() != 3 { + raise DecodeError("movable_list: invalid list_item_ids column count") + } + let list_peer_idx = decode_delta_rle_usize(list_id_cols[0]) + let list_counter = decode_delta_rle_i32(list_id_cols[1]) + let list_lamport_sub = decode_delta_rle_i32(list_id_cols[2]) + if list_peer_idx.length() != list_counter.length() || + list_peer_idx.length() != list_lamport_sub.length() { + raise DecodeError("movable_list: list_item_ids length mismatch") + } + let elem_ids_view = r.remaining_view() + let (elem_id_cols, rest_after_elem_ids) = take_columnar_vec(elem_ids_view) + r.skip(elem_ids_view.length() - rest_after_elem_ids.length()) + if elem_id_cols.length() != 2 { + raise DecodeError("movable_list: invalid elem_ids column count") + } + let elem_peer_idx = decode_delta_rle_usize(elem_id_cols[0]) + let elem_lamport = decode_delta_rle_u32(elem_id_cols[1]) + if elem_peer_idx.length() != elem_lamport.length() { + raise DecodeError("movable_list: elem_ids length mismatch") + } + let last_ids_view = r.remaining_view() + let (last_id_cols, rest_after_last_ids) = take_columnar_vec(last_ids_view) + r.skip(last_ids_view.length() - rest_after_last_ids.length()) + if last_id_cols.length() != 2 { + raise DecodeError("movable_list: invalid last_set_ids column count") + } + let last_peer_idx = decode_delta_rle_usize(last_id_cols[0]) + let last_lamport = decode_delta_rle_u32(last_id_cols[1]) + if last_peer_idx.length() != last_lamport.length() { + raise DecodeError("movable_list: last_set_ids length mismatch") + } + if r.remaining() != 0 { + raise DecodeError("movable_list: trailing bytes") + } + if validate { + // visible values should match visible entries = items.len - 1 (sentinel excluded) + let visible_n = if n_items == 0 { 0 } else { n_items - 1 } + if values.length() != visible_n { + raise DecodeError("movable_list: visible value count mismatch") + } + + // list_item_ids count matches (visible + invisible) list items + let mut expected_list_ids = 0UL + if n_items > 0 { + expected_list_ids = visible_n.to_uint64() + for c in invisible_list_item { + expected_list_ids = expected_list_ids + c + } + } + if list_peer_idx.length().to_uint64() != expected_list_ids { + raise DecodeError("movable_list: list_item_ids count mismatch") + } + let mut need_elem_ids = 0 + let mut need_last_ids = 0 + for i in 1.. 0x7FFF_FFFFUL || p.to_int() < 0 || p.to_int() >= peers.length() { + raise DecodeError("movable_list: peer_idx out of range") + } + } + for p in elem_peer_idx { + if p > 0x7FFF_FFFFUL || p.to_int() < 0 || p.to_int() >= peers.length() { + raise DecodeError("movable_list: elem peer_idx out of range") + } + } + for p in last_peer_idx { + if p > 0x7FFF_FFFFUL || p.to_int() < 0 || p.to_int() >= peers.length() { + raise DecodeError("movable_list: last peer_idx out of range") + } + } + } + let items_bytes = encode_columnar_vec([ + encode_delta_rle_usize(invisible_list_item), + encode_bool_rle(pos_id_eq_elem_id), + encode_bool_rle(elem_id_eq_last_set_id), + ]) + let list_ids_bytes = encode_columnar_vec([ + encode_delta_rle_usize(list_peer_idx), + encode_delta_rle_i32(list_counter), + encode_delta_rle_i32(list_lamport_sub), + ]) + let elem_ids_bytes = encode_columnar_vec([ + encode_delta_rle_usize(elem_peer_idx), + encode_delta_rle_u32(elem_lamport), + ]) + let last_ids_bytes = encode_columnar_vec([ + encode_delta_rle_usize(last_peer_idx), + encode_delta_rle_u32(last_lamport), + ]) + let encoded = BytesWriter::new() + encoded.write_varint_u64(4UL) + encoded.write_bytes(items_bytes) + encoded.write_bytes(list_ids_bytes) + encoded.write_bytes(elem_ids_bytes) + encoded.write_bytes(last_ids_bytes) + let w = BytesWriter::new() + w.write_bytes(postcard_encode_vec_common_value(values)) + w.write_bytes(encode_peer_table(peers)) + w.write_bytes(encoded.to_bytes()) + w.to_bytes() +} diff --git a/moon/loro_codec/state_snapshot_peer_table.mbt b/moon/loro_codec/state_snapshot_peer_table.mbt new file mode 100644 index 000000000..ee619ddcc --- /dev/null +++ b/moon/loro_codec/state_snapshot_peer_table.mbt @@ -0,0 +1,26 @@ +///| +fn take_peer_table( + bytes : BytesView, +) -> (Array[UInt64], BytesView) raise DecodeError { + let r = BytesReader::from_view(bytes) + let n_u64 = r.read_uleb128_u64() + if n_u64 > 0x7FFF_FFFFUL { + raise DecodeError("state: too many peers") + } + let n = n_u64.to_int() + let peers : Array[UInt64] = [] + for _i in 0.. Bytes { + let w = BytesWriter::new() + w.write_uleb128_u64(peers.length().to_uint64()) + for p in peers { + w.write_u64_le(p) + } + w.to_bytes() +} diff --git a/moon/loro_codec/state_snapshot_richtext.mbt b/moon/loro_codec/state_snapshot_richtext.mbt new file mode 100644 index 000000000..549ac473c --- /dev/null +++ b/moon/loro_codec/state_snapshot_richtext.mbt @@ -0,0 +1,152 @@ +///| +fn count_utf8_codepoints(s : String) -> Int { + let bytes = @encoding/utf8.encode(s[:]) + let mut count = 0 + for b in bytes { + // Count non-continuation bytes: 0b10xxxxxx + let u = b.to_uint() + if (u & 0xC0) != 0x80 { + count = count + 1 + } + } + count +} + +///| +type EncodedMark = (UInt64, CommonValue, Byte) // (key_idx, value, info) + +///| +fn take_postcard_vec_marks( + bytes : BytesView, +) -> (Array[EncodedMark], BytesView) raise DecodeError { + let r = BytesReader::from_view(bytes) + let n_u64 = r.read_varint_u64() + if n_u64 > 0x7FFF_FFFFUL { + raise DecodeError("richtext: too many marks") + } + let n = n_u64.to_int() + let out : Array[EncodedMark] = [] + for _i in 0.. Bytes { + let w = BytesWriter::new() + w.write_varint_u64(marks.length().to_uint64()) + for m in marks { + let (key_idx, value, info) = m + w.write_varint_u64(key_idx) + w.write_bytes(postcard_encode_common_value(value)) + w.write_u8(info) + } + w.to_bytes() +} + +///| +fn encode_encoded_text( + span_peer_idx : Array[UInt64], + span_counter : Array[Int], + span_lamport_sub : Array[Int], + span_len : Array[Int], + keys : Array[String], + marks : Array[EncodedMark], +) -> Bytes { + let span_cols = encode_columnar_vec([ + encode_delta_rle_usize(span_peer_idx), + encode_delta_rle_i32(span_counter), + encode_delta_rle_i32(span_lamport_sub), + encode_delta_rle_i32(span_len), + ]) + let w = BytesWriter::new() + w.write_varint_u64(3UL) + w.write_bytes(span_cols) + w.write_bytes(postcard_encode_vec_string(keys)) + w.write_bytes(encode_postcard_vec_marks(marks)) + w.to_bytes() +} + +///| +pub fn transcode_richtext_state_snapshot( + bytes : BytesView, + validate : Bool, +) -> Bytes raise DecodeError { + let (text, rest1) = postcard_take_string(bytes) + let (peers, rest2) = take_peer_table(rest1) + let r = BytesReader::from_view(rest2) + let n_fields = r.read_varint_u64() + if n_fields != 3UL { + raise DecodeError("richtext: invalid EncodedText field count") + } + let spans_view = r.remaining_view() + let (span_cols, rest_after_span) = take_columnar_vec(spans_view) + r.skip(spans_view.length() - rest_after_span.length()) + if span_cols.length() != 4 { + raise DecodeError("richtext: invalid span column count") + } + let span_peer_idx = decode_delta_rle_usize(span_cols[0]) + let span_counter = decode_delta_rle_i32(span_cols[1]) + let span_lamport_sub = decode_delta_rle_i32(span_cols[2]) + let span_len = decode_delta_rle_i32(span_cols[3]) + let span_n = span_len.length() + if span_peer_idx.length() != span_n || + span_counter.length() != span_n || + span_lamport_sub.length() != span_n { + raise DecodeError("richtext: span column length mismatch") + } + let keys_view = r.remaining_view() + let (keys, rest_after_keys) = postcard_take_vec_string(keys_view) + r.skip(keys_view.length() - rest_after_keys.length()) + let marks_view = r.remaining_view() + let (marks, rest_after_marks) = take_postcard_vec_marks(marks_view) + r.skip(marks_view.length() - rest_after_marks.length()) + if r.remaining() != 0 { + raise DecodeError("richtext: trailing bytes") + } + if validate { + // marks count must match number of span.len == 0 + let mut mark_needed = 0 + let mut text_len_sum = 0 + for i in 0.. 0 { + text_len_sum = text_len_sum + len + } + let p = span_peer_idx[i] + if p > 0x7FFF_FFFFUL || p.to_int() < 0 || p.to_int() >= peers.length() { + raise DecodeError("richtext: peer_idx out of range") + } + } + if marks.length() != mark_needed { + raise DecodeError("richtext: mark count mismatch") + } + for m in marks { + let (key_idx, _v, _info) = m + if key_idx > 0x7FFF_FFFFUL || + key_idx.to_int() < 0 || + key_idx.to_int() >= keys.length() { + raise DecodeError("richtext: key_idx out of range") + } + } + if count_utf8_codepoints(text) != text_len_sum { + raise DecodeError("richtext: text unicode length mismatch") + } + } + let w = BytesWriter::new() + w.write_bytes(postcard_encode_string(text)) + w.write_bytes(encode_peer_table(peers)) + w.write_bytes( + encode_encoded_text( + span_peer_idx, span_counter, span_lamport_sub, span_len, keys, marks, + ), + ) + w.to_bytes() +} diff --git a/moon/loro_codec/state_snapshot_test.mbt b/moon/loro_codec/state_snapshot_test.mbt new file mode 100644 index 000000000..756a82d80 --- /dev/null +++ b/moon/loro_codec/state_snapshot_test.mbt @@ -0,0 +1,180 @@ +///| +test "state_snapshot: map transcode sorts keys" { + // Build a MapState payload where the visible-map and deleted-keys are intentionally unsorted. + let values : Array[(String, CommonValue)] = [ + ("b", CommonValue::bool(true)), + ("a", CommonValue::i64(1L)), + ] + let deleted : Array[String] = ["d", "c"] + let peers : Array[UInt64] = [123UL] + + // Meta is for all keys (a, b, c, d) in sorted order. + let meta_w = BytesWriter::new() + for _k in ["a", "b", "c", "d"] { + meta_w.write_uleb128_u64(0UL) // peer_idx + meta_w.write_uleb128_u64(7UL) // lamport + } + let peer_w = BytesWriter::new() + peer_w.write_uleb128_u64(peers.length().to_uint64()) + for p in peers { + peer_w.write_u64_le(p) + } + let payload_w = BytesWriter::new() + payload_w.write_bytes(postcard_encode_map_string_common_value(values)) + payload_w.write_bytes(postcard_encode_vec_string(deleted)) + payload_w.write_bytes(peer_w.to_bytes()) + payload_w.write_bytes(meta_w.to_bytes()) + let payload = payload_w.to_bytes() + let out = try! transcode_map_state_snapshot(payload[:], true) + + // Expect sorted visible entries and deleted keys in the output. + let values_sorted : Array[(String, CommonValue)] = [ + ("a", CommonValue::i64(1L)), + ("b", CommonValue::bool(true)), + ] + let deleted_sorted : Array[String] = ["c", "d"] + let expected_w = BytesWriter::new() + expected_w.write_bytes(postcard_encode_map_string_common_value(values_sorted)) + expected_w.write_bytes(postcard_encode_vec_string(deleted_sorted)) + expected_w.write_bytes(peer_w.to_bytes()) + expected_w.write_bytes(meta_w.to_bytes()) + let expected = expected_w.to_bytes() + assert_eq(out, expected) +} + +///| +test "state_snapshot: list transcode roundtrip" { + let values : Array[CommonValue] = [ + CommonValue::i64(1L), + CommonValue::bool(false), + ] + let peers : Array[UInt64] = [1UL] + let peer_w = BytesWriter::new() + peer_w.write_uleb128_u64(peers.length().to_uint64()) + for p in peers { + peer_w.write_u64_le(p) + } + let ids_bytes = encode_columnar_vec_wrapped([ + encode_delta_rle_usize([0UL, 0UL]), + encode_delta_rle_i32([0, 1]), + encode_delta_rle_i32([0, 0]), + ]) + let payload_w = BytesWriter::new() + payload_w.write_bytes(postcard_encode_vec_common_value(values)) + payload_w.write_bytes(peer_w.to_bytes()) + payload_w.write_bytes(ids_bytes) + let payload = payload_w.to_bytes() + let out = try! transcode_list_state_snapshot(payload[:], true) + assert_eq(out, payload) +} + +///| +test "state_snapshot: richtext transcode validates unicode scalar length" { + // "a😀b" has 3 Unicode scalars. + let text = "a😀b" + let peers : Array[UInt64] = [7UL] + let peer_w = BytesWriter::new() + peer_w.write_uleb128_u64(1UL) + peer_w.write_u64_le(peers[0]) + + // spans: one text span len=3 + let spans_bytes = encode_columnar_vec([ + encode_delta_rle_usize([0UL]), + encode_delta_rle_i32([0]), + encode_delta_rle_i32([0]), + encode_delta_rle_i32([3]), + ]) + let encoded_text_w = BytesWriter::new() + encoded_text_w.write_varint_u64(3UL) + encoded_text_w.write_bytes(spans_bytes) + encoded_text_w.write_bytes(postcard_encode_vec_string([])) + encoded_text_w.write_bytes(b"\x00") + let payload_w = BytesWriter::new() + payload_w.write_bytes(postcard_encode_string(text)) + payload_w.write_bytes(peer_w.to_bytes()) + payload_w.write_bytes(encoded_text_w.to_bytes()) + let payload = payload_w.to_bytes() + let out = try! transcode_richtext_state_snapshot(payload[:], true) + assert_eq(out, payload) +} + +///| +test "state_snapshot: tree transcode empty" { + // Empty tree: no peers, no nodes, empty position arena (must still be encoded as non-empty bytes). + let peer_w = BytesWriter::new() + peer_w.write_uleb128_u64(0UL) + let node_ids_bytes = encode_columnar_vec([ + encode_delta_rle_usize([]), + encode_delta_rle_i32([]), + ]) + let nodes_bytes = encode_columnar_vec([ + encode_delta_rle_usize([]), + encode_delta_rle_usize([]), + encode_delta_rle_i32([]), + encode_delta_rle_i32([]), + b"\x00", + ]) + let encoded_tree_w = BytesWriter::new() + encoded_tree_w.write_varint_u64(4UL) + encoded_tree_w.write_bytes(node_ids_bytes) + encoded_tree_w.write_bytes(nodes_bytes) + encoded_tree_w.write_bytes(postcard_encode_bytes(encode_position_arena([]))) + encoded_tree_w.write_bytes(postcard_encode_bytes(b"")) + let payload_w = BytesWriter::new() + payload_w.write_bytes(peer_w.to_bytes()) + payload_w.write_bytes(encoded_tree_w.to_bytes()) + let payload = payload_w.to_bytes() + let out = try! transcode_tree_state_snapshot(payload[:], true) + assert_eq(out, payload) +} + +///| +test "state_snapshot: movable_list transcode minimal" { + let values : Array[CommonValue] = [CommonValue::i64(1L)] + let peers : Array[UInt64] = [10UL] + let peer_w = BytesWriter::new() + peer_w.write_uleb128_u64(1UL) + peer_w.write_u64_le(peers[0]) + + // items: sentinel + 1 visible + let items_bytes = encode_columnar_vec([ + encode_delta_rle_usize([0UL, 0UL]), + encode_bool_rle([true, true]), + encode_bool_rle([true, true]), + ]) + // list_item_ids: 1 id for the visible item + let list_ids_bytes = encode_columnar_vec([ + encode_delta_rle_usize([0UL]), + encode_delta_rle_i32([0]), + encode_delta_rle_i32([0]), + ]) + // elem_ids / last_set_ids: empty + let elem_ids_bytes = encode_columnar_vec([ + encode_delta_rle_usize([]), + encode_delta_rle_u32([]), + ]) + let last_ids_bytes = encode_columnar_vec([ + encode_delta_rle_usize([]), + encode_delta_rle_u32([]), + ]) + let encoded_w = BytesWriter::new() + encoded_w.write_varint_u64(4UL) + encoded_w.write_bytes(items_bytes) + encoded_w.write_bytes(list_ids_bytes) + encoded_w.write_bytes(elem_ids_bytes) + encoded_w.write_bytes(last_ids_bytes) + let payload_w = BytesWriter::new() + payload_w.write_bytes(postcard_encode_vec_common_value(values)) + payload_w.write_bytes(peer_w.to_bytes()) + payload_w.write_bytes(encoded_w.to_bytes()) + let payload = payload_w.to_bytes() + let out = try! transcode_movable_list_state_snapshot(payload[:], true) + assert_eq(out, payload) +} + +///| +test "state_snapshot: counter transcode" { + let payload = b"\x00\x00\x00\x00\x00\x00\xF0\x3F" // 1.0 (f64 LE) + let out = try! transcode_counter_state_snapshot(payload[:], true) + assert_eq(out, payload) +} diff --git a/moon/loro_codec/state_snapshot_tree.mbt b/moon/loro_codec/state_snapshot_tree.mbt new file mode 100644 index 000000000..a758d0307 --- /dev/null +++ b/moon/loro_codec/state_snapshot_tree.mbt @@ -0,0 +1,144 @@ +///| +fn encode_postcard_vec_usize(values : Array[UInt64]) -> Bytes { + let w = BytesWriter::new() + w.write_varint_u64(values.length().to_uint64()) + for v in values { + w.write_varint_u64(v) + } + w.to_bytes() +} + +///| +fn decode_postcard_vec_usize( + bytes : BytesView, +) -> Array[UInt64] raise DecodeError { + let r = BytesReader::from_view(bytes) + let n_u64 = r.read_varint_u64() + if n_u64 > 0x7FFF_FFFFUL { + raise DecodeError("postcard: usize vec too large") + } + let n = n_u64.to_int() + let out : Array[UInt64] = [] + for _i in 0.. Bytes raise DecodeError { + let (peers, rest1) = take_peer_table(bytes) + let r = BytesReader::from_view(rest1) + let n_fields = r.read_varint_u64() + if n_fields != 4UL { + raise DecodeError("tree: invalid EncodedTree field count") + } + + // node_ids + let node_ids_view = r.remaining_view() + let (node_id_cols, rest_after_node_ids) = take_columnar_vec(node_ids_view) + r.skip(node_ids_view.length() - rest_after_node_ids.length()) + if node_id_cols.length() != 2 { + raise DecodeError("tree: invalid node_id column count") + } + let node_peer_idx = decode_delta_rle_usize(node_id_cols[0]) + let node_counter = decode_delta_rle_i32(node_id_cols[1]) + if node_counter.length() != node_peer_idx.length() { + raise DecodeError("tree: node_id column length mismatch") + } + + // nodes + let nodes_view = r.remaining_view() + let (node_cols, rest_after_nodes) = take_columnar_vec(nodes_view) + r.skip(nodes_view.length() - rest_after_nodes.length()) + if node_cols.length() != 5 { + raise DecodeError("tree: invalid node column count") + } + let parent_idx_plus_two = decode_delta_rle_usize(node_cols[0]) + let last_set_peer_idx = decode_delta_rle_usize(node_cols[1]) + let last_set_counter = decode_delta_rle_i32(node_cols[2]) + let last_set_lamport_sub = decode_delta_rle_i32(node_cols[3]) + let fractional_idx_idx = decode_postcard_vec_usize(node_cols[4]) + let n_nodes = node_peer_idx.length() + if parent_idx_plus_two.length() != n_nodes { + raise DecodeError("tree: node column length mismatch") + } + if last_set_peer_idx.length() != n_nodes { + raise DecodeError("tree: node column length mismatch") + } + if last_set_counter.length() != n_nodes { + raise DecodeError("tree: node column length mismatch") + } + if last_set_lamport_sub.length() != n_nodes { + raise DecodeError("tree: node column length mismatch") + } + if fractional_idx_idx.length() != n_nodes { + raise DecodeError("tree: node column length mismatch") + } + let frac_view = r.remaining_view() + let (fractional_indexes_bytes, rest_after_frac) = postcard_take_bytes( + frac_view, + ) + r.skip(frac_view.length() - rest_after_frac.length()) + let reserved_view = r.remaining_view() + let (reserved_bytes, rest_after_reserved) = postcard_take_bytes(reserved_view) + r.skip(reserved_view.length() - rest_after_reserved.length()) + if r.remaining() != 0 { + raise DecodeError("tree: trailing bytes") + } + let positions = decode_position_arena_v2(fractional_indexes_bytes[:]) + if validate { + for p in node_peer_idx { + if p > 0x7FFF_FFFFUL || p.to_int() < 0 || p.to_int() >= peers.length() { + raise DecodeError("tree: node peer_idx out of range") + } + } + for p in last_set_peer_idx { + if p > 0x7FFF_FFFFUL || p.to_int() < 0 || p.to_int() >= peers.length() { + raise DecodeError("tree: last_set_peer_idx out of range") + } + } + for i in 0.. n_nodes.to_uint64() + 1UL { + raise DecodeError("tree: invalid parent_idx_plus_two") + } + let fi = fractional_idx_idx[i] + if fi > 0x7FFF_FFFFUL || + fi.to_int() < 0 || + fi.to_int() >= positions.length() { + raise DecodeError("tree: invalid fractional_index_idx") + } + } + } + let node_ids_bytes = encode_columnar_vec([ + encode_delta_rle_usize(node_peer_idx), + encode_delta_rle_i32(node_counter), + ]) + let nodes_bytes = encode_columnar_vec([ + encode_delta_rle_usize(parent_idx_plus_two), + encode_delta_rle_usize(last_set_peer_idx), + encode_delta_rle_i32(last_set_counter), + encode_delta_rle_i32(last_set_lamport_sub), + encode_postcard_vec_usize(fractional_idx_idx), + ]) + let encoded_tree = BytesWriter::new() + encoded_tree.write_varint_u64(4UL) + encoded_tree.write_bytes(node_ids_bytes) + encoded_tree.write_bytes(nodes_bytes) + // IMPORTANT: TreeState uses Rust `PositionArena::encode()` which is non-empty even when empty. + encoded_tree.write_bytes( + postcard_encode_bytes(encode_position_arena(positions)), + ) + encoded_tree.write_bytes(postcard_encode_bytes(reserved_bytes)) + let w = BytesWriter::new() + w.write_bytes(encode_peer_table(peers)) + w.write_bytes(encoded_tree.to_bytes()) + w.to_bytes() +} diff --git a/moon/loro_codec/top.mbt b/moon/loro_codec/top.mbt new file mode 100644 index 000000000..9f5ebc921 --- /dev/null +++ b/moon/loro_codec/top.mbt @@ -0,0 +1,4 @@ +///| +pub fn version() -> String { + "0.1.0" +} diff --git a/moon/loro_codec/value_custom.mbt b/moon/loro_codec/value_custom.mbt new file mode 100644 index 000000000..62d233d99 --- /dev/null +++ b/moon/loro_codec/value_custom.mbt @@ -0,0 +1,10 @@ +// ----------------------------------------------------------------------------- +// Custom Value encoding used in ChangeBlock. +// ----------------------------------------------------------------------------- +// +// Implementation is split into cohesive files: +// - value_custom_types.mbt +// - value_custom_bytes.mbt +// - value_custom_loro_value.mbt +// - value_custom_decode.mbt +// - value_custom_encode.mbt diff --git a/moon/loro_codec/value_custom_bytes.mbt b/moon/loro_codec/value_custom_bytes.mbt new file mode 100644 index 000000000..beb05a525 --- /dev/null +++ b/moon/loro_codec/value_custom_bytes.mbt @@ -0,0 +1,50 @@ +///| +fn read_len_u64(r : BytesReader) -> UInt64 raise DecodeError { + let n = r.read_uleb128_u64() + if n > MAX_COLLECTION_SIZE { + raise DecodeError("value: collection too large") + } + n +} + +///| +fn read_utf8(r : BytesReader) -> String raise DecodeError { + let len_u64 = r.read_uleb128_u64() + if len_u64 > 0x7FFF_FFFFUL { + raise DecodeError("value: string too long") + } + let len = len_u64.to_int() + if len < 0 || len > r.remaining() { + raise DecodeError("value: invalid string length") + } + let bytes = r.read_exact(len) + @encoding/utf8.decode(bytes) catch { + @encoding/utf8.Malformed(_) => raise DecodeError("value: invalid utf8") + } +} + +///| +fn read_binary(r : BytesReader) -> Bytes raise DecodeError { + let len_u64 = r.read_uleb128_u64() + if len_u64 > 0x7FFF_FFFFUL { + raise DecodeError("value: binary too long") + } + let len = len_u64.to_int() + if len < 0 || len > r.remaining() { + raise DecodeError("value: invalid binary length") + } + r.read_exact(len).to_bytes() +} + +///| +fn write_utf8(w : BytesWriter, s : String) -> Unit { + let bytes = @encoding/utf8.encode(s[:]) + w.write_uleb128_u64(bytes.length().to_uint64()) + w.write_bytes(bytes) +} + +///| +fn write_binary(w : BytesWriter, b : Bytes) -> Unit { + w.write_uleb128_u64(b.length().to_uint64()) + w.write_bytes(b) +} diff --git a/moon/loro_codec/value_custom_decode.mbt b/moon/loro_codec/value_custom_decode.mbt new file mode 100644 index 000000000..990725709 --- /dev/null +++ b/moon/loro_codec/value_custom_decode.mbt @@ -0,0 +1,100 @@ +///| +pub fn decode_value(bytes : BytesView) -> (Value, BytesView) raise DecodeError { + let r = BytesReader::from_view(bytes) + let tag = r.read_u8() + decode_value_content(tag, bytes[1:]) +} + +///| +pub fn decode_value_content( + tag : Byte, + bytes : BytesView, +) -> (Value, BytesView) raise DecodeError { + let r = BytesReader::from_view(bytes) + let kind = tag.to_uint() & 0x7F + let v : Value = match kind { + 0 => Value::Null + 1 => Value::True + 2 => Value::False + 3 => Value::I64(r.read_sleb128_i64()) + 4 => { + let bits = r.read_u64_be() + Value::F64(bits.reinterpret_as_double()) + } + 5 => Value::Str(read_utf8(r)) + 6 => Value::Binary(read_binary(r)) + 7 => Value::ContainerIdx(r.read_uleb128_u64()) + 8 => Value::DeleteOnce + 9 => Value::DeleteSeq + 10 => { + let x = r.read_sleb128_i64() + if x < -2147483648L || x > 2147483647L { + raise DecodeError("value: deltaint overflow") + } + Value::DeltaInt(x.to_int()) + } + 11 => Value::LoroValue(read_loro_value(r, 0)) + 12 => { + let info = r.read_u8() + let len = r.read_uleb128_u64() + let key_idx = r.read_uleb128_u64() + let value = read_loro_value(r, 0) + Value::MarkStart({ info, len, key_idx, value }) + } + 13 => { + let target_idx = r.read_uleb128_u64() + let is_parent_null = r.read_u8() != b'\x00' + let position = r.read_uleb128_u64() + let parent_idx = if is_parent_null { + Option::None + } else { + Option::Some(r.read_uleb128_u64()) + } + Value::TreeMove({ target_idx, is_parent_null, position, parent_idx }) + } + 14 => { + let from = r.read_uleb128_u64() + let from_idx = r.read_uleb128_u64() + let lamport = r.read_uleb128_u64() + Value::ListMove({ from, from_idx, lamport }) + } + 15 => { + let peer_idx = r.read_uleb128_u64() + let lamport_u64 = r.read_uleb128_u64() + if lamport_u64 > 0xFFFF_FFFFUL { + raise DecodeError("value: listset lamport overflow") + } + let value = read_loro_value(r, 0) + Value::ListSet({ peer_idx, lamport: lamport_u64.to_uint(), value }) + } + 16 => { + let subject_peer_idx = r.read_uleb128_u64() + let subject_cnt_u64 = r.read_uleb128_u64() + let position_idx = r.read_uleb128_u64() + let is_parent_null = r.read_u8() != b'\x00' + let mut parent_peer_idx : UInt64 = 0 + let mut parent_cnt_u64 : UInt64 = 0 + if !is_parent_null { + parent_peer_idx = r.read_uleb128_u64() + parent_cnt_u64 = r.read_uleb128_u64() + } + if subject_cnt_u64 > 0x7FFF_FFFFUL || parent_cnt_u64 > 0x7FFF_FFFFUL { + raise DecodeError("value: raw_tree_move counter overflow") + } + Value::RawTreeMove({ + subject_peer_idx, + subject_cnt: subject_cnt_u64.to_int(), + position_idx, + is_parent_null, + parent_peer_idx, + parent_cnt: parent_cnt_u64.to_int(), + }) + } + _ => { + let data = read_binary(r) + Value::Future(tag, data) + } + } + let rest = bytes[bytes.length() - r.remaining():] + (v, rest) +} diff --git a/moon/loro_codec/value_custom_encode.mbt b/moon/loro_codec/value_custom_encode.mbt new file mode 100644 index 000000000..ed69693ec --- /dev/null +++ b/moon/loro_codec/value_custom_encode.mbt @@ -0,0 +1,92 @@ +///| +pub fn encode_value_content(v : Value) -> (Byte, Bytes) { + let w = BytesWriter::new() + match v { + Value::Null => (b'\x00', b"") + Value::True => (b'\x01', b"") + Value::False => (b'\x02', b"") + Value::I64(x) => { + w.write_sleb128_i64(x) + (b'\x03', w.to_bytes()) + } + Value::F64(x) => { + w.write_u64_be(x.reinterpret_as_uint64()) + (b'\x04', w.to_bytes()) + } + Value::Str(s) => { + write_utf8(w, s) + (b'\x05', w.to_bytes()) + } + Value::Binary(b) => { + write_binary(w, b) + (b'\x06', w.to_bytes()) + } + Value::ContainerIdx(idx) => { + w.write_uleb128_u64(idx) + (b'\x07', w.to_bytes()) + } + Value::DeleteOnce => (b'\x08', b"") + Value::DeleteSeq => (b'\x09', b"") + Value::DeltaInt(x) => { + w.write_sleb128_i64(x.to_int64()) + (b'\x0A', w.to_bytes()) + } + Value::LoroValue(v) => { + write_loro_value(w, v) + (b'\x0B', w.to_bytes()) + } + Value::MarkStart(m) => { + w.write_u8(m.info) + w.write_uleb128_u64(m.len) + w.write_uleb128_u64(m.key_idx) + write_loro_value(w, m.value) + (b'\x0C', w.to_bytes()) + } + Value::TreeMove(t) => { + w.write_uleb128_u64(t.target_idx) + w.write_u8(if t.is_parent_null { b'\x01' } else { b'\x00' }) + w.write_uleb128_u64(t.position) + match t.parent_idx { + Option::None => () + Option::Some(p) => w.write_uleb128_u64(p) + } + (b'\x0D', w.to_bytes()) + } + Value::ListMove(m) => { + w.write_uleb128_u64(m.from) + w.write_uleb128_u64(m.from_idx) + w.write_uleb128_u64(m.lamport) + (b'\x0E', w.to_bytes()) + } + Value::ListSet(s) => { + w.write_uleb128_u64(s.peer_idx) + w.write_uleb128_u64(s.lamport.to_uint64()) + write_loro_value(w, s.value) + (b'\x0F', w.to_bytes()) + } + Value::RawTreeMove(t) => { + w.write_uleb128_u64(t.subject_peer_idx) + w.write_uleb128_u64(t.subject_cnt.to_uint64()) + w.write_uleb128_u64(t.position_idx) + w.write_u8(if t.is_parent_null { b'\x01' } else { b'\x00' }) + if !t.is_parent_null { + w.write_uleb128_u64(t.parent_peer_idx) + w.write_uleb128_u64(t.parent_cnt.to_uint64()) + } + (b'\x10', w.to_bytes()) + } + Value::Future(tag, data) => { + write_binary(w, data) + (tag, w.to_bytes()) + } + } +} + +///| +pub fn encode_value(v : Value) -> Bytes { + let w = BytesWriter::new() + let (tag, content) = encode_value_content(v) + w.write_u8(tag) + w.write_bytes(content) + w.to_bytes() +} diff --git a/moon/loro_codec/value_custom_loro_value.mbt b/moon/loro_codec/value_custom_loro_value.mbt new file mode 100644 index 000000000..945322553 --- /dev/null +++ b/moon/loro_codec/value_custom_loro_value.mbt @@ -0,0 +1,87 @@ +///| +fn read_loro_value(r : BytesReader, depth : Int) -> LoroValue raise DecodeError { + if depth > 1024 { + raise DecodeError("value: too deep") + } + let kind = r.read_u8().to_uint() + match kind { + 0 => LoroValue::Null + 1 => LoroValue::True + 2 => LoroValue::False + 3 => LoroValue::I64(r.read_sleb128_i64()) + 4 => { + let bits = r.read_u64_be() + LoroValue::F64(bits.reinterpret_as_double()) + } + 5 => LoroValue::Str(read_utf8(r)) + 6 => LoroValue::Binary(read_binary(r)) + 7 => { + let len = read_len_u64(r).to_int() + let items : Array[LoroValue] = [] + for _i in 0.. { + let len = read_len_u64(r).to_int() + let items : Array[(UInt64, LoroValue)] = [] + for _i in 0.. { + let ct = r.read_u8() + LoroValue::ContainerType(ct) + } + _ => raise DecodeError("value: invalid loro_value kind") + } +} + +///| +fn write_loro_value(w : BytesWriter, v : LoroValue) -> Unit { + match v { + LoroValue::Null => w.write_u8(b'\x00') + LoroValue::True => w.write_u8(b'\x01') + LoroValue::False => w.write_u8(b'\x02') + LoroValue::I64(x) => { + w.write_u8(b'\x03') + w.write_sleb128_i64(x) + } + LoroValue::F64(x) => { + w.write_u8(b'\x04') + w.write_u64_be(x.reinterpret_as_uint64()) + } + LoroValue::Str(s) => { + w.write_u8(b'\x05') + write_utf8(w, s) + } + LoroValue::Binary(b) => { + w.write_u8(b'\x06') + write_binary(w, b) + } + LoroValue::List(items) => { + w.write_u8(b'\x07') + w.write_uleb128_u64(items.length().to_uint64()) + for it in items { + write_loro_value(w, it) + } + } + LoroValue::Map(items) => { + w.write_u8(b'\x08') + w.write_uleb128_u64(items.length().to_uint64()) + for pair in items { + let (k, it) = pair + w.write_uleb128_u64(k) + write_loro_value(w, it) + } + } + LoroValue::ContainerType(ct) => { + w.write_u8(b'\x09') + w.write_u8(ct) + } + } +} diff --git a/moon/loro_codec/value_custom_test.mbt b/moon/loro_codec/value_custom_test.mbt new file mode 100644 index 000000000..25d58f2ad --- /dev/null +++ b/moon/loro_codec/value_custom_test.mbt @@ -0,0 +1,63 @@ +///| +fn assert_roundtrip(bytes : Bytes) -> Unit raise { + let (v, rest) = decode_value(bytes[:]) catch { DecodeError(msg) => fail(msg) } + assert_eq(rest.length(), 0) + let encoded = encode_value(v) + assert_eq(encoded, bytes) +} + +///| +test "value_custom: primitives roundtrip" { + assert_roundtrip(b"\x00") // Null + assert_roundtrip(b"\x01") // True + assert_roundtrip(b"\x02") // False + assert_roundtrip(b"\x03\x7F") // I64(-1) + assert_roundtrip(b"\x0A\x7F") // DeltaInt(-1) + assert_roundtrip(b"\x05\x02hi") // Str("hi") + assert_roundtrip(b"\x06\x03\x01\x02\x03") // Binary([1,2,3]) + assert_roundtrip(b"\x07\xAC\x02") // ContainerIdx(300) + assert_roundtrip(b"\x08") // DeleteOnce + assert_roundtrip(b"\x09") // DeleteSeq +} + +///| +test "value_custom: f64 big-endian roundtrip" { + // 1.0 in IEEE 754 double (big-endian) + assert_roundtrip(b"\x04\x3F\xF0\x00\x00\x00\x00\x00\x00") +} + +///| +test "value_custom: nested loro_value list/map" { + // Value::LoroValue(List([I64(1), Str("x")])) + assert_roundtrip(b"\x0B\x07\x02\x03\x01\x05\x01x") + // Value::LoroValue(Map({0: Null})) + assert_roundtrip(b"\x0B\x08\x01\x00\x00") +} + +///| +test "value_custom: mark/tree/list ops" { + // MarkStart(info=0x84, len=1, key_idx=0, value=True) + assert_roundtrip(b"\x0C\x84\x01\x00\x01") + // TreeMove(target=1, parent=null, position=2) + assert_roundtrip(b"\x0D\x01\x01\x02") + // TreeMove(target=1, parent=3, position=2) + assert_roundtrip(b"\x0D\x01\x00\x02\x03") + // ListMove(from=1, from_idx=2, lamport=3) + assert_roundtrip(b"\x0E\x01\x02\x03") + // ListSet(peer_idx=7, lamport=42, value=I64(1)) + assert_roundtrip(b"\x0F\x07\x2A\x03\x01") +} + +///| +test "value_custom: raw_tree_move" { + // RawTreeMove(subject_peer=1, subject_cnt=2, position=3, parent=null) + assert_roundtrip(b"\x10\x01\x02\x03\x01") + // RawTreeMove(subject_peer=1, subject_cnt=2, position=3, parent=(4,5)) + assert_roundtrip(b"\x10\x01\x02\x03\x00\x04\x05") +} + +///| +test "value_custom: future unknown kind payload" { + // Future tag byte + binary payload (uleb128 len + bytes) + assert_roundtrip(b"\x91\x01X") +} diff --git a/moon/loro_codec/value_custom_types.mbt b/moon/loro_codec/value_custom_types.mbt new file mode 100644 index 000000000..162453a0c --- /dev/null +++ b/moon/loro_codec/value_custom_types.mbt @@ -0,0 +1,78 @@ +///| +const MAX_COLLECTION_SIZE : UInt64 = 268435456UL + +///| +pub enum LoroValue { + Null + True + False + I64(Int64) + F64(Double) + Str(String) + Binary(Bytes) + List(Array[LoroValue]) + Map(Array[(UInt64, LoroValue)]) // (key_idx, value) + ContainerType(Byte) +} derive(Eq, Show) + +///| +pub enum Value { + Null + True + False + I64(Int64) + F64(Double) + Str(String) + Binary(Bytes) + ContainerIdx(UInt64) + DeleteOnce + DeleteSeq + DeltaInt(Int) + LoroValue(LoroValue) + MarkStart(MarkStart) + TreeMove(TreeMove) + ListMove(ListMove) + ListSet(ListSet) + RawTreeMove(RawTreeMove) + Future(Byte, Bytes) // (raw tag byte, binary payload bytes) +} derive(Eq, Show) + +///| +pub struct MarkStart { + info : Byte + len : UInt64 + key_idx : UInt64 + value : LoroValue +} derive(Eq, Show) + +///| +pub struct TreeMove { + target_idx : UInt64 + is_parent_null : Bool + position : UInt64 + parent_idx : UInt64? +} derive(Eq, Show) + +///| +pub struct ListMove { + from : UInt64 + from_idx : UInt64 + lamport : UInt64 +} derive(Eq, Show) + +///| +pub struct ListSet { + peer_idx : UInt64 + lamport : UInt + value : LoroValue +} derive(Eq, Show) + +///| +pub struct RawTreeMove { + subject_peer_idx : UInt64 + subject_cnt : Int + position_idx : UInt64 + is_parent_null : Bool + parent_peer_idx : UInt64 + parent_cnt : Int +} derive(Eq, Show) diff --git a/moon/loro_codec/xxhash32.mbt b/moon/loro_codec/xxhash32.mbt new file mode 100644 index 000000000..ea627b147 --- /dev/null +++ b/moon/loro_codec/xxhash32.mbt @@ -0,0 +1,94 @@ +///| +pub const LORO_XXH32_SEED : UInt = 0x4F524F4C + +///| +const PRIME32_1 : UInt = 0x9E3779B1 + +///| +const PRIME32_2 : UInt = 0x85EBCA77 + +///| +const PRIME32_3 : UInt = 0xC2B2AE3D + +///| +const PRIME32_4 : UInt = 0x27D4EB2F + +///| +const PRIME32_5 : UInt = 0x165667B1 + +///| +fn rotl32(x : UInt, r : Int) -> UInt { + (x << r) | (x >> (32 - r)) +} + +///| +fn mul32(a : UInt, b : UInt) -> UInt { + (a.to_uint64() * b.to_uint64()).to_uint() +} + +///| +fn read_u32_le(bytes : BytesView, offset : Int) -> UInt raise DecodeError { + if offset < 0 || offset + 4 > bytes.length() { + raise DecodeError("xxhash32: out of bounds") + } + let b0 = bytes[offset + 0].to_uint() + let b1 = bytes[offset + 1].to_uint() + let b2 = bytes[offset + 2].to_uint() + let b3 = bytes[offset + 3].to_uint() + b0 | (b1 << 8) | (b2 << 16) | (b3 << 24) +} + +///| +fn round(acc : UInt, input : UInt) -> UInt { + let mut acc = acc + acc = acc + mul32(input, PRIME32_2) + acc = rotl32(acc, 13) + acc = mul32(acc, PRIME32_1) + acc +} + +///| +pub fn xxhash32(data : BytesView, seed : UInt) -> UInt raise DecodeError { + let len = data.length() + let mut offset = 0 + let mut h32 : UInt = 0 + if len >= 16 { + let mut v1 = seed + PRIME32_1 + PRIME32_2 + let mut v2 = seed + PRIME32_2 + let mut v3 = seed + let mut v4 = seed - PRIME32_1 + let limit = len - 16 + while offset <= limit { + v1 = round(v1, read_u32_le(data, offset)) + offset = offset + 4 + v2 = round(v2, read_u32_le(data, offset)) + offset = offset + 4 + v3 = round(v3, read_u32_le(data, offset)) + offset = offset + 4 + v4 = round(v4, read_u32_le(data, offset)) + offset = offset + 4 + } + h32 = rotl32(v1, 1) + rotl32(v2, 7) + rotl32(v3, 12) + rotl32(v4, 18) + } else { + h32 = seed + PRIME32_5 + } + h32 = h32 + len.reinterpret_as_uint() + while offset + 4 <= len { + h32 = h32 + mul32(read_u32_le(data, offset), PRIME32_3) + h32 = mul32(rotl32(h32, 17), PRIME32_4) + offset = offset + 4 + } + while offset < len { + h32 = h32 + mul32(data[offset].to_uint(), PRIME32_5) + h32 = mul32(rotl32(h32, 11), PRIME32_1) + offset = offset + 1 + } + + // avalanche + h32 = h32 ^ (h32 >> 15) + h32 = mul32(h32, PRIME32_2) + h32 = h32 ^ (h32 >> 13) + h32 = mul32(h32, PRIME32_3) + h32 = h32 ^ (h32 >> 16) + h32 +} diff --git a/moon/loro_codec/xxhash32_test.mbt b/moon/loro_codec/xxhash32_test.mbt new file mode 100644 index 000000000..659478c49 --- /dev/null +++ b/moon/loro_codec/xxhash32_test.mbt @@ -0,0 +1,18 @@ +///| +test "xxhash32 test vectors" { + fn h(data : Bytes, seed : UInt) -> UInt { + xxhash32(data[:], seed) catch { + DecodeError(_) => 0 + } + } + + assert_eq(h(b"", 0), 0x02CC5D05) + let seed = LORO_XXH32_SEED + assert_eq(h(b"", seed), 0xDC3BF95A) + assert_eq(h(b"\x00", seed), 0xDAD9F666) + assert_eq(h(b"loro", seed), 0x74D321EA) + assert_eq( + h(b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", seed), + 0x2EDAB25F, + ) +} diff --git a/moon/moon.mod.json b/moon/moon.mod.json new file mode 100644 index 000000000..c2900e81b --- /dev/null +++ b/moon/moon.mod.json @@ -0,0 +1,4 @@ +{ + "name": "loro-dev/loro" +} + diff --git a/moon/specs/00-goals-and-acceptance.md b/moon/specs/00-goals-and-acceptance.md new file mode 100644 index 000000000..229d966d6 --- /dev/null +++ b/moon/specs/00-goals-and-acceptance.md @@ -0,0 +1,95 @@ +# 00. 目标、范围与验收标准 + +## 0.1 背景 + +Loro 的导出/导入使用二进制编码格式(见 `docs/encoding.md` 及其补充文档)。本项目目标是在 Moonbit 中实现同等格式的编解码,从而实现与 Rust 版 Loro 的互操作。 + +## 0.2 最终验收(必须满足) + +### A. Rust → Moon:任意导出 blob 可正确 decode + +对任意 Rust 侧 `LoroDoc.export(...)` 产物(目前主要是 FastSnapshot 与 FastUpdates): + +- Moon 解码必须: + - 校验 magic、mode、checksum + - 正确解析 body(snapshot/updates) + - 正确解析 SSTable、ChangeBlock、容器 state、值编码、压缩(LZ4 Frame) + - 不应依赖“仅某些用例”的固定结构 + +### B. Moon → Rust:Moon 编码产物可被 Rust 正确 import + +Moon 需要能把解码得到的结构重新编码为合法 blob,并满足: + +- Rust `import()` 成功 +- Rust 导入后文档状态与预期一致(由 Rust 真值/对照判定) + +> 备注:初期可以不要求 **字节级完全一致**(byte-for-byte),但必须保证语义一致;后续可逐步追求 byte-level 相等以减少差异风险。 + +### C. 双向互通 e2e + +项目“完成”的验收方式为 e2e 测试: + +1. Rust 生成测试文档与导出 blob(Snapshot/Updates/…)。 +2. Moon 对 blob 执行 `decode`(必要时 `transcode`:decode→encode)。 +3. Rust `import()` Moon 产物并对比 `get_deep_value()`(或同等)与真值 JSON。 + +## 0.3 编码模式支持范围 + +- 必须支持: + - `EncodeMode = 3`:FastSnapshot + - `EncodeMode = 4`:FastUpdates +- 明确不支持并报错: + - `EncodeMode = 1`:OutdatedRle + - `EncodeMode = 2`:OutdatedSnapshot + +## 0.4 兼容性定义(实现策略建议) + +为了保证“可重编码”与跨语言互通,建议在 Moon 侧维护一个 **Lossless IR**: + +- IR 的目标不是实现完整 CRDT 行为,而是能承载: + - 文档 header(mode、checksum 可重算) + - Snapshot:oplog_bytes(SSTable)+ state_bytes(SSTable 或 empty “E”)+ shallow_root_state_bytes(可为空) + - Updates:ChangeBlock 序列 + - SSTable:块、meta、每个 KV 条目 + - ChangeBlock:各段(header/meta/cids/keys/positions/ops/delete_start_ids/values)能 lossless 还原 + - 未知/未来字段:以 opaque bytes 形式保留(保证 forward compatibility 的“保守重编码”) + +阶段性里程碑建议: + +1. **v1:转码器(transcoder)优先**:Moon 先做到能把 Rust blob decode 成 IR,再 encode 回可被 Rust import 的 blob(允许编码细节与 Rust 不同,如压缩策略/块切分不同)。 +2. **v2:对齐增强**:逐步对齐 Rust 的编码策略(block_size、压缩开关、列编码细节),降低差异与边界风险。 + +## 0.5 关键正确性注意点(必须写入测试用例) + +1. **checksum 覆盖范围**:文档头的 xxHash32 覆盖 bytes[20..](包含 2 字节 mode + body),不是仅 body。 +2. **端序陷阱**: + - header mode 是 **u16 big-endian** + - `ID.to_bytes()`(peer+counter)是 **big-endian** + - 自定义 Value 编码里的 F64 是 **big-endian** + - postcard 的 f64 是 **little-endian** +3. **整数编码混用**: + - LEB128(ULEB128/SLEB128)用于 Value 编码等 + - postcard 使用 unsigned varint + zigzag(不是 SLEB128) +4. **两套 ContainerType 映射**:ContainerWrapper 的首字节与 postcard 序列化的历史映射不同(必须在解码 parent `Option` 时用历史映射)。 +5. **Richtext span.len 的语义**:是 Unicode scalar count(不能用 UTF-16 code unit 直接切)。 + +## 0.6 非目标(可选但不强制) + +- 在 Moon 内实现完整 Loro CRDT 与操作应用(本项目只要求编码格式互通)。 +- 完全复刻 Rust 的压缩策略与块布局(byte-for-byte 相同)。可作为后续优化目标。 + +## 0.7 终极测试(必须落地) + +除 Rust→Moon→Rust 的 transcode e2e 外,最终还必须补齐两类“强一致性”黄金测试(用于兜底编码细节与语义一致性): + +1. **Updates 一致性(JsonUpdates 对照)**: + - Rust CLI 固定 seed 生成随机操作,同时导出二进制 updates(mode=4)与 JsonUpdates(`loro::JsonSchema`,见 `docs/JsonSchema.md`)。 + - Moon 从二进制 updates 导出 JsonUpdates。 + - 两份 JsonUpdates 必须结构完全一致(建议 parse 后比较,而不是比字符串)。 + +2. **Snapshot 一致性(Deep JSON/toJSON 对照)**: + - Rust CLI 固定 seed 生成随机操作,同时导出二进制 snapshot(mode=3)与 `get_deep_value()` 的真值 JSON。 + - Moon 解析 snapshot 后提供 toJSON(deep JSON)输出。 + - Moon 的 deep JSON 必须与 Rust 真值 JSON 完全一致(建议 parse 后比较)。 + +详细流程与建议接口见 `moon/specs/03-e2e-test-plan.md`。 diff --git a/moon/specs/01-context-checklist.md b/moon/specs/01-context-checklist.md new file mode 100644 index 000000000..9c7a89673 --- /dev/null +++ b/moon/specs/01-context-checklist.md @@ -0,0 +1,91 @@ +# 01. Context 收集清单(开工前必须完成) + +本清单用于确保实现过程不因缺失背景导致反复返工。每一项都应当在实现前完成确认,并把结论记录到 `moon/SPEC_NOTES.md`(后续实现查阅用)。 + +## 1.1 规格文档(必须) + +阅读并提取“可实现的确定性规则”: + +- 主规格:`docs/encoding.md` + - header / checksum / mode + - FastSnapshot / FastUpdates + - SSTable + - OpLog KV schema(vv/fr/sv/sf + change blocks) + - ChangeBlock 的整体结构(postcard + 列编码) + - 自定义 Value Encoding(tag + payload) + - serde_columnar 的外层格式与策略说明(BoolRle/Rle/DeltaRle/DeltaOfDelta) +- 补充规格: + - `docs/encoding-xxhash32.md`:xxHash32 实现与 test vectors + - `docs/encoding-lz4.md`:LZ4 Frame(以及 block 级解码) + - `docs/encoding-container-states.md`:Map/List/Text/Tree/MovableList/Counter 的 state snapshot + +输出物: + +- `moon/SPEC_NOTES.md` 至少包含: + - 所有端序规则(并注明出自哪段规格) + - LEB128 与 postcard varint 的差异与各自使用场景 + - ContainerType 两套映射表 + - Richtext 的 Unicode 规则与 Moon 侧实现策略 + - 允许/不允许的宽容解析点(例如未知 value tag 的处理) + +## 1.2 Rust 源码“真值”定位(必须) + +每一块格式都要找到 Rust 参考实现并记录文件位置(方便对照/排查): + +- 顶层 header/body:`crates/loro-internal/src/encoding.rs` +- FastSnapshot / FastUpdates:`crates/loro-internal/src/encoding/fast_snapshot.rs` +- SSTable: + - `crates/kv-store/src/sstable.rs` + - `crates/kv-store/src/block.rs` +- ChangeBlock: + - `crates/loro-internal/src/oplog/change_store/block_encode.rs` + - `crates/loro-internal/src/oplog/change_store/block_meta_encode.rs` +- 自定义 Value:`crates/loro-internal/src/encoding/value.rs` +- ID / ContainerID:`crates/loro-common/src/lib.rs` +- ContainerWrapper:`crates/loro-internal/src/state/container_store/container_wrapper.rs` +- 各容器 state: + - `crates/loro-internal/src/state/map_state.rs` + - `crates/loro-internal/src/state/list_state.rs` + - `crates/loro-internal/src/state/richtext_state.rs` + - `crates/loro-internal/src/state/tree_state.rs` + - `crates/loro-internal/src/state/movable_list_state.rs` + - `crates/loro-internal/src/state/counter_state.rs` + +输出物: + +- 在 `moon/SPEC_NOTES.md` 里按模块建立“规格段落 ↔ Rust 源码位置”的映射索引。 + +## 1.3 Moonbit 语言/运行时能力确认(必须) + +编码格式实现会依赖以下能力;需要提前确认 Moonbit 是否支持、或需要手写替代: + +1. 整数: + - 是否有 `Int64/UInt64`? + - 位运算与移位的行为(逻辑/算术移位,溢出是否截断)? +2. 更大整数: + - serde_columnar 的 `DeltaRle` 规范使用 i128 delta(至少需要能精确表示 i128)。 + - 若无 i128:是否有 BigInt?或者可用“有符号 128 位结构体(hi/lo)”实现? +3. 字节与切片: + - `Bytes`/`Array[Byte]` 的拷贝成本与切片语义(零拷贝/拷贝)? + - 如何实现安全的 reader(越界错误而非 panic)? +4. 浮点: + - 是否可按字节读写 IEEE754 f64(LE/BE)? +5. Unicode: + - 字符串是否支持按 Unicode scalar 遍历? + - 如何从字符串中按“Unicode scalar count”截取子串(Richtext span.len 需要)? + +输出物: + +- `moon/specs/02-module-plan.md` 中 i128 与 Unicode 的实现选型必须基于此处结论。 + +## 1.4 对照数据与验收方式确认(必须) + +为避免“实现了但不知道对不对”,必须在开工前确定: + +- Rust 侧怎么生成测试向量(blob + 真值 JSON + 元信息) +- Moon 侧怎么运行单测与 e2e(至少提供 CLI 入口,供 Rust harness 调用) +- e2e 的判定方式:以 Rust `import()` 后 `get_deep_value()` 的 JSON 对比为准 + +输出物: + +- `moon/specs/03-e2e-test-plan.md`(详细定义向量格式与 CLI 合约) diff --git a/moon/specs/02-module-plan.md b/moon/specs/02-module-plan.md new file mode 100644 index 000000000..d65219565 --- /dev/null +++ b/moon/specs/02-module-plan.md @@ -0,0 +1,295 @@ +# 02. 按模块逐步实现计划(含测试与退出条件) + +本计划的核心原则: + +1. **先底层 primitive,再组合结构**(否则排查成本极高) +2. **每个模块都有独立单测**(至少包含:样例向量 + 往返测试) +3. **尽早引入跨语言对照**(Rust 生成/验证,Moon 负责解析/编码) + +下文每个模块包含: + +- 目标:本模块做什么 +- 依赖:必须先完成哪些模块 +- 实现要点:容易错的点 +- 测试:需要写哪些测试(单测/对照/属性/边界) +- 退出条件:达到什么程度才算“完成并可进入下一模块” + +--- + +## 2.1 工程骨架与公共设施 + +### 模块:`errors` / `bytes_reader` / `bytes_writer` + +目标: +- 统一错误类型(DecodeError/ChecksumMismatch/Unsupported/Overflow/InvalidInput 等) +- 提供安全的字节读写器:端序读取、切片、剩余长度、越界检查 + +测试: +- 越界读取必须返回错误(不允许崩溃) +- u16/u32/u64 的 LE/BE 读写往返 + +退出条件: +- 所有基础 IO API 在单测覆盖下稳定 + +--- + +## 2.2 整数编码:LEB128 与 postcard varint(必须严格区分) + +### 模块:`leb128` + +目标: +- ULEB128(u64) 与 SLEB128(i64) 的编码/解码 + +实现要点: +- SLEB128 是 two's complement sign extension(不是 zigzag) +- 必须设置最大读取字节数防止恶意输入导致死循环 + +测试: +- 使用 `docs/encoding.md` 的示例向量(含负数) +- 随机往返:encode->decode 等价(限制范围) + +退出条件: +- 与规格示例完全一致;边界条件(0、最大值、最小值)通过 + +### 模块:`postcard/varint` + `postcard/zigzag` + +目标: +- unsigned varint(用于 u16-u128/usize 等) +- zigzag(用于 i16-i128/isize 等) + +测试: +- Rust 侧生成(postcard)随机 i64/u64 的二进制,Moon 解码一致 +- Moon 编码后 Rust 解码一致 + +退出条件: +- i64/u64/usize 的对照测试稳定通过 + +--- + +## 2.3 校验与压缩:xxHash32 / LZ4 Frame + +### 模块:`xxhash32` + +目标: +- 按 `docs/encoding-xxhash32.md` 实现 xxHash32(seed=0x4F524F4C) + +测试: +- 直接使用文档 test vectors(空输入、单字节、16 字节等) +- Rust 对照:随机 bytes 的 hash 值一致 + +退出条件: +- 文档向量 + Rust 对照测试全部通过 + +### 模块:`lz4_frame`(优先实现解压) + +目标: +- 解析 LZ4 Frame(magic/descriptor/blocks/end mark) +- 解压 block(支持 overlap copy) + +实现要点: +- Loro SSTable block 压缩使用 LZ4 Frame(不是 raw block) +- 先实现解压即可;编码阶段可先不压缩(输出 None),后续再补压缩 + +测试: +- Rust 生成的 LZ4 Frame 数据解压后与 Rust 解压一致 +- 恶意输入:magic 错误、block size 越界、offset 溢出等必须报错 + +退出条件: +- 能解压 Rust 真实 SSTable block(含 LZ4)且对照一致 + +--- + +## 2.4 SSTable(KV Store) + +### 模块:`sstable/*` + +目标: +- 支持 SSTable `import_all`:解析 header、BlockMeta、blocks(Normal/Large)、校验 checksum、解压、还原 KV +- 支持 SSTable `export_all`:至少生成 Rust 可读的 SSTable(可先不压缩) + +依赖: +- `bytes_reader`、`xxhash32`、`lz4_frame` + +实现要点: +- BlockMeta checksum 覆盖范围:meta entries(不含 count) +- block checksum:对“压缩后/未压缩的 block body”做 xxHash32,再追加 checksum(checksum 本身不压缩) +- NormalBlock 的 key 前缀压缩:第一条 key 来自 BlockMeta.first_key;后续用 common_prefix_len + suffix 还原 + +测试: +- Moon 自构造 SSTable(无压缩/少量 KV)→ decode 得 KV 列表 +- Rust 生成复杂 SSTable(多 block、LargeValueBlock、LZ4 压缩)→ Moon decode 得到 KV 列表与 Rust 对照一致 +- Moon encode 的 SSTable → Rust `import_all` 成功,KV 迭代一致 + +退出条件: +- SSTable decode/encode 在跨语言对照下稳定通过 + +--- + +## 2.5 顶层 header/body(Document blob) + +### 模块:`document` + +目标: +- 解析 header:magic `"loro"`、checksum(xxHash32)、mode(u16 BE) +- 支持: + - FastSnapshot body(三段 u32_le len + bytes;state 可为单字节 `"E"`) + - FastUpdates body(重复 ULEB128 len + block bytes 到 EOF) + +依赖: +- `xxhash32`、`leb128` + +测试: +- checksum 正确/错误分支 +- mode 不支持分支(1/2 必须报错) +- snapshot 空 state `"E"` 分支 + +退出条件: +- 可对 Rust 导出的 blob 完成 header/body 切分与校验 + +--- + +## 2.6 基础标识结构:ID / ContainerID / ContainerWrapper + +### 模块:`id` / `container_id` / `container_wrapper` + +目标: +- 实现: + - ChangeBlock key:12 bytes(peer u64 BE + counter i32 BE) + - ContainerID.to_bytes(root 与 normal) + - postcard `Option` 的历史 ContainerType 映射(只用于 wrapper.parent) + - ContainerWrapper:首字节 type(to_bytes 映射) + depth(LEB128) + parent(postcard Option) + payload + +测试: +- Rust 随机生成 ContainerID: + - to_bytes 一致 + - postcard 序列化下 parent 的映射一致 + +退出条件: +- ContainerID/Wrapper 可以无歧义解析并可重编码 + +--- + +## 2.7 serde_columnar(列式编码) + +### 模块:`serde_columnar/*` + +目标: +- outer format:`varint(n_cols)` + N 次 `(varint(len), bytes)` +- strategy: + - BoolRle + - Rle(AnyRle) + - DeltaRle(delta + AnyRle) + - DeltaOfDelta(bitstream + prefix code) + +依赖: +- `postcard/varint`、`postcard/zigzag`、`bytes_reader` + +实现要点: +- Row count 不显式存储,必须从 column payload 解码时推导 +- DeltaOfDelta 的 bit 顺序(big-endian bit order)极易错,必须重测 +- i128 支持是硬门槛:需要明确 Moon 侧实现方案(BigInt 或自定义 128 位整数) + +测试: +- 每个 strategy 的小向量单测(覆盖 run/literal/空序列/单元素) +- Rust serde_columnar 生成列数据 → Moon 解码一致 +- Moon 编码 → Rust 解码一致(可分阶段完成:先 BoolRle/Rle/DeltaRle,再 DeltaOfDelta) + +退出条件: +- 至少 ChangeBlock/State 用到的列结构都能稳定 decode;encode 分阶段补齐 + +--- + +## 2.8 自定义 Value Encoding(非 postcard 的那套) + +### 模块:`value_custom` + +目标: +- 实现 `docs/encoding.md` 的 Value Encoding(tag 0..16 + >=0x80 unknown) + +实现要点: +- F64 big-endian +- I64/DeltaInt 使用 SLEB128(不是 zigzag) +- 对未知 tag:保留为 opaque(tag + raw payload)以便重编码 + +测试: +- Rust 生成覆盖所有 tag 的向量(含边界:NaN/Inf/-0.0、大整数、长字符串、二进制) +- Moon decode→encode→Rust decode 一致(语义) + +退出条件: +- 作为 ChangeBlock values 段的基础依赖可用 + +--- + +## 2.9 ChangeBlock(FastUpdates 的核心) + +### 模块:`change_block` + +目标: +- 完整实现 ChangeBlock 的 decode(必要时 encode) +- 产出可测试的 `Change/Op` 数据结构(见 `moon/specs/04-ir-design.md`) + +分解实现顺序(建议严格按层次): +1. postcard 外层 EncodedBlock struct:切分出各 bytes 段 +2. header 段(peer table、atom lens、deps、lamport) +3. change_meta 段(timestamps、commit msg lens + 拼接区) +4. arena:cids(ContainerArena)、keys、positions(PositionArena) +5. ops(serde_columnar) +6. delete_start_ids(serde_columnar) +7. values(自定义 Value) +8. **组装 Change/Op**:把 EncodedOp + values + delete_start_ids 还原为 `Op`,并按 change atom_len 切分成 `Change` 列表 + +测试: +- Rust 导出 FastUpdates: + - Moon 能逐 block 解码 + - Moon encode 回 blob 后 Rust import 成功 + - Rust `get_deep_value()` 与真值 JSON 相同 +- Change/Op 级对照(推荐作为“快速定位”用例): + - Rust 额外输出 `changes.json`(Change/Op 列表),Moon decode 输出同构 JSON(见 `moon/specs/04-ir-design.md#46-测试用-json-形态建议`),做结构化 diff + +退出条件: +- FastUpdates e2e 通过(至少覆盖多 peer + 多容器) + +--- + +## 2.10 State(FastSnapshot 的 state_bytes) + +### 模块:`state/*` + +目标: +- 解码 `encoding-container-states.md` 中的各容器 state snapshot +- 作为“可重编码表示”的一部分(至少可保留原始 bytes 以重编码) + +实现顺序建议: +1. MapState +2. ListState +3. RichtextState(重点:Unicode scalar) +4. TreeState(PositionArena) +5. MovableListState(sentinel + visible/invisible 逻辑) +6. CounterState(如需) + +测试: +- 每个容器都有 Rust 生成的专用向量(blob + 真值 JSON) +- Moon transcode 后 Rust import,deep json 等价 + +退出条件: +- FastSnapshot e2e 通过(覆盖全部容器类型) + +--- + +## 2.11 CLI 与集成 + +### 模块:`moon/bin/loro-codec` + +目标: +- 提供给 e2e harness 调用的稳定接口(建议至少包含): + - `decode --out `:输出结构化 JSON(debug 用) + - 建议额外支持输出 `changes.json`(Change/Op),用于与 Rust 对照(见 `moon/specs/04-ir-design.md`) + - `encode --out `:从 Change/Op JSON 生成 blob(进阶) + - `transcode `:decode→encode(e2e 主入口) + +测试: +- CLI 参数错误处理 +- 端到端:对 `moon/testdata/*.blob` 逐个 transcode,Rust import 校验通过 + +退出条件: +- `transcode` 成为稳定可用的 e2e 入口 diff --git a/moon/specs/03-e2e-test-plan.md b/moon/specs/03-e2e-test-plan.md new file mode 100644 index 000000000..514f303bf --- /dev/null +++ b/moon/specs/03-e2e-test-plan.md @@ -0,0 +1,175 @@ +# 03. e2e 测试计划(最终验收方式) + +本项目以 e2e 为最终验收:Rust 与 Moon 能互相 decode/encode,并由 Rust 侧验证语义一致。 + +## 3.1 总体思路 + +1. Rust 负责: + - 构造覆盖面足够的 Loro 文档(不同容器、不同边界情况、多 peer) + - 导出 blob(Snapshot/Updates/…) + - 产出真值 JSON(`get_deep_value()`) +2. Moon 负责: + - 对 blob 解析并重编码(transcode) +3. Rust 负责最终判定: + - `import(Moon 输出 blob)` 成功 + - `get_deep_value()` 与真值 JSON 完全相等 + +## 3.2 测试向量(testdata)规范 + +建议目录:`moon/testdata/` + +每个 case 一组文件: + +- `.blob`:Rust `export(...)` 的二进制输出 +- `.json`:Rust `get_deep_value().to_json_value()` 的 JSON(真值) +- `.meta.json`(建议):描述如何生成该 blob 的元信息,例如: + - `mode`:snapshot / updates / shallow_snapshot / state_only / snapshot_at + - `encode_mode`:3 或 4 + - `notes`:覆盖点说明(例如包含 emoji、触发 LZ4、触发 LargeValueBlock 等) + - 对 updates:`from_vv` 或 `spans` 的构造参数(便于复现) + +## 3.3 Rust 测试向量生成器(建议实现方式) + +新增 Rust 小工具(可放在 `examples/` 或新增 crate),能力: + +- `generate --out moon/testdata --seed ... --cases ...` +- 内置多组 case: + - 基础容器:Map/List/Text/Tree/MovableList + - Richtext Unicode:包含 emoji/非 BMP 字符 + - 大 value:触发 SSTable 多 block 与 LargeValueBlock + - 压缩:确保产生 LZ4 压缩 block(并验证解压逻辑) + - 多 peer:模拟协作写入 + - 导出模式:Snapshot / Updates(from vv) / UpdatesInRange / ShallowSnapshot / StateOnly / SnapshotAt + +输出真值: + +- 对 Snapshot:真值为导出目标版本的 `doc.get_deep_value()` +- 对 Updates:需要额外建立“回放场景”: + - Rust 先生成基线 docA 与 docB(或 vv 起点),导出 updates blob + - e2e 时 Rust 用基线 + import(updates) 得到目标状态,再与真值对比 + +## 3.4 Moon CLI 合约(供 Rust harness 调用) + +建议固定为: + +- `moon/bin/loro-codec transcode ` + +约束: + +- `transcode` 必须: + - 校验 checksum(失败返回非 0) + - 正确处理 mode=3/4 + - 输出的 `` 必须是 Rust 可 import 的合法格式(checksum 也要正确) + +可选 debug 命令(便于排查): + +- `decode --out `:输出解析后的结构化 JSON(例如 header、SSTable meta、ChangeBlock 段统计) + +## 3.5 e2e 测试结构(推荐用 Rust integration tests 驱动) + +伪流程: + +1. Rust 遍历 `moon/testdata/*.blob` +2. 对每个 case: + - 调用 Moon CLI:`transcode case.blob out.blob` + - Rust 创建新 doc,`import(out.blob)` + - 读取 `case.json` 真值并对比 `doc.get_deep_value()` + +对 Updates 类 case: + +- 测试应包含: + - 基线状态(meta.json 指定) + - import 顺序与前置版本 vector + +## 3.6 覆盖矩阵(必须至少覆盖) + +1. header: + - magic 错误 + - checksum 错误 + - mode 不支持 +2. SSTable: + - 多 block + - LZ4 压缩 block + - LargeValueBlock +3. ChangeBlock: + - 多 peer + - dep flags / dep counters / lamport / timestamps 的 DeltaOfDelta +4. Value: + - 所有 tag(0..16) + - unknown tag(>=0x80)的保守重编码 +5. 容器 state: + - Map/List/Text/Tree/MovableList 全覆盖 + - Text 含 emoji(验证 Unicode scalar) + +## 3.7 分阶段验收里程碑(建议) + +1. Milestone 1:Rust→Moon→Rust(Snapshot only)e2e 通过 +2. Milestone 2:FastUpdates e2e 通过 +3. Milestone 3:ShallowSnapshot/StateOnly/SnapshotAt 覆盖通过 +4. Milestone 4(可选):编码策略对齐(压缩/块布局/byte-level 更接近 Rust) + +## 3.8 当前落地(repo 现状) + +目前 repo 内已落地一组 **可选** 的 Rust integration tests(默认会在缺少 Moon/Node 时自动跳过): + +- Rust harness:`crates/loro/tests/moon_transcode.rs` +- Moon CLI:`moon/cmd/loro_codec_cli`(JS target,Node 侧用 `fs` 读写文件) + +本地运行: + +```sh +MOON_BIN=~/.moon/bin/moon NODE_BIN=node cargo test -p loro --test moon_transcode +``` + +当前覆盖点包含(至少,且持续扩充): + +- Snapshot / AllUpdates +- SnapshotAt / StateOnly / ShallowSnapshot +- Updates(from vv) +- 多 peer(导出包含多个 peer 的 updates) +- 终极测试:FastUpdates→JsonUpdates(与 Rust 强一致)、FastSnapshot→Deep JSON(与 Rust 强一致) +- 覆盖矩阵:固定覆盖(curated ops)+ 多 seed 随机序列(包含容器嵌套、RichText mark/unmark、Tree meta 等) +- Counter:`cargo test -p loro --test moon_transcode` + +## 3.9 终极测试(两种形态,强一致性) + +为避免“仅能 import 成功但语义细节漂移”,最终需要落地两类黄金测试:**同一份随机操作序列**下,Rust 与 Moon 的“结构化输出”要强一致。 + +> 约定:下文的 JsonUpdates 指 `loro::JsonSchema`(见 `docs/JsonSchema.md`)。 + +### 3.9.1 Updates:二进制 FastUpdates → JsonUpdates,一致性对照 + +目标: +- Rust 侧导出的 JsonUpdates,与 Moon 从**二进制 updates** 导出的 JsonUpdates **结构完全一致**。 + +建议流程: +1. Rust CLI(固定 seed,可复现)生成随机操作并 commit,得到最终 doc。 +2. Rust 同时导出: + - `updates.blob`:二进制 updates(mode=4,建议 `ExportMode::Updates{from: ...}` 或 `ExportMode::all_updates()`) + - `updates.json`:JsonUpdates(Rust `export_json_updates(...)` 的 JSON 输出) +3. Moon 读取 `updates.blob`,导出 JsonUpdates(例如调用 Moon CLI `export-jsonschema`),得到 `updates.moon.json`。 +4. 对比:`updates.moon.json` 与 `updates.json` 反序列化后的结构体/JSON 值完全相等(推荐**先 parse 再比较**,而不是比字符串)。 + +覆盖意义: +- 该测试直接约束 ChangeBlock/serde_columnar/Value 等解码语义:Moon 的“二进制→JsonUpdates”必须与 Rust 的“真值 JsonUpdates”一致。 + +### 3.9.2 Snapshot:二进制 FastSnapshot → Deep JSON(toJSON),一致性对照 + +目标: +- Rust 的 `get_deep_value()` 真值 JSON,与 Moon 从**二进制 snapshot** 解析后产出的 deep JSON(toJSON)**结构完全一致**。 + +建议流程: +1. Rust CLI(固定 seed,可复现)生成随机操作并 commit,得到最终 doc。 +2. Rust 同时导出: + - `snapshot.blob`:二进制 snapshot(mode=3,`ExportMode::Snapshot`) + - `snapshot.deep.json`:真值 JSON(`doc.get_deep_value().to_json_value()`) +3. Moon 读取 `snapshot.blob`,解析 snapshot,并提供 `toJSON`/`export-deep-json` 之类的接口输出 `snapshot.moon.deep.json`(最终状态的 JSON)。 +4. 对比:`snapshot.moon.deep.json` 与 `snapshot.deep.json` 反序列化后的 JSON 值完全相等。 + +覆盖意义: +- 该测试直接约束 FastSnapshot 的 state snapshot(Map/List/Richtext/Tree/MovableList/Counter 等)解码语义与 JSON 形态,能暴露 Unicode scalar、排序/稳定性等问题。 + +当前落地(repo 现状): +- Rust 生成器:`crates/loro/examples/moon_golden_gen.rs` +- Rust tests:`crates/loro/tests/moon_transcode.rs`(`moon_golden_updates_jsonschema_matches_rust` / `moon_golden_snapshot_deep_json_matches_rust`) +- Moon CLI:`moon/cmd/loro_codec_cli`(`export-jsonschema` / `export-deep-json`) diff --git a/moon/specs/04-ir-design.md b/moon/specs/04-ir-design.md new file mode 100644 index 000000000..bd964fbdb --- /dev/null +++ b/moon/specs/04-ir-design.md @@ -0,0 +1,337 @@ +# 04. Moon 侧 Change / Op 数据结构设计 + +本文件定义 Moonbit 侧用于“可重编码(decode→encode)”与“可测试(对照/Golden)”的核心数据结构(`Change` / `Op` 等)。 + +目标: + +1. **能承载 ChangeBlock 的核心语义**:Change 元数据 + Op 序列(含值、引用与删除跨度)。 +2. **便于测试**:可序列化为稳定 JSON,用于与 Rust 侧导出的 JSON 对照(或作为 golden)。 +3. **可用于编码**:从这些结构能构建回 ChangeBlock(不要求 byte-for-byte 相同,但必须 Rust 可 import 且语义一致)。 + +> 说明:下文类型用“Moonbit 风格伪代码”描述,最终落地时可按 Moonbit 实际语法调整,但字段语义/约束不应改变。 + +--- + +## 4.1 基础类型 + +### 4.1.1 数值类型约定 + +- `PeerID`:u64(Rust 为 `u64`) +- `Counter`:i32(Rust 为 `i32`) +- `Lamport`:u32(Rust 为 `u32`,但在若干 JSON/显示层可按 i64 表示) +- `Timestamp`:i64 + +> 注意:编码层同时存在两套变长整数体系: +> - 自定义 Value 编码用 LEB128(含 SLEB128) +> - postcard/serde_columnar 用 varint + zigzag +> +> 这些结构只关心语义值本身,不暴露编码细节。 + +### 4.1.2 ID / IdLp / TreeID + +``` +struct ID { peer: PeerID, counter: Counter } +struct IdLp { peer: PeerID, lamport: Lamport } // movable list 用 +type TreeID = ID // Tree 节点 ID 与 ID 结构一致(peer+counter) +``` + +为了对照 Rust 的字符串格式,提供以下约定(仅用于 JSON/调试): + +- `ID` 字符串:`"{counter}@{peer}"` +- `IdLp` 字符串:`"L{lamport}@{peer}"` + +### 4.1.3 ContainerID + +``` +enum ContainerType { + Map, List, Text, Tree, MovableList, + Counter, // 可选 feature + Unknown(u8), // 未来扩展 +} + +enum ContainerID { + Root { name: String, ty: ContainerType }, + Normal { peer: PeerID, counter: Counter, ty: ContainerType }, +} +``` + +与 Rust 的 `Display/TryFrom<&str>` 对齐(用于 JSON/测试): + +- Root:`"cid:root-{name}:{ContainerType}"` +- Normal:`"cid:{ID}:{ContainerType}"` + +其中 `ContainerType` 显示为:`Map/List/Text/Tree/MovableList/(Counter)/Unknown(k)`。 + +### 4.1.4 FractionalIndex(Tree position) + +Tree 的 position 在二进制里是 `FractionalIndex` 的 bytes,JSON 侧使用其 `Display`: + +- `fractional_index`:**大写十六进制**字符串(Rust `FractionalIndex::to_string()` 实际是 bytes 的 `%02X` 拼接)。 + +推荐存两份(便于编码与测试): + +``` +struct FractionalIndex { + bytes: Bytes, // 编码用 + hex: String, // 测试/日志用,可由 bytes 推导 +} +``` + +--- + +## 4.2 LoroValue(用户态值,用于 Insert/Set/Mark 等) + +LoroValue 在二进制里走 postcard(见 `docs/encoding-container-states.md` 的 “LoroValue Encoding (in postcard)”),在 JSON(human-readable)里走 Rust 自定义序列化规则(见 `crates/loro-common/src/value.rs`): + +- `Null` → JSON `null` +- `Bool` → JSON `true/false` +- `Double/I64` → JSON number +- `String` → JSON string +- `Binary` → JSON number array(0..255) +- `List` → JSON array +- `Map` → JSON object +- `Container(ContainerID)` → JSON string:`"🦜:" + ContainerIDString` + +建议直接复用这个“测试友好 JSON 形态”(特别是容器引用前缀 `🦜:`),从而可与 Rust 输出直接对照。 + +--- + +## 4.3 Change(核心结构之一) + +### 4.3.1 结构定义 + +``` +struct Change { + id: ID, // change 起始 ID(peer+counter) + timestamp: i64, // change timestamp(DeltaOfDelta) + deps: Array[ID], // frontiers(对照 Rust json_schema: deps) + lamport: Lamport, // change 的 lamport 起点 + msg: Option[String], // commit message(None/Some) + ops: Array[Op], // op 列表(按 counter 递增) +} +``` + +### 4.3.2 约束(用于测试断言) + +- `ops` 必须按 `op.counter` 递增排序。 +- `op.counter` 必须满足:`id.counter <= op.counter < id.counter + change_op_len`。 +- `change_op_len` 定义为 `sum(op_len(op.content))`,且应等于该 Change 在 ChangeBlock header 中的 atom_len。 + +> 注:FastUpdates 的 ChangeBlock header 对 “self dep” 做了压缩(dep_on_self),解码后 `deps` 应包含完整 dep 列表(含 self dep)。 + +--- + +## 4.4 Op(核心结构之二) + +### 4.4.1 顶层结构 + +``` +struct Op { + container: ContainerID, // 目标容器 + counter: Counter, // op 的起始 counter(绝对值,不是相对 offset) + content: OpContent, // 语义操作 +} + +enum OpContent { + List(ListOp), + MovableList(MovableListOp), + Map(MapOp), + Text(TextOp), + Tree(TreeOp), + Future(FutureOp), // Unknown/Counter(可选) +} +``` + +为了最大化测试复用,建议让 `OpContent` 的形态尽量与 Rust 的 `encoding/json_schema.rs::json::JsonOpContent` 对齐。 + +### 4.4.2 各容器 OpContent 详细定义与 op_len 规则 + +#### List + +``` +enum ListOp { + Insert { pos: u32, value: Array[LoroValue] }, + Delete { pos: i32, len: i32, start_id: ID }, +} +``` + +- `op_len(Insert) = value.length` +- `op_len(Delete) = abs(len)`(注意 len 可为负,代表不同方向,语义以编码规则为准) + +#### MovableList + +``` +enum MovableListOp { + Insert { pos: u32, value: Array[LoroValue] }, + Delete { pos: i32, len: i32, start_id: ID }, + Move { from: u32, to: u32, elem_id: IdLp }, + Set { elem_id: IdLp, value: LoroValue }, +} +``` + +- Insert/Delete 的 `op_len` 同 List +- Move/Set 的 `op_len = 1` + +#### Map + +``` +enum MapOp { + Insert { key: String, value: LoroValue }, + Delete { key: String }, +} +``` + +- `op_len = 1` + +#### Text(Richtext ops) + +``` +enum TextOp { + Insert { pos: u32, text: String }, + Delete { pos: i32, len: i32, start_id: ID }, + Mark { start: u32, end: u32, style_key: String, style_value: LoroValue, info: u8 }, + MarkEnd, +} +``` + +- `op_len(Insert) = unicode_scalar_count(text)`(必须与 Rust `text.chars().count()` 一致) +- `op_len(Delete) = abs(len)` +- `op_len(Mark) = 1`,`op_len(MarkEnd) = 1` + +> 注意:编码层的 MarkStart 里带有 `len=end-start`,但它不等价于 atom_len;atom_len 固定为 1。 + +#### Tree + +``` +enum TreeOp { + Create { target: TreeID, parent: Option[TreeID], fractional_index: FractionalIndex }, + Move { target: TreeID, parent: Option[TreeID], fractional_index: FractionalIndex }, + Delete { target: TreeID }, +} +``` + +- `op_len = 1` + +#### Future(未知/扩展容器) + +目标:提供可重编码的“保守”表示,保证未来版本不会把数据丢掉。 + +``` +enum FutureOp { + // 可选:counter feature + Counter { value: EncodedValue }, // 值可能是 I64 或 F64 + Unknown { prop: i32, value: EncodedValue }, // value 用自定义 Value 编码体系 +} +``` + +`EncodedValue` 建议对齐 Rust `encoding/value.rs::OwnedValue` 的 JSON 表示(`{ "value_type": "...", "value": ... }`),至少包含: + +- `i64` / `f64` / `str` / `binary` / `loro_value` / `delete_once` / `delete_seq` / `delta_int` +- `mark_start` / `list_move` / `list_set` / `raw_tree_move` +- `future.unknown(kind,data)`:保留未知 kind 与原始 bytes(以便重编码) + +--- + +## 4.5 Change / Op ↔ ChangeBlock(二进制)映射要点(用于实现与测试) + +本节不是完整实现指南,而是把“字段如何从编码里来”与“编码时如何从字段生成”讲清楚,避免实现时失配。 + +### 4.5.1 解码(binary → Change/Op)关键路径 + +以 FastUpdates 的单个 ChangeBlock 为例: + +1. postcard 解出 `EncodedBlock` 外层字段: + - `counter_start/counter_len/lamport_start/lamport_len/n_changes` + - 以及各 bytes 段:`header/change_meta/cids/keys/positions/ops/delete_start_ids/values` +2. 解析 `header`(见 `crates/loro-internal/src/oplog/change_store/block_meta_encode.rs::decode_changes_header`): + - 得到 `peers[]`、每个 change 的 `atom_len`、`deps`、每个 change 的 `lamport` +3. 解析 `change_meta`: + - `timestamps[]`(DeltaOfDelta) + - `commit_msg_len[]`(AnyRle)+ 拼接区 → `msg[]` +4. 解析 arenas: + - `cids`:ContainerArena(postcard Vec)→ `ContainerID[]` + - `keys`:LEB128(len)+utf8 → `String[]` + - `positions`:PositionArena(serde_columnar)→ `Bytes[]` +5. 解析 `ops`(serde_columnar EncodedOp 列)得到 `[(container_idx, prop, value_type, len)]` +6. 解析 `delete_start_ids`(serde_columnar)得到删除跨度表(供 DeleteSeq 消费) +7. 解析 `values`:按每个 op 的 `value_type` 顺序消费 values byte stream,得到 `Value`(自定义 Value 编码体系) +8. **用容器类型 + prop + value** 还原语义 Op(对照 Rust `crates/loro-internal/src/encoding/outdated_encode_reordered.rs::decode_op`): + - Map:`prop` 是 `key_idx` → `keys[key_idx]` + - List/Text/MovableList:`prop` 多为位置;Delete 需要从 delete_start_ids 取 `start_id + signed_len` + - Text Mark:由 `MarkStart` + `prop(start)` 还原 `start/end/style_key/style_value/info` + - Tree:使用 `RawTreeMove` + `positions[position_idx]`;并需计算 `op_id` 来区分 Create/Move(见 Rust `is_create = subject.id() == op_id`) +9. 将 ops 按 change atom_len 切分到每个 `Change.ops`: + - 对每个 change:累积 `op_len(op.content)` 直到等于该 change 的 atom_len + - 同时填充 Change:`id/timestamp/deps/lamport/msg` + +### 4.5.2 编码(Change/Op → binary)关键路径 + +编码时不要求与 Rust byte-for-byte 一致,但必须 Rust 可 import。建议“先做可用版,再做对齐版”: + +- v1(可用版): + - 直接从 `Change/Op` 重建 registers(peer/key/cid/position),生成 ContainerArena/keys/positions,并生成 ops 列 + delete_start_ids + values bytes。 + - SSTable 的编码可统一用 `compression_type=None`(避免压缩差异);ChangeBlock 内 values 不压缩。 + +从 `Change/Op` 构造 ChangeBlock 的关键点(对照 Rust `encode_op/get_op_prop/encode_block`): + +1. `container_idx`:来自 `cid_register.register(container_id)` +2. `prop`:按 op 类型计算(等价 Rust `get_op_prop`): + - List/MovableList Insert/Delete/InsertText:`prop = pos` + - MovableList Move:`prop = to` + - MovableList Set:`prop = 0` + - Text Insert/Delete/Mark:`prop = pos/start` + - Text MarkEnd:`prop = 0` + - Map:`prop = key_idx`(key_idx 来自 key_register) + - Tree:`prop = 0` +3. `value_type + values/delete_start_ids`:按 op 内容映射(等价 Rust `encode_op`): + - List/MovableList Insert → 写入 `LoroValue(list)` 到 values + - Text Insert → 写入 `Str(text)` 到 values + - Map Insert/Delete → 写入 `LoroValue(v)` 或 `DeleteOnce` + - Delete → 写入 `DeleteSeq`(values)+ 追加一条 delete_start_id + - Text Mark → 写入 `MarkStart`(含 len/end-start、key、value、info) + - Text MarkEnd → 写入 `Null` + - MovableList Move → 写入 `ListMove` + - MovableList Set → 写入 `ListSet` + - Tree → 写入 `RawTreeMove`(引用 peer_idx/position_idx 等) + - Future → 写入 `I64/F64/Unknown(...)` +4. `len`:必须等于 `op_len(op.content)`(见 4.4.2),用于 change atom_len 的累计。 +5. Change header 部分: + - change atom_len:写入 n-1 个(最后一个由 counter_len - sum 推导) + - dep_on_self 优化:若 deps 包含 `ID(peer, change_start_counter-1)`,可设 dep_on_self=true 并从 deps 中移除该项再编码其它 deps + - lamport:写入 n-1 个(最后一个由 lamport_start/lamport_len 推导) + +--- + +## 4.6 测试用 JSON 形态(建议) + +为便于跨语言对照,建议 Moon `decode --emit-changes-json` 输出尽量对齐 Rust 的 `encoding/json_schema.rs::json::JsonChange/JsonOp`: + +- `Change` JSON: + - `id`:`"{counter}@{peer}"` + - `timestamp`:i64 + - `deps`:`["{counter}@{peer}", ...]` + - `lamport`:u32 + - `msg`:string or null + - `ops`:数组 +- `Op` JSON: + - `container`:`ContainerIDString` + - `counter`:i32 + - `content`:按容器类型的 tagged object(如 `{"type":"insert",...}`),字段名与 Rust json_schema 保持一致 + - `fractional_index`:大写 hex string + - `LoroValue`:按 4.2 的 human-readable 规则 + +同时建议 Moon 额外提供一个 debug 输出(不参与对照): + +- `wire`:包含 `container_idx/prop/value_type/len` 与 values/delete_start_ids 消费位置(用于排查编码映射错误) + +--- + +## 4.7 建议的测试切入点(利用 Change/Op) + +1. **单位测试(decode_op 映射)**: + - 给定 `(container_type, prop, value_kind+payload, delete_start_id?)`,断言还原的 `OpContent` 正确。 +2. **Golden 测试(changes.json 对照)**: + - Rust 为每个 updates 用例额外输出 `changes.json`(可复用 `encoding::json_schema::export_json_in_id_span` 或定制导出)。 + - Moon decode 同一个 blob 输出 `changes.json`,做结构化 diff(忽略 debug 字段)。 +3. **端到端(transcode + import)**: + - 仍以 Rust import 后 deep value 对比为最终判定,但 Change/Op 层的 diff 可快速定位“错在 ops 还是 state”。 diff --git a/moon/specs/05-fastupdates-changeblock-encoding.md b/moon/specs/05-fastupdates-changeblock-encoding.md new file mode 100644 index 000000000..7118f1e76 --- /dev/null +++ b/moon/specs/05-fastupdates-changeblock-encoding.md @@ -0,0 +1,224 @@ +# 05. FastUpdates(mode=4)/ ChangeBlock 编码:Context 与实现规格摘记 + +本文件用于支撑 MoonBit 侧把 **FastUpdates** 做到真正的 `decode -> Change/Op -> encode`(而不是仅校验后原样输出 bytes),从而满足“Rust ↔ Moon 任意导出格式都能互相 decode/encode”的最终目标。 + +> 重要经验:`docs/encoding.md` 有部分细节会滞后或不完整;这里以 Rust 源码为真值,并把实现时容易踩坑的点显式写下来,避免反复试错。 + +## 5.1 真值来源(必须读) + +- ChangeBlock 打包与 op 编码: + - `crates/loro-internal/src/oplog/change_store/block_encode.rs` + - `crates/loro-internal/src/encoding/outdated_encode_reordered.rs`(`get_op_prop` / `encode_op` 真正决定 `prop` 与 `value_type`) +- ChangeBlock header/meta 编码: + - `crates/loro-internal/src/oplog/change_store/block_meta_encode.rs`(`encode_changes`) +- Arena: + - `crates/loro-internal/src/encoding/arena.rs`(`ContainerArena` / `PositionArena`) +- Value 编码(values 段): + - `crates/loro-internal/src/encoding/value.rs`(`ValueWriter` / `ValueKind`) +- 依赖库语义(排查策略/空序列编码时用): + - `serde_columnar` 0.3.14(策略与 wrapper 行为) + - `postcard`(struct/vec/bytes 的编码语义) + +## 5.2 EncodedBlock(最外层:postcard struct) + +Rust 侧 `EncodedBlock<'a>`(见 `block_encode.rs`)被 postcard 序列化为: + +1. `counter_start: u32`(postcard varint(u64) 形式承载,要求 ≤ u32) +2. `counter_len: u32` +3. `lamport_start: u32` +4. `lamport_len: u32` +5. `n_changes: u32` +6. `header: bytes`(postcard bytes:`varint(len) + raw`) +7. `change_meta: bytes` +8. `cids: bytes` +9. `keys: bytes` +10. `positions: bytes` +11. `ops: bytes` +12. `delete_start_ids: bytes` +13. `values: bytes` + +字段顺序非常关键:Moon 侧的 `decode_encoded_block/encode_encoded_block` 必须与之对应。 + +### 关键派生字段(由 Change 列表推导) + +在 Rust `encode_block` 中: + +- `counter_start = first_change.id.counter` +- `counter_len = last_change.ctr_end() - first_change.id.counter` +- `lamport_start = first_change.lamport()` +- `lamport_len = last_change.lamport_end() - first_change.lamport()` +- `n_changes = block.len()` + +Moon 侧实现 `encode_change_block(changes)` 时应使用同样定义。 + +## 5.3 header 段(`encode_changes` 输出的第一个 bytes) + +来源:`block_meta_encode.rs::encode_changes` + +布局(按顺序拼接): + +1. **Peer Table** + - `peer_count: ULEB128(u64)` + - `peer_ids: peer_count × u64_le` + - 约束:`peers[0]` 必须是该 block 的 peer(Rust 在进入 encode_block 时先 `peer_register.register(&peer)` 保证)。 + +2. **AtomLen(只写 N-1 个)** + - 对每个 change(除最后一个):写 `atom_len` 的 `ULEB128(u64)` + - 最后一个 change 的 atom_len 不写,解码侧通过 `counter_len - sum(prev)` 推导。 + +3. **Deps(按 change 展开)** + - `dep_on_self: BoolRle`(长度 = N) + - `dep_len: AnyRle`(长度 = N;为“去掉 self dep 后”的 deps 数) + - `dep_peer_idx: AnyRle`(长度 = sum(dep_len);peer_idx 指向 Peer Table) + - `dep_counter: DeltaOfDelta`(长度 = sum(dep_len);counter) + +4. **Lamport(只写 N-1 个)** + - `lamport: DeltaOfDelta`(长度 = N-1) + - 最后一个 change 的 lamport 不直接编码,解码时用: + - `last_lamport = lamport_start + lamport_len - last_atom_len` + +> 坑点:header 里 lamport 的 DeltaOfDelta 只包含 N-1 个元素,这是最常见 off-by-one bug 源头之一。 + +## 5.4 change_meta 段(`encode_changes` 输出的第二个 bytes) + +来源:`block_meta_encode.rs::encode_changes` + +布局(按顺序拼接): + +1. `timestamps: DeltaOfDelta`(长度 = N) +2. `commit_msg_lens: AnyRle`(长度 = N;None → 0) +3. `commit_msgs: bytes`(把所有非空 commit_msg 直接拼接的 UTF-8 字节串) + +解码时需要按 `commit_msg_lens` 切分末尾 bytes;编码同理。 + +## 5.5 keys 段(key_register 的输出) + +来源:`block_encode.rs::encode_keys` + +布局:重复直到 EOF: + +- `len: ULEB128(u64)` +- `utf8_bytes: len` + +注意:这里用的是 **ULEB128**(不是 postcard varint)。 + +## 5.6 cids 段(ContainerArena) + +来源:`encoding/arena.rs::ContainerArena::encode` + +**关键坑点:它不是 columnar vec。** + +Rust 实际调用:`serde_columnar::to_vec(&self.containers)`,其中 `self.containers: Vec`。 +由于这是对 **Vec 直接做 serde/postcard 序列化**,结果是 row-wise 的 postcard Vec 结构: + +- `vec_len: varint(u64)` +- 对每个元素(EncodedContainer,4 个字段): + - `field_count: varint(u64)`(固定为 `4`,postcard 对 “struct as seq” 的编码) + - `is_root: u8`(0/1) + - `kind: u8`(ContainerID.to_bytes 的映射:Map=0,List=1,Text=2,Tree=3,MovableList=4,Counter=5) + - `peer_idx: varint(u64)`(root 时固定为 0;normal 时为 peers 表索引) + - `key_idx_or_counter: zigzag-varint(i64)`(i32 范围) + +语义映射: + +- root:`(is_root=true, key_idx_or_counter = keys[name_idx])` +- normal:`(is_root=false, peer_idx -> peers[peer_idx], key_idx_or_counter = counter)` + +实现建议: + +- 在 `encode_change_block` 中,先收集所有涉及到的 ContainerID,建立 `keys` 和 `peers` 注册表,再按顺序生成 container_arena bytes。 + +## 5.7 positions 段(PositionArena v2) + +来源:`encoding/arena.rs::PositionArena::encode_v2` + +- 若 positions 为空:直接返回空 bytes(长度 0) +- 否则:`serde_columnar::to_vec(&PositionArena { positions: Vec })` + - 包含 **struct wrapper**(field_count=1)+ columnar vec 两列: + - `common_prefix_length: AnyRle` + - `rest: bytes column`(注意 bytes column 自身内部有 count 与每段 length) + +Moon 侧实现时要区分: + +- ChangeBlock 的 positions:允许空 bytes +- TreeState 的 fractional_indexes:Rust 用 `PositionArena::encode()`,即使为空也会产生非空 payload(所以 Moon 侧需要单独的 `encode_position_arena()` 语义) + +## 5.8 ops 段(EncodedOps) + +来源:`block_encode.rs::EncodedOps` + +Rust:`serde_columnar::to_vec(&EncodedOps { ops })` + +编码为: + +- struct wrapper:`field_count=1` +- columnar vec(4 列): + 1. `container_index: DeltaRle`(container_arena 的索引) + 2. `prop: DeltaRle` + 3. `value_type: Rle`(`ValueKind::to_u8()`) + 4. `len: Rle`(op atom_len) + +### prop 的计算(必须与 Rust 完全一致) + +真值来源:`encoding/outdated_encode_reordered.rs::get_op_prop` + +- List/MovableList/Text: + - Insert/InsertText:`prop = pos` + - Delete:`prop = pos` + - MovableList Move:`prop = to` + - MovableList Set:`prop = 0` + - Text StyleStart:`prop = start` + - Text StyleEnd:`prop = 0` +- Map:`prop = key_register.register(map.key)`(key_idx) +- Tree:`prop = 0` +- Future: + - Counter:`prop = 0` + - Unknown:`prop = op.prop` + +## 5.9 delete_start_ids 段(EncodedDeleteStartIds) + +来源:`block_encode.rs` + +- 若该 block 没有任何 DeleteSeq:此段 **为 0 字节空串**(不是 “空 columnar vec”) +- 否则:`serde_columnar::to_vec(&EncodedDeleteStartIds { delete_start_ids })` + - struct wrapper:field_count=1 + - columnar vec(3 列): + - `peer_idx: DeltaRle` + - `counter: DeltaRle` + - `len: DeltaRle`(Moon 侧建议用 i64 承载) + +生成规则(真值来源:`encoding/outdated_encode_reordered.rs::encode_op`): + +- 每遇到一次 DeleteSeq(List/MovableList/Text): + - push `EncodedDeleteStartId { peer_idx = peer_register.register(id_start.peer), counter, len }` + - values 段写 `Value::DeleteSeq`(无 payload) + +## 5.10 values 段(ValueWriter 的输出) + +来源:`encoding/value.rs` + `outdated_encode_reordered.rs::encode_op` + +规则: + +- values 段是按 ops 的顺序连续拼接的 Value 编码(每个 value 起始 1 byte tag)。 +- `value_type` 列来自 `ValueKind`,必须与具体 value 的 tag 一致。 + +需要覆盖的 value 类型(对照 Moon 侧 `OpContent`): + +- Map:`LoroValue` / `DeleteOnce` +- List/MovableList:`LoroValue::List` / `DeleteSeq` / `ListMove` / `ListSet` +- Text:`Str` / `DeleteSeq` / `MarkStart` / `Null(=MarkEnd)` +- Tree:`RawTreeMove`(用于 Create/Move/Delete 的三态:Delete 用 deleted_root + position_idx=0) +- Future:counter 的 I64/F64 优化;unknown 的 opaque bytes + +## 5.11 实现分阶段建议(避免一次性做完难排查) + +1. **先做 “decode->encode 等价(语义)” 的 FastUpdates transcode**: + - `parse_fast_updates_body` 拿到每个 block bytes + - `decode_change_block(bytes)` -> `changes: Array[Change]` + - `encode_change_block(changes)` -> `new_bytes` + - 输出 new_bytes 组成新 document +2. 对每个段落单独做 “可视化对照”: + - Rust 侧加 probe:打印各段 bytes hex(已证明 cids/ops 有坑) + - Moon 侧增加 debug 命令输出(可选)以便 diff +3. 最后再追求 “byte-level 更接近 Rust”(例如压缩、block size、字段排序一致)。 + diff --git a/moon/specs/06-jsonschema-export.md b/moon/specs/06-jsonschema-export.md new file mode 100644 index 000000000..fcd09940e --- /dev/null +++ b/moon/specs/06-jsonschema-export.md @@ -0,0 +1,98 @@ +# 06. JsonSchema(`docs/JsonSchema.md`)导出:MoonBit 实现约定与细节 + +本文件记录 MoonBit 侧实现 `docs/JsonSchema.md` **导出**(export)的关键约定与实现细节,方便后续扩展测试与排查差异。 + +实现代码: + +- `moon/loro_codec/json_schema_export.mbt` +- CLI:`moon/cmd/loro_codec_cli/main.mbt`(`export-jsonschema`) + +> 注意:本文只讨论 **导出**(FastUpdates 二进制 → JsonSchema JSON)。JsonSchema → FastUpdates 的 **编码**细节见 `moon/specs/07-jsonschema-encode.md`。 + +--- + +## 6.1 总体结构与 peer 压缩 + +JsonSchema 根对象: + +```ts +{ + schema_version: 1, + start_version: Record, + peers: string[], + changes: Change[], +} +``` + +与 `docs/JsonSchema.md` 一致: + +- `peers` 存放 **真实 PeerID(u64)** 的十进制字符串(避免 JS number 精度问题)。 +- `Change.id` / `Change.deps` / `TreeID` / `ElemID` / `ContainerID(normal)` 中的 `{PeerID}` 都是 **peer index**(`0..peers.length-1`),即“peer 压缩”后的编号。 + +MoonBit 侧做法: + +- 扫描 Change/Op 时动态 `register_peer(actual_peer_id)`,把它分配到 `peer_idx`,并把 `actual_peer_id` 追加到 `peers[]`。 +- 导出 `id` 字段时使用 `{counter}@{peer_idx}`。 + +--- + +## 6.2 `start_version` 的重建策略(从二进制 FastUpdates 推导) + +Rust 的 `export_json_updates(start_vv, end_vv)` 会在 JSON 中携带 `start_version = vv_to_frontiers(start_vv)`。 + +但 **FastUpdates 二进制格式本身不显式携带 start_vv**,所以 MoonBit 导出函数 +`export_json_schema_from_fast_updates(bytes, validate)` 采用“best-effort”推导: + +1. 先解出本次 blob 内包含的 change 集合 `included_ids`。 +2. 遍历每个 change 的 deps: + - 若 dep 不在 `included_ids` 中,则认为它属于“导出范围外的依赖”(external dep) +3. 对每个真实 peer,取 external deps 的最大 counter,作为 `start_version[peer]` 的值。 + +该推导在典型场景下可得到与 Rust `start_version` 一致的结果: + +- `all_updates()`:通常 external deps 为空 ⇒ `start_version = {}` +- `Updates { from: vv_v1 }`:external deps 通常包含 `vv_v1` 的 frontier ⇒ `start_version` 非空 + +> 备注:Rust 侧导入 json updates 目前不会使用 `start_version` 做硬校验,但它对 debug / tooling 很有价值,所以仍然尽量对齐 Rust。 + +--- + +## 6.3 数字编码与精度 + +JsonSchema 的字段里包含 `timestamp(i64)` / `lamport(u32)` / `counter(i32)` 等数值。 + +MoonBit 输出 JSON 时: + +- 仍使用 JSON number 类型 +- 但对整型会同时设置 `Json::number(number, repr=...)`,用十进制字符串作为 `repr` + +目的: + +- JSON 文本层面保留精确整型表示(避免中间链路把大整数变成科学计数法或丢精度) +- Rust `serde_json` 解析依旧以 `repr` 为准,不影响 `loro::JsonSchema` 反序列化 + +--- + +## 6.4 `LoroValue::Container` 字符串前缀与 ID 递增规则 + +`docs/JsonSchema.md` 规定:当 `LoroValue` 是 Container 时,在 JSON 中编码为: + +``` +"🦜:cid:{Counter}@{PeerID}:{ContainerType}" +``` + +其中 `{PeerID}` 同样是 **peer index**。 + +MoonBit 侧目前只在二进制 `ValueEncoding` 里拿到 `ContainerType`(对应 `LoroValue::ContainerType`),需要结合 **当前 op 的 ID** 构造 ContainerID: + +- `ContainerID` 使用 `op_id = ID(change_peer, op.counter)` 作为 `{Counter}@{PeerID}` 的基础 +- 对 `ListInsertOp.value`(数组)按 Rust 的规则做 `id.inc(i)`: + - 第 `i` 个元素使用 `ID(change_peer, op.counter + i)` +- 对 `MapInsertOp.value`(map value)使用同一个 `op_id`(不递增) + +--- + +## 6.5 当前限制 + +- 仅支持从 `FastUpdates(mode=4)` 二进制导出 JsonSchema(不支持 FastSnapshot)。 +- `UnknownOp` 目前输出为占位结构(`value_type="unknown", value=null`),用于保持导出可用;后续如需要可对齐 Rust 的 `OwnedValue` / `EncodedValue` 细节。 diff --git a/moon/specs/07-jsonschema-encode.md b/moon/specs/07-jsonschema-encode.md new file mode 100644 index 000000000..b3c1e3770 --- /dev/null +++ b/moon/specs/07-jsonschema-encode.md @@ -0,0 +1,106 @@ +# 07. JsonSchema(`docs/JsonSchema.md`)编码:MoonBit 从 JsonSchema 生成 FastUpdates + +本文件记录 MoonBit 侧实现 `docs/JsonSchema.md` **编码**(encode / import)的关键约定与实现细节: + +- 输入:JsonSchema JSON(字符串) +- 输出:`FastUpdates(mode=4)` 二进制 blob(可被 Rust `LoroDoc.import(...)` 导入) + +实现代码: + +- `moon/loro_codec/json_schema_import.mbt`(`encode_fast_updates_from_json_schema`) +- CLI:`moon/cmd/loro_codec_cli/main.mbt`(`encode-jsonschema`) + +--- + +## 7.1 API / CLI + +MoonBit API: + +- `encode_fast_updates_from_json_schema(json: String, validate: Bool) -> Bytes` + +CLI: + +- `loro-codec encode-jsonschema ` + +--- + +## 7.2 输入约定:peer 压缩与 ID 解析 + +JsonSchema root 字段(见 `docs/JsonSchema.md`): + +```ts +{ + schema_version: 1, + start_version: Record, + peers: string[], // optional + changes: Change[], +} +``` + +MoonBit 解析 ID / ContainerID 时有两种模式: + +1. **有 peers(peer 压缩)**:`id = "{counter}@{peer_idx}"`,其中 `peer_idx` 是 `0..peers.length-1`; +2. **无 peers(不压缩)**:`id = "{counter}@{peer_id}"`,其中 `peer_id` 直接是 64-bit PeerID 的十进制字符串。 + +> Rust 的 `LoroDoc.export_json_updates(...)` 默认会输出带 `peers` 的压缩格式,因此主要路径是 (1)。 + +--- + +## 7.3 为什么必须校验 counter 连续性 + +FastUpdates 的二进制 `ChangeBlock` 里并不会为每个 `Op`/`Change` 显式存储完整的 “start counter” 列表。 + +- 对一个 peer 的 changes:下一条 change 的 start counter 由上一条 change 的 `atom_len(op_len 累加)` 推导; +- 对 change 内的 ops:同理,op 的 counter 序列由 `op.len()` 推导。 + +因此 JsonSchema → ChangeBlock 时必须确保: + +- 同一个 peer 内:按 `change.id.counter` 排序后 **连续**; +- 每个 change 内:按 `op.counter` 排序后 **连续**; +- 并且 `expected += op.len()` / `expected += atom_len` 的推导关系成立。 + +MoonBit 在 `jsonschema_import_sort_and_validate_changes(...)` 中做了上述验证;不满足时会报错。 + +--- + +## 7.4 分块策略:按 peer 编成多个 ChangeBlock + +编码流程(简化): + +1. 解析所有 `changes[]` 为 MoonBit 的 `Change`/`Op`; +2. 按 **真实 peer id** 分组; +3. 每个 peer 生成一个 `DecodedChangeBlock`,调用 `encode_change_block(...)` 得到 block bytes; +4. 把所有 blocks 写入 `FastUpdates(mode=4)` body(`ULEB128(len) + bytes` * N); +5. 用 `encode_document(4, body)` 生成带 checksum 的最终 blob。 + +`validate=true` 时会对每个生成的 block 再做一次 `decode_change_block(...)` 自校验,提前发现编码错误。 + +--- + +## 7.5 Op / Value 支持范围与限制 + +当前支持的容器类型: + +- `Map` / `List` / `Text` / `Tree` / `MovableList` / `Counter` + +当前限制: + +- `UnknownOp` 暂不支持(遇到会报错)。 +- `Counter` 的 JsonSchema 形态使用 `JsonOpContent::Future`(字段 `type="counter"` + `prop` + `value_type/value`),目前仅支持: + - `prop == 0` + - `value_type` 为 `f64` 或 `i64`(会编码为二进制 values 段里的 `F64/I64`) +- `LoroValue::Container`(JSON 中 `"🦜:cid:..."`)仅支持 normal container,并且要求它的 `peer/counter` 与当前 op 的 `op_id(peer, counter)` **一致**: + - 二进制 ValueEncoding 里对 container value 只存 `container_type`(不存 peer/counter),因此必须从 `op_id` 推回 container id; + - root container value(`cid:root-*`)在二进制 value 里不可表示,目前直接拒绝。 +- `LoroValue` 的 JSON 数组会一律解析为 `List`(与 Rust 侧 `LoroValue` JSON 反序列化行为对齐);因此 JSON 里无法无歧义区分 `Binary` 与 `List` 的数组形态。 + +--- + +## 7.6 `start_version` 的处理 + +JsonSchema 的 `start_version` 在编码为 FastUpdates 时会被 **忽略**: + +- FastUpdates 二进制格式不携带 `start_version` +- 导入方(Rust `LoroDoc.import(...)`)也不需要它 + +如果需要基于 `start_version` 做“补齐缺失历史”的工具链,建议在更外层协议中单独保存它。 diff --git a/moon/specs/README.md b/moon/specs/README.md new file mode 100644 index 000000000..af216e1ca --- /dev/null +++ b/moon/specs/README.md @@ -0,0 +1,19 @@ +# Moonbit 实现 Loro 编码格式:规格与计划索引 + +本目录用于记录“用 Moonbit 实现 `docs/encoding.md` 所描述的 Loro 二进制编码格式”的实现计划、关键规格摘记与测试/验收策略。 + +## 文档列表 + +- `moon/specs/00-goals-and-acceptance.md`:目标、范围、验收标准(以跨语言 e2e 互通为准) +- `moon/specs/01-context-checklist.md`:开工前必须收集/确认的 Context 清单(规格/源码/边界条件) +- `moon/specs/02-module-plan.md`:按模块逐步实现的详细计划(每步的依赖、测试与退出条件) +- `moon/specs/03-e2e-test-plan.md`:e2e 测试方案(Rust 向量生成、Moon CLI 约定、对照校验) +- `moon/specs/04-ir-design.md`:Moon 侧 Change / Op 数据结构设计(重点:Change / Op 结构与测试友好 JSON 形态) +- `moon/specs/05-fastupdates-changeblock-encoding.md`:FastUpdates / ChangeBlock 的编码细节摘记(以 Rust 源码为真值),用于实现 mode=4 的真正 decode→encode +- `moon/specs/06-jsonschema-export.md`:JsonSchema(`docs/JsonSchema.md`)导出实现细节(FastUpdates 二进制 → JsonSchema JSON) +- `moon/specs/07-jsonschema-encode.md`:JsonSchema 编码实现细节(JsonSchema JSON → FastUpdates 二进制) + +## 约定 + +- Moonbit 代码统一放在 `moon/` 下(例如 `moon/loro_codec/`、`moon/bin/`、`moon/tests/`)。 +- “正确性”的最终定义:Rust ↔ Moon 的导出/导入能互相 decode/encode,并在 Rust 侧用 `get_deep_value()`(或等价接口)验证状态一致。 diff --git a/skills/moonbit/.gitignore b/skills/moonbit/.gitignore new file mode 100644 index 000000000..f665aad37 --- /dev/null +++ b/skills/moonbit/.gitignore @@ -0,0 +1,6 @@ +target/ +.mooncakes/ +.moonagent/ +dist +_build/ +target \ No newline at end of file diff --git a/skills/moonbit/LICENSE b/skills/moonbit/LICENSE new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/skills/moonbit/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/skills/moonbit/README.md b/skills/moonbit/README.md new file mode 100644 index 000000000..8b6ca4fe5 --- /dev/null +++ b/skills/moonbit/README.md @@ -0,0 +1,46 @@ +# MoonBit Agent Skill + +This repository contains a [Agent Skill](https://agentskills.io/home) that teaches AI coding agents the MoonBit language and its toolchain. + +## Integrate the Skill into your agent + +Different AI assistants require different configuration methods. Below are guides for popular coding assistants: + +### Codex CLI + +```shell +mkdir -p ~/.codex/skills/ +git clone https://github.com/moonbitlang/moonbit-agent-guide ~/.codex/skills/moonbit +``` + +Documentation: https://developers.openai.com/codex/skills + +### Claude Code + +```shell +mkdir -p ~/.claude/skills/ +git clone https://github.com/moonbitlang/moonbit-agent-guide ~/.claude/skills/moonbit +``` + +Documentation: https://code.claude.com/docs/en/skills + +### GitHub Copilot for VS Code + +```shell +# enable moonbit skill for current repository +mkdir -p ./.github/skills/ +git clone https://github.com/moonbitlang/moonbit-agent-guide ./.github/skills/moonbit +``` + +Note: Agent Skills support in VS Code is currently in preview and available only in [VS Code Insiders](https://code.visualstudio.com/insiders/). Enable the `chat.useAgentSkills` setting to use Agent Skills. See [Use Agent Skills in VS Code](https://code.visualstudio.com/docs/copilot/customization/agent-skills) for details. + +### Cursor & Cursor CLI + +> Agent Skills are available only in the Cursor nightly release channel. + +Documentation: https://cursor.com/cn/docs/context/skills + +### Gemini CLI + +It seems that Gemini CLI will support agent skills in next release: https://github.com/google-gemini/gemini-cli/issues/15327 + diff --git a/skills/moonbit/SKILL.mbt.md b/skills/moonbit/SKILL.mbt.md new file mode 120000 index 000000000..d69512cfc --- /dev/null +++ b/skills/moonbit/SKILL.mbt.md @@ -0,0 +1 @@ +SKILL.md \ No newline at end of file diff --git a/skills/moonbit/SKILL.md b/skills/moonbit/SKILL.md new file mode 100644 index 000000000..f8a6a9269 --- /dev/null +++ b/skills/moonbit/SKILL.md @@ -0,0 +1,973 @@ +--- +name: moonbit-agent-guide +description: Guide for writing, refactoring, and testing MoonBit projects. Use when working in MoonBit modules or packages, organizing MoonBit files, using moon tooling (build/check/test/doc/ide), or following MoonBit-specific layout, documentation, and testing conventions. +--- + +# MoonBit Project Layouts + +MoonBit use the `.mbt` extension and interface files `.mbti`. At +the top-level of a MoonBit project there is a `moon.mod.json` file specifying +the metadata of the project. The project may contain multiple packages, each +with its own `moon.pkg.json` file. + +## Example layout + +``` +my_module +├── moon.mod.json # Module metadata, source field(optional) specifies the source directory of the module +├── moon.pkg.json # Package metadata (each directory is a package like Golang) +├── README.mbt.md # Markdown with tested code blocks (`test "..." { ... }`) +├── README.md -> README.mbt.md +├── cmd # Command line directory +│ └── main +│ ├── main.mbt +│ └── moon.pkg.json # executable package with {"is_main": true} +├── liba/ # Library packages +│ └── moon.pkg.json # Referenced by other packages as `@username/my_module/liba` +│ └── libb/ # Library packages +│ └── moon.pkg.json # Referenced by other packages as `@username/my_module/liba/libb` +├── user_pkg.mbt # Root packages, referenced by other packages as `@username/my_module` +├── user_pkg_wbtest.mbt # White-box tests (only needed for testing internal private members, similar to Golang's package mypackage) +└── user_pkg_test.mbt # Black-box tests +└── ... # More package files, symbols visible to current package (like Golang) +``` + +- **Module**: `moon.mod.json` file in the project directory. + A MoonBit *module* is like a Go module,it is a collection of packages in subdirectories, usually corresponding to a repository or project. + Module boundaries matter for dependency management and import paths. + +- **Package**: a `moon.pkg.json` file per directory. + All subcommands of `moon` will + still be executed in the directory of the module (where `moon.mod.json` is + located), not the current package. + A MoonBit *package* is the actual compilation unit (like a Go package). + All source files in the same package are concatenated into one unit. + The `package` name in the source defines the package, not the file name. + Imports refer to module + package paths, NEVER to file names. + +- **Files**: + A `.mbt` file is just a chunk of source inside a package. + File names do NOT create modules or namespaces. + You may freely split/merge/move declarations between files in the same package. + Any declaration in a package can reference any other declaration in that package, regardless of file. + + +## Coding/layout rules you MUST follow: + +1. Prefer many small, cohesive files over one large file. + - Group related types and functions into focused files (e.g. http_client.mbt, router.mbt). + - If a file is getting large or unfocused, create a new file and move related declarations into it. + +2. You MAY freely move declarations between files inside the same package. + - Each block is separated by `///|`, moving a function/struct/trait between files does not change semantics, as long as its name and pub-ness stay the same, the order of each block is irrelevant too. + - It is safe to refactor by splitting or merging files inside a package. + +3. File names are purely organizational. + - Do NOT assume file names define modules, and do NOT use file names in type paths. + - Choose file names to describe a feature or responsibility, not to mirror type names rigidly. + +4. When adding new code: + - Prefer adding it to an existing file that matches the feature. + - If no good file exists, create a new file under the same package with a descriptive name. + - Avoid creating giant “misc” or “util” files. + +5. Tests: + - Place tests in dedicated test files (e.g. *_test.mbt) within the appropriate package. + For a package, besides `*_test.mbt`files,`*.mbt.md`are also blackbox test files, the code block `mbt check` are treated as test cases, they serve both purposes: documentation and tests. + You may have `README.mbt.md` files with `mbt check` code examples, you can also symlink `README.mbt.md` to `README.md` + to make it integrate better with GitHub. + - It is fine—and encouraged—to have multiple small test files. + +6. Interface files(`pkg.generated.mbti`) + `pkg.generated.mbti` is compiler-generated summaries of each package's public API surface. They provide a formal, concise overview of all exported types, functions, and traits without implementation details. + They are generated using `moon info`, useful for code review, when you have a commit that does not change public APIs, `pkg.generated.mbti` files will remain unchanged, so it is recommended to put `pkg.generated.mbti` in version control when you are done. + + You can also use `moon doc @moonbitlang/core/strconv` to explore the public API of a package interactively and `moon ide peek-def 'Array::join'` to read + the definition. + +# Common Pitfalls to Avoid + +- **Don't use uppercase for variables/functions** - compilation error +- **Don't forget `mut` for mutable record fields** - immutable by default +- **Don't ignore error handling** - errors must be explicitly handled +- **Don't use `return` unnecessarily** - last expression is the return value +- **Don't create methods without Type:: prefix** - methods need explicit type prefix +- **Don't forget to handle array bounds** - use `get()` for safe access +- **Don't forget @package prefix when calling functions from other packages** +- **Don't use ++ or -- (not supported)** - use `i = i + 1` or `i += 1` +- **Don't add explicit `try` for error-raising functions** - errors propagate automatically (unlike Swift) +- **Legacy syntax**: Older code may use `function_name!(...)` or `function_name(...)?` - these are deprecated; use normal calls and `try?` for Result conversion + + +# `moon` Essentials + +## Essential Commands + +- `moon new my_project` - Create new project +- `moon run cmd/main` - Run main package +- `moon build` - Build project +- `moon check` - Type check without building, use it REGULARLY, it is fast +- `moon info` - Type check and generate `mbti` files + run it to see if any public interfaces changed. +- `moon check --target all` - Type check for all backends +- `moon add package` - Add dependency +- `moon remove package` - Remove dependency +- `moon fmt` - Format code + +### Test Commands + +- `moon test` - Run all tests +- `moon test --update` - Update snapshots +- `moon test -v` - Verbose output with test names +- `moon test [dirname|filename]` - Test specific directory or file +- `moon coverage analyze` - Analyze coverage +- `moon test --filter 'globl'` - Run tests matching filter + ``` + moon test float/float_test.mbt --filter "Float::*" + ``` +## `README.mbt.md` Generation Guide + +- Output `README.mbt.md` in the package directory. + `*.mbt.md` file and docstring contents treats `mbt check` specially. + `mbt check` block will be included directly as code and also run by `moon check` and `moon test`. If you don't want the code snippets to be checked, explicit `mbt nocheck` is preferred. + If you are only referencing types from the package, you should use `mbt nocheck` which will only be syntax highlighted. + Symlink `README.mbt.md` to `README.md` to adapt to systems that expect `README.md`. + +## Testing Guide + +Use snapshot tests as it is easy to update when behavior changes. + +- **Snapshot Tests**: `inspect(value, content="...")`. If unknown, write `inspect(value)` and run `moon test --update` (or `moon test -u`). + - Use regular `inspect()` for simple values (uses `Show` trait) + - Use `@json.inspect()` for complex nested structures (uses `ToJson` trait, produces more readable output) + - It is encouraged to `inspect` or `@json.inspect` the whole return value of a function if + the whole return value is not huge, this makes test simple. You need `impl (Show|ToJson) for YourType` or `derive (Show, ToJson)`. +- **Update workflow**: After changing code that affects output, run `moon test --update` to regenerate snapshots, then review the diffs in your test files (the `content=` parameter will be updated automatically). + +- Black-box by default: Call only public APIs via `@package.fn`. Use white-box tests only when private members matter. +- Grouping: Combine related checks in one `test "..." { ... }` block for speed and clarity. +- Panics: Name test with prefix `test "panic ..." {...}`; if the call returns a value, wrap it with `ignore(...)` to silence warnings. +- Errors: Use `try? f()` to get `Result[...]` and `inspect` it when a function may raise. +- Verify: Run `moon test` (or `-u` to update snapshots) and `moon fmt` afterwards. + +### Docstring tests + +Public APIs are encouraged to have docstring tests. +````mbt check +///| +/// Get the largest element of a non-empty `Array`. +/// +/// # Example +/// ```mbt check +/// test { +/// inspect(sum_array([1, 2, 3, 4, 5, 6]), content="21") +/// } +/// ``` +/// +/// # Panics +/// Panics if the `xs` is empty. +pub fn sum_array(xs : Array[Int]) -> Int { + xs.fold(init=0, (a, b) => a + b) +} +```` + +The MoonBit code in docstring will be type checked and tested automatically. +(using `moon test --update`). In docstrings, `mbt check` should only contain `test` or `async test`. + +## Spec-driven Development + +- The spec can be written in a readonly `spec.mbt` file (name is conventional, not mandatory) with stub code marked as declarations: + +```mbt check +///| +#declaration_only +pub type Yaml + +///| +#declaration_only +pub fn Yaml::to_string(y : Yaml) -> String raise { + ... +} + +///| +#declaration_only +pub fn parse_yaml(s : String) -> Yaml raise { + ... +} +``` + +- Add `spec_easy_test.mbt`, `spec_difficult_test.mbt` etc to test the spec functions; everything will be type-checked(`moon check`). +- The AI or students can implement the `declaration_only` functions in different files thanks to our package organization. +- Run `moon test` to check everything is correct. + +- `#declaration_only` is supported for functions, methods, and types. +- The `pub type Yaml` line is an intentionally opaque placeholder; the implementer chooses its representation. +- Note the spec file can also contain normal code, not just declarations. + +## `moon doc` for API Discovery + +**CRITICAL**: `moon doc ''` is your PRIMARY tool for discovering available APIs, functions, types, and methods in MoonBit. Always prefer `moon doc` over other approaches when exploring what APIs are available, it is **more powerful and accurate** than `grep_search` or any regex-based searching tools. + + +`moon doc` uses a specialized query syntax designed for symbol lookup: +- **Empty query**: `moon doc ''` + + - In a module: shows all available packages in current module, including dependencies and moonbitlang/core + - In a package: shows all symbols in current package + - Outside package: shows all available packages + +- **Function/value lookup**: `moon doc "[@pkg.]value_or_function_name"` + +- **Type lookup**: `moon doc "[@pkg.]Type_name"` (builtin type does not need package prefix) + +- **Method/field lookup**: `moon doc "[@pkg.]Type_name::method_or_field_name"` + +- **Package exploration**: `moon doc "@pkg"` + - Show package `pkg` and list all its exported symbols + - Example: `moon doc "@json"` - explore entire `@json` package + - Example: `moon doc "@encoding/utf8"` - explore nested package + +- **Globbing**: Use `*` wildcard for partial matches, e.g. `moon doc "String::*rev*"` to find all String methods with "rev" in their name + +### `moon doc` Examples + +````bash +# search for String methods in standard library: +$ moon doc "String" + +type String + + pub fn String::add(String, String) -> String + # ... more methods omitted ... + +$ moon doc "@buffer" # list all symbols in package buffer: +moonbitlang/core/buffer + +fn from_array(ArrayView[Byte]) -> Buffer +# ... omitted ... + +$ moon doc "@buffer.new" # list the specific function in a package: +package "moonbitlang/core/buffer" + +pub fn new(size_hint? : Int) -> Buffer + Creates ... omitted ... + + +$ moon doc "String::*rev*" # globbing +package "moonbitlang/core/string" + +pub fn String::rev(String) -> String + Returns ... omitted ... + # ... more + +pub fn String::rev_find(String, StringView) -> Int? + Returns ... omitted ... +```` +**Best practice**: When implementing a feature, start with `moon doc` queries to discover available APIs before writing code. This is faster and more accurate than searching through files. + +## `moon ide [peek-def|outline|find-references]` for code navigation and refactoring + +For project-local symbols and navigation, use `moon ide outline .` to scan a package, `moon ide find-references ` to locate usages, and `moon ide peek-def` for inline definition context and locate toplevel symbols. + +These tools save tokens and more precise than grepping(grep display results in both definition and call site including comments too). + +### `moon ide peek-def sym [-loc filename:line:col]` example + +When the user ask: Can you check if `Parser::read_u32_leb128` is implemented correctly? + +In this case, You can run `moon ide peek-def Parser::read_u32_leb128` to get the definition context: (this is better than `grep` since it searches the whole project by semantics) + +``` file src/parse.mbt +L45:|///| +L46:|fn Parser::read_u32_leb128(self : Parser) -> UInt raise ParseError { +L47:| ... +...:| } +``` +Now you want to see the definition of `Parser` struct, you can run: + +```bash +$ moon ide peek-def Parser -loc src/parse.mbt:46:4 +Definition found at file src/parse.mbt + | ///| +2 | priv struct Parser { + | ^^^^^^ + | bytes : Bytes + | mut pos : Int + | } + | +``` +For the `-loc` argument, the line number must be precise; the column can be approximate since +the positonal argument `Parser` helps locate the position. + +If the sym is toplevel symbol, the location can be omitted: +````bash +$ moon ide peek-def String::rev +Found 1 symbols matching 'String::rev': + +`pub fn String::rev` in package moonbitlang/core/builtin at /Users/usrname/.moon/lib/core/builtin/string_methods.mbt:1039-1044 +1039 | ///| + | /// Returns a new string with the characters in reverse order. It respects + | /// Unicode characters and surrogate pairs but not grapheme clusters. + | pub fn String::rev(self : String) -> String { + | self[:].rev() + | } +```` + +### `moon ide outline [dir|file]` and `moon ide find-references ` for Package Symbols + +Use this to scan a package or file for top-level symbols and locate usages without grepping + +- `moon ide outline dir` outlines the current package directory (per-file headers) +- `moon ide outline parser.mbt` outlines a single file +- Useful when you need a quick inventory of a package, or to find the right file before `goto-definition` +- `moon ide find-references TranslationUnit` finds all references to a symbol in the current module + +```bash +$ moon ide outline . +spec.mbt: + L003 | pub(all) enum CStandard { + ... + L013 | pub(all) struct Position { + ... +``` + +```bash +$ moon ide find-references TranslationUnit +``` + +## Package Management + +### Adding Dependencies + +```sh +moon add moonbitlang/x # Add latest version +moon add moonbitlang/x@0.4.6 # Add specific version +``` + +### Updating Dependencies + +```sh +moon update # Update package index +``` + +### Typical Module configurations (`moon.mod.json`) + +```json +{ + "name": "username/hello", // Required format for published modules + "version": "0.1.0", + "source": ".", // Source directory(optional, default: ".") + "repository": "", // Git repository URL + "keywords": [], // Search keywords + "description": "...", // Module description + "deps": { + // Dependencies from mooncakes.io, using`moon add` to add dependencies + "moonbitlang/x": "0.4.6" + } +} +``` + +### Typical Package configuration (`moon.pkg.json`) + +```json +{ + "is_main": true, // Creates executable when true + "import": [ // Package dependencies + "username/hello/liba", // Simple import, use @liba.foo() to call functions + { + "path": "moonbitlang/x/encoding", + "alias": "libb" // Custom alias, use @libb.encode() to call functions + } + ], + "test-import": [...], // Imports for black-box tests, similar to import + "wbtest-import": [...] // Imports for white-box tests, similar to import (rarely used) +} +``` + +Packages per directory, packages without `moon.pkg.json` are not recognized. + +### Package Importing (used in moon.pkg.json) + +- **Import format**: `"module_name/package_path"` +- **Usage**: `@alias.function()` to call imported functions +- **Default alias**: Last part of path (e.g., `liba` for `username/hello/liba`) +- **Package reference**: Use `@packagename` in test files to reference the + tested package + +**Package Alias Rules**: + +- Import `"username/hello/liba"` → use `@liba.function()` (default alias is last path segment) +- Import with custom alias `{"path": "moonbitlang/x/encoding", "alias": "enc"}` → use `@enc.function()` +- In `_test.mbt` or `_wbtest.mbt` files, the package being tested is auto-imported + +Example: + +```mbt +///| +/// In main.mbt after importing "username/hello/liba" in `moon.pkg.json` +fn main { + println(@liba.hello()) // Calls hello() from liba package +} +``` + +### Using Standard Library (moonbitlang/core) + +**MoonBit standard library (moonbitlang/core) packages are automatically imported** - DO NOT add them to dependencies: + +- ❌ **DO NOT** use `moon add` to add standard library packages like `moonbitlang/core/strconv` +- ❌ **DO NOT** add standard library packages to `"deps"` field of `moon.mod.json` +- ❌ **DO NOT** add standard library packages to `"import"` field of `moon.pkg.json` +- ✅ **DO** use them directly: `@strconv.parse_int()`, `@list.List`, `@array.fold()`, etc. + +If you get an error like "cannot import `moonbitlang/core/strconv`", remove it from imports - it's automatically available. + +### Creating Packages + +To add a new package `fib` under `.`: + +1. Create directory: `./fib/` +2. Add `./fib/moon.pkg.json`: `{}` -- Minimal valid moon.pkg.json +3. Add `.mbt` files with your code +4. Import in dependent packages: + + ```json + { + "import": [ + "username/hello/fib", + ... + ] + } + ``` +For more advanced topics like `conditional compilation`, `link configuration`, `warning control`, and `pre-build commands`, see `references/advanced-moonbit-build.md`. + +# MoonBit Language Tour + +## Core facts + +- **Expression‑oriented**: `if`, `match`, loops return values; last expression is the return. +- **References by default**: Arrays/Maps/structs mutate via reference; use `Ref[T]` for primitive mutability. +- **Blocks**: Separate top‑level items with `///|`. Generate code block‑by‑block. +- **Visibility**: `fn` private by default; `pub` exposes read/construct as allowed; `pub(all)` allows external construction. +- **Naming convention**: lower_snake for values/functions; UpperCamel for types/enums; enum variants start UpperCamel. +- **Packages**: No `import` in code files; call via `@alias.fn`. Configure imports in `moon.pkg.json`. +- **Placeholders**: `...` is a valid placeholder in MoonBit code for incomplete implementations. +- **Global values**: immutable by default and generally require type annotations. +- **Garbage collection**: MoonBit has a GC, there is no lifetime annotation, there's no ownership system. + Unlike Rust, like F#, `let mut` is only needed when you want to reassign a variable, not for mutating fields of a struct or elements of an array/map. +- **Delimit top-level items with `///|` comments** so tools can split the file reliably. + +## MoonBit Error Handling (Checked Errors) + +MoonBit uses checked error-throwing functions, not unchecked exceptions. All errors are subtype of `Error`, we can declare our own error types by `suberror`. +Use `raise` in signatures to declare error types and let errors propagate by +default. Use `try?` to convert to `Result[...]` in tests, or `try { } catch { }` +to handle errors explicitly. + +```mbt check +///| +/// Declare error types with 'suberror' +suberror ValueError String + +///| +/// Tuple struct to hold position info +struct Position(Int, Int) derive(ToJson, Show, Eq) + +///| +/// ParseError is subtype of Error +pub(all) suberror ParseError { + InvalidChar(pos~:Position, Char) // pos is labeled + InvalidEof(pos~:Position) + InvalidNumber(pos~:Position, String) + InvalidIdentEscape(pos~:Position) +} derive(Eq, ToJson, Show) + +///| +/// Functions declare what they can throw +fn parse_int(s : String, position~ : Position) -> Int raise ParseError { + // 'raise' throws an error + if s is "" { + raise ParseError::InvalidEof(pos=position) + } + ... // parsing logic +} + +///| +/// Just declare `raise` to not track specific error types +fn div(x : Int, y : Int) -> Int raise { + if y is 0 { + fail("Division by zero") + } + x / y +} + +///| +test "inspect raise function" { + let result : Result[Int, Error] = try? div(1, 0) + guard result is Err(Failure(msg)) && msg.contains("Division by zero") else { + fail("Expected error") + } +} + +// Three ways to handle errors: + +///| +/// Propagate automatically +fn use_parse(position~: Position) -> Int raise ParseError { + let x = parse_int("123", position=position) + // Error auto-propagates by default. + // Unlike Swift, you do not need to mark `try` for functions that can raise + // errors; the compiler infers it automatically. This keeps error handling + // explicit but concise. + x * 2 +} + +///| +/// Mark `raise` for all possible errors, do not care which error it is. +/// For quick prototypes, `raise` is acceptable. +fn use_parse2(position~: Position) -> Int raise { + let x = parse_int("123", position=position) + x * 2 +} + +///| +/// Convert to Result with try? +fn safe_parse(s : String, position~: Position) -> Result[Int, ParseError] { + let val1 : Result[_] = try? parse_int(s, position=position) // Returns Result[Int, ParseError] + // try! is rarely used - it panics on error, similar to unwrap() in Rust + // let val2 : Int = try! parse_int(s) // Returns Int otherwise crash + + // Alternative explicit handling: + let val3 = try parse_int(s, position=position) catch { + err => Err(err) + } noraise { // noraise block is optional - handles the success case + v => Ok(v) + } + ... +} + +///| +/// Handle with try-catch +fn handle_parse(s : String, position~: Position) -> Int { + try parse_int(s, position=position) catch { + ParseError::InvalidEof => { + println("Parse failed: InvalidEof") + -1 // Default value + } + _ => 2 + } +} +``` + +Important: When calling a function that can raise errors, if you only want to +propagate the error, you do not need any marker; the compiler infers it. + +## Integers, Char + +MoonBit supports Byte, Int16, Int, UInt16, UInt, Int64, UInt64, etc. When the type is known, +the literal can be overloaded: + +```mbt check +///| +test "integer and char literal overloading disambiguation via type in the current context" { + let a0 = 1 // a is Int by default + let (int, uint, uint16, int64, byte) : (Int, UInt, UInt16, Int64, Byte) = ( + 1, 1, 1, 1, 1, + ) + assert_eq(int, uint16.to_int()) + let a1 : Int = 'b' // this also works, a5 will be the unicode value + let a2 : Char = 'b' +} +``` +## Bytes (Immutable) + +```mbt check +///| +test "bytes literals overloading and indexing" { + let b0 : Bytes = b"abcd" + let b1 : Bytes = "abcd" // b" prefix is optional, when we know the type + let b2 : Bytes = [0xff, 0x00, 0x01] // Array literal overloading + guard b0 is [b'a', ..] && b0[1] is b'b' else { + // Bytes can be pattern matched as BytesView and indexed + fail("unexpected bytes content") + } + +} +``` +## Array (Resizable) + +```mbt check +///| +test "array literals overloading: disambiguation via type in the current context" { + let a0 : Array[Int] = [1, 2, 3] // resizable + let a1 : FixedArray[Int] = [1, 2, 3] // Fixed size + let a2 : ReadOnlyArray[Int] = [1, 2, 3] + let a3 : ArrayView[Int] = [1, 2, 3] +} +``` +## String (Immutable UTF-16) +`s[i]` returns a code unit (UInt16), `s.get_char(i)` returns `Char?`. +Since MoonBit supports char literal overloading, you can write code snippets like this: + +```mbt check +///| +test "string indexing and utf8 encode/decode" { + let s = "hello world" + let b0 : UInt16 = s[0] + guard(b0 is ('\n' | 'h' | 'b' | 'a'..='z') && s is [.."hello", ..rest]) else { + fail("unexpected string content") + } + guard rest is " world" // otherwise will crash (guard without else) + + // In check mode (expression with explicit type), ('\n' : UInt16) is valid. + + // Using get_char for Option handling + let b1 : Char? = s.get_char(0) + assert_true(b1 is Some('a'..='z')) + + // ⚠️ Important: Variables won't work with direct indexing + let eq_char : Char = '=' + // s[0] == eq_char // ❌ Won't compile - eq_char is not a literal, lhs is UInt while rhs is Char + // Use: s[0] == '=' or s.get_char(0) == Some(eq_char) + let bytes = @encoding/utf8.encode("中文") // utf8 encode package is in stdlib + assert_true(bytes is [0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87]) + let s2 : String = @encoding/utf8.decode(bytes) // decode utf8 bytes back to String + assert_true(s2 is "中文") + for c in "中文" { + let _ : Char = c // unicode safe iteration + println("char: \{c}") // iterate over chars + } +} +``` + +### String Interpolation && StringBuilder + +MoonBit uses `\{}` for string interpolation, for custom types, it needs implement trait `Show` + +```mbt check +///| +test "string interpolation basics" { + + let name : String = "Moon" + let config = { "cache": 123 } + let version = 1.0 + println("Hello \{name} v\{version}") // "Hello Moon v1.0" + // ❌ Wrong - quotes inside interpolation not allowed: + // println(" - Checking if 'cache' section exists: \{config["cache"]}") + + // ✅ Correct - extract to variable first: + let has_key = config["cache"] // `"` not allowed in interpolation + println(" - Checking if 'cache' section exists: \{has_key}") + + let sb = StringBuilder::new() + sb..write_char('[') // dotdot for imperative method chaining + ..write_view([1,2,3].map((x) => "\{x}").join(",")) + ..write_char(']') + inspect(sb.to_string(), content="[1,2,3]") +} +``` + +Expressions inside `\{}` can only be _basic expressions_ (no quotes, newlines, or nested interpolations). String literals are not allowed as it makes lexing too difficult. + + +### Multiple line strings + +```mbt check +///| +test "multi-line string literals" { + let multi_line_string : String = + #|Hello "world" + #|World + #| + let multi_line_string_with_interp : String = + $|Line 1 "" + $|Line 2 \{1+2} + $| + // no escape in `#|`, + // only escape '\{..}` in `$|` + assert_eq(multi_line_string, "Hello \"world\"\nWorld\n") + assert_eq(multi_line_string_with_interp, "Line 1 \"\"\nLine 2 3\n") +} +``` + +## Map (Mutable, Insertion-Order Preserving) + +```mbt check +///| +test "map literals and common operations" { + // Map literal syntax + let map : Map[String, Int] = { "a": 1, "b": 2, "c": 3 } + let empty : Map[String, Int] = {} // Empty map, preferred + let also_empty : Map[String, Int] = Map::new() + // From array of pairs + let from_pairs : Map[String, Int] = Map::from_array([("x", 1), ("y", 2)]) + + // Set/update value + map["new-key"] = 3 + map["a"] = 10 // Updates existing key + + // Get value - returns Option[T] + guard map is { "new-key": 3, "missing"? : None, .. } else { + fail("unexpected map contents") + } + + // Direct access (panics if key missing) + let value : Int = map["a"] // value = 10 + + // Iteration preserves insertion order + for k, v in map { + println("\{k}: \{v}") // Prints: a: 10, b: 2, c: 3, new-key: 3 + } + + // Other common operations + map.remove("b") + guard map is { "a": 10, "c": 3, "new-key": 3, .. } && map.length() == 3 else { + // "b" is gone, only 3 elements left + fail("unexpected map contents after removal") + } +} +``` + +## View Types + +**Key Concept**: View types (`StringView`, `BytesView`, `ArrayView[T]`) are zero-copy, non-owning read-only slices created with the `[:]` syntax. They don't allocate memory and are ideal for passing sub-sequences without copying data, for function which takes String, Bytes, Array, they also take *View(implicit conversion). + +- `String` → `StringView` via `s[:]` or `s[start:end]` +- `Bytes` → `BytesView` via `b[:]` or `b[start:end]` +- `Array[T]`, `FixedArray[T]`, `ReadOnlyArray[T] → `ArrayView[T]` via `a[:]` or `a[start:end]` + +**Important**: StringView slice is slightly different due to unicode safety: +`s[a:b]` may raise an error at surrogate boundaries (UTF-16 encoding edge case). You have two options: + +- Use `try! s[a:b]` if you're certain the boundaries are valid (crashes on invalid boundaries) +- Let the error propagate to the caller for proper handling + +**When to use views**: + +- Pattern matching with rest patterns (`[first, .. rest]`) +- Passing slices to functions without allocation overhead +- Avoiding unnecessary copies of large sequences + +Convert back with `.to_string()`, `.to_bytes()`, or `.to_array()` when you need ownership. (`moon doc StringView`) + +## User defined types(`enum`, `struct`) + +```mbt check +///| +enum Tree[T] { + Leaf(T) // Unlike Rust, no comma here + Node(left~ : Tree[T], T, right~ : Tree[T]) // enum can use labels +} derive(Show, ToJson) // derive traits for Tree + +///| +pub fn Tree::sum(tree : Tree[Int]) -> Int { + match tree { + Leaf(x) => x + // we don't need to write Tree::Leaf, when `tree` has a known type + Node(left~, x, right~) => left.sum() + x + right.sum() // method invoked in dot notation + } +} + +///| +struct Point { + x : Int + y : Int +} derive(Show, ToJson) // derive traits for Point + +test "user defined types: enum and struct" { + @json.inspect(Point::{ x: 10, y: 20 }, content=({"x":10,"y":20})) +} + +``` + +## Functional `for` loop + + +```mbt check +pub fn binary_search( + arr : ArrayView[Int], + value : Int, +) -> Result[Int, Int] { + let len = arr.length() + // functional for loop: + // initial state ; [predicate] ; [post-update] { + // loop body with `continue` to update state + //} else { // exit block + // } + // predicate and post-update are optional + for i = 0, j = len; i < j; { + // post-update is omitted, we use `continue` to update state + let h = i + (j - i) / 2 + if arr[h] < value { + continue h + 1, j // functional update of loop state + } else { + continue i, h // functional update of loop state + } + } else { // exit of for loop + if i < len && arr[i] == value { + Ok(i) + } else { + Err(i) + } + } where { + invariant : 0 <= i && i <= j && j <= len, + invariant : i == 0 || arr[i - 1] < value, + invariant : j == len || arr[j] >= value, + reasoning : + #|For a sorted array, the boundary invariants are witnesses: + #| - `arr[i-1] < value` implies all arr[0..i) < value (by sortedness) + #| - `arr[j] >= value` implies all arr[j..len) >= value (by sortedness) + #| + #|Preservation proof: + #| - When arr[h] < value: new_i = h+1, and arr[new_i - 1] = arr[h] < value ✓ + #| - When arr[h] >= value: new_j = h, and arr[new_j] = arr[h] >= value ✓ + #| + #|Termination: j - i decreases each iteration (h is strictly between i and j) + #| + #|Correctness at exit (i == j): + #| - By invariants: arr[0..i) < value and arr[i..len) >= value + #| - So if value exists, it can only be at index i + #| - If arr[i] != value, then value is absent and i is the insertion point + #| + } +} + +///| +test "functional for loop control flow" { + let arr : Array[Int] = [1, 3, 5, 7, 9] + inspect(binary_search(arr,5), content="Ok(2)") // Array to ArrayView implicit conversion when passing as arguments + inspect(binary_search(arr,6), content="Err(3)") + // for iteration is supported too + for i, v in arr { + println("\{i}: \{v}") // `i` is index, `v` is value + } +} +``` +You are *STRONGLY ENCOURAGED* to use functional `for` loops instead of imperative loops +*WHENEVER POSSIBLE*, as they are easier to reason about. + +### Loop Invariants with `where` Clause + +The `where` clause attaches **machine-checkable invariants** and **human-readable reasoning** to functional `for` loops. This enables formal verification thinking while keeping the code executable. Note for trivial loops, you are encouraged to convert it into `for .. in` so no reasoning is needed. + +**Syntax:** +```mbt nocheck +for ... { + ... +} where { + invariant : , // checked at runtime in debug builds + invariant : , // multiple invariants allowed + reasoning : // documentation for proof sketch +} +``` + +**Writing Good Invariants:** + +1. **Make them checkable**: Invariants must be valid MoonBit boolean expressions using loop variables and captured values. + +2. **Use boundary witnesses**: For properties over ranges (e.g., "all elements in arr[0..i) satisfy P"), check only boundary elements. For sorted arrays, `arr[i-1] < value` implies all `arr[0..i) < value`. + +3. **Handle edge cases with `||`**: Use patterns like `i == 0 || arr[i-1] < value` to handle boundary conditions where the check would be out of bounds. + +4. **Cover three aspects in reasoning**: + - **Preservation**: Why each `continue` maintains the invariants + - **Termination**: Why the loop eventually exits (e.g., a decreasing measure) + - **Correctness**: Why the invariants at exit imply the desired postcondition + +## Label and Optional Parameters + +Good example: use labeled and optional parameters + +```mbt check +///| +fn g( + positional : Int, + required~ : Int, + optional? : Int, // no default => Option + optional_with_default? : Int = 42, // default => plain Int +) -> String { + // These are the inferred types inside the function body. + let _ : Int = positional + let _ : Int = required + let _ : Int? = optional + let _ : Int = optional_with_default + "\{positional},\{required},\{optional},\{optional_with_default}" +} + +///| +test { + inspect(g(1, required=2), content="1,2,None,42") + inspect(g(1, required=2, optional=3), content="1,2,Some(3),42") + inspect(g(1, required=4, optional_with_default=100), content="1,4,None,100") +} +``` + +Misuse: `arg : Type?` is not an optional parameter. +Callers still must pass it (as `None`/`Some(...)`). + +```mbt check +///| +fn with_config(a : Int?, b : Int?, c : Int) -> String { + "\{a},\{b},\{c}" +} + +///| +test { + inspect(with_config(None, None, 1), content="None,None,1") + inspect(with_config(Some(5), Some(5), 1), content="Some(5),Some(5),1") +} +``` + +Anti-pattern: `arg? : Type?` (no default => double Option). +If you want a defaulted optional parameter, write `b? : Int = 1`, not `b? : Int? = Some(1)`. + +```mbt check +///| +fn f_misuse(a? : Int?, b? : Int = 1) -> Unit { + let _ : Int?? = a // rarely intended + let _ : Int = b + +} +// How to fix: declare `(a? : Int, b? : Int = 1)` directly. + +///| +fn f_correct(a? : Int, b? : Int = 1) -> Unit { + let _ : Int? = a + let _ : Int = b + +} + +///| +test { + f_misuse(b=3) + f_misuse(a=Some(5), b=2) // works but confusing + f_correct(b=2) + f_correct(a=5) +} +``` + +Bad example: `arg : APIOptions` (use labeled optional parameters instead) + +```mbt check +///| +/// Do not use struct to group options. +struct APIOptions { + width : Int? + height : Int? +} + +///| +fn not_idiomatic(opts : APIOptions, arg : Int) -> Unit { + +} + +///| +test { + // Hard to use in call site + not_idiomatic({ width : Some(5), height : None }, 10) + not_idiomatic({ width : None, height : None }, 10) +} +``` + +## More details + +For deeper syntax, types, and examples, read `references/moonbit-language-fundamentals.mbt.md`. diff --git a/skills/moonbit/ide.md b/skills/moonbit/ide.md new file mode 100644 index 000000000..0c7e509f6 --- /dev/null +++ b/skills/moonbit/ide.md @@ -0,0 +1,116 @@ +## Code Navigation with `moon ide` + +**ALWAYS use `moon ide` for code navigation in MoonBit projects instead of manual file searching, grep, or semantic search.** + +This tool provides two essential commands for precise code exploration: + +### Core Commands + +- `moon ide goto-definition` - Find where a symbol is defined +- `moon ide find-references` - Find all usages of a symbol + +### Query System + +Symbol lookup uses a two-part query system for precise results: + +#### 1. Symbol Name Query (`-query`) + +Fuzzy search for symbol names with package filtering support: + +```bash +# Find any symbol named 'symbol' +moon ide goto-definition -query 'symbol' + +# Find methods of a specific type +moon ide goto-definition -query 'Type::method' + +# Find trait method implementations +moon ide goto-definition -query 'Trait for Type with method' + +# Find symbol in specific package using @pkg prefix +moon ide goto-definition -query '@moonbitlang/x encode' + +# Find symbol in multiple packages (searches in pkg1 OR pkg2) +moon ide goto-definition -query '@username/mymodule/pkg1 @username/mymodule/pkg2 helper' + +# Find symbol in nested package +moon ide goto-definition -query '@username/mymodule/mypkg helper' +``` + +**Supported symbols**: functions, constants, let bindings, types, structs, enums, traits + +**Package filtering**: Prefix your query with `@package_name` to scope the search. Multiple `@pkg` prefixes create an OR condition. + +#### 2. Tag-based Filtering (`-tags`) + +Pre-filter symbols by characteristics before name matching: + +**Visibility tags**: + +- `pub` - Public symbols +- `pub all` - Public structs with all public fields +- `pub open` - Public traits with all methods public +- `priv` - Private symbols + +**Symbol type tags**: + +- `type` - Type definitions (struct, enum, typealias, abstract) +- `error` - Error type definitions +- `enum` - Enum definitions and variants +- `struct` - Struct definitions +- `alias` - Type/function/trait aliases +- `let` - Top-level let bindings +- `const` - Constant definitions +- `fn` - Function definitions +- `trait` - Trait definitions +- `impl` - Trait implementations +- `test` - Named test functions + +**Combine tags with logical operators**: + +```bash +# Public functions only +moon ide goto-definition -tags 'pub fn' -query 'my_func' + +# Functions or constants +moon ide goto-definition -tags 'fn | const' -query 'helper' + +# Public functions or constants +moon ide goto-definition -tags 'pub (fn | const)' -query 'api' + +# Public types or traits +moon ide goto-definition -tags 'pub (type | trait)' -query 'MyType' +``` + +### Practical Examples + +```bash +# Find public function definition +moon ide goto-definition -tags 'pub fn' -query 'maximum' + +# Find all references to a struct +moon ide find-references -tags 'struct' -query 'Rectangle' + +# Find trait implementations +moon ide goto-definition -tags 'impl' -query 'Show for MyType' + +# Find errors in specific package +moon ide goto-definition -tags 'error' -query '@mymodule/parser ParseError' + +# Find symbol across multiple packages +moon ide goto-definition -query '@moonbitlang/x @moonbitlang/core encode' + +# Combine package filtering with tags +moon ide goto-definition -tags 'pub fn' -query '@username/myapp helper' +``` + +### Query Processing + +The tool processes queries in this order: + +1. Filter symbols by `-tags` conditions +2. Extract package scope from `@pkg` prefixes in `-query` +3. Fuzzy match remaining symbols by name +4. Return top 3 best matches with location information + +**Best Practice**: Start with `-tags` to reduce noise, then use `@pkg` prefixes in `-query` to scope by package for precise navigation. diff --git a/skills/moonbit/references/advanced-moonbit-build.md b/skills/moonbit/references/advanced-moonbit-build.md new file mode 100644 index 000000000..5ba7a82af --- /dev/null +++ b/skills/moonbit/references/advanced-moonbit-build.md @@ -0,0 +1,106 @@ +## Conditional Compilation + +Target specific backends/modes in `moon.pkg.json`: + +```json +{ + "targets": { + "wasm_only.mbt": ["wasm"], + "js_only.mbt": ["js"], + "debug_only.mbt": ["debug"], + "wasm_or_js.mbt": ["wasm", "js"], // for wasm or js backend + "not_js.mbt": ["not", "js"], // for nonjs backend + "complex.mbt": ["or", ["and", "wasm", "release"], ["and", "js", "debug"]] // more complex conditions + } +} +``` + +**Available conditions:** + +- **Backends**: `"wasm"`, `"wasm-gc"`, `"js"`, `"native"` +- **Build modes**: `"debug"`, `"release"` +- **Logical operators**: `"and"`, `"or"`, `"not"` + +## Link Configuration + +### Basic Linking + +```json +{ + "link": true, // Enable linking for this package + // OR for advanced cases: + "link": { + "wasm": { + "exports": ["hello", "foo:bar"], // Export functions + "heap-start-address": 1024, // Memory layout + "import-memory": { + // Import external memory + "module": "env", + "name": "memory" + }, + "export-memory-name": "memory" // Export memory with name + }, + "wasm-gc": { + "exports": ["hello"], + "use-js-builtin-string": true, // JS String Builtin support + "imported-string-constants": "_" // String namespace + }, + "js": { + "exports": ["hello"], + "format": "esm" // "esm", "cjs", or "iife" + }, + "native": { + "cc": "gcc", // C compiler + "cc-flags": "-O2 -DMOONBIT", // Compile flags + "cc-link-flags": "-s" // Link flags + } + } +} +``` + +## Warning Control + +Disable specific warnings in `moon.mod.json` or `moon.pkg.json`: + +```json +{ + "warn-list": "-2-29" // Disable unused variable (2) & unused package (29) +} +``` + +**Common warning numbers:** + +- `1` - Unused function +- `2` - Unused variable +- `11` - Partial pattern matching +- `12` - Unreachable code +- `29` - Unused package + +Use `moonc build-package -warn-help` to see all available warnings. + +## Pre-build Commands + +Embed external files as MoonBit code: + +```json +{ + "pre-build": [ + { + "input": "data.txt", + "output": "embedded.mbt", + "command": ":embed -i $input -o $output --name data --text" + }, + ... // more embed commands + ] +} +``` + +Generated code example: + +```mbt check +///| +let data : String = + #|hello, + #|world + #| +``` diff --git a/skills/moonbit/references/moonbit-language-fundamentals.mbt.md b/skills/moonbit/references/moonbit-language-fundamentals.mbt.md new file mode 100644 index 000000000..0290287a6 --- /dev/null +++ b/skills/moonbit/references/moonbit-language-fundamentals.mbt.md @@ -0,0 +1,425 @@ +# MoonBit Language Fundamentals + + +## Quick reference: + +```mbt check +///| +/// comments doc string +pub fn sum(x : Int, y : Int) -> Int { + x + y +} + +///| +struct Rect { + width : Int + height : Int +} + +///| +fn Rect::area(self : Rect) -> Int { + self.width * self.height +} + +///| +pub impl Show for Rect with output(_self, logger) { + logger.write_string("Rect") +} + +///| +enum MyOption { + MyNone + MySome(Int) +} derive(Show, ToJson, Eq, Compare) + +///| +/// match + loops are expressions +test "everything is expression in MoonBit" { + // tuple + let (n, opt) = (1, MySome(2)) + // if expressions return values + let msg : String = if n > 0 { "pos" } else { "non-pos" } + let res = match opt { + MySome(x) => { + inspect(x, content="2") + 1 + } + MyNone => 0 + } + let status : Result[Int, String] = Ok(10) + // match expressions return values + let description = match status { + Ok(value) => "Success: \{value}" + Err(error) => "Error: \{error}" + } + let array = [1, 2, 3, 4, 5] + let mut i = 0 // mutable bindings (local only, globals are immutable) + let target = 3 + // loops return values with 'break' + let found : Int? = while i < array.length() { + if array[i] == target { + break Some(i) // Exit with value + } + i = i + 1 + } else { // Value when loop completes normally + None + } + assert_eq(found, Some(2)) // Found at index 2 +} + +///| +/// global bindings +pub let my_name : String = "MoonBit" + +///| +pub const PI : Double = 3.14159 // constants use UPPER_SNAKE or PascalCase + +///| +pub fn maximum(xs : Array[Int]) -> Int raise { + // Toplevel functions are *mutually recursive* by default + // `raise` annotation means the function would raise any Error + // Only add `raise XXError` when you do need track the specific error type + match xs { + [] => fail("Empty array") // fail() is built-in for generic errors + [x] => x + // pattern match over array, the `.. rest` is a rest pattern + // it is of type `ArrayView[Int]` which is a slice + [x, .. rest] => { + let mut max_val = x // `mut` only allowed in local bindings + for y in rest { + if y > max_val { + max_val = y + } + } + max_val // return can be omitted if the last expression is the return value + } + } +} + +///| +/// pub(all) means it can be both read and created outside the package +pub(all) struct Point { + x : Int + mut y : Int +} derive(Show, ToJson) + +///| +pub enum MyResult[T, E] { + MyOk(T) // semicolon `;` is optional when we have a newline + MyErr(E) // Enum variants must start uppercase +} derive(Show, Eq, ToJson) +// pub means it can only be pattern matched outside the package +// but it can not be created outside the package, use `pub(all)` otherwise + +///| +/// pub (open) means the trait can be implemented for outside packages +pub(open) trait Comparable { + compare(Self, Self) -> Int // `Self` refers to the implementing type +} + +///| +test "inspect test" { + let result = sum(1, 2) + inspect(result, content="3") + // The `content` can be auto-corrected by running `moon test --update` + let point = Point::{ x: 10, y: 20 } + // For complex structures, use @json.inspect for better readability: + @json.inspect(point, content={ "x": 10, "y": 20 }) +} +``` + + +## Complex Types + +```mbt check +///| +pub type UserId = Int // Int is aliased to UserId - like symlink + +///| +/// Tuple-struct for callback +pub struct Handler((String) -> Unit) // A newtype wrapper + +///| +/// Tuple-struct syntax for single-field newtypes +struct Meters(Int) // Tuple-struct syntax + +///| +let distance : Meters = Meters(100) + +///| +let raw : Int = distance.0 // Access first field with .0 + +///| +struct Addr { + host : String + port : Int +} derive(Show, Eq, ToJson, FromJson) + +///| +/// Structural types with literal syntax +let config : Addr = Addr::{ + // `Type::` can be omitted since the type is already known + host: "localhost", + port: 8080, +} + + +``` + +## Common Derivable Traits + +Most types can automatically derive standard traits using the `derive(...)` syntax: + +- **`Show`** - Enables `to_string()` and string interpolation with `\{value}` +- **`Eq`** - Enables `==` and `!=` equality operators +- **`Compare`** - Enables `<`, `>`, `<=`, `>=` comparison operators +- **`ToJson`** - Enables `@json.inspect()` for readable test output +- **`Hash`** - Enables use as Map keys + +```mbt check +///| +struct Coordinate { + x : Int + y : Int +} derive(Show, Eq, ToJson) + +///| +enum Status { + Active + Inactive +} derive(Show, Eq, Compare) +``` + +**Best practice**: Always derive `Show` and `Eq` for data types. Add `ToJson` if you plan to test them with `@json.inspect()`. + +## Reference Semantics by Default + +MoonBit passes most types by reference semantically (the optimizer may copy +immutables): + +```mbt check +///| +/// Structs with 'mut' fields are always passed by reference +struct Counter { + mut value : Int +} + +///| +fn increment(c : Counter) -> Unit { + c.value += 1 // Modifies the original +} + +///| +/// Arrays and Maps are mutable references +fn modify_array(arr : Array[Int]) -> Unit { + arr[0] = 999 // Modifies original array +} + +///| +test "reference semantics" { + let counter : Ref[Int] = Ref::{ val: 0 } + counter.val += 1 + assert_true(counter.val is 1) + let arr : Array[Int] = [1, 2, 3] // unlike Rust, no `mut` keyword needed + modify_array(arr) + assert_true(arr[0] is 999) + let mut x = 3 // `mut` neeed for re-assignment to the bindings + x += 2 + assert_true(x is 5) +} +``` + +## Pattern Matching + +```mbt check +///| +#warnings("-unused_value") +test "pattern match over Array, struct and StringView" { + let arr : Array[Int] = [10, 20, 25, 30] + match arr { + [] => ... // empty array + [single] => ... // single element + [first, .. middle, rest] => { + let _ : ArrayView[Int] = middle // middle is ArrayView[Int] + assert_true(first is 10 && middle is [20, 25] && rest is 30) + } + } + fn process_point(point : Point) -> Unit { + match point { + { x: 0, y: 0 } => ... + { x, y } if x == y => ... + { x, .. } if x < 0 => ... + ... + } + } + /// StringView pattern matching for parsing + fn is_palindrome(s : StringView) -> Bool { + loop s { + [] | [_] => true + [a, .. rest, b] if a == b => continue rest + // a is of type Char, rest is of type StringView + _ => false + } + } + + +} +``` + +## Functional `loop` control flow + +The `loop` construct is unique to MoonBit: + +```mbt check +///| +/// Functional loop with pattern matching on loop variables +/// @list.List is from the standard library +fn sum_list(list : @list.List[Int]) -> Int { + loop (list, 0) { + (Empty, acc) => acc // Base case returns accumulator + (More(x, tail=rest), acc) => continue (rest, x + acc) // Recurse with new values + } +} + +///| +/// Multiple loop variables with complex control flow +fn find_pair(arr : Array[Int], target : Int) -> (Int, Int)? { + loop (0, arr.length() - 1) { + (i, j) if i >= j => None + (i, j) => { + let sum = arr[i] + arr[j] + if sum == target { + Some((i, j)) // Found pair + } else if sum < target { + continue (i + 1, j) // Move left pointer + } else { + continue (i, j - 1) // Move right pointer + } + } + } +} +``` + +**Note**: You must provide a payload to `loop`. If you want an infinite loop, use `while true { ... }` instead. The syntax `loop { ... }` without arguments is invalid. + + +## Methods and Traits + +Methods use `Type::method_name` syntax, traits require explicit implementation: + +```mbt check +///| +struct Rectangle { + width : Double + height : Double +} + +///| +// Methods are prefixed with Type:: +fn Rectangle::area(self : Rectangle) -> Double { + self.width * self.height +} + +///| +/// Static methods don't need self +fn Rectangle::new(w : Double, h : Double) -> Rectangle { + { width: w, height: h } +} + +///| +/// Show trait now uses output(self, logger) for custom formatting +/// to_string() is automatically derived from this +pub impl Show for Rectangle with output(self, logger) { + logger.write_string("Rectangle(\{self.width}x\{self.height})") +} + +///| +/// Traits can have non-object-safe methods +trait Named { + name() -> String // No 'self' parameter - not object-safe +} + +///| +/// Trait bounds in generics +fn[T : Show + Named] describe(value : T) -> String { + "\{T::name()}: \{value.to_string()}" +} + +///| +/// Trait implementation +impl Hash for Rectangle with hash_combine(self, hasher) { + hasher..combine(self.width)..combine(self.height) +} +``` + +## Operator Overloading + +MoonBit supports operator overloading through traits: + +```mbt check +///| +struct Vector(Int, Int) + +///| +/// Implement arithmetic operators +pub impl Add for Vector with add(self, other) { + Vector(self.0 + other.0, self.1 + other.1) +} + +///| +struct Person { + age : Int +} derive(Eq) + +///| +/// Comparison operators +pub impl Compare for Person with compare(self, other) { + self.age.compare(other.age) +} + +///| +test "overloading" { + let v1 : Vector = Vector(1, 2) + let v2 : Vector = Vector(3, 4) + let _v3 : Vector = v1 + v2 + +} +``` + +## Access Control Modifiers + +MoonBit has fine-grained visibility control: + +```mbt check +///| +/// `fn` defaults to Private - only visible in current package +fn internal_helper() -> Unit { + ... +} + +///| +pub fn get_value() -> Int { + ... +} + +///| +// Struct (default) - type visible, implementation hidden +struct DataStructure {} + +///| +/// `pub struct` defaults to readonly - can read, pattern match, but not create +pub struct Config {} + +///| +/// Public all - full access +pub(all) struct Config2 {} + +///| +/// Abstract trait (default) - cannot be implemented by +/// types outside this package +pub trait MyTrait {} + +///| +/// Open for extension +pub(open) trait Extendable {} +``` diff --git a/skills/moonbit/references/moonbit-language-fundamentals.md b/skills/moonbit/references/moonbit-language-fundamentals.md new file mode 120000 index 000000000..05dfc934b --- /dev/null +++ b/skills/moonbit/references/moonbit-language-fundamentals.md @@ -0,0 +1 @@ +moonbit-language-fundamentals.mbt.md \ No newline at end of file