diff --git a/.gitignore b/.gitignore index c6e6aa2049..87e918c19e 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,11 @@ profile.json.gz # test fixtures benchmarks/fixtures + +#TODO: Remove this +crates/toolchain/tests/rv32im-test-vectors/tests/* +*.o +*.a +*.s +*.txt +riscv/* \ No newline at end of file diff --git a/crates/circuits/primitives/cuda/include/primitives/constants.h b/crates/circuits/primitives/cuda/include/primitives/constants.h index dec26b5f41..16396d41ec 100644 --- a/crates/circuits/primitives/cuda/include/primitives/constants.h +++ b/crates/circuits/primitives/cuda/include/primitives/constants.h @@ -3,91 +3,97 @@ #include namespace riscv { -static const size_t RV32_REGISTER_NUM_LIMBS = 4; -static const size_t RV32_CELL_BITS = 8; -static const size_t RV_J_TYPE_IMM_BITS = 21; +inline constexpr size_t RV32_REGISTER_NUM_LIMBS = 4; +inline constexpr size_t RV32_CELL_BITS = 8; +inline constexpr size_t RV_J_TYPE_IMM_BITS = 21; -static const size_t RV32_IMM_AS = 0; +inline constexpr size_t RV32_IMM_AS = 0; } // namespace riscv namespace program { -static const size_t PC_BITS = 30; -static const size_t DEFAULT_PC_STEP = 4; +inline constexpr size_t PC_BITS = 30; +inline constexpr size_t DEFAULT_PC_STEP = 4; } // namespace program namespace native { -static const size_t AS_IMMEDIATE = 0; -static const size_t AS_NATIVE = 4; -static const size_t EXT_DEG = 4; -static const size_t BETA = 11; +inline constexpr size_t AS_IMMEDIATE = 0; +inline constexpr size_t AS_NATIVE = 4; +inline constexpr size_t EXT_DEG = 4; +inline constexpr size_t BETA = 11; } // namespace native namespace poseidon2 { -static const size_t CHUNK = 8; +inline constexpr size_t CHUNK = 8; } // namespace poseidon2 namespace p3_keccak_air { -static const size_t NUM_ROUNDS = 24; -static const size_t BITS_PER_LIMB = 16; -static const size_t U64_LIMBS = 64 / BITS_PER_LIMB; -static const size_t RATE_BITS = 1088; -static const size_t RATE_LIMBS = RATE_BITS / BITS_PER_LIMB; +inline constexpr size_t NUM_ROUNDS = 24; +inline constexpr size_t BITS_PER_LIMB = 16; +inline constexpr size_t U64_LIMBS = 64 / BITS_PER_LIMB; +inline constexpr size_t RATE_BITS = 1088; +inline constexpr size_t RATE_LIMBS = RATE_BITS / BITS_PER_LIMB; } // namespace p3_keccak_air namespace keccak256 { /// Total number of sponge bytes: number of rate bytes + number of capacity bytes. -static const size_t KECCAK_WIDTH_BYTES = 200; +inline constexpr size_t KECCAK_WIDTH_BYTES = 200; /// Total number of 16-bit limbs in the sponge. -static const size_t KECCAK_WIDTH_U16S = KECCAK_WIDTH_BYTES / 2; +inline constexpr size_t KECCAK_WIDTH_U16S = KECCAK_WIDTH_BYTES / 2; /// Number of rate bytes. -static const size_t KECCAK_RATE_BYTES = 136; +inline constexpr size_t KECCAK_RATE_BYTES = 136; /// Number of 16-bit rate limbs. -static const size_t KECCAK_RATE_U16S = KECCAK_RATE_BYTES / 2; +inline constexpr size_t KECCAK_RATE_U16S = KECCAK_RATE_BYTES / 2; /// Number of absorb rounds, equal to rate in u64s. -static const size_t NUM_ABSORB_ROUNDS = KECCAK_RATE_BYTES / 8; +inline constexpr size_t NUM_ABSORB_ROUNDS = KECCAK_RATE_BYTES / 8; /// Number of capacity bytes. -static const size_t KECCAK_CAPACITY_BYTES = 64; +inline constexpr size_t KECCAK_CAPACITY_BYTES = 64; /// Number of 16-bit capacity limbs. -static const size_t KECCAK_CAPACITY_U16S = KECCAK_CAPACITY_BYTES / 2; +inline constexpr size_t KECCAK_CAPACITY_U16S = KECCAK_CAPACITY_BYTES / 2; /// Number of output digest bytes used during the squeezing phase. -static const size_t KECCAK_DIGEST_BYTES = 32; +inline constexpr size_t KECCAK_DIGEST_BYTES = 32; /// Number of 64-bit digest limbs. -static const size_t KECCAK_DIGEST_U64S = KECCAK_DIGEST_BYTES / 8; +inline constexpr size_t KECCAK_DIGEST_U64S = KECCAK_DIGEST_BYTES / 8; // ==== Constants for register/memory adapter ==== /// Register reads to get dst, src, len -static const size_t KECCAK_REGISTER_READS = 3; +inline constexpr size_t KECCAK_REGISTER_READS = 3; /// Number of cells to read/write in a single memory access -static const size_t KECCAK_WORD_SIZE = 4; +inline constexpr size_t KECCAK_WORD_SIZE = 4; /// Memory reads for absorb per row -static const size_t KECCAK_ABSORB_READS = KECCAK_RATE_BYTES / KECCAK_WORD_SIZE; +inline constexpr size_t KECCAK_ABSORB_READS = KECCAK_RATE_BYTES / KECCAK_WORD_SIZE; /// Memory writes for digest per row -static const size_t KECCAK_DIGEST_WRITES = KECCAK_DIGEST_BYTES / KECCAK_WORD_SIZE; +inline constexpr size_t KECCAK_DIGEST_WRITES = KECCAK_DIGEST_BYTES / KECCAK_WORD_SIZE; /// keccakf parameters -static const size_t KECCAK_ROUND = 24; -static const size_t KECCAK_STATE_SIZE = 25; -static const size_t KECCAK_Q_SIZE = 192; +inline constexpr size_t KECCAK_ROUND = 24; +inline constexpr size_t KECCAK_STATE_SIZE = 25; +inline constexpr size_t KECCAK_Q_SIZE = 192; /// From memory config -static const size_t KECCAK_POINTER_MAX_BITS = 29; +inline constexpr size_t KECCAK_POINTER_MAX_BITS = 29; } // namespace keccak256 namespace mod_builder { -static const size_t MAX_LIMBS = 97; +inline constexpr size_t MAX_LIMBS = 97; } // namespace mod_builder namespace sha256 { -static const size_t SHA256_BLOCK_BITS = 512; -static const size_t SHA256_BLOCK_U8S = 64; -static const size_t SHA256_BLOCK_WORDS = 16; -static const size_t SHA256_WORD_U8S = 4; -static const size_t SHA256_WORD_BITS = 32; -static const size_t SHA256_WORD_U16S = 2; -static const size_t SHA256_HASH_WORDS = 8; -static const size_t SHA256_NUM_READ_ROWS = 4; -static const size_t SHA256_ROWS_PER_BLOCK = 17; -static const size_t SHA256_ROUNDS_PER_ROW = 4; -static const size_t SHA256_ROW_VAR_CNT = 5; -static const size_t SHA256_REGISTER_READS = 3; -static const size_t SHA256_READ_SIZE = 16; -static const size_t SHA256_WRITE_SIZE = 32; -} // namespace sha256 \ No newline at end of file +inline constexpr size_t SHA256_BLOCK_BITS = 512; +inline constexpr size_t SHA256_BLOCK_U8S = 64; +inline constexpr size_t SHA256_BLOCK_WORDS = 16; +inline constexpr size_t SHA256_WORD_U8S = 4; +inline constexpr size_t SHA256_WORD_BITS = 32; +inline constexpr size_t SHA256_WORD_U16S = 2; +inline constexpr size_t SHA256_HASH_WORDS = 8; +inline constexpr size_t SHA256_NUM_READ_ROWS = 4; +inline constexpr size_t SHA256_ROWS_PER_BLOCK = 17; +inline constexpr size_t SHA256_ROUNDS_PER_ROW = 4; +inline constexpr size_t SHA256_ROW_VAR_CNT = 5; +inline constexpr size_t SHA256_REGISTER_READS = 3; +inline constexpr size_t SHA256_READ_SIZE = 16; +inline constexpr size_t SHA256_WRITE_SIZE = 32; +} // namespace sha256 + +namespace hintstore { +// Must match MAX_HINT_BUFFER_WORDS_BITS in openvm_rv32im_guest::lib.rs +inline constexpr size_t MAX_HINT_BUFFER_WORDS_BITS = 18; +inline constexpr size_t MAX_HINT_BUFFER_WORDS = (1 << MAX_HINT_BUFFER_WORDS_BITS) - 1; +} // namespace hintstore diff --git a/crates/toolchain/openvm/src/io/mod.rs b/crates/toolchain/openvm/src/io/mod.rs index eb00a9d3cd..05f073073e 100644 --- a/crates/toolchain/openvm/src/io/mod.rs +++ b/crates/toolchain/openvm/src/io/mod.rs @@ -6,7 +6,7 @@ use core::alloc::Layout; use core::fmt::Write; #[cfg(target_os = "zkvm")] -use openvm_rv32im_guest::{hint_buffer_u32, hint_input, hint_store_u32}; +use openvm_rv32im_guest::{hint_buffer_chunked, hint_input, hint_store_u32}; use serde::de::DeserializeOwned; #[cfg(not(target_os = "zkvm"))] @@ -83,7 +83,7 @@ pub(crate) fn read_vec_by_len(len: usize) -> Vec { // The heap-embedded-alloc uses linked list allocator, which has a minimum alignment of // `sizeof(usize) * 2 = 8` on 32-bit architectures: https://github.com/rust-osdev/linked-list-allocator/blob/b5caf3271259ddda60927752fa26527e0ccd2d56/src/hole.rs#L429 let mut bytes = Vec::with_capacity(capacity); - hint_buffer_u32!(bytes.as_mut_ptr(), num_words); + hint_buffer_chunked(bytes.as_mut_ptr(), num_words as usize); // SAFETY: We populate a `Vec` by hintstore-ing `num_words` 4 byte words. We set the // length to `len` and don't care about the extra `capacity - len` bytes stored. unsafe { diff --git a/crates/toolchain/openvm/src/io/read.rs b/crates/toolchain/openvm/src/io/read.rs index 39b2166e39..f2eff6cfa5 100644 --- a/crates/toolchain/openvm/src/io/read.rs +++ b/crates/toolchain/openvm/src/io/read.rs @@ -2,7 +2,7 @@ use core::mem::MaybeUninit; use openvm_platform::WORD_SIZE; #[cfg(target_os = "zkvm")] -use openvm_rv32im_guest::hint_buffer_u32; +use openvm_rv32im_guest::hint_buffer_chunked; use super::hint_store_word; use crate::serde::WordRead; @@ -31,7 +31,7 @@ impl WordRead for Reader { let num_words = words.len(); if let Some(new_remaining) = self.bytes_remaining.checked_sub(num_words * WORD_SIZE) { #[cfg(target_os = "zkvm")] - hint_buffer_u32!(words.as_mut_ptr(), words.len()); + hint_buffer_chunked(words.as_mut_ptr() as *mut u8, words.len()); #[cfg(not(target_os = "zkvm"))] { for w in words.iter_mut() { @@ -51,7 +51,7 @@ impl WordRead for Reader { } let mut num_padded_bytes = bytes.len(); #[cfg(target_os = "zkvm")] - hint_buffer_u32!(bytes as *mut [u8] as *mut u32, num_padded_bytes / WORD_SIZE); + hint_buffer_chunked(bytes.as_mut_ptr(), num_padded_bytes / WORD_SIZE); #[cfg(not(target_os = "zkvm"))] { let mut words = bytes.chunks_exact_mut(WORD_SIZE); diff --git a/crates/toolchain/openvm/src/pal_abi.rs b/crates/toolchain/openvm/src/pal_abi.rs index 0ab3d3f386..3797998bb8 100644 --- a/crates/toolchain/openvm/src/pal_abi.rs +++ b/crates/toolchain/openvm/src/pal_abi.rs @@ -5,7 +5,7 @@ /// system operations in the same way: there is no operating system and even the standard /// library should be directly handled with intrinsics. use openvm_platform::{fileno::*, memory::sys_alloc_aligned, rust_rt::terminate, WORD_SIZE}; -use openvm_rv32im_guest::{hint_buffer_u32, hint_random, raw_print_str_from_bytes}; +use openvm_rv32im_guest::{hint_buffer_chunked, hint_random, raw_print_str_from_bytes}; const DIGEST_WORDS: usize = 8; @@ -73,7 +73,7 @@ pub unsafe extern "C" fn sys_sha_buffer( #[no_mangle] pub unsafe extern "C" fn sys_rand(recv_buf: *mut u32, words: usize) { hint_random(words); - hint_buffer_u32!(recv_buf, words); + hint_buffer_chunked(recv_buf as *mut u8, words); } /// # Safety diff --git a/crates/vm/src/arch/execution.rs b/crates/vm/src/arch/execution.rs index 234dfbd5b9..0b7a13bfe9 100644 --- a/crates/vm/src/arch/execution.rs +++ b/crates/vm/src/arch/execution.rs @@ -38,6 +38,12 @@ pub enum ExecutionError { DisabledOperation { pc: u32, opcode: VmOpcode }, #[error("at pc = {pc}")] HintOutOfBounds { pc: u32 }, + #[error("at pc {pc}, hint buffer num_words {num_words} exceeds MAX_HINT_BUFFER_WORDS {max_hint_buffer_words}")] + HintBufferTooLarge { + pc: u32, + num_words: u32, + max_hint_buffer_words: u32, + }, #[error("at pc {pc}, tried to publish into index {public_value_index} when num_public_values = {num_public_values}")] PublicValueIndexOutOfBounds { pc: u32, diff --git a/docs/vocs/docs/pages/specs/openvm/isa.mdx b/docs/vocs/docs/pages/specs/openvm/isa.mdx index 14b71fa05c..a1e8223540 100644 --- a/docs/vocs/docs/pages/specs/openvm/isa.mdx +++ b/docs/vocs/docs/pages/specs/openvm/isa.mdx @@ -35,6 +35,7 @@ OpenVM depends on the following parameters, some of which are fixed and some of | `addr_space_height` | The base 2 log of the number of writable address spaces supported. | Configurable, must satisfy `addr_space_height <= F::bits() - 2` | | `pointer_max_bits` | The maximum number of bits in a pointer. | Configurable, must satisfy `pointer_max_bits <= F::bits() - 2` | | `num_public_values` | The number of user public values. | Configurable. If continuation is enabled, it must equal `8` times a power of two(which is nonzero). | +| `MAX_HINT_BUFFER_WORDS_BITS` | The maximum number of bits for hint buffer word count. This determines `MAX_HINT_BUFFER_WORDS = 2^MAX_HINT_BUFFER_WORDS_BITS - 1` = 262,143 words (≈1MB), the maximum words per `HINT_BUFFER_RV32` instruction. | Fixed to 18. | We explain these parameters in subsequent sections. @@ -428,9 +429,11 @@ with user input-output. | Name | Operands | Description | | ---------------- | --------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | HINT_STOREW_RV32 | `_,b,_,1,2` | `[r32{0}(b):4]_2 = next 4 bytes from hint stream`. Only valid if next 4 values in hint stream are bytes. | -| HINT_BUFFER_RV32 | `a,b,_,1,2` | `[r32{0}(b):4 * l]_2 = next 4 * l bytes from hint stream` where `l = r32{0}(a)`. Only valid if next `4 * l` values in hint stream are bytes. Very important: `l` should not be 0. The pointer address `r32{0}(b)` does not need to be a multiple of `4`. | +| HINT_BUFFER_RV32 | `a,b,_,1,2` | `[r32{0}(b):4 * l]_2 = next 4 * l bytes from hint stream` where `l = r32{0}(a)`. Only valid if next `4 * l` values in hint stream are bytes. `l` must be non-zero and <= `MAX_HINT_BUFFER_WORDS` (262,143 words ≈ 1MB). The pointer address `r32{0}(b)` does not need to be a multiple of `4`. | | REVEAL_RV32 | `a,b,c,1,3,_,g` | Pseudo-instruction for `STOREW_RV32 a,b,c,1,3,_,g` writing to the user IO address space `3`. Only valid when continuations are enabled. | +> **Note:** The `MAX_HINT_BUFFER_WORDS` bound on `HINT_BUFFER_RV32` is enforced by both the executor and AIR constraints. The SDK's `hint_buffer_chunked` function automatically splits larger reads into multiple `HINT_BUFFER_RV32` instructions. + #### Phantom Sub-Instructions The RV32IM extension defines the following phantom sub-instructions. diff --git a/extensions/algebra/moduli-macros/src/lib.rs b/extensions/algebra/moduli-macros/src/lib.rs index 4ea8af0211..0266b7468e 100644 --- a/extensions/algebra/moduli-macros/src/lib.rs +++ b/extensions/algebra/moduli-macros/src/lib.rs @@ -875,15 +875,15 @@ pub fn moduli_declare(input: TokenStream) -> TokenStream { } #[cfg(target_os = "zkvm")] { - use ::openvm_algebra_guest::{openvm_custom_insn, openvm_rv32im_guest}; // needed for hint_store_u32! and hint_buffer_u32! + use ::openvm_algebra_guest::{openvm_custom_insn, openvm_rv32im_guest}; // needed for hint_store_u32! and hint_buffer_chunked let is_square = core::mem::MaybeUninit::::uninit(); - let sqrt = core::mem::MaybeUninit::<#struct_name>::uninit(); + let mut sqrt = core::mem::MaybeUninit::<#struct_name>::uninit(); unsafe { #hint_sqrt_extern_func(self as *const #struct_name as usize); let is_square_ptr = is_square.as_ptr() as *const u32; openvm_rv32im_guest::hint_store_u32!(is_square_ptr); - openvm_rv32im_guest::hint_buffer_u32!(sqrt.as_ptr() as *const u8, <#struct_name as ::openvm_algebra_guest::IntMod>::NUM_LIMBS / 4); + openvm_rv32im_guest::hint_buffer_chunked(sqrt.as_mut_ptr() as *mut u8, <#struct_name as ::openvm_algebra_guest::IntMod>::NUM_LIMBS / 4 as usize); let is_square = is_square.assume_init(); if is_square == 0 || is_square == 1 { Some((is_square == 1, sqrt.assume_init())) @@ -902,14 +902,14 @@ pub fn moduli_declare(input: TokenStream) -> TokenStream { } #[cfg(target_os = "zkvm")] { - use ::openvm_algebra_guest::{openvm_custom_insn, openvm_rv32im_guest}; // needed for hint_buffer_u32! + use ::openvm_algebra_guest::{openvm_custom_insn, openvm_rv32im_guest}; // needed for hint_buffer_chunked let mut non_qr_uninit = core::mem::MaybeUninit::::uninit(); let mut non_qr; unsafe { #hint_non_qr_extern_func(); - let ptr = non_qr_uninit.as_ptr() as *const u8; - openvm_rv32im_guest::hint_buffer_u32!(ptr, ::NUM_LIMBS / 4); + let ptr = non_qr_uninit.as_mut_ptr() as *mut u8; + openvm_rv32im_guest::hint_buffer_chunked(ptr, ::NUM_LIMBS / 4 as usize); non_qr = non_qr_uninit.assume_init(); } // ensure non_qr < modulus diff --git a/extensions/rv32im/circuit/cuda/src/hintstore.cu b/extensions/rv32im/circuit/cuda/src/hintstore.cu index ce09a22477..b4e3a1b607 100644 --- a/extensions/rv32im/circuit/cuda/src/hintstore.cu +++ b/extensions/rv32im/circuit/cuda/src/hintstore.cu @@ -6,6 +6,8 @@ using namespace riscv; using namespace program; +using hintstore::MAX_HINT_BUFFER_WORDS; +using hintstore::MAX_HINT_BUFFER_WORDS_BITS; template struct Rv32HintStoreCols { // common @@ -87,11 +89,25 @@ struct Rv32HintStore { COL_WRITE_ARRAY(row, Rv32HintStoreCols, mem_ptr_limbs, mem_ptr_limbs); if (local_idx == 0) { + // The overflow check for mem_ptr + num_words * 4 is not needed because + // 4 * MAX_HINT_BUFFER_WORDS < 2^pointer_max_bits guarantees no overflow + assert(MAX_HINT_BUFFER_WORDS_BITS + 2 < pointer_max_bits); + + // Range check for mem_ptr (using pointer_max_bits) uint32_t msl_rshift = (RV32_REGISTER_NUM_LIMBS - 1) * RV32_CELL_BITS; uint32_t msl_lshift = RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - pointer_max_bits; + + // Range check for num_words (using MAX_HINT_BUFFER_WORDS_BITS) + // These constraints only work for MAX_HINT_BUFFER_WORDS_BITS in [16, 23] + assert(MAX_HINT_BUFFER_WORDS_BITS >= 16 && MAX_HINT_BUFFER_WORDS_BITS <= 23); + + assert(record.num_words <= MAX_HINT_BUFFER_WORDS); + uint32_t rem_words_limb2_lshift = (RV32_REGISTER_NUM_LIMBS - 1) * RV32_CELL_BITS - MAX_HINT_BUFFER_WORDS_BITS; + + // Combined range check for mem_ptr and num_words bitwise_lookup.add_range( (record.mem_ptr >> msl_rshift) << msl_lshift, - (record.num_words >> msl_rshift) << msl_lshift + ((record.num_words >> 16) & 0xFF) << rem_words_limb2_lshift ); mem_helper.fill( row.slice_from(COL_INDEX(Rv32HintStoreCols, mem_ptr_aux_cols)), diff --git a/extensions/rv32im/circuit/src/hintstore/execution.rs b/extensions/rv32im/circuit/src/hintstore/execution.rs index 47e68e3084..631cdb79a1 100644 --- a/extensions/rv32im/circuit/src/hintstore/execution.rs +++ b/extensions/rv32im/circuit/src/hintstore/execution.rs @@ -14,6 +14,7 @@ use openvm_instructions::{ use openvm_rv32im_transpiler::{ Rv32HintStoreOpcode, Rv32HintStoreOpcode::{HINT_BUFFER, HINT_STOREW}, + MAX_HINT_BUFFER_WORDS, }; use openvm_stark_backend::p3_field::PrimeField32; @@ -172,6 +173,15 @@ unsafe fn execute_e12_impl MAX_HINT_BUFFER_WORDS as u32 { + return Err(ExecutionError::HintBufferTooLarge { + pc, + num_words, + max_hint_buffer_words: MAX_HINT_BUFFER_WORDS as u32, + }); + } + if exec_state.streams.hint_stream.len() < RV32_REGISTER_NUM_LIMBS * num_words as usize { let err = ExecutionError::HintOutOfBounds { pc }; return Err(err); diff --git a/extensions/rv32im/circuit/src/hintstore/mod.rs b/extensions/rv32im/circuit/src/hintstore/mod.rs index 35955bb979..b9cac88249 100644 --- a/extensions/rv32im/circuit/src/hintstore/mod.rs +++ b/extensions/rv32im/circuit/src/hintstore/mod.rs @@ -25,6 +25,7 @@ use openvm_instructions::{ use openvm_rv32im_transpiler::{ Rv32HintStoreOpcode, Rv32HintStoreOpcode::{HINT_BUFFER, HINT_STOREW}, + MAX_HINT_BUFFER_WORDS, MAX_HINT_BUFFER_WORDS_BITS, }; use openvm_stark_backend::{ interaction::InteractionBuilder, @@ -202,19 +203,29 @@ impl Air for Rv32HintStoreAir { ) .eval(builder, is_start.clone()); - // Preventing mem_ptr and rem_words overflow - // Constraining mem_ptr_limbs[RV32_REGISTER_NUM_LIMBS - 1] < 2^(pointer_max_bits - - // (RV32_REGISTER_NUM_LIMBS - 1)*RV32_CELL_BITS) which implies mem_ptr <= - // 2^pointer_max_bits Similarly for rem_words <= 2^pointer_max_bits + // Preventing rem_words overflow: rem_words < 2^MAX_HINT_BUFFER_WORDS_BITS + // These constraints only work for MAX_HINT_BUFFER_WORDS_BITS in [16, 23] + debug_assert!( + (16..=23).contains(&MAX_HINT_BUFFER_WORDS_BITS), + "MAX_HINT_BUFFER_WORDS_BITS must be in [16, 23] for these constraints to work" + ); + // For MAX_HINT_BUFFER_WORDS_BITS = 18, this requires: + // - limbs[3] = 0 (since 2^18 < 2^24) + // - limbs[2] < 4 (since 2^18 = 4 * 2^16) + builder.assert_zero(local_cols.rem_words_limbs[RV32_REGISTER_NUM_LIMBS - 1]); + + // Preventing mem_ptr overflow: mem_ptr < 2^pointer_max_bits + // (rem_words overflow is handled below with the stricter MAX_HINT_BUFFER_WORDS_BITS bound) self.bitwise_operation_lookup_bus .send_range( local_cols.mem_ptr_limbs[RV32_REGISTER_NUM_LIMBS - 1] * AB::F::from_canonical_usize( 1 << (RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - self.pointer_max_bits), ), - local_cols.rem_words_limbs[RV32_REGISTER_NUM_LIMBS - 1] + local_cols.rem_words_limbs[RV32_REGISTER_NUM_LIMBS - 2] * AB::F::from_canonical_usize( - 1 << (RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - self.pointer_max_bits), + 1 << ((RV32_REGISTER_NUM_LIMBS - 1) * RV32_CELL_BITS + - MAX_HINT_BUFFER_WORDS_BITS), ), ) .eval(builder, is_start.clone()); @@ -409,6 +420,15 @@ where read_rv32_register(state.memory.data(), a) }; + // Bounds check: num_words must not exceed MAX_HINT_BUFFER_WORDS + if num_words > MAX_HINT_BUFFER_WORDS as u32 { + return Err(ExecutionError::HintBufferTooLarge { + pc: *state.pc, + num_words, + max_hint_buffer_words: MAX_HINT_BUFFER_WORDS as u32, + }); + } + let record = state.ctx.alloc(MultiRowLayout::new(Rv32HintStoreMetadata { num_words: num_words as usize, })); @@ -508,6 +528,10 @@ impl TraceFiller for Rv32HintStoreFiller { let msl_lshift: u32 = (RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - self.pointer_max_bits) as u32; + // Scale factors for rem_words range check (using MAX_HINT_BUFFER_WORDS_BITS) + let rem_words_limb2_lshift: u32 = + ((RV32_REGISTER_NUM_LIMBS - 1) * RV32_CELL_BITS - MAX_HINT_BUFFER_WORDS_BITS) as u32; + chunks .par_iter_mut() .zip(sizes.par_iter()) @@ -526,9 +550,17 @@ impl TraceFiller for Rv32HintStoreFiller { }), ) }; + // Range check for mem_ptr (using pointer_max_bits) + // (num_words overflow check is handled below with the stricter + // MAX_HINT_BUFFER_WORDS_BITS bound) + // Range check for num_words (using MAX_HINT_BUFFER_WORDS_BITS) + debug_assert!( + num_words <= MAX_HINT_BUFFER_WORDS as u32, + "num_words must be <= MAX_HINT_BUFFER_WORDS" + ); self.bitwise_lookup_chip.request_range( (record.inner.mem_ptr >> msl_rshift) << msl_lshift, - (num_words >> msl_rshift) << msl_lshift, + ((num_words >> 16) & 0xFF) << rem_words_limb2_lshift, ); let mut timestamp = record.inner.timestamp + num_words * 3; diff --git a/extensions/rv32im/circuit/src/hintstore/tests.rs b/extensions/rv32im/circuit/src/hintstore/tests.rs index e79066aae6..61019a1104 100644 --- a/extensions/rv32im/circuit/src/hintstore/tests.rs +++ b/extensions/rv32im/circuit/src/hintstore/tests.rs @@ -19,7 +19,10 @@ use openvm_instructions::{ riscv::{RV32_CELL_BITS, RV32_MEMORY_AS, RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS}, LocalOpcode, }; -use openvm_rv32im_transpiler::Rv32HintStoreOpcode::{self, *}; +use openvm_rv32im_transpiler::{ + Rv32HintStoreOpcode::{self, *}, + MAX_HINT_BUFFER_WORDS, +}; use openvm_stark_backend::{ p3_field::FieldAlgebra, p3_matrix::{ @@ -194,6 +197,94 @@ fn rand_hintstore_test() { // part of the trace and check that the chip throws the expected error. ////////////////////////////////////////////////////////////////////////////////////// +#[test] +#[should_panic(expected = "HintBufferTooLarge")] +fn test_hint_buffer_exceeds_max_words() { + let mut rng = create_seeded_rng(); + let mut tester = VmChipTestBuilder::default(); + + let (mut harness, _bitwise) = create_harness::>(&mut tester); + + let num_words = (MAX_HINT_BUFFER_WORDS + 1) as u32; + + let a = gen_pointer(&mut rng, RV32_REGISTER_NUM_LIMBS); + tester.write( + RV32_REGISTER_AS as usize, + a, + num_words.to_le_bytes().map(F::from_canonical_u8), + ); + + let mem_ptr = gen_pointer(&mut rng, 4) as u32; + let b = gen_pointer(&mut rng, RV32_REGISTER_NUM_LIMBS); + tester.write(1, b, mem_ptr.to_le_bytes().map(F::from_canonical_u8)); + + for _ in 0..num_words { + let data = rng.next_u32().to_le_bytes().map(F::from_canonical_u8); + tester.streams_mut().hint_stream.extend(data); + } + + tester.execute( + &mut harness.executor, + &mut harness.arena, + &Instruction::from_usize( + HINT_BUFFER.global_opcode(), + [a, b, 0, RV32_REGISTER_AS as usize, RV32_MEMORY_AS as usize], + ), + ); +} + +#[test] +fn test_hint_buffer_rem_words_range_check() { + let mut rng = create_seeded_rng(); + let mut tester = VmChipTestBuilder::default(); + + let (mut harness, bitwise) = create_harness(&mut tester); + + // Build a small, valid buffer instruction with 1 word so trace has 1 row. + let num_words: u32 = 1; + let a = gen_pointer(&mut rng, RV32_REGISTER_NUM_LIMBS); + tester.write( + RV32_REGISTER_AS as usize, + a, + num_words.to_le_bytes().map(F::from_canonical_u8), + ); + + let mem_ptr = gen_pointer(&mut rng, 4) as u32; + let b = gen_pointer(&mut rng, RV32_REGISTER_NUM_LIMBS); + tester.write(1, b, mem_ptr.to_le_bytes().map(F::from_canonical_u8)); + + for _ in 0..num_words { + let data = rng.next_u32().to_le_bytes().map(F::from_canonical_u8); + tester.streams_mut().hint_stream.extend(data); + } + + tester.execute( + &mut harness.executor, + &mut harness.arena, + &Instruction::from_usize( + HINT_BUFFER.global_opcode(), + [a, b, 0, RV32_REGISTER_AS as usize, RV32_MEMORY_AS as usize], + ), + ); + + let modify_trace = |trace: &mut DenseMatrix| { + let mut trace_row = trace.row_slice(0).to_vec(); + let cols: &mut Rv32HintStoreCols = trace_row.as_mut_slice().borrow_mut(); + // Force `rem_words` to overflow MAX_HINT_BUFFER_WORDS_BITS on the start row. + cols.rem_words_limbs = [F::ZERO, F::ZERO, F::ZERO, F::from_canonical_u8(1)]; + *trace = RowMajorMatrix::new(trace_row, trace.width()); + }; + + disable_debug_builder(); + let tester = tester + .build() + .load_and_prank_trace(harness, modify_trace) + .load_periphery(bitwise) + .finalize(); + + tester.simple_test_with_expected_error(get_verification_error(false)); +} + #[allow(clippy::too_many_arguments)] fn run_negative_hintstore_test( opcode: Rv32HintStoreOpcode, diff --git a/extensions/rv32im/guest/src/io.rs b/extensions/rv32im/guest/src/io.rs index 664b9b1117..535959d4cc 100644 --- a/extensions/rv32im/guest/src/io.rs +++ b/extensions/rv32im/guest/src/io.rs @@ -1,5 +1,5 @@ #![allow(unused_imports)] -use crate::{PhantomImm, PHANTOM_FUNCT3, SYSTEM_OPCODE}; +use crate::{PhantomImm, MAX_HINT_BUFFER_WORDS, PHANTOM_FUNCT3, SYSTEM_OPCODE}; /// Store the next 4 bytes from the hint stream to [[rd]_1]_2. #[macro_export] @@ -21,8 +21,8 @@ macro_rules! hint_buffer_u32 { ($x:expr, $len:expr) => { if $len != 0 { openvm_custom_insn::custom_insn_i!( - opcode = openvm_rv32im_guest::SYSTEM_OPCODE, - funct3 = openvm_rv32im_guest::HINT_FUNCT3, + opcode = $crate::SYSTEM_OPCODE, + funct3 = $crate::HINT_FUNCT3, rd = In $x, rs1 = In $len, imm = Const 1, @@ -31,6 +31,18 @@ macro_rules! hint_buffer_u32 { }; } +/// Read hint buffer with automatic chunking for large reads. +/// Splits reads larger than MAX_HINT_BUFFER_WORDS into multiple instructions. +#[inline(always)] +pub fn hint_buffer_chunked(mut ptr: *mut u8, mut num_words: usize) { + while num_words > 0 { + let chunk = core::cmp::min(num_words, MAX_HINT_BUFFER_WORDS); + hint_buffer_u32!(ptr, chunk); + ptr = ptr.wrapping_add(chunk * 4); + num_words -= chunk; + } +} + /// Reset the hint stream with the next hint. #[inline(always)] pub fn hint_input() { diff --git a/extensions/rv32im/guest/src/lib.rs b/extensions/rv32im/guest/src/lib.rs index 99f1a6f97f..cea29068e2 100644 --- a/extensions/rv32im/guest/src/lib.rs +++ b/extensions/rv32im/guest/src/lib.rs @@ -25,6 +25,16 @@ pub const REVEAL_FUNCT3: u8 = 0b010; pub const PHANTOM_FUNCT3: u8 = 0b011; pub const CSRRW_FUNCT3: u8 = 0b001; +/// Maximum number of bits for hint buffer size. +/// IMPORTANT: Must be synced with MAX_HINT_BUFFER_WORDS_BITS constant for cuda +/// `crates/circuits/primitives/cuda/include/primitives/constants.h` +// For the constraints, they are configured for a range of MAX_HINT_BUFFER_WORDS_BITS between +// [16,23] +pub const MAX_HINT_BUFFER_WORDS_BITS: usize = 18; +/// Maximum number of words that can be read in a single HINT_BUFFER instruction. +/// AIR constraint requires rem_words < 2^MAX_HINT_BUFFER_WORDS_BITS, so max is one less +pub const MAX_HINT_BUFFER_WORDS: usize = (1 << MAX_HINT_BUFFER_WORDS_BITS) - 1; // 262,143 words ≈ 1MB + /// imm options for system phantom instructions #[derive(Debug, Copy, Clone, PartialEq, Eq, FromRepr)] #[repr(u16)] diff --git a/extensions/rv32im/tests/programs/examples/hint_large_buffer.rs b/extensions/rv32im/tests/programs/examples/hint_large_buffer.rs new file mode 100644 index 0000000000..64472b0f25 --- /dev/null +++ b/extensions/rv32im/tests/programs/examples/hint_large_buffer.rs @@ -0,0 +1,25 @@ +#![cfg_attr(not(feature = "std"), no_main)] +#![cfg_attr(not(feature = "std"), no_std)] + +use openvm::io::read_vec; +use openvm_rv32im_guest::MAX_HINT_BUFFER_WORDS; + +openvm::entry!(main); + +pub fn main() { + let vec = read_vec(); + + // Create a hint buffer larger than MAX_HINT_BUFFER_WORDS, to test chunking + let expected_words = MAX_HINT_BUFFER_WORDS + 100; + let expected_len = expected_words * 4; + + if vec.len() != expected_len { + openvm::process::panic(); + } + + for (i, item) in vec.iter().enumerate() { + if *item != (i as u8) { + openvm::process::panic(); + } + } +} diff --git a/extensions/rv32im/tests/src/lib.rs b/extensions/rv32im/tests/src/lib.rs index ff141398f5..c4302ae808 100644 --- a/extensions/rv32im/tests/src/lib.rs +++ b/extensions/rv32im/tests/src/lib.rs @@ -13,7 +13,7 @@ mod tests { }; use openvm_instructions::{exe::VmExe, instruction::Instruction, LocalOpcode, SystemOpcode}; use openvm_rv32im_circuit::{Rv32IBuilder, Rv32IConfig, Rv32ImBuilder, Rv32ImConfig}; - use openvm_rv32im_guest::hint_load_by_key_encode; + use openvm_rv32im_guest::{hint_load_by_key_encode, MAX_HINT_BUFFER_WORDS}; use openvm_rv32im_transpiler::{ DivRemOpcode, MulHOpcode, MulOpcode, Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension, @@ -169,6 +169,37 @@ mod tests { Ok(()) } + /// NOTE: This test is slow because it processes > 1MB of data. It is marked #[ignore] + /// and can be run with: cargo test -p openvm-rv32im-integration-tests test_hint_buffer_chunking + /// -- --ignored + #[test] + #[ignore = "slow test: processes >1MB of data"] + fn test_hint_buffer_chunking() -> Result<()> { + let config = test_rv32im_config(); + let elf = build_example_program_at_path(get_programs_dir!(), "hint_large_buffer", &config)?; + let exe = VmExe::from_elf( + elf, + Transpiler::::default() + .with_extension(Rv32ITranspilerExtension) + .with_extension(Rv32MTranspilerExtension) + .with_extension(Rv32IoTranspilerExtension), + )?; + + // Create input buffer larger than MAX_HINT_BUFFER_WORDS + // This will require chunking to succeed + let expected_words = MAX_HINT_BUFFER_WORDS + 100; + let expected_len = expected_words * 4; + + // Create data with a pattern that can be verified + let data: Vec = (0..expected_len) + .map(|i| F::from_canonical_u8((i % 256) as u8)) + .collect(); + + let input = vec![data]; + air_test_with_min_segments(Rv32ImBuilder, config, exe, input, 1); + Ok(()) + } + #[test] fn test_read() -> Result<()> { let config = test_rv32im_config(); diff --git a/extensions/rv32im/transpiler/src/lib.rs b/extensions/rv32im/transpiler/src/lib.rs index 03a354517e..218202c369 100644 --- a/extensions/rv32im/transpiler/src/lib.rs +++ b/extensions/rv32im/transpiler/src/lib.rs @@ -9,6 +9,7 @@ use openvm_rv32im_guest::{ NATIVE_STOREW_FUNCT3, NATIVE_STOREW_FUNCT7, PHANTOM_FUNCT3, REVEAL_FUNCT3, RV32M_FUNCT7, RV32_ALU_OPCODE, SYSTEM_OPCODE, TERMINATE_FUNCT3, }; +pub use openvm_rv32im_guest::{MAX_HINT_BUFFER_WORDS, MAX_HINT_BUFFER_WORDS_BITS}; use openvm_stark_backend::p3_field::PrimeField32; use openvm_transpiler::{ util::{nop, unimp}, diff --git a/guest-libs/pairing/src/bls12_381/pairing.rs b/guest-libs/pairing/src/bls12_381/pairing.rs index db13c785e1..8ed9df2f68 100644 --- a/guest-libs/pairing/src/bls12_381/pairing.rs +++ b/guest-libs/pairing/src/bls12_381/pairing.rs @@ -25,7 +25,7 @@ use { openvm_pairing_guest::{PairingBaseFunct7, OPCODE, PAIRING_FUNCT3}, openvm_platform::custom_insn_r, openvm_rv32im_guest, - openvm_rv32im_guest::hint_buffer_u32, + openvm_rv32im_guest::hint_buffer_chunked, }; use super::{Bls12_381, Fp, Fp12, Fp2}; @@ -280,7 +280,7 @@ impl PairingCheck for Bls12_381 { } #[cfg(target_os = "zkvm")] { - let hint = MaybeUninit::<(Fp12, Fp12)>::uninit(); + let mut hint = MaybeUninit::<(Fp12, Fp12)>::uninit(); // We do not rely on the slice P's memory layout since rust does not guarantee it across // compiler versions. let p_fat_ptr = (P.as_ptr() as u32, P.len() as u32); @@ -294,8 +294,8 @@ impl PairingCheck for Bls12_381 { rs1 = In &p_fat_ptr, rs2 = In &q_fat_ptr ); - let ptr = hint.as_ptr() as *const u8; - hint_buffer_u32!(ptr, (48 * 12 * 2) / 4); + let ptr = hint.as_mut_ptr() as *mut u8; + hint_buffer_chunked(ptr, (48 * 12 * 2) / 4 as usize); hint.assume_init() } } diff --git a/guest-libs/pairing/src/bn254/pairing.rs b/guest-libs/pairing/src/bn254/pairing.rs index c0f1cc35f2..9fb160511b 100644 --- a/guest-libs/pairing/src/bn254/pairing.rs +++ b/guest-libs/pairing/src/bn254/pairing.rs @@ -21,7 +21,7 @@ use { core::mem::MaybeUninit, openvm_pairing_guest::{PairingBaseFunct7, OPCODE, PAIRING_FUNCT3}, openvm_platform::custom_insn_r, - openvm_rv32im_guest::hint_buffer_u32, + openvm_rv32im_guest::hint_buffer_chunked, }; use super::{Bn254, Fp, Fp12, Fp2}; @@ -314,7 +314,7 @@ impl PairingCheck for Bn254 { } #[cfg(target_os = "zkvm")] { - let hint = MaybeUninit::<(Fp12, Fp12)>::uninit(); + let mut hint = MaybeUninit::<(Fp12, Fp12)>::uninit(); // We do not rely on the slice P's memory layout since rust does not guarantee it across // compiler versions. let p_fat_ptr = (P.as_ptr() as u32, P.len() as u32); @@ -328,8 +328,8 @@ impl PairingCheck for Bn254 { rs1 = In &p_fat_ptr, rs2 = In &q_fat_ptr ); - let ptr = hint.as_ptr() as *const u8; - hint_buffer_u32!(ptr, (32 * 12 * 2) / 4); + let ptr = hint.as_mut_ptr() as *mut u8; + hint_buffer_chunked(ptr, (32 * 12 * 2) / 4 as usize); hint.assume_init() } }