From ac9ad136912b354a3c8842ffe3f5f1cf8d1feb98 Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Fri, 6 Feb 2026 06:03:16 +0000 Subject: [PATCH 01/11] User-mode demand paging and fallible memory copy --- dev_tests/src/ratchet.rs | 2 +- litebox_common_linux/src/vmap.rs | 2 + litebox_platform_linux_userland/src/lib.rs | 19 ++ .../src/arch/x86/interrupts.S | 3 +- .../src/arch/x86/interrupts.rs | 111 ++++++- .../src/host/per_cpu_variables.rs | 40 +++ litebox_platform_lvbs/src/lib.rs | 270 ++++++++++++------ litebox_runner_lvbs/x86_64_vtl1.ld | 7 + litebox_shim_optee/src/lib.rs | 25 +- litebox_shim_optee/src/loader/elf.rs | 2 +- litebox_shim_optee/src/loader/ta_stack.rs | 5 +- litebox_shim_optee/src/ptr.rs | 187 +++++------- litebox_shim_optee/src/syscalls/ldelf.rs | 14 +- 13 files changed, 451 insertions(+), 236 deletions(-) diff --git a/dev_tests/src/ratchet.rs b/dev_tests/src/ratchet.rs index 4b0a21993..4dcc09856 100644 --- a/dev_tests/src/ratchet.rs +++ b/dev_tests/src/ratchet.rs @@ -72,7 +72,7 @@ fn ratchet_maybe_uninit() -> Result<()> { ("dev_tests/", 1), ("litebox/", 1), ("litebox_platform_linux_userland/", 3), - ("litebox_platform_lvbs/", 5), + ("litebox_platform_lvbs/", 6), ("litebox_shim_linux/", 5), ("litebox_shim_optee/", 1), ], diff --git a/litebox_common_linux/src/vmap.rs b/litebox_common_linux/src/vmap.rs index e2c6e3d65..325ce75b3 100644 --- a/litebox_common_linux/src/vmap.rs +++ b/litebox_common_linux/src/vmap.rs @@ -172,4 +172,6 @@ pub enum PhysPointerError { UnsupportedOperation, #[error("Unsupported permissions: {0:#x}")] UnsupportedPermissions(u8), + #[error("Memory copy failed")] + CopyFailed, } diff --git a/litebox_platform_linux_userland/src/lib.rs b/litebox_platform_linux_userland/src/lib.rs index 038ebcf45..5d6ffc339 100644 --- a/litebox_platform_linux_userland/src/lib.rs +++ b/litebox_platform_linux_userland/src/lib.rs @@ -2228,6 +2228,25 @@ impl litebox::platform::CrngProvider for LinuxUserland { /// testing, or use a kernel module to provide this functionality (if needed). impl VmapManager for LinuxUserland {} +/// Dummy `VmemPageFaultHandler`. +/// +/// Page faults are handled transparently by the host Linux kernel. +/// Provided to satisfy trait bounds for `PageManager::handle_page_fault`. +impl litebox::mm::linux::VmemPageFaultHandler for LinuxUserland { + unsafe fn handle_page_fault( + &self, + _fault_addr: usize, + _flags: litebox::mm::linux::VmFlags, + _error_code: u64, + ) -> Result<(), litebox::mm::linux::PageFaultError> { + unreachable!("host kernel handles page faults for Linux userland") + } + + fn access_error(_error_code: u64, _flags: litebox::mm::linux::VmFlags) -> bool { + unreachable!("host kernel handles page faults for Linux userland") + } +} + #[cfg(test)] mod tests { use core::sync::atomic::AtomicU32; diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.S b/litebox_platform_lvbs/src/arch/x86/interrupts.S index 23dcc3e94..5f0969d30 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.S +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.S @@ -143,8 +143,7 @@ isr_with_err_code isr_stack_segment_fault stack_segment_fault_handler_impl /* Vector 13: General Protection Fault (#GP) - Error code */ isr_with_err_code isr_general_protection_fault general_protection_fault_handler_impl -/* Vector 14: Page Fault (#PF) - Error code */ -isr_with_err_code isr_page_fault page_fault_handler_impl +/* Vector 14: Page Fault (#PF) - Custom stub defined in interrupts.rs */ /* Vector 16: x87 Floating-Point Exception (#MF) - No error code */ isr_no_err_code isr_x87_floating_point x87_floating_point_handler_impl diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.rs b/litebox_platform_lvbs/src/arch/x86/interrupts.rs index 94b6c2b18..e074cafab 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.rs +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.rs @@ -25,6 +25,85 @@ use x86_64::{VirtAddr, structures::idt::InterruptDescriptorTable}; // Include assembly ISR stubs core::arch::global_asm!(include_str!("interrupts.S")); +// Custom page fault ISR stub. +// +// This stub splits user-mode and kernel-mode page faults at the assembly level: +// +// - **User-mode faults**: jumps directly to exception_callback in run_thread_arch, +// which handles swapgs, saves exception info (CR2, error code), saves user +// registers and extended states, and calls the **shim's exception handler**. +// +// - **Kernel-mode faults**: standard push_regs/call/pop_regs/iretq flow into a +// minimal Rust handler that only does exception table fixup or panics. +// +// # User-mode ISR stack cleanup +// +// The user-mode path (`jmp exception_callback`) leaves the CPU-pushed iret frame +// and error code (48 bytes) on the ISR stack without popping them. This is safe +// because: +// +// 1. The page fault IDT entry does not use IST (IST index = 0). On a user→kernel +// privilege-level change, the CPU unconditionally loads RSP from TSS.RSP0 +// (Intel SDM Vol. 3A, §6.12.1 "Exception- or Interrupt-Handler Procedures"). +// +// 2. TSS.RSP0 always points to the top of the kernel stack. Each subsequent +// user→kernel transition (syscall, interrupt, or exception) causes the CPU +// to reload RSP from TSS.RSP0, overwriting any stale data from previous +// entries. +// +// Reference: Intel SDM Vol. 3A, §6.12.1 +// https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html +core::arch::global_asm!( + ".global isr_page_fault", + "isr_page_fault:", + "cld", + // Check if fault came from user mode by testing CS RPL bits. + // On ISR entry the CPU pushed: [rsp+40]=SS, [rsp+32]=RSP, [rsp+24]=RFLAGS, + // [rsp+16]=CS, [rsp+8]=RIP, [rsp+0]=error_code + "test qword ptr [rsp + 16], 0x3", + "jnz .Lpf_user_mode", + // --- Kernel-mode page fault: standard ISR flow --- + "push rdi", + "push rsi", + "push rdx", + "push rcx", + "push rax", + "push r8", + "push r9", + "push r10", + "push r11", + "push rbx", + "push rbp", + "push r12", + "push r13", + "push r14", + "push r15", + "mov rbp, rsp", + "and rsp, -16", + "mov rdi, rbp", + "call kernel_page_fault_handler_impl", + "mov rsp, rbp", + "pop r15", + "pop r14", + "pop r13", + "pop r12", + "pop rbp", + "pop rbx", + "pop r11", + "pop r10", + "pop r9", + "pop r8", + "pop rax", + "pop rcx", + "pop rdx", + "pop rsi", + "pop rdi", + "add rsp, 8", // skip error code + "iretq", + ".Lpf_user_mode:", + "jmp exception_callback", +); + // External symbols for assembly ISR stubs unsafe extern "C" { fn isr_divide_error(); @@ -218,18 +297,32 @@ extern "C" fn general_protection_fault_handler_impl(regs: &PtRegs) { ); } -/// Rust handler for page fault exception (vector 14). -/// Called from assembly stub with pointer to saved register state. +/// Kernel-mode page fault handler. +/// Called from the `isr_page_fault` assembly stub only for kernel-mode faults. #[unsafe(no_mangle)] -extern "C" fn page_fault_handler_impl(regs: &PtRegs) { +extern "C" fn kernel_page_fault_handler_impl(regs: &mut PtRegs) { + use litebox::mm::exception_table::search_exception_tables; + use litebox::utils::TruncateExt as _; use x86_64::registers::control::Cr2; - todo!( - "EXCEPTION [{}]: PAGE FAULT\nAccessed Address: {:?}\nError Code: {:#x}\n{:#x?}", - mode_str(regs), - Cr2::read(), - regs.orig_rax, - regs + let fault_addr: usize = Cr2::read_raw().truncate(); + let error_code = regs.orig_rax; + + // Check the exception table for a recovery address. + // This handles fallible memory operations like memcpy_fallible that access + // user-space or VTL0 addresses which might be unmapped. + // + // TODO: Add kernel-mode demand paging for user-space addresses. Demand paging + // a shim using its exception handler is a chicken-and-egg problem. Some + // pre-population is unavoidable. + if let Some(fixup_addr) = search_exception_tables(regs.rip) { + regs.rip = fixup_addr; + return; + } + + panic!( + "EXCEPTION [KERNEL]: PAGE FAULT\nAccessed Address: {:#x}\nError Code: {:#x}\n{:#x?}", + fault_addr, error_code, regs ); } diff --git a/litebox_platform_lvbs/src/host/per_cpu_variables.rs b/litebox_platform_lvbs/src/host/per_cpu_variables.rs index 7a3fc311c..2e293d6db 100644 --- a/litebox_platform_lvbs/src/host/per_cpu_variables.rs +++ b/litebox_platform_lvbs/src/host/per_cpu_variables.rs @@ -231,6 +231,12 @@ pub struct PerCpuVariablesAsm { vtl1_kernel_xsaved: Cell, /// XSAVE/XRSTOR state tracking for VTL1 user (see `vtl1_kernel_xsaved` for state values and reset). vtl1_user_xsaved: Cell, + /// Exception info: exception vector number + exception_trapno: Cell, + /// Exception info: hardware error code + exception_error_code: Cell, + /// Exception info: faulting address (CR2) + exception_cr2: Cell, } impl PerCpuVariablesAsm { @@ -319,6 +325,37 @@ impl PerCpuVariablesAsm { pub const fn vtl1_user_xsaved_offset() -> usize { offset_of!(PerCpuVariablesAsm, vtl1_user_xsaved) } + pub const fn exception_trapno_offset() -> usize { + offset_of!(PerCpuVariablesAsm, exception_trapno) + } + pub const fn exception_error_code_offset() -> usize { + offset_of!(PerCpuVariablesAsm, exception_error_code) + } + pub const fn exception_cr2_offset() -> usize { + offset_of!(PerCpuVariablesAsm, exception_cr2) + } + pub fn set_exception_info( + &self, + exception: litebox::shim::Exception, + error_code: u32, + cr2: usize, + ) { + self.exception_trapno.set(exception.0); + self.exception_error_code.set(error_code); + self.exception_cr2.set(cr2); + } + pub fn get_exception(&self) -> litebox::shim::Exception { + litebox::shim::Exception(self.exception_trapno.get()) + } + pub fn get_exception_error_code(&self) -> u32 { + self.exception_error_code.get() + } + pub fn get_exception_cr2(&self) -> usize { + self.exception_cr2.get() + } + pub fn get_user_context_top_addr(&self) -> usize { + self.user_context_top_addr.get() + } /// Reset VTL1 xsaved flags to 0 at each VTL1 entry (OP-TEE SMC call). /// This ensures: /// - XRSTOR is skipped until XSAVE populates valid data (no spurious restores on fresh entry) @@ -366,6 +403,9 @@ impl RefCellWrapper { vtl1_xsave_mask_hi: Cell::new(0), vtl1_kernel_xsaved: Cell::new(0), vtl1_user_xsaved: Cell::new(0), + exception_trapno: Cell::new(0), + exception_error_code: Cell::new(0), + exception_cr2: Cell::new(0), }, inner: RefCell::new(value), } diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index 4ba966eb5..c7b266850 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -522,42 +522,57 @@ impl LinuxKernel { } } + /// Map a VTL0 physical range and return a guard that unmaps on drop. + fn map_vtl0_guard( + &self, + phys_addr: x86_64::PhysAddr, + size: u64, + flags: PageTableFlags, + ) -> Option> { + let (page_addr, length) = self + .map_vtl0_phys_range(phys_addr, phys_addr + size, flags) + .ok()?; + let page_offset: usize = (phys_addr - phys_addr.align_down(Size4KiB::SIZE)).truncate(); + Some(Vtl0MappedGuard { + owner: self, + page_addr, + length, + ptr: page_addr.wrapping_add(page_offset), + }) + } + /// This function copies data from VTL0 physical memory to the VTL1 kernel through `Box`. /// Use this function instead of map/unmap functions to avoid potential TOCTTOU. /// Better to replace this function with `::from_bytes()` or similar + /// /// # Safety /// /// The caller must ensure that the `phys_addr` is a valid VTL0 physical address - /// # Panics - /// - /// Panics if `phys_addr` is invalid or not properly aligned for `T` pub unsafe fn copy_from_vtl0_phys( &self, phys_addr: x86_64::PhysAddr, ) -> Option> { use alloc::boxed::Box; - if let Ok((page_addr, length)) = self.map_vtl0_phys_range( + let guard = self.map_vtl0_guard( phys_addr, - phys_addr + core::mem::size_of::() as u64, + core::mem::size_of::() as u64, PageTableFlags::PRESENT, - ) { - let page_offset: usize = (phys_addr - phys_addr.align_down(Size4KiB::SIZE)).truncate(); - let src_ptr = page_addr.wrapping_add(page_offset).cast::(); - assert!(src_ptr.is_aligned(), "src_ptr is not properly aligned"); - - // Safety: src_ptr points to valid VTL0 memory that was just mapped - let boxed = Box::::new(unsafe { core::ptr::read_volatile(src_ptr) }); - - assert!( - self.unmap_vtl0_pages(page_addr, length).is_ok(), - "Failed to unmap VTL0 pages" - ); + )?; + + let mut value = core::mem::MaybeUninit::::uninit(); + let result = unsafe { + litebox::mm::exception_table::memcpy_fallible( + value.as_mut_ptr().cast(), + guard.ptr, + core::mem::size_of::(), + ) + }; - Some(boxed) - } else { - None - } + // Safety: the value was fully initialized on success. + result + .ok() + .map(|()| Box::new(unsafe { value.assume_init() })) } /// This function copies data from the VTL1 kernel to VTL0 physical memory. @@ -565,34 +580,28 @@ impl LinuxKernel { /// # Safety /// /// The caller must ensure that the `phys_addr` is a valid VTL0 physical address - /// # Panics - /// - /// Panics if phys_addr is invalid or not properly aligned for `T` pub unsafe fn copy_to_vtl0_phys( &self, phys_addr: x86_64::PhysAddr, value: &T, ) -> bool { - if let Ok((page_addr, length)) = self.map_vtl0_phys_range( + let Some(guard) = self.map_vtl0_guard( phys_addr, - phys_addr + core::mem::size_of::() as u64, + core::mem::size_of::() as u64, PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ) { - let page_offset: usize = (phys_addr - phys_addr.align_down(Size4KiB::SIZE)).truncate(); - let dst_ptr = page_addr.wrapping_add(page_offset).cast::(); - assert!(dst_ptr.is_aligned(), "dst_ptr is not properly aligned"); - - // Safety: dst_ptr points to valid VTL0 memory that was just mapped - unsafe { core::ptr::write_volatile(dst_ptr, *value) }; + ) else { + return false; + }; + let dst_ptr = guard.ptr; - assert!( - self.unmap_vtl0_pages(page_addr, length).is_ok(), - "Failed to unmap VTL0 pages" - ); - true - } else { - false + unsafe { + litebox::mm::exception_table::memcpy_fallible( + dst_ptr, + core::ptr::from_ref::(value).cast::(), + core::mem::size_of::(), + ) } + .is_ok() } /// This function copies a slice from the VTL1 kernel to VTL0 physical memory. @@ -601,40 +610,28 @@ impl LinuxKernel { /// # Safety /// /// The caller must ensure that the `phys_addr` is a valid VTL0 physical address. - /// - /// # Panics - /// - /// Panics if phys_addr is invalid or not properly aligned for `T` pub unsafe fn copy_slice_to_vtl0_phys( &self, phys_addr: x86_64::PhysAddr, value: &[T], ) -> bool { - if let Ok((page_addr, length)) = self.map_vtl0_phys_range( + let Some(guard) = self.map_vtl0_guard( phys_addr, - phys_addr + core::mem::size_of_val(value) as u64, + core::mem::size_of_val(value) as u64, PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ) { - let page_offset: usize = (phys_addr - phys_addr.align_down(Size4KiB::SIZE)).truncate(); - let dst_ptr = page_addr.wrapping_add(page_offset).cast::(); - assert!(dst_ptr.is_aligned(), "dst_ptr is not properly aligned"); - - // Safety: dst_ptr points to mapped VTL0 memory with enough space for value.len() - // elements. We use copy_nonoverlapping instead of creating a slice reference - // because VTL0 memory is external (similar to MMIO/DMA) and may be concurrently - // modified, which would violate Rust's aliasing model for references. - unsafe { - core::ptr::copy_nonoverlapping(value.as_ptr(), dst_ptr, value.len()); - } + ) else { + return false; + }; + let dst_ptr = guard.ptr; - assert!( - self.unmap_vtl0_pages(page_addr, length).is_ok(), - "Failed to unmap VTL0 pages" - ); - true - } else { - false + unsafe { + litebox::mm::exception_table::memcpy_fallible( + dst_ptr, + value.as_ptr().cast::(), + core::mem::size_of_val(value), + ) } + .is_ok() } /// This function copies a slice from VTL0 physical memory to the VTL1 kernel. @@ -643,39 +640,28 @@ impl LinuxKernel { /// # Safety /// /// The caller must ensure that the `phys_addr` is a valid VTL0 physical address. - /// - /// # Panics - /// - /// Panics if phys_addr is invalid or not properly aligned for `T` pub unsafe fn copy_slice_from_vtl0_phys( &self, phys_addr: x86_64::PhysAddr, buf: &mut [T], ) -> bool { - if let Ok((page_addr, length)) = self.map_vtl0_phys_range( + let Some(guard) = self.map_vtl0_guard( phys_addr, - phys_addr + core::mem::size_of_val(buf) as u64, + core::mem::size_of_val(buf) as u64, PageTableFlags::PRESENT, - ) { - let page_offset: usize = (phys_addr - phys_addr.align_down(Size4KiB::SIZE)).truncate(); - let src_ptr = page_addr.wrapping_add(page_offset).cast::(); - assert!(src_ptr.is_aligned(), "src_ptr is not properly aligned"); - - // Safety: see copy_slice_to_vtl0_phys for why we use copy_nonoverlapping - // instead of creating a slice reference to VTL0 memory. - unsafe { - core::ptr::copy_nonoverlapping(src_ptr, buf.as_mut_ptr(), buf.len()); - } - - assert!( - self.unmap_vtl0_pages(page_addr, length).is_ok(), - "Failed to unmap VTL0 pages" - ); + ) else { + return false; + }; + let src_ptr = guard.ptr; - return true; + unsafe { + litebox::mm::exception_table::memcpy_fallible( + buf.as_mut_ptr().cast::(), + src_ptr, + core::mem::size_of_val(buf), + ) } - - false + .is_ok() } /// Create a new task page table for VTL1 user space and returns its ID. @@ -759,6 +745,25 @@ impl LinuxKernel { } } +/// RAII guard that unmaps VTL0 physical pages when dropped. +struct Vtl0MappedGuard<'a, Host: HostInterface> { + owner: &'a LinuxKernel, + page_addr: *mut u8, + length: usize, + ptr: *mut u8, +} + +impl Drop for Vtl0MappedGuard<'_, Host> { + fn drop(&mut self) { + assert!( + self.owner + .unmap_vtl0_pages(self.page_addr, self.length) + .is_ok(), + "Failed to unmap VTL0 pages" + ); + } +} + impl RawMutexProvider for LinuxKernel { type RawMutex = RawMutex; } @@ -1521,6 +1526,51 @@ macro_rules! SAVE_SYSCALL_USER_CONTEXT_ASM { }; } +/// Save user context after a page fault ISR into the user context area. +/// +/// Similar to `SAVE_SYSCALL_USER_CONTEXT_ASM` but it preserves all GPRs. +/// The iret frame (SS, RSP, RFLAGS, CS, RIP) and error code are on +/// the ISR stack. This macro saves them via a saved ISR stack pointer. +/// +/// Prerequisites: +/// - `rsp` points to the top of the user context area (push target) +/// - `rax` points to the ISR stack: `[rax]`=error_code, `[rax+8]`=RIP, +/// `[rax+16]`=CS, `[rax+24]`=RFLAGS, `[rax+32]`=RSP, `[rax+40]`=SS +/// - All GPRs except `rax` contain user-mode values +/// - User `rax` has been saved to per-CPU scratch +/// - `swapgs` has already been executed (GS = kernel) +/// +/// Clobbers: rax +#[cfg(target_arch = "x86_64")] +macro_rules! SAVE_PF_USER_CONTEXT_ASM { + () => { + " + push [rax + 40] // pt_regs->ss + push [rax + 32] // pt_regs->rsp + push [rax + 24] // pt_regs->eflags + push [rax + 16] // pt_regs->cs + push [rax + 8] // pt_regs->rip + push [rax] // pt_regs->orig_rax (error code) + push rdi // pt_regs->rdi + push rsi // pt_regs->rsi + push rdx // pt_regs->rdx + push rcx // pt_regs->rcx + mov rax, gs:[{scratch_off}] + push rax // pt_regs->rax + push r8 // pt_regs->r8 + push r9 // pt_regs->r9 + push r10 // pt_regs->r10 + push r11 // pt_regs->r11 + push rbx // pt_regs->rbx + push rbp // pt_regs->rbp + push r12 // pt_regs->r12 + push r13 // pt_regs->r13 + push r14 // pt_regs->r14 + push r15 // pt_regs->r15 + " + }; +} + /// Restore user context from the memory area pointed by the current `rsp`. /// /// This macro uses the `pop` instructions (i.e., from low addresses up to high ones) such that @@ -1595,12 +1645,29 @@ unsafe extern "C" fn run_thread_arch( "mov rdi, [rsp]", // pass `thread_ctx` "call {syscall_handler}", "jmp done", - // Exception and interrupt callback placeholders - // IDT handler functions will jump to these labels to - // handle user-mode exceptions/interrupts. - // Note that these two callbacks are not yet implemented and no code path jumps to them. + // Exception callback: entered from isr_page_fault for user-mode page faults. + // At this point: + // - rsp = ISR stack (error_code at top, iret frame above) + // - All GPRs contain user-mode values + // - Interrupts are disabled (IDT gate clears IF) + // - GS = user (swapgs has NOT happened yet) ".globl exception_callback", "exception_callback:", + "swapgs", + "mov gs:[{scratch_off}], rax", // Save `rax` to per-CPU scratch + "mov rax, cr2", + "mov gs:[{exception_cr2_off}], rax", // Save `CR2` (faulting address) + "mov byte ptr gs:[{exception_trapno_off}], 14", // Exception: page fault (14) + "mov eax, [rsp]", + "mov gs:[{exception_error_code_off}], eax", // error code (32-bit) from ISR stack + "mov rax, rsp", // store ISR `rsp` in `rax` + "mov rsp, gs:[{user_context_top_off}]", // `rsp` points to the top address of user context area + SAVE_PF_USER_CONTEXT_ASM!(), + XSAVE_VTL1_ASM!({vtl1_user_xsave_area_off}, {vtl1_xsave_mask_lo_off}, {vtl1_xsave_mask_hi_off}, {vtl1_user_xsaved_off}), + "mov rbp, gs:[{cur_kernel_bp_off}]", + "mov rsp, gs:[{cur_kernel_sp_off}]", + "mov rdi, [rsp]", // pass `thread_ctx` + "call {exception_handler}", "jmp done", ".globl interrupt_callback", "interrupt_callback:", @@ -1621,9 +1688,14 @@ unsafe extern "C" fn run_thread_arch( vtl1_kernel_xsaved_off = const { PerCpuVariablesAsm::vtl1_kernel_xsaved_offset() }, vtl1_user_xsaved_off = const { PerCpuVariablesAsm::vtl1_user_xsaved_offset() }, USER_CONTEXT_SIZE = const core::mem::size_of::(), + scratch_off = const { PerCpuVariablesAsm::scratch_offset() }, + exception_trapno_off = const { PerCpuVariablesAsm::exception_trapno_offset() }, + exception_error_code_off = const { PerCpuVariablesAsm::exception_error_code_offset() }, + exception_cr2_off = const { PerCpuVariablesAsm::exception_cr2_offset() }, init_handler = sym init_handler, reenter_handler = sym reenter_handler, syscall_handler = sym syscall_handler, + exception_handler = sym exception_handler, ); } @@ -1631,6 +1703,18 @@ unsafe extern "C" fn syscall_handler(thread_ctx: &mut ThreadContext) { thread_ctx.call_shim(|shim, ctx| shim.syscall(ctx)); } +/// Handles user-mode exceptions by reading exception info from per-CPU variables +/// and routing to the shim's exception handler. +unsafe extern "C" fn exception_handler(thread_ctx: &mut ThreadContext) { + use crate::host::per_cpu_variables::with_per_cpu_variables_asm; + let info = with_per_cpu_variables_asm(|pcv| litebox::shim::ExceptionInfo { + exception: pcv.get_exception(), + error_code: pcv.get_exception_error_code(), + cr2: pcv.get_exception_cr2(), + }); + thread_ctx.call_shim(|shim, ctx| shim.exception(ctx, &info)); +} + /// Calls `f` in order to call into a shim entrypoint. impl ThreadContext<'_> { fn call_shim( diff --git a/litebox_runner_lvbs/x86_64_vtl1.ld b/litebox_runner_lvbs/x86_64_vtl1.ld index a7060f942..1123244a2 100644 --- a/litebox_runner_lvbs/x86_64_vtl1.ld +++ b/litebox_runner_lvbs/x86_64_vtl1.ld @@ -23,6 +23,13 @@ SECTIONS _data_start = .; *(.rodata .rodata.*) *(.data .data.*) + + /* Exception table for fallible memory operations (memcpy_fallible, etc.) */ + . = ALIGN(4); + __start_ex_table = .; + KEEP(*(ex_table)) + __stop_ex_table = .; + _data_end = .; . = ALIGN(0x1000); diff --git a/litebox_shim_optee/src/lib.rs b/litebox_shim_optee/src/lib.rs index 453dd0595..611a352f5 100644 --- a/litebox_shim_optee/src/lib.rs +++ b/litebox_shim_optee/src/lib.rs @@ -71,7 +71,18 @@ impl litebox::shim::EnterShim for OpteeShimEntrypoints { _ctx: &mut Self::ExecutionContext, info: &litebox::shim::ExceptionInfo, ) -> ContinueOperation { - todo!("Handle exception in OP-TEE shim: {:?}", info,); + if info.exception == litebox::shim::Exception::PAGE_FAULT { + match unsafe { + self.task + .global + .pm + .handle_page_fault(info.cr2, info.error_code.into()) + } { + Ok(()) => return ContinueOperation::ResumeGuest, + Err(_) => return ContinueOperation::ExitThread, + } + } + todo!("Handle exception in OP-TEE shim: {:?}", info); } fn interrupt(&self, _ctx: &mut Self::ExecutionContext) -> ContinueOperation { @@ -742,7 +753,7 @@ impl Task { 0, tls_size, ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, - MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS | MapFlags::MAP_POPULATE, + MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS, -1, 0, )?; @@ -953,16 +964,16 @@ impl TeeObjMap { match user_attrs[0].attribute_id { TeeAttributeType::SecretValue => { let key_addr: usize = user_attrs[0].a.truncate(); - let key_len = usize::try_from(user_attrs[0].b).unwrap(); + let key_len: usize = user_attrs[0].b.truncate(); // TODO: revisit buffer size limits based on OP-TEE spec and deployment constraints if key_len > MAX_KERNEL_BUF_SIZE { return Err(TeeResult::BadParameters); } let key_ptr = UserConstPtr::::from_usize(key_addr); - let key_slice = key_ptr - .to_owned_slice(key_len) - .ok_or(TeeResult::BadParameters)?; - tee_obj.set_key(&key_slice); + let Some(key_box) = key_ptr.to_owned_slice(key_len) else { + return Err(TeeResult::BadParameters); + }; + tee_obj.set_key(&key_box); } _ => todo!( "handle attribute ID: {}", diff --git a/litebox_shim_optee/src/loader/elf.rs b/litebox_shim_optee/src/loader/elf.rs index c6b43c05e..b1b13715d 100644 --- a/litebox_shim_optee/src/loader/elf.rs +++ b/litebox_shim_optee/src/loader/elf.rs @@ -81,7 +81,7 @@ impl litebox_common_linux::loader::MapMemory for ElfFileInMemory<'_> { super::DEFAULT_LOW_ADDR, mapping_len, ProtFlags::PROT_NONE, - MapFlags::MAP_ANONYMOUS | MapFlags::MAP_PRIVATE | MapFlags::MAP_POPULATE, + MapFlags::MAP_ANONYMOUS | MapFlags::MAP_PRIVATE, -1, 0, )? diff --git a/litebox_shim_optee/src/loader/ta_stack.rs b/litebox_shim_optee/src/loader/ta_stack.rs index 4cf687c9c..6cdeab2db 100644 --- a/litebox_shim_optee/src/loader/ta_stack.rs +++ b/litebox_shim_optee/src/loader/ta_stack.rs @@ -289,8 +289,9 @@ pub(crate) fn allocate_stack(task: &crate::Task, stack_base: Option) -> O .create_stack_pages( None, length, - // Use POPULATE_PAGES_IMMEDIATELY since some platforms (e.g., LVBS) - // do not support demand paging yet. + // Pre-populate because the shim writes to the stack from kernel mode + // (e.g., push_bytes via memcpy_fallible) before the TA runs, and + // kernel-mode demand paging is not yet supported. CreatePagesFlags::POPULATE_PAGES_IMMEDIATELY, ) .ok()? diff --git a/litebox_shim_optee/src/ptr.rs b/litebox_shim_optee/src/ptr.rs index b6b720203..5a4c08fa8 100644 --- a/litebox_shim_optee/src/ptr.rs +++ b/litebox_shim_optee/src/ptr.rs @@ -183,38 +183,25 @@ impl PhysMutPtr { if count >= self.count { return Err(PhysPointerError::IndexOutOfBounds(count, self.count)); } - let src = match unsafe { - self.map_and_get_ptr( + let guard = unsafe { + self.map_and_get_ptr_guard( count, core::mem::size_of::(), PhysPageMapPermissions::READ, - ) - } { - Ok(ptr) => ptr, - Err(e) => { - let _ = unsafe { self.unmap() }; - return Err(e); - } - }; - let val = { - let mut buffer = core::mem::MaybeUninit::::uninit(); - if (src as usize).is_multiple_of(core::mem::align_of::()) { - unsafe { - core::ptr::copy_nonoverlapping(src, buffer.as_mut_ptr(), 1); - } - } else { - unsafe { - core::ptr::copy_nonoverlapping( - src.cast::(), - buffer.as_mut_ptr().cast::(), - core::mem::size_of::(), - ); - } - } - unsafe { buffer.assume_init() } + )? }; - let _ = unsafe { self.unmap() }; - Ok(alloc::boxed::Box::new(val)) + let src = guard.ptr_at(count); + let mut buffer = core::mem::MaybeUninit::::uninit(); + unsafe { + litebox::mm::exception_table::memcpy_fallible( + buffer.as_mut_ptr().cast::(), + src.cast::(), + core::mem::size_of::(), + ) + .map_err(|_| PhysPointerError::CopyFailed)?; + } + // Safety: memcpy_fallible fully initialized the buffer on success. + Ok(alloc::boxed::Box::new(unsafe { buffer.assume_init() })) } /// Read a slice of values at the given offset from the physical pointer. @@ -235,33 +222,22 @@ impl PhysMutPtr { { return Err(PhysPointerError::IndexOutOfBounds(count, self.count)); } - let src = match unsafe { - self.map_and_get_ptr( + let guard = unsafe { + self.map_and_get_ptr_guard( count, core::mem::size_of_val(values), PhysPageMapPermissions::READ, - ) - } { - Ok(ptr) => ptr, - Err(e) => { - let _ = unsafe { self.unmap() }; - return Err(e); - } + )? }; - if (src as usize).is_multiple_of(core::mem::align_of::()) { - unsafe { - core::ptr::copy_nonoverlapping(src, values.as_mut_ptr(), values.len()); - } - } else { - unsafe { - core::ptr::copy_nonoverlapping( - src.cast::(), - values.as_mut_ptr().cast::(), - core::mem::size_of_val(values), - ); - } + let src = guard.ptr_at(count); + unsafe { + litebox::mm::exception_table::memcpy_fallible( + values.as_mut_ptr().cast::(), + src.cast::(), + core::mem::size_of_val(values), + ) + .map_err(|_| PhysPointerError::CopyFailed)?; } - let _ = unsafe { self.unmap() }; Ok(()) } @@ -280,25 +256,22 @@ impl PhysMutPtr { if count >= self.count { return Err(PhysPointerError::IndexOutOfBounds(count, self.count)); } - let dst = match unsafe { - self.map_and_get_ptr( + let guard = unsafe { + self.map_and_get_ptr_guard( count, core::mem::size_of::(), PhysPageMapPermissions::READ | PhysPageMapPermissions::WRITE, - ) - } { - Ok(ptr) => ptr, - Err(e) => { - let _ = unsafe { self.unmap() }; - return Err(e); - } + )? }; - if (dst as usize).is_multiple_of(core::mem::align_of::()) { - unsafe { core::ptr::write(dst, value) }; - } else { - unsafe { core::ptr::write_unaligned(dst, value) }; + let dst = guard.ptr_at(count); + unsafe { + litebox::mm::exception_table::memcpy_fallible( + dst.cast::(), + core::ptr::from_ref(&value).cast::(), + core::mem::size_of::(), + ) + .map_err(|_| PhysPointerError::CopyFailed)?; } - let _ = unsafe { self.unmap() }; Ok(()) } @@ -320,38 +293,27 @@ impl PhysMutPtr { { return Err(PhysPointerError::IndexOutOfBounds(count, self.count)); } - let dst = match unsafe { - self.map_and_get_ptr( + let guard = unsafe { + self.map_and_get_ptr_guard( count, core::mem::size_of_val(values), PhysPageMapPermissions::READ | PhysPageMapPermissions::WRITE, - ) - } { - Ok(ptr) => ptr, - Err(e) => { - let _ = unsafe { self.unmap() }; - return Err(e); - } + )? }; - if (dst as usize).is_multiple_of(core::mem::align_of::()) { - unsafe { - core::ptr::copy_nonoverlapping(values.as_ptr(), dst, values.len()); - } - } else { - unsafe { - core::ptr::copy_nonoverlapping( - values.as_ptr().cast::(), - dst.cast::(), - core::mem::size_of_val(values), - ); - } + let dst = guard.ptr_at(count); + unsafe { + litebox::mm::exception_table::memcpy_fallible( + dst.cast::(), + values.as_ptr().cast::(), + core::mem::size_of_val(values), + ) + .map_err(|_| PhysPointerError::CopyFailed)?; } - let _ = unsafe { self.unmap() }; Ok(()) } - /// This is a helper function to map physical pages and get a pointer to the requested - /// data element at a given index. + /// This function maps physical pages for the requested data element at a given + /// index and returns a guard that unmaps on drop. /// /// It bridges element-level access (used by `read_at_offset`, `write_at_offset`, etc.) /// with page-level mapping. It determines which physical pages contain the requested @@ -364,13 +326,14 @@ impl PhysMutPtr { /// /// # Safety /// - /// Same safety requirements as `map_range`. - unsafe fn map_and_get_ptr( + /// Same as [`Self::map_range`]. The returned guard borrows `self` mutably, ensuring + /// the mapping is released when the guard goes out of scope. + unsafe fn map_and_get_ptr_guard( &mut self, count: usize, size: usize, perms: PhysPageMapPermissions, - ) -> Result<*mut T, PhysPointerError> { + ) -> Result, PhysPointerError> { let skip = self .offset .checked_add( @@ -388,7 +351,9 @@ impl PhysMutPtr { .map_info .as_ref() .ok_or(PhysPointerError::NoMappingInfo)?; - Ok(map_info.base.wrapping_add(skip % ALIGN).cast::()) + let base = map_info.base.wrapping_add(skip % ALIGN).cast::(); + let _ = map_info; + Ok(MappedGuard { owner: self, base }) } /// Map the physical pages from `start` to `end` indexes. @@ -443,22 +408,25 @@ impl PhysMutPtr { } } -/// Ensures physical pages are unmapped when `PhysMutPtr` goes out of scope. +/// RAII guard that unmaps physical pages when dropped. /// -/// This type is designed for single-use access: create, read/write once, then drop. -/// The `Drop` implementation guarantees that mapped pages are always released, -/// preventing resource leaks and adhering to the "minimize persistent mapping" -/// security principle. Errors during unmapping are silently ignored since we -/// cannot propagate errors from `drop`. -impl Drop for PhysMutPtr { +/// Created by [`PhysMutPtr::map_guard`]. Holds a mutable borrow on the parent +/// `PhysMutPtr` and provides the mapped base pointer for the duration of the mapping. +struct MappedGuard<'a, T: Clone, const ALIGN: usize> { + owner: &'a mut PhysMutPtr, + base: *mut T, +} + +impl MappedGuard<'_, T, ALIGN> { + /// Returns the mapped base pointer offset by `count` elements. + fn ptr_at(&self, count: usize) -> *mut T { + self.base.wrapping_add(count) + } +} + +impl Drop for MappedGuard<'_, T, ALIGN> { fn drop(&mut self) { - // SAFETY: The platform is expected to handle unmapping safely, including - // the case where pages were never mapped (returns Unmapped error, ignored). - let result = unsafe { self.unmap() }; - debug_assert!( - result.is_ok() || matches!(result, Err(PhysPointerError::Unmapped(_))), - "unexpected error during unmap in drop: {result:?}", - ); + let _ = unsafe { self.owner.unmap() }; } } @@ -545,13 +513,6 @@ impl PhysConstPtr { } } -/// See [`Drop`] implementation for [`PhysMutPtr`] for details. -impl Drop for PhysConstPtr { - fn drop(&mut self) { - let _ = unsafe { self.inner.unmap() }; - } -} - impl core::fmt::Debug for PhysConstPtr { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.debug_struct("PhysConstPtr") diff --git a/litebox_shim_optee/src/syscalls/ldelf.rs b/litebox_shim_optee/src/syscalls/ldelf.rs index 8b5ac945a..69d5203da 100644 --- a/litebox_shim_optee/src/syscalls/ldelf.rs +++ b/litebox_shim_optee/src/syscalls/ldelf.rs @@ -57,13 +57,12 @@ impl Task { if addr.checked_add(total_size).is_none() { return Err(TeeResult::BadParameters); } - // `sys_map_zi` always creates read/writeable mapping - // Use MAP_POPULATE to ensure pages are allocated immediately (required for platforms - // that don't support demand paging, e.g., LVBS). + // `sys_map_zi` always creates read/writeable mapping. // // We map with PROT_READ_WRITE first, then mprotect padding regions to PROT_NONE. - // This is because our mmap with MAP_POPULATE and PROT_NONE create pages without - // USER_ACCESSIBLE bit, making them inaccessible even to mprotect. + // Pre-populate because ldelf (user mode) accesses these pages immediately after + // mapping, and kernel-mode demand paging is not yet supported for the + // exception_callback path during early TA loading. let mut flags = MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS | MapFlags::MAP_POPULATE; if addr != 0 { flags |= MapFlags::MAP_FIXED; @@ -187,11 +186,10 @@ impl Task { if addr.checked_add(total_size).is_none() { return Err(TeeResult::BadParameters); } - // Use MAP_POPULATE to ensure pages are allocated immediately (required for platforms - // that don't support demand paging, e.g., LVBS). - // // We map with PROT_READ_WRITE first, then mprotect padding regions to PROT_NONE as // explained in `sys_map_zi`. + // Pre-populate because `read_ta_bin` writes to these pages from kernel mode + // via memcpy_fallible, and kernel-mode demand paging is not yet supported. let mut flags_internal = MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS | MapFlags::MAP_POPULATE; if addr != 0 { From 0a0f90dc299ee064bdf663e978f3137266306db1 Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Sun, 8 Feb 2026 17:16:21 +0000 Subject: [PATCH 02/11] tss.rsp0 --- litebox_platform_lvbs/src/arch/x86/gdt.rs | 11 +++++-- .../src/arch/x86/interrupts.rs | 32 +++++++++---------- .../src/host/per_cpu_variables.rs | 25 ++++++++++++++- 3 files changed, 49 insertions(+), 19 deletions(-) diff --git a/litebox_platform_lvbs/src/arch/x86/gdt.rs b/litebox_platform_lvbs/src/arch/x86/gdt.rs index dd405b97f..7b5ba1498 100644 --- a/litebox_platform_lvbs/src/arch/x86/gdt.rs +++ b/litebox_platform_lvbs/src/arch/x86/gdt.rs @@ -82,10 +82,17 @@ impl Default for GdtWrapper { } fn setup_gdt_tss() { - let stack_top = with_per_cpu_variables_asm(PerCpuVariablesAsm::get_interrupt_stack_ptr); + let interrupt_stack_top = + with_per_cpu_variables_asm(PerCpuVariablesAsm::get_interrupt_stack_ptr); + let page_fault_stack_top = + with_per_cpu_variables_asm(PerCpuVariablesAsm::get_page_fault_stack_ptr); let mut tss = Box::new(AlignedTss(TaskStateSegment::new())); - tss.0.interrupt_stack_table[0] = VirtAddr::new(stack_top as u64); + // IST[0] (hardware IST1): dedicated stack for double faults + tss.0.interrupt_stack_table[0] = VirtAddr::new(interrupt_stack_top as u64); + // RSP0: stack loaded by the CPU on any user->kernel privilege-level change when + // the IDT entry's IST index is 0. + tss.0.privilege_stack_table[0] = VirtAddr::new(page_fault_stack_top as u64); // `tss_segment()` requires `&'static TaskStateSegment`. Leaking `tss` is fine because // it will be used until the LVBS kernel resets. let tss = Box::leak(tss); diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.rs b/litebox_platform_lvbs/src/arch/x86/interrupts.rs index e074cafab..2209f3ebc 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.rs +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.rs @@ -29,30 +29,30 @@ core::arch::global_asm!(include_str!("interrupts.S")); // // This stub splits user-mode and kernel-mode page faults at the assembly level: // -// - **User-mode faults**: jumps directly to exception_callback in run_thread_arch, -// which handles swapgs, saves exception info (CR2, error code), saves user -// registers and extended states, and calls the **shim's exception handler**. +// - **User-mode faults**: jump to exception_callback in run_thread_arch which +// swaps GS, saves exception info and CPU registers, and calls the shim's +// exception handler. // // - **Kernel-mode faults**: standard push_regs/call/pop_regs/iretq flow into a -// minimal Rust handler that only does exception table fixup or panics. +// minimal handler that does exception table fixup or panics. // -// # User-mode ISR stack cleanup +// # Stacks for page fault handling // -// The user-mode path (`jmp exception_callback`) leaves the CPU-pushed iret frame -// and error code (48 bytes) on the ISR stack without popping them. This is safe -// because: +// We do not use an IST entry for page faults (check the below idt setup). This +// results in two different stacks for user-mode vs kernel-mode page faults: // -// 1. The page fault IDT entry does not use IST (IST index = 0). On a user→kernel -// privilege-level change, the CPU unconditionally loads RSP from TSS.RSP0 -// (Intel SDM Vol. 3A, §6.12.1 "Exception- or Interrupt-Handler Procedures"). +// - **User-mode faults**: the CPU automatically switches to the RSP0 stack from +// TSS on the transition. We set a dedicated per-CPU stack for it. Since it is +// always reloaded, don't need to wipe out stale data from previous faults +// (i.e., iret frame and error code). // -// 2. TSS.RSP0 always points to the top of the kernel stack. Each subsequent -// user→kernel transition (syscall, interrupt, or exception) causes the CPU -// to reload RSP from TSS.RSP0, overwriting any stale data from previous -// entries. +// - **Kernel-mode faults**: the CPU does not switch stacks. The ISR stub pushes +// registers onto the current stack before calling the page-fault handler. +// This implies that the kernel-mode code should ensure there is enough stack +// space before performing any operations that might fault. Otherwise, the +// fault handler might overwrite existing data or cause a double fault. // // Reference: Intel SDM Vol. 3A, §6.12.1 -// https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html core::arch::global_asm!( ".global isr_page_fault", "isr_page_fault:", diff --git a/litebox_platform_lvbs/src/host/per_cpu_variables.rs b/litebox_platform_lvbs/src/host/per_cpu_variables.rs index 2e293d6db..e64c81b01 100644 --- a/litebox_platform_lvbs/src/host/per_cpu_variables.rs +++ b/litebox_platform_lvbs/src/host/per_cpu_variables.rs @@ -22,6 +22,7 @@ use litebox_common_linux::{rdgsbase, wrgsbase}; use x86_64::VirtAddr; pub const INTERRUPT_STACK_SIZE: usize = 2 * PAGE_SIZE; +pub const PAGE_FAULT_STACK_SIZE: usize = PAGE_SIZE; pub const KERNEL_STACK_SIZE: usize = 10 * PAGE_SIZE; /// Per-CPU VTL1 kernel variables @@ -32,6 +33,7 @@ pub struct PerCpuVariables { hv_simp_page: [u8; PAGE_SIZE], interrupt_stack: [u8; INTERRUPT_STACK_SIZE], _guard_page_0: [u8; PAGE_SIZE], + page_fault_stack: [u8; PAGE_FAULT_STACK_SIZE], kernel_stack: [u8; KERNEL_STACK_SIZE], _guard_page_1: [u8; PAGE_SIZE], hvcall_input: [u8; PAGE_SIZE], @@ -56,6 +58,10 @@ impl PerCpuVariables { &raw const self.interrupt_stack as u64 + (self.interrupt_stack.len() - 1) as u64 } + pub(crate) fn page_fault_stack_top(&self) -> u64 { + &raw const self.page_fault_stack as u64 + (self.page_fault_stack.len() - 1) as u64 + } + pub fn hv_vp_assist_page_as_ptr(&self) -> *const HvVpAssistPage { (&raw const self.hv_vp_assist_page).cast::() } @@ -148,6 +154,7 @@ static mut BSP_VARIABLES: PerCpuVariables = PerCpuVariables { hv_simp_page: [0u8; PAGE_SIZE], interrupt_stack: [0u8; INTERRUPT_STACK_SIZE], _guard_page_0: [0u8; PAGE_SIZE], + page_fault_stack: [0u8; PAGE_FAULT_STACK_SIZE], kernel_stack: [0u8; KERNEL_STACK_SIZE], _guard_page_1: [0u8; PAGE_SIZE], hvcall_input: [0u8; PAGE_SIZE], @@ -195,8 +202,10 @@ static mut BSP_VARIABLES: PerCpuVariables = PerCpuVariables { pub struct PerCpuVariablesAsm { /// Initial kernel stack pointer to reset the kernel stack on VTL switch kernel_stack_ptr: Cell, - /// Initial interrupt stack pointer for x86 IST + /// Initial interrupt stack pointer for x86 IST (double fault) interrupt_stack_ptr: Cell, + /// Page fault IST stack pointer + page_fault_stack_ptr: Cell, /// Return address for call-based VTL switching vtl_return_addr: Cell, /// Scratch pad @@ -249,6 +258,12 @@ impl PerCpuVariablesAsm { pub fn get_interrupt_stack_ptr(&self) -> usize { self.interrupt_stack_ptr.get() } + pub fn set_page_fault_stack_ptr(&self, sp: usize) { + self.page_fault_stack_ptr.set(sp); + } + pub fn get_page_fault_stack_ptr(&self) -> usize { + self.page_fault_stack_ptr.get() + } pub fn set_vtl_return_addr(&self, addr: usize) { self.vtl_return_addr.set(addr); } @@ -280,6 +295,9 @@ impl PerCpuVariablesAsm { pub const fn interrupt_stack_ptr_offset() -> usize { offset_of!(PerCpuVariablesAsm, interrupt_stack_ptr) } + pub const fn page_fault_stack_ptr_offset() -> usize { + offset_of!(PerCpuVariablesAsm, page_fault_stack_ptr) + } pub const fn vtl_return_addr_offset() -> usize { offset_of!(PerCpuVariablesAsm, vtl_return_addr) } @@ -388,6 +406,7 @@ impl RefCellWrapper { pcv_asm: PerCpuVariablesAsm { kernel_stack_ptr: Cell::new(0), interrupt_stack_ptr: Cell::new(0), + page_fault_stack_ptr: Cell::new(0), vtl_return_addr: Cell::new(0), scratch: Cell::new(0), vtl0_state_top_addr: Cell::new(0), @@ -593,12 +612,16 @@ pub fn init_per_cpu_variables() { & !(STACK_ALIGNMENT - 1); let interrupt_sp = TruncateExt::::truncate(per_cpu_variables.interrupt_stack_top()) & !(STACK_ALIGNMENT - 1); + let page_fault_sp = + TruncateExt::::truncate(per_cpu_variables.page_fault_stack_top()) + & !(STACK_ALIGNMENT - 1); let vtl0_state_top_addr = TruncateExt::::truncate(&raw const per_cpu_variables.vtl0_state as u64) + core::mem::size_of::(); with_per_cpu_variables_asm(|pcv_asm| { pcv_asm.set_kernel_stack_ptr(kernel_sp); pcv_asm.set_interrupt_stack_ptr(interrupt_sp); + pcv_asm.set_page_fault_stack_ptr(page_fault_sp); pcv_asm.set_vtl0_state_top_addr(vtl0_state_top_addr); }); }); From f087d78f2ab11fbda00c16b7f6a629c109fa21d9 Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Sun, 8 Feb 2026 20:09:08 +0000 Subject: [PATCH 03/11] pass all user-mode exceptions --- litebox_platform_lvbs/src/arch/x86/gdt.rs | 18 +- .../src/arch/x86/interrupts.S | 72 ++++-- .../src/arch/x86/interrupts.rs | 234 ++++-------------- .../src/host/per_cpu_variables.rs | 68 ++--- litebox_platform_lvbs/src/lib.rs | 36 +-- litebox_shim_optee/src/lib.rs | 17 +- litebox_shim_optee/src/ptr.rs | 20 +- 7 files changed, 193 insertions(+), 272 deletions(-) diff --git a/litebox_platform_lvbs/src/arch/x86/gdt.rs b/litebox_platform_lvbs/src/arch/x86/gdt.rs index 7b5ba1498..e86fcbe9f 100644 --- a/litebox_platform_lvbs/src/arch/x86/gdt.rs +++ b/litebox_platform_lvbs/src/arch/x86/gdt.rs @@ -82,17 +82,17 @@ impl Default for GdtWrapper { } fn setup_gdt_tss() { - let interrupt_stack_top = - with_per_cpu_variables_asm(PerCpuVariablesAsm::get_interrupt_stack_ptr); - let page_fault_stack_top = - with_per_cpu_variables_asm(PerCpuVariablesAsm::get_page_fault_stack_ptr); + let double_fault_stack_top = + with_per_cpu_variables_asm(PerCpuVariablesAsm::get_double_fault_stack_ptr); + let exception_stack_top = + with_per_cpu_variables_asm(PerCpuVariablesAsm::get_exception_stack_ptr); let mut tss = Box::new(AlignedTss(TaskStateSegment::new())); - // IST[0] (hardware IST1): dedicated stack for double faults - tss.0.interrupt_stack_table[0] = VirtAddr::new(interrupt_stack_top as u64); - // RSP0: stack loaded by the CPU on any user->kernel privilege-level change when - // the IDT entry's IST index is 0. - tss.0.privilege_stack_table[0] = VirtAddr::new(page_fault_stack_top as u64); + // TSS.IST1: dedicated stack for double faults + tss.0.interrupt_stack_table[0] = VirtAddr::new(double_fault_stack_top as u64); + // TSS.RSP0: stack loaded by the CPU on Ring 3 -> Ring 0 transition when the IDT + // entry's IST index is 0. In our setup, all exceptions except for double faults. + tss.0.privilege_stack_table[0] = VirtAddr::new(exception_stack_top as u64); // `tss_segment()` requires `&'static TaskStateSegment`. Leaking `tss` is fine because // it will be used until the LVBS kernel resets. let tss = Box::leak(tss); diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.S b/litebox_platform_lvbs/src/arch/x86/interrupts.S index 5f0969d30..23121a206 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.S +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.S @@ -9,6 +9,31 @@ * 2. Call the appropriate Rust handler * 3. Restore registers and return via iretq * + * Each stub checks the saved CS RPL bits to determine whether the exception + * came from user mode (ring 3) or kernel mode (ring 0): + * + * - User-mode exceptions: push the vector number and jump to + * exception_callback (run_thread_arch), which swaps GS, saves the full + * CPU context, and routes to the shim's exception handler. + * + * - Kernel-mode exceptions: standard push_regs/call/pop_regs/iretq flow + * into a per-vector Rust handler. + * + * Stacks (Reference: Intel SDM Vol. 3A, §6.12.1): + * + * Unless an IST entry is configured for the vector (i.e., #DF in our case), + * the CPU selects the stack based on the privilege transition: + * + * - User-mode (CPL change): the CPU loads RSP from TSS.RSP0. We set a + * dedicated per-CPU stack for this (see gdt.rs). Since RSP0 is always + * reloaded from the TSS, stale data from previous exceptions does not + * need to be cleaned up. + * + * - Kernel-mode (no CPL change): the CPU continues on the current stack. + * The ISR stub pushes registers onto it. Kernel code must ensure enough + * stack space before performing operations that might fault; otherwise + * the fault handler may overwrite live data or trigger a double fault. + * * The x86_64 interrupt frame pushed by CPU: * [rsp+40] SS * [rsp+32] RSP @@ -74,13 +99,16 @@ /* * ISR stub for interrupts WITHOUT an error code. - * The CPU does not push an error code, so we push a dummy 0. + * Kernel-mode: standard push_regs/call/pop_regs/iretq flow. + * User-mode: push vector number, then jump to exception_callback. */ -.macro isr_no_err_code name:req handler:req +.macro isr_no_err_code name:req handler:req vector:req .global \name \name: cld push 0 /* Push dummy error code */ + test qword ptr [rsp + 16], 0x3 /* Check CS RPL bits */ + jnz .Luser_\name push_regs mov rbp, rsp /* Save stack pointer */ and rsp, -16 /* Align stack to 16 bytes for call */ @@ -90,16 +118,20 @@ pop_regs add rsp, 8 /* Skip error code */ iretq +.Luser_\name: + push \vector /* Pass vector number to exception_callback */ + jmp exception_callback .endm /* * ISR stub for interrupts WITH an error code. - * The CPU pushes the error code automatically. */ -.macro isr_with_err_code name:req handler:req +.macro isr_with_err_code name:req handler:req vector:req .global \name \name: cld + test qword ptr [rsp + 16], 0x3 /* Check CS RPL bits */ + jnz .Luser_\name push_regs mov rbp, rsp /* Save stack pointer */ and rsp, -16 /* Align stack to 16 bytes for call */ @@ -109,50 +141,54 @@ pop_regs add rsp, 8 /* Skip error code */ iretq +.Luser_\name: + push \vector /* Pass vector number to exception_callback */ + jmp exception_callback .endm /* Exception handlers (vectors 0-31) */ /* Vector 0: Divide Error (#DE) - No error code */ -isr_no_err_code isr_divide_error divide_error_handler_impl +isr_no_err_code isr_divide_error divide_error_handler_impl 0 /* Vector 1: Debug (#DB) - No error code */ -isr_no_err_code isr_debug debug_handler_impl +isr_no_err_code isr_debug debug_handler_impl 1 /* Vector 3: Breakpoint (#BP) - No error code */ -isr_no_err_code isr_breakpoint breakpoint_handler_impl +isr_no_err_code isr_breakpoint breakpoint_handler_impl 3 /* Vector 4: Overflow (#OF) - No error code */ -isr_no_err_code isr_overflow overflow_handler_impl +isr_no_err_code isr_overflow overflow_handler_impl 4 /* Vector 5: Bound Range Exceeded (#BR) - No error code */ -isr_no_err_code isr_bound_range_exceeded bound_range_exceeded_handler_impl +isr_no_err_code isr_bound_range_exceeded bound_range_exceeded_handler_impl 5 /* Vector 6: Invalid Opcode (#UD) - No error code */ -isr_no_err_code isr_invalid_opcode invalid_opcode_handler_impl +isr_no_err_code isr_invalid_opcode invalid_opcode_handler_impl 6 /* Vector 7: Device Not Available (#NM) - No error code */ -isr_no_err_code isr_device_not_available device_not_available_handler_impl +isr_no_err_code isr_device_not_available device_not_available_handler_impl 7 /* Vector 8: Double Fault (#DF) - Error code (always 0) */ -isr_with_err_code isr_double_fault double_fault_handler_impl +isr_with_err_code isr_double_fault double_fault_handler_impl 8 /* Vector 12: Stack-Segment Fault (#SS) - Error code */ -isr_with_err_code isr_stack_segment_fault stack_segment_fault_handler_impl +isr_with_err_code isr_stack_segment_fault stack_segment_fault_handler_impl 12 /* Vector 13: General Protection Fault (#GP) - Error code */ -isr_with_err_code isr_general_protection_fault general_protection_fault_handler_impl +isr_with_err_code isr_general_protection_fault general_protection_fault_handler_impl 13 -/* Vector 14: Page Fault (#PF) - Custom stub defined in interrupts.rs */ +/* Vector 14: Page Fault (#PF) - Error code */ +isr_with_err_code isr_page_fault page_fault_handler_impl 14 /* Vector 16: x87 Floating-Point Exception (#MF) - No error code */ -isr_no_err_code isr_x87_floating_point x87_floating_point_handler_impl +isr_no_err_code isr_x87_floating_point x87_floating_point_handler_impl 16 /* Vector 17: Alignment Check (#AC) - Error code */ -isr_with_err_code isr_alignment_check alignment_check_handler_impl +isr_with_err_code isr_alignment_check alignment_check_handler_impl 17 /* Vector 19: SIMD Floating-Point Exception (#XM) - No error code */ -isr_no_err_code isr_simd_floating_point simd_floating_point_handler_impl +isr_no_err_code isr_simd_floating_point simd_floating_point_handler_impl 19 /* * Hypervisor synthetic interrupt handler (vector 0xf3) diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.rs b/litebox_platform_lvbs/src/arch/x86/interrupts.rs index 2209f3ebc..8ad4affe8 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.rs +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.rs @@ -25,85 +25,6 @@ use x86_64::{VirtAddr, structures::idt::InterruptDescriptorTable}; // Include assembly ISR stubs core::arch::global_asm!(include_str!("interrupts.S")); -// Custom page fault ISR stub. -// -// This stub splits user-mode and kernel-mode page faults at the assembly level: -// -// - **User-mode faults**: jump to exception_callback in run_thread_arch which -// swaps GS, saves exception info and CPU registers, and calls the shim's -// exception handler. -// -// - **Kernel-mode faults**: standard push_regs/call/pop_regs/iretq flow into a -// minimal handler that does exception table fixup or panics. -// -// # Stacks for page fault handling -// -// We do not use an IST entry for page faults (check the below idt setup). This -// results in two different stacks for user-mode vs kernel-mode page faults: -// -// - **User-mode faults**: the CPU automatically switches to the RSP0 stack from -// TSS on the transition. We set a dedicated per-CPU stack for it. Since it is -// always reloaded, don't need to wipe out stale data from previous faults -// (i.e., iret frame and error code). -// -// - **Kernel-mode faults**: the CPU does not switch stacks. The ISR stub pushes -// registers onto the current stack before calling the page-fault handler. -// This implies that the kernel-mode code should ensure there is enough stack -// space before performing any operations that might fault. Otherwise, the -// fault handler might overwrite existing data or cause a double fault. -// -// Reference: Intel SDM Vol. 3A, §6.12.1 -core::arch::global_asm!( - ".global isr_page_fault", - "isr_page_fault:", - "cld", - // Check if fault came from user mode by testing CS RPL bits. - // On ISR entry the CPU pushed: [rsp+40]=SS, [rsp+32]=RSP, [rsp+24]=RFLAGS, - // [rsp+16]=CS, [rsp+8]=RIP, [rsp+0]=error_code - "test qword ptr [rsp + 16], 0x3", - "jnz .Lpf_user_mode", - // --- Kernel-mode page fault: standard ISR flow --- - "push rdi", - "push rsi", - "push rdx", - "push rcx", - "push rax", - "push r8", - "push r9", - "push r10", - "push r11", - "push rbx", - "push rbp", - "push r12", - "push r13", - "push r14", - "push r15", - "mov rbp, rsp", - "and rsp, -16", - "mov rdi, rbp", - "call kernel_page_fault_handler_impl", - "mov rsp, rbp", - "pop r15", - "pop r14", - "pop r13", - "pop r12", - "pop rbp", - "pop rbx", - "pop r11", - "pop r10", - "pop r9", - "pop r8", - "pop rax", - "pop rcx", - "pop rdx", - "pop rsi", - "pop rdi", - "add rsp, 8", // skip error code - "iretq", - ".Lpf_user_mode:", - "jmp exception_callback", -); - // External symbols for assembly ISR stubs unsafe extern "C" { fn isr_divide_error(); @@ -176,188 +97,129 @@ pub fn init_idt() { idt().load(); } -// TODO: carefully handle exceptions/interrupts. If an exception or interrupt is due to userspace code, -// we should destroy the corresponding user context rather than halt the entire kernel. - -/// User-mode CS selector has RPL=3 (bits 0-1 set) -const USER_MODE_RPL_MASK: usize = 0x3; - -/// Check if the exception occurred in user mode by examining the saved CS register. -#[inline] -fn is_user_mode(regs: &PtRegs) -> bool { - (regs.cs & USER_MODE_RPL_MASK) == USER_MODE_RPL_MASK -} - -/// Get a string indicating the execution context (kernel or user mode). -#[inline] -fn mode_str(regs: &PtRegs) -> &'static str { - if is_user_mode(regs) { "USER" } else { "KERNEL" } -} +// TODO: Let's consider whether we can recover some of the below exceptions instead of panicking. -/// Rust handler for divide error exception (vector 0). -/// Called from assembly stub with pointer to saved register state. +/// Kernel-mode handler for divide error exception (vector 0). #[unsafe(no_mangle)] extern "C" fn divide_error_handler_impl(regs: &PtRegs) { - todo!( - "EXCEPTION [{}]: DIVIDE BY ZERO\n{:#x?}", - mode_str(regs), - regs - ); + panic!("EXCEPTION: DIVIDE BY ZERO\n{:#x?}", regs); } -/// Rust handler for debug exception (vector 1). -/// Called from assembly stub with pointer to saved register state. +/// Kernel-mode handler for debug exception (vector 1). #[unsafe(no_mangle)] extern "C" fn debug_handler_impl(regs: &PtRegs) { - todo!("EXCEPTION [{}]: DEBUG\n{:#x?}", mode_str(regs), regs); + panic!("EXCEPTION: DEBUG\n{:#x?}", regs); } -/// Rust handler for breakpoint exception (vector 3). -/// Called from assembly stub with pointer to saved register state. +/// Kernel-mode handler for breakpoint exception (vector 3). #[unsafe(no_mangle)] extern "C" fn breakpoint_handler_impl(regs: &PtRegs) { - todo!("EXCEPTION [{}]: BREAKPOINT\n{:#x?}", mode_str(regs), regs); + panic!("EXCEPTION: BREAKPOINT\n{:#x?}", regs); } -/// Rust handler for overflow exception (vector 4). -/// Called from assembly stub with pointer to saved register state. +/// Kernel-mode handler for overflow exception (vector 4). #[unsafe(no_mangle)] extern "C" fn overflow_handler_impl(regs: &PtRegs) { - todo!("EXCEPTION [{}]: OVERFLOW\n{:#x?}", mode_str(regs), regs); + panic!("EXCEPTION: OVERFLOW\n{:#x?}", regs); } -/// Rust handler for bound range exceeded exception (vector 5). -/// Called from assembly stub with pointer to saved register state. +/// Kernel-mode handler for bound range exceeded exception (vector 5). #[unsafe(no_mangle)] extern "C" fn bound_range_exceeded_handler_impl(regs: &PtRegs) { - todo!( - "EXCEPTION [{}]: BOUND RANGE EXCEEDED\n{:#x?}", - mode_str(regs), - regs - ); + panic!("EXCEPTION: BOUND RANGE EXCEEDED\n{:#x?}", regs); } -/// Rust handler for invalid opcode exception (vector 6). -/// Called from assembly stub with pointer to saved register state. +/// Kernel-mode handler for invalid opcode exception (vector 6). #[unsafe(no_mangle)] extern "C" fn invalid_opcode_handler_impl(regs: &PtRegs) { - todo!( - "EXCEPTION [{}]: INVALID OPCODE at RIP {:#x}\n{:#x?}", - mode_str(regs), - regs.rip, - regs + panic!( + "EXCEPTION: INVALID OPCODE at RIP {:#x}\n{:#x?}", + regs.rip, regs ); } -/// Rust handler for device not available exception (vector 7). -/// Called from assembly stub with pointer to saved register state. +/// Kernel-mode handler for device not available exception (vector 7). #[unsafe(no_mangle)] extern "C" fn device_not_available_handler_impl(regs: &PtRegs) { - todo!( - "EXCEPTION [{}]: DEVICE NOT AVAILABLE (FPU/SSE)\n{:#x?}", - mode_str(regs), - regs - ); + panic!("EXCEPTION: DEVICE NOT AVAILABLE (FPU/SSE)\n{:#x?}", regs); } -/// Rust handler for double fault exception (vector 8). -/// Called from assembly stub with pointer to saved register state. +/// Kernel-mode handler for double fault exception (vector 8). #[unsafe(no_mangle)] extern "C" fn double_fault_handler_impl(regs: &PtRegs) { - // Double faults are always fatal - no recovery possible panic!( - "EXCEPTION [{}]: DOUBLE FAULT (Error Code: {:#x})\n{:#x?}", - mode_str(regs), - regs.orig_rax, - regs + "EXCEPTION: DOUBLE FAULT (Error Code: {:#x})\n{:#x?}", + regs.orig_rax, regs ); } -/// Rust handler for stack-segment fault exception (vector 12). -/// Called from assembly stub with pointer to saved register state. +/// Kernel-mode handler for stack-segment fault exception (vector 12). #[unsafe(no_mangle)] extern "C" fn stack_segment_fault_handler_impl(regs: &PtRegs) { - todo!( - "EXCEPTION [{}]: STACK-SEGMENT FAULT (Error Code: {:#x})\n{:#x?}", - mode_str(regs), - regs.orig_rax, - regs + panic!( + "EXCEPTION: STACK-SEGMENT FAULT (Error Code: {:#x})\n{:#x?}", + regs.orig_rax, regs ); } -/// Rust handler for general protection fault exception (vector 13). -/// Called from assembly stub with pointer to saved register state. +/// Kernel-mode handler for general protection fault exception (vector 13). #[unsafe(no_mangle)] extern "C" fn general_protection_fault_handler_impl(regs: &PtRegs) { - todo!( - "EXCEPTION [{}]: GENERAL PROTECTION FAULT (Error Code: {:#x})\n{:#x?}", - mode_str(regs), - regs.orig_rax, - regs + panic!( + "EXCEPTION: GENERAL PROTECTION FAULT (Error Code: {:#x})\n{:#x?}", + regs.orig_rax, regs ); } -/// Kernel-mode page fault handler. -/// Called from the `isr_page_fault` assembly stub only for kernel-mode faults. +/// Kernel-mode page fault handler (vector 14). #[unsafe(no_mangle)] -extern "C" fn kernel_page_fault_handler_impl(regs: &mut PtRegs) { +extern "C" fn page_fault_handler_impl(regs: &mut PtRegs) { use litebox::mm::exception_table::search_exception_tables; use litebox::utils::TruncateExt as _; use x86_64::registers::control::Cr2; - let fault_addr: usize = Cr2::read_raw().truncate(); - let error_code = regs.orig_rax; - // Check the exception table for a recovery address. // This handles fallible memory operations like memcpy_fallible that access // user-space or VTL0 addresses which might be unmapped. - // - // TODO: Add kernel-mode demand paging for user-space addresses. Demand paging - // a shim using its exception handler is a chicken-and-egg problem. Some - // pre-population is unavoidable. if let Some(fixup_addr) = search_exception_tables(regs.rip) { regs.rip = fixup_addr; return; } + // TODO: Add kernel-mode demand paging for user-space addresses. We cannot + // rely on the exception_callback logic which aims to return to user-space + // faulting instructions. Here, we need to return to kernel-space faulting + // instruction after handling the page fault. We still require the shim's + // support to handle this page fault along with the `VmArea` information. + + // Kernel-mode page fault at kernel-space addresses + let fault_addr: usize = Cr2::read_raw().truncate(); + let error_code = regs.orig_rax; panic!( - "EXCEPTION [KERNEL]: PAGE FAULT\nAccessed Address: {:#x}\nError Code: {:#x}\n{:#x?}", + "EXCEPTION: PAGE FAULT\nAccessed Address: {:#x}\nError Code: {:#x}\n{:#x?}", fault_addr, error_code, regs ); } -/// Rust handler for x87 floating-point exception (vector 16). -/// Called from assembly stub with pointer to saved register state. +/// Kernel-mode handler for x87 floating-point exception (vector 16). #[unsafe(no_mangle)] extern "C" fn x87_floating_point_handler_impl(regs: &PtRegs) { - todo!( - "EXCEPTION [{}]: x87 FLOATING-POINT ERROR\n{:#x?}", - mode_str(regs), - regs - ); + panic!("EXCEPTION: x87 FLOATING-POINT ERROR\n{:#x?}", regs); } -/// Rust handler for alignment check exception (vector 17). -/// Called from assembly stub with pointer to saved register state. +/// Kernel-mode handler for alignment check exception (vector 17). #[unsafe(no_mangle)] extern "C" fn alignment_check_handler_impl(regs: &PtRegs) { - todo!( - "EXCEPTION [{}]: ALIGNMENT CHECK (Error Code: {:#x})\n{:#x?}", - mode_str(regs), - regs.orig_rax, - regs + panic!( + "EXCEPTION: ALIGNMENT CHECK (Error Code: {:#x})\n{:#x?}", + regs.orig_rax, regs ); } -/// Rust handler for SIMD floating-point exception (vector 19). -/// Called from assembly stub with pointer to saved register state. +/// Kernel-mode handler for SIMD floating-point exception (vector 19). #[unsafe(no_mangle)] extern "C" fn simd_floating_point_handler_impl(regs: &PtRegs) { - todo!( - "EXCEPTION [{}]: SIMD FLOATING-POINT ERROR\n{:#x?}", - mode_str(regs), - regs - ); + panic!("EXCEPTION: SIMD FLOATING-POINT ERROR\n{:#x?}", regs); } // Note: isr_hyperv_sint is defined in interrupts.S as a minimal stub that only diff --git a/litebox_platform_lvbs/src/host/per_cpu_variables.rs b/litebox_platform_lvbs/src/host/per_cpu_variables.rs index e64c81b01..3732cfed7 100644 --- a/litebox_platform_lvbs/src/host/per_cpu_variables.rs +++ b/litebox_platform_lvbs/src/host/per_cpu_variables.rs @@ -21,8 +21,8 @@ use litebox::utils::TruncateExt; use litebox_common_linux::{rdgsbase, wrgsbase}; use x86_64::VirtAddr; -pub const INTERRUPT_STACK_SIZE: usize = 2 * PAGE_SIZE; -pub const PAGE_FAULT_STACK_SIZE: usize = PAGE_SIZE; +pub const DOUBLE_FAULT_STACK_SIZE: usize = 2 * PAGE_SIZE; +pub const EXCEPTION_STACK_SIZE: usize = PAGE_SIZE; pub const KERNEL_STACK_SIZE: usize = 10 * PAGE_SIZE; /// Per-CPU VTL1 kernel variables @@ -31,9 +31,9 @@ pub const KERNEL_STACK_SIZE: usize = 10 * PAGE_SIZE; pub struct PerCpuVariables { hv_vp_assist_page: [u8; PAGE_SIZE], hv_simp_page: [u8; PAGE_SIZE], - interrupt_stack: [u8; INTERRUPT_STACK_SIZE], + double_fault_stack: [u8; DOUBLE_FAULT_STACK_SIZE], _guard_page_0: [u8; PAGE_SIZE], - page_fault_stack: [u8; PAGE_FAULT_STACK_SIZE], + exception_stack: [u8; EXCEPTION_STACK_SIZE], kernel_stack: [u8; KERNEL_STACK_SIZE], _guard_page_1: [u8; PAGE_SIZE], hvcall_input: [u8; PAGE_SIZE], @@ -54,12 +54,12 @@ impl PerCpuVariables { &raw const self.kernel_stack as u64 + (self.kernel_stack.len() - 1) as u64 } - pub(crate) fn interrupt_stack_top(&self) -> u64 { - &raw const self.interrupt_stack as u64 + (self.interrupt_stack.len() - 1) as u64 + pub(crate) fn double_fault_stack_top(&self) -> u64 { + &raw const self.double_fault_stack as u64 + (self.double_fault_stack.len() - 1) as u64 } - pub(crate) fn page_fault_stack_top(&self) -> u64 { - &raw const self.page_fault_stack as u64 + (self.page_fault_stack.len() - 1) as u64 + pub(crate) fn exception_stack_top(&self) -> u64 { + &raw const self.exception_stack as u64 + (self.exception_stack.len() - 1) as u64 } pub fn hv_vp_assist_page_as_ptr(&self) -> *const HvVpAssistPage { @@ -152,9 +152,9 @@ impl PerCpuVariables { static mut BSP_VARIABLES: PerCpuVariables = PerCpuVariables { hv_vp_assist_page: [0u8; PAGE_SIZE], hv_simp_page: [0u8; PAGE_SIZE], - interrupt_stack: [0u8; INTERRUPT_STACK_SIZE], + double_fault_stack: [0u8; DOUBLE_FAULT_STACK_SIZE], _guard_page_0: [0u8; PAGE_SIZE], - page_fault_stack: [0u8; PAGE_FAULT_STACK_SIZE], + exception_stack: [0u8; EXCEPTION_STACK_SIZE], kernel_stack: [0u8; KERNEL_STACK_SIZE], _guard_page_1: [0u8; PAGE_SIZE], hvcall_input: [0u8; PAGE_SIZE], @@ -202,10 +202,10 @@ static mut BSP_VARIABLES: PerCpuVariables = PerCpuVariables { pub struct PerCpuVariablesAsm { /// Initial kernel stack pointer to reset the kernel stack on VTL switch kernel_stack_ptr: Cell, - /// Initial interrupt stack pointer for x86 IST (double fault) - interrupt_stack_ptr: Cell, - /// Page fault IST stack pointer - page_fault_stack_ptr: Cell, + /// Double fault stack pointer (TSS.IST1) + double_fault_stack_ptr: Cell, + /// Exception stack pointer (TSS.RSP0) + exception_stack_ptr: Cell, /// Return address for call-based VTL switching vtl_return_addr: Cell, /// Scratch pad @@ -252,17 +252,17 @@ impl PerCpuVariablesAsm { pub fn set_kernel_stack_ptr(&self, sp: usize) { self.kernel_stack_ptr.set(sp); } - pub fn set_interrupt_stack_ptr(&self, sp: usize) { - self.interrupt_stack_ptr.set(sp); + pub fn set_double_fault_stack_ptr(&self, sp: usize) { + self.double_fault_stack_ptr.set(sp); } - pub fn get_interrupt_stack_ptr(&self) -> usize { - self.interrupt_stack_ptr.get() + pub fn get_double_fault_stack_ptr(&self) -> usize { + self.double_fault_stack_ptr.get() } - pub fn set_page_fault_stack_ptr(&self, sp: usize) { - self.page_fault_stack_ptr.set(sp); + pub fn set_exception_stack_ptr(&self, sp: usize) { + self.exception_stack_ptr.set(sp); } - pub fn get_page_fault_stack_ptr(&self) -> usize { - self.page_fault_stack_ptr.get() + pub fn get_exception_stack_ptr(&self) -> usize { + self.exception_stack_ptr.get() } pub fn set_vtl_return_addr(&self, addr: usize) { self.vtl_return_addr.set(addr); @@ -292,11 +292,11 @@ impl PerCpuVariablesAsm { pub const fn kernel_stack_ptr_offset() -> usize { offset_of!(PerCpuVariablesAsm, kernel_stack_ptr) } - pub const fn interrupt_stack_ptr_offset() -> usize { - offset_of!(PerCpuVariablesAsm, interrupt_stack_ptr) + pub const fn double_fault_stack_ptr_offset() -> usize { + offset_of!(PerCpuVariablesAsm, double_fault_stack_ptr) } - pub const fn page_fault_stack_ptr_offset() -> usize { - offset_of!(PerCpuVariablesAsm, page_fault_stack_ptr) + pub const fn exception_stack_ptr_offset() -> usize { + offset_of!(PerCpuVariablesAsm, exception_stack_ptr) } pub const fn vtl_return_addr_offset() -> usize { offset_of!(PerCpuVariablesAsm, vtl_return_addr) @@ -405,8 +405,8 @@ impl RefCellWrapper { Self { pcv_asm: PerCpuVariablesAsm { kernel_stack_ptr: Cell::new(0), - interrupt_stack_ptr: Cell::new(0), - page_fault_stack_ptr: Cell::new(0), + double_fault_stack_ptr: Cell::new(0), + exception_stack_ptr: Cell::new(0), vtl_return_addr: Cell::new(0), scratch: Cell::new(0), vtl0_state_top_addr: Cell::new(0), @@ -610,18 +610,18 @@ pub fn init_per_cpu_variables() { with_per_cpu_variables_mut(|per_cpu_variables| { let kernel_sp = TruncateExt::::truncate(per_cpu_variables.kernel_stack_top()) & !(STACK_ALIGNMENT - 1); - let interrupt_sp = TruncateExt::::truncate(per_cpu_variables.interrupt_stack_top()) - & !(STACK_ALIGNMENT - 1); - let page_fault_sp = - TruncateExt::::truncate(per_cpu_variables.page_fault_stack_top()) + let double_fault_sp = + TruncateExt::::truncate(per_cpu_variables.double_fault_stack_top()) & !(STACK_ALIGNMENT - 1); + let exception_sp = TruncateExt::::truncate(per_cpu_variables.exception_stack_top()) + & !(STACK_ALIGNMENT - 1); let vtl0_state_top_addr = TruncateExt::::truncate(&raw const per_cpu_variables.vtl0_state as u64) + core::mem::size_of::(); with_per_cpu_variables_asm(|pcv_asm| { pcv_asm.set_kernel_stack_ptr(kernel_sp); - pcv_asm.set_interrupt_stack_ptr(interrupt_sp); - pcv_asm.set_page_fault_stack_ptr(page_fault_sp); + pcv_asm.set_double_fault_stack_ptr(double_fault_sp); + pcv_asm.set_exception_stack_ptr(exception_sp); pcv_asm.set_vtl0_state_top_addr(vtl0_state_top_addr); }); }); diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index c7b266850..81f2e78c8 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -1526,16 +1526,17 @@ macro_rules! SAVE_SYSCALL_USER_CONTEXT_ASM { }; } -/// Save user context after a page fault ISR into the user context area. +/// Save user context after an ISR exception into the user context area. /// /// Similar to `SAVE_SYSCALL_USER_CONTEXT_ASM` but it preserves all GPRs. -/// The iret frame (SS, RSP, RFLAGS, CS, RIP) and error code are on -/// the ISR stack. This macro saves them via a saved ISR stack pointer. +/// The ISR stub pushes the vector number on top of the CPU-pushed error code +/// and iret frame. This macro copies them via a saved ISR stack pointer. /// /// Prerequisites: /// - `rsp` points to the top of the user context area (push target) -/// - `rax` points to the ISR stack: `[rax]`=error_code, `[rax+8]`=RIP, -/// `[rax+16]`=CS, `[rax+24]`=RFLAGS, `[rax+32]`=RSP, `[rax+40]`=SS +/// - `rax` points to the ISR stack: `[rax]`=vector, `[rax+8]`=error_code, +/// `[rax+16]`=RIP, `[rax+24]`=CS, `[rax+32]`=RFLAGS, `[rax+40]`=RSP, +/// `[rax+48]`=SS /// - All GPRs except `rax` contain user-mode values /// - User `rax` has been saved to per-CPU scratch /// - `swapgs` has already been executed (GS = kernel) @@ -1545,12 +1546,12 @@ macro_rules! SAVE_SYSCALL_USER_CONTEXT_ASM { macro_rules! SAVE_PF_USER_CONTEXT_ASM { () => { " - push [rax + 40] // pt_regs->ss - push [rax + 32] // pt_regs->rsp - push [rax + 24] // pt_regs->eflags - push [rax + 16] // pt_regs->cs - push [rax + 8] // pt_regs->rip - push [rax] // pt_regs->orig_rax (error code) + push [rax + 48] // pt_regs->ss + push [rax + 40] // pt_regs->rsp + push [rax + 32] // pt_regs->eflags + push [rax + 24] // pt_regs->cs + push [rax + 16] // pt_regs->rip + push [rax + 8] // pt_regs->orig_rax (error code) push rdi // pt_regs->rdi push rsi // pt_regs->rsi push rdx // pt_regs->rdx @@ -1645,9 +1646,9 @@ unsafe extern "C" fn run_thread_arch( "mov rdi, [rsp]", // pass `thread_ctx` "call {syscall_handler}", "jmp done", - // Exception callback: entered from isr_page_fault for user-mode page faults. + // Exception callback: entered from ISR stubs for user-mode exceptions. // At this point: - // - rsp = ISR stack (error_code at top, iret frame above) + // - rsp points to ISR stack: [rsp]=vector, [rsp+8]=error_code, then iret frame // - All GPRs contain user-mode values // - Interrupts are disabled (IDT gate clears IF) // - GS = user (swapgs has NOT happened yet) @@ -1656,10 +1657,11 @@ unsafe extern "C" fn run_thread_arch( "swapgs", "mov gs:[{scratch_off}], rax", // Save `rax` to per-CPU scratch "mov rax, cr2", - "mov gs:[{exception_cr2_off}], rax", // Save `CR2` (faulting address) - "mov byte ptr gs:[{exception_trapno_off}], 14", // Exception: page fault (14) - "mov eax, [rsp]", - "mov gs:[{exception_error_code_off}], eax", // error code (32-bit) from ISR stack + "mov gs:[{exception_cr2_off}], rax", // Save `CR2` (only meaningful for #PF) + "mov al, [rsp]", + "mov gs:[{exception_trapno_off}], al", // vector number from ISR stack + "mov eax, [rsp + 8]", + "mov gs:[{exception_error_code_off}], eax", // error code from ISR stack "mov rax, rsp", // store ISR `rsp` in `rax` "mov rsp, gs:[{user_context_top_off}]", // `rsp` points to the top address of user context area SAVE_PF_USER_CONTEXT_ASM!(), diff --git a/litebox_shim_optee/src/lib.rs b/litebox_shim_optee/src/lib.rs index 611a352f5..0ef292230 100644 --- a/litebox_shim_optee/src/lib.rs +++ b/litebox_shim_optee/src/lib.rs @@ -68,21 +68,24 @@ impl litebox::shim::EnterShim for OpteeShimEntrypoints { fn exception( &self, - _ctx: &mut Self::ExecutionContext, + ctx: &mut Self::ExecutionContext, info: &litebox::shim::ExceptionInfo, ) -> ContinueOperation { - if info.exception == litebox::shim::Exception::PAGE_FAULT { - match unsafe { + if info.exception == litebox::shim::Exception::PAGE_FAULT + && unsafe { self.task .global .pm .handle_page_fault(info.cr2, info.error_code.into()) - } { - Ok(()) => return ContinueOperation::ResumeGuest, - Err(_) => return ContinueOperation::ExitThread, } + .is_ok() + { + return ContinueOperation::ResumeGuest; } - todo!("Handle exception in OP-TEE shim: {:?}", info); + // Note: OP-TEE OS doesn't have a concept of signal handling. It kills + // the TA on CPU exceptions except for pageable page faults. + ctx.rax = (TeeResult::TargetDead as u32) as usize; + ContinueOperation::ExitThread } fn interrupt(&self, _ctx: &mut Self::ExecutionContext) -> ContinueOperation { diff --git a/litebox_shim_optee/src/ptr.rs b/litebox_shim_optee/src/ptr.rs index 5a4c08fa8..9418ca6be 100644 --- a/litebox_shim_optee/src/ptr.rs +++ b/litebox_shim_optee/src/ptr.rs @@ -426,7 +426,25 @@ impl MappedGuard<'_, T, ALIGN> { impl Drop for MappedGuard<'_, T, ALIGN> { fn drop(&mut self) { - let _ = unsafe { self.owner.unmap() }; + // SAFETY: The platform is expected to handle unmapping safely, including + // the case where pages were never mapped (returns Unmapped error, ignored). + let result = unsafe { self.owner.unmap() }; + debug_assert!( + result.is_ok() || matches!(result, Err(PhysPointerError::Unmapped(_))), + "unexpected error during unmap in drop: {result:?}", + ); + } +} + +impl Drop for PhysMutPtr { + fn drop(&mut self) { + // SAFETY: The platform is expected to handle unmapping safely, including + // the case where pages were never mapped (returns Unmapped error, ignored). + let result = unsafe { self.unmap() }; + debug_assert!( + result.is_ok() || matches!(result, Err(PhysPointerError::Unmapped(_))), + "unexpected error during unmap in drop: {result:?}", + ); } } From e54669c17b5017029c46a05014c76fadb8ba6abe Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Mon, 9 Feb 2026 02:22:43 +0000 Subject: [PATCH 04/11] use zerocopy for safety --- dev_tests/src/ratchet.rs | 1 - litebox_platform_lvbs/Cargo.toml | 2 +- .../src/arch/x86/interrupts.S | 20 ++----- litebox_platform_lvbs/src/host/linux.rs | 32 ++++++----- litebox_platform_lvbs/src/lib.rs | 53 +++++++++++++++--- litebox_platform_lvbs/src/mshv/heki.rs | 55 ++++++------------- .../src/mshv/mem_integrity.rs | 18 ++---- litebox_platform_lvbs/src/mshv/vsm.rs | 30 +++------- litebox_shim_optee/src/ptr.rs | 2 +- 9 files changed, 101 insertions(+), 112 deletions(-) diff --git a/dev_tests/src/ratchet.rs b/dev_tests/src/ratchet.rs index 4dcc09856..f962f5d69 100644 --- a/dev_tests/src/ratchet.rs +++ b/dev_tests/src/ratchet.rs @@ -72,7 +72,6 @@ fn ratchet_maybe_uninit() -> Result<()> { ("dev_tests/", 1), ("litebox/", 1), ("litebox_platform_linux_userland/", 3), - ("litebox_platform_lvbs/", 6), ("litebox_shim_linux/", 5), ("litebox_shim_optee/", 1), ], diff --git a/litebox_platform_lvbs/Cargo.toml b/litebox_platform_lvbs/Cargo.toml index 236d25b5b..190f28b13 100644 --- a/litebox_platform_lvbs/Cargo.toml +++ b/litebox_platform_lvbs/Cargo.toml @@ -37,7 +37,7 @@ object = { version = "0.36.7", default-features = false, features = ["pe"] } digest = { version = "0.10.7", default-features = false } aligned-vec = { version = "0.6.4", default-features = false } raw-cpuid = "11.6.0" -zerocopy = { version = "0.8", default-features = false } +zerocopy = { version = "0.8", default-features = false, features = ["derive"] } [target.'cfg(target_arch = "x86_64")'.dependencies] x86_64 = { version = "0.15.2", default-features = false, features = ["instructions"] } diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.S b/litebox_platform_lvbs/src/arch/x86/interrupts.S index 23121a206..5f227ead2 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.S +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.S @@ -4,11 +4,6 @@ /* * Interrupt Service Routine (ISR) stubs for x86_64 * - * This file provides assembly stubs for interrupt handlers that: - * 1. Save all general-purpose registers in PtRegs layout - * 2. Call the appropriate Rust handler - * 3. Restore registers and return via iretq - * * Each stub checks the saved CS RPL bits to determine whether the exception * came from user mode (ring 3) or kernel mode (ring 0): * @@ -25,9 +20,8 @@ * the CPU selects the stack based on the privilege transition: * * - User-mode (CPL change): the CPU loads RSP from TSS.RSP0. We set a - * dedicated per-CPU stack for this (see gdt.rs). Since RSP0 is always - * reloaded from the TSS, stale data from previous exceptions does not - * need to be cleaned up. + * dedicated per-CPU stack for this (gdt.rs). Since RSP0 is always + * reloaded from the TSS, we do not wipe stale data from old exceptions. * * - Kernel-mode (no CPL change): the CPU continues on the current stack. * The ISR stub pushes registers onto it. Kernel code must ensure enough @@ -97,11 +91,7 @@ pop rdi .endm -/* - * ISR stub for interrupts WITHOUT an error code. - * Kernel-mode: standard push_regs/call/pop_regs/iretq flow. - * User-mode: push vector number, then jump to exception_callback. - */ +/* ISR stub for interrupts WITHOUT an error code. */ .macro isr_no_err_code name:req handler:req vector:req .global \name \name: @@ -123,9 +113,7 @@ jmp exception_callback .endm -/* - * ISR stub for interrupts WITH an error code. - */ +/* ISR stub for interrupts WITH an error code. */ .macro isr_with_err_code name:req handler:req vector:req .global \name \name: diff --git a/litebox_platform_lvbs/src/host/linux.rs b/litebox_platform_lvbs/src/host/linux.rs index e08fd73f5..44dcb6cd7 100644 --- a/litebox_platform_lvbs/src/host/linux.rs +++ b/litebox_platform_lvbs/src/host/linux.rs @@ -4,6 +4,7 @@ //! Linux Structs use crate::arch::MAX_CORES; +use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; /// Context saved when entering the kernel /// @@ -60,7 +61,7 @@ pub struct Timespec { const BITS_PER_LONG: usize = 64; #[repr(C)] -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, FromBytes, Immutable, KnownLayout)] pub struct CpuMask { bits: [u64; MAX_CORES.div_ceil(BITS_PER_LONG)], } @@ -103,7 +104,7 @@ pub enum PkeyIdType { /// `module_signature` from [Linux](https://elixir.bootlin.com/linux/v6.6.85/source/include/linux/module_signature.h#L33) #[repr(C)] -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, FromBytes, Immutable, KnownLayout)] pub struct ModuleSignature { pub algo: u8, pub hash: u8, @@ -133,9 +134,10 @@ impl ModuleSignature { /// `kexec_segment` from [Linux](https://elixir.bootlin.com/linux/v6.6.85/source/include/linux/kexec.h#L82) #[repr(C)] -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, FromBytes, IntoBytes, Immutable, KnownLayout)] pub struct KexecSegment { - pub buf: *const core::ffi::c_void, + /// Pointer to buffer (stored as u64 since we don't dereference it) + pub buf: u64, pub bufsz: u64, pub mem: u64, pub memsz: u64, @@ -146,16 +148,17 @@ pub struct KexecSegment { /// we need for our use case, such as `nr_segments` and `segment`, and /// are not affected by the kernel build configurations like `CONFIG_KEXEC_FILE` and `CONFIG_IMA_KEXEC`. #[repr(C)] -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, FromBytes, IntoBytes, Immutable, KnownLayout)] pub struct Kimage { head: u64, - entry: *const u64, - last_entry: *const u64, + /// Pointer fields stored as u64 since we don't dereference them + entry: u64, + last_entry: u64, start: u64, - control_code_page: *const core::ffi::c_void, // struct page* - swap_page: *const core::ffi::c_void, // struct page* - vmcoreinfo_page: *const core::ffi::c_void, // struct page* - vmcoreinfo_data_copy: *const core::ffi::c_void, + control_code_page: u64, // struct page* + swap_page: u64, // struct page* + vmcoreinfo_page: u64, // struct page* + vmcoreinfo_data_copy: u64, pub nr_segments: u64, pub segment: [KexecSegment; KEXEC_SEGMENT_MAX], // we do not need the rest of the fields for now @@ -163,9 +166,10 @@ pub struct Kimage { pub const KEXEC_SEGMENT_MAX: usize = 16; /// `list_head` from [Linux](https://elixir.bootlin.com/linux/v6.6.85/source/include/linux/types.h#L190) -#[derive(Clone, Copy, Debug)] +/// Pointer fields stored as u64 since we don't dereference them. +#[derive(Clone, Copy, Debug, FromBytes, IntoBytes, Immutable, KnownLayout)] #[repr(C)] pub struct ListHead { - pub next: *mut ListHead, - pub prev: *mut ListHead, + pub next: u64, + pub prev: u64, } diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index 81f2e78c8..a9288e674 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -50,6 +50,31 @@ pub mod mshv; pub mod syscall_entry; +/// Allocate a zeroed `Box` directly on the heap, avoiding stack intermediaries +/// for large types (e.g., 4096-byte `HekiPage`). +/// +/// This is safe because `T: FromBytes` guarantees that all-zero bytes are a valid `T`. +/// +/// # Panics +/// +/// Panics if `T` is a zero-sized type, since `alloc_zeroed` with a zero-sized +/// layout is undefined behavior. +fn box_new_zeroed() -> alloc::boxed::Box { + assert!( + core::mem::size_of::() > 0, + "box_new_zeroed does not support zero-sized types" + ); + let layout = core::alloc::Layout::new::(); + // Safety: layout has a non-zero size and correct alignment for T. + let ptr = unsafe { alloc::alloc::alloc_zeroed(layout) }.cast::(); + if ptr.is_null() { + alloc::alloc::handle_alloc_error(layout); + } + // Safety: ptr is a valid, zeroed, properly aligned heap allocation for T. + // T: FromBytes guarantees all-zero is a valid bit pattern. + unsafe { alloc::boxed::Box::from_raw(ptr) } +} + static CPU_MHZ: AtomicU64 = AtomicU64::new(0); /// Special page table ID for the base (kernel-only) page table. @@ -543,16 +568,17 @@ impl LinuxKernel { /// This function copies data from VTL0 physical memory to the VTL1 kernel through `Box`. /// Use this function instead of map/unmap functions to avoid potential TOCTTOU. - /// Better to replace this function with `::from_bytes()` or similar /// /// # Safety /// /// The caller must ensure that the `phys_addr` is a valid VTL0 physical address - pub unsafe fn copy_from_vtl0_phys( + pub unsafe fn copy_from_vtl0_phys( &self, phys_addr: x86_64::PhysAddr, ) -> Option> { - use alloc::boxed::Box; + if core::mem::size_of::() == 0 { + return Some(alloc::boxed::Box::new(T::new_zeroed())); + } let guard = self.map_vtl0_guard( phys_addr, @@ -560,19 +586,16 @@ impl LinuxKernel { PageTableFlags::PRESENT, )?; - let mut value = core::mem::MaybeUninit::::uninit(); + let mut boxed = box_new_zeroed::(); let result = unsafe { litebox::mm::exception_table::memcpy_fallible( - value.as_mut_ptr().cast(), + core::ptr::from_mut::(boxed.as_mut()).cast(), guard.ptr, core::mem::size_of::(), ) }; - // Safety: the value was fully initialized on success. - result - .ok() - .map(|()| Box::new(unsafe { value.assume_init() })) + result.ok().map(|()| boxed) } /// This function copies data from the VTL1 kernel to VTL0 physical memory. @@ -585,6 +608,10 @@ impl LinuxKernel { phys_addr: x86_64::PhysAddr, value: &T, ) -> bool { + if core::mem::size_of::() == 0 { + return true; + } + let Some(guard) = self.map_vtl0_guard( phys_addr, core::mem::size_of::() as u64, @@ -615,6 +642,10 @@ impl LinuxKernel { phys_addr: x86_64::PhysAddr, value: &[T], ) -> bool { + if core::mem::size_of_val(value) == 0 { + return true; + } + let Some(guard) = self.map_vtl0_guard( phys_addr, core::mem::size_of_val(value) as u64, @@ -645,6 +676,10 @@ impl LinuxKernel { phys_addr: x86_64::PhysAddr, buf: &mut [T], ) -> bool { + if core::mem::size_of_val(buf) == 0 { + return true; + } + let Some(guard) = self.map_vtl0_guard( phys_addr, core::mem::size_of_val(buf) as u64, diff --git a/litebox_platform_lvbs/src/mshv/heki.rs b/litebox_platform_lvbs/src/mshv/heki.rs index 5314992c8..9b6d3f7e7 100644 --- a/litebox_platform_lvbs/src/mshv/heki.rs +++ b/litebox_platform_lvbs/src/mshv/heki.rs @@ -12,6 +12,7 @@ use x86_64::{ PhysAddr, VirtAddr, structures::paging::{PageSize, Size4KiB}, }; +use zerocopy::{FromBytes, FromZeros, Immutable, IntoBytes, KnownLayout}; bitflags::bitflags! { #[derive(Clone, Copy, Debug, PartialEq)] @@ -106,7 +107,7 @@ pub(crate) fn mod_mem_type_to_mem_attr(mod_mem_type: ModMemType) -> MemAttr { /// `HekiRange` is a generic container for various types of memory ranges. /// It has an `attributes` field which can be interpreted differently based on the context like /// `MemAttr`, `KdataType`, `ModMemType`, or `KexecType`. -#[derive(Default, Clone, Copy)] +#[derive(Default, Clone, Copy, FromBytes, IntoBytes, Immutable, KnownLayout)] #[repr(C, packed)] pub struct HekiRange { pub va: u64, @@ -194,11 +195,12 @@ impl core::fmt::Debug for HekiRange { pub const HEKI_MAX_RANGES: usize = ((PAGE_SIZE as u32 - u64::BITS * 3 / 8) / core::mem::size_of::() as u32) as usize; -#[derive(Clone, Copy)] +#[derive(Clone, Copy, FromBytes, Immutable, KnownLayout)] #[repr(align(4096))] #[repr(C)] pub struct HekiPage { - pub next: *mut HekiPage, + /// Pointer to next page (stored as u64 since we don't dereference it) + pub next: u64, pub next_pa: u64, pub nranges: u64, pub ranges: [HekiRange; HEKI_MAX_RANGES], @@ -207,10 +209,8 @@ pub struct HekiPage { impl HekiPage { pub fn new() -> Self { - HekiPage { - next: core::ptr::null_mut(), - ..Default::default() - } + // Safety: all fields are valid when zeroed (u64 zeros, array of zeroed HekiRange) + Self::new_zeroed() } pub fn is_valid(&self) -> bool { @@ -234,7 +234,7 @@ impl HekiPage { impl Default for HekiPage { fn default() -> Self { - Self::new() + Self::new_zeroed() } } @@ -247,30 +247,20 @@ impl<'a> IntoIterator for &'a HekiPage { } } -#[derive(Default, Clone, Copy, Debug)] +#[derive(Default, Clone, Copy, Debug, FromBytes, IntoBytes, Immutable, KnownLayout)] #[repr(C)] pub struct HekiPatch { pub pa: [u64; 2], pub size: u8, pub code: [u8; POKE_MAX_OPCODE_SIZE], + _padding: [u8; 2], } pub const POKE_MAX_OPCODE_SIZE: usize = 5; impl HekiPatch { /// Creates a new `HekiPatch` with a given buffer. Returns `None` if any field is invalid. pub fn try_from_bytes(bytes: &[u8]) -> Option { - if bytes.len() != core::mem::size_of::() { - return None; - } - let mut patch = core::mem::MaybeUninit::::uninit(); - let patch = unsafe { - core::ptr::copy_nonoverlapping( - bytes.as_ptr().cast::(), - patch.as_mut_ptr().cast::(), - core::mem::size_of::(), - ); - patch.assume_init() - }; + let patch = Self::read_from_bytes(bytes).ok()?; if patch.is_valid() { Some(patch) } else { None } } @@ -312,12 +302,14 @@ pub enum HekiPatchType { Unknown = 0xffff_ffff, } -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, FromBytes, Immutable, KnownLayout)] #[repr(C)] pub struct HekiPatchInfo { - pub typ_: HekiPatchType, + /// Patch type stored as u32 for zerocopy compatibility (see `HekiPatchType`) + pub typ_: u32, list: ListHead, - mod_: *const core::ffi::c_void, // *const `struct module` + /// *const `struct module` (stored as u64 since we don't dereference it) + mod_: u64, pub patch_index: u64, pub max_patch_count: u64, // pub patch: [HekiPatch; *] @@ -326,23 +318,12 @@ pub struct HekiPatchInfo { impl HekiPatchInfo { /// Creates a new `HekiPatchInfo` with a given buffer. Returns `None` if any field is invalid. pub fn try_from_bytes(bytes: &[u8]) -> Option { - if bytes.len() != core::mem::size_of::() { - return None; - } - let mut info = core::mem::MaybeUninit::::uninit(); - let info = unsafe { - core::ptr::copy_nonoverlapping( - bytes.as_ptr().cast::(), - info.as_mut_ptr().cast::(), - core::mem::size_of::(), - ); - info.assume_init() - }; + let info = Self::read_from_bytes(bytes).ok()?; if info.is_valid() { Some(info) } else { None } } pub fn is_valid(&self) -> bool { - !(self.typ_ != HekiPatchType::JumpLabel + !(self.typ_ != HekiPatchType::JumpLabel as u32 || self.patch_index == 0 || self.patch_index > self.max_patch_count) } diff --git a/litebox_platform_lvbs/src/mshv/mem_integrity.rs b/litebox_platform_lvbs/src/mshv/mem_integrity.rs index 4f60649e6..a66182f4f 100644 --- a/litebox_platform_lvbs/src/mshv/mem_integrity.rs +++ b/litebox_platform_lvbs/src/mshv/mem_integrity.rs @@ -37,6 +37,7 @@ use x509_cert::{ Certificate, der::{Decode, Encode, oid::ObjectIdentifier}, }; +use zerocopy::FromBytes; /// This function validates the memory content of a loaded kernel module against the original ELF file. /// In particular, it checks whether the non-relocatable/patchable bytes of certain sections @@ -447,18 +448,11 @@ fn extract_module_data_and_signature( }) .ok_or(VerificationError::SignatureNotFound)?; - let mut module_signature = core::mem::MaybeUninit::::uninit(); - unsafe { - core::ptr::copy_nonoverlapping( - signed_module - .as_ptr() - .add(module_signature_offset) - .cast::(), - module_signature.as_mut_ptr().cast::(), - core::mem::size_of::(), - ); - } - let module_signature = unsafe { module_signature.assume_init() }; + let module_signature = ModuleSignature::read_from_bytes( + &signed_module[module_signature_offset + ..module_signature_offset + core::mem::size_of::()], + ) + .map_err(|_| VerificationError::InvalidSignature)?; if !module_signature.is_valid() { return Err(VerificationError::InvalidSignature); } diff --git a/litebox_platform_lvbs/src/mshv/vsm.rs b/litebox_platform_lvbs/src/mshv/vsm.rs index 51ee92afa..c338e8b1b 100644 --- a/litebox_platform_lvbs/src/mshv/vsm.rs +++ b/litebox_platform_lvbs/src/mshv/vsm.rs @@ -56,8 +56,9 @@ use x86_64::{ structures::paging::{PageSize, PhysFrame, Size4KiB, frame::PhysFrameRange}, }; use x509_cert::{Certificate, der::Decode}; +use zerocopy::{FromBytes, FromZeros, Immutable, IntoBytes, KnownLayout}; -#[derive(Copy, Clone)] +#[derive(Copy, Clone, FromBytes, Immutable, KnownLayout)] #[repr(align(4096))] struct AlignedPage([u8; PAGE_SIZE]); @@ -753,20 +754,13 @@ pub fn mshv_vsm_kexec_validate(pa: u64, nranges: u64, crash: u64) -> Result::uninit(); - let kimage_slice: &mut [u8] = unsafe { - core::slice::from_raw_parts_mut( - kimage.as_mut_ptr().cast::(), - core::mem::size_of::(), - ) - }; - kimage_slice.copy_from_slice(&kexec_image[..core::mem::size_of::()]); - let kimage = unsafe { kimage.assume_init() }; + let kimage = Kimage::read_from_bytes(&kexec_image[..core::mem::size_of::()]) + .map_err(|_| VsmError::KexecImageSegmentsInvalid)?; if kimage.nr_segments > KEXEC_SEGMENT_MAX as u64 { return Err(VsmError::KexecImageSegmentsInvalid); } for i in 0..usize::try_from(kimage.nr_segments).unwrap_or(0) { - let va = kimage.segment[i].buf as u64; + let va = kimage.segment[i].buf; let pa = kimage.segment[i].mem; if let Some(epa) = pa.checked_add(kimage.segment[i].memsz) { kexec_memory_metadata.insert_memory_range(KexecMemoryRange::new(va, pa, epa)); @@ -850,25 +844,19 @@ fn copy_heki_patch_from_vtl0(patch_pa_0: u64, patch_pa_1: u64) -> Result::uninit(); - let heki_patch_slice: &mut [u8] = unsafe { - core::slice::from_raw_parts_mut( - heki_patch.as_mut_ptr().cast::(), - core::mem::size_of::(), - ) - }; + let mut heki_patch = HekiPatch::new_zeroed(); + let heki_patch_bytes = heki_patch.as_mut_bytes(); unsafe { if !crate::platform_low().copy_slice_from_vtl0_phys( patch_pa_0, - heki_patch_slice.get_unchecked_mut(..bytes_in_first_page), + heki_patch_bytes.get_unchecked_mut(..bytes_in_first_page), ) || !crate::platform_low().copy_slice_from_vtl0_phys( patch_pa_1, - heki_patch_slice.get_unchecked_mut(bytes_in_first_page..), + heki_patch_bytes.get_unchecked_mut(bytes_in_first_page..), ) { return Err(VsmError::Vtl0CopyFailed); } } - let heki_patch = unsafe { heki_patch.assume_init() }; if heki_patch.is_valid() { Ok(heki_patch) } else { diff --git a/litebox_shim_optee/src/ptr.rs b/litebox_shim_optee/src/ptr.rs index 9418ca6be..e883b81d2 100644 --- a/litebox_shim_optee/src/ptr.rs +++ b/litebox_shim_optee/src/ptr.rs @@ -410,7 +410,7 @@ impl PhysMutPtr { /// RAII guard that unmaps physical pages when dropped. /// -/// Created by [`PhysMutPtr::map_guard`]. Holds a mutable borrow on the parent +/// Created by `map_and_get_ptr_guard`. Holds a mutable borrow on the parent /// `PhysMutPtr` and provides the mapped base pointer for the duration of the mapping. struct MappedGuard<'a, T: Clone, const ALIGN: usize> { owner: &'a mut PhysMutPtr, From e04ceeab06d2491909a40ad8ecbd9e0229ec3d7b Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Mon, 9 Feb 2026 19:54:58 +0000 Subject: [PATCH 05/11] kernel-mode user-space demand paging --- litebox/src/shim.rs | 8 ++ .../src/host/snp/snp_impl.rs | 12 ++ litebox_platform_linux_userland/src/lib.rs | 7 ++ .../src/arch/x86/interrupts.S | 48 +++++++- .../src/arch/x86/interrupts.rs | 49 +++++--- .../src/host/per_cpu_variables.rs | 8 ++ litebox_platform_lvbs/src/lib.rs | 107 ++++++++++++++++-- litebox_runner_lvbs/src/lib.rs | 8 +- litebox_shim_linux/src/lib.rs | 22 ++++ litebox_shim_linux/src/syscalls/signal/mod.rs | 1 + litebox_shim_optee/src/lib.rs | 28 +++-- 11 files changed, 260 insertions(+), 38 deletions(-) diff --git a/litebox/src/shim.rs b/litebox/src/shim.rs index b80aad8a1..4145a6199 100644 --- a/litebox/src/shim.rs +++ b/litebox/src/shim.rs @@ -96,6 +96,11 @@ pub enum ContinueOperation { ResumeGuest, /// Exit the current thread. ExitThread, + /// The exception was handled in kernel mode; resume kernel execution. + ExceptionHandled, + /// The exception was not handled. The platform should apply an + /// exception-table fixup if one exists. + ExceptionFixup, } /// Information about a hardware exception. @@ -109,6 +114,9 @@ pub struct ExceptionInfo { /// The value of the CR2 register at the time of the exception, if /// applicable (e.g., for page faults). pub cr2: usize, + /// Whether the exception occurred in kernel mode (e.g., a demand page + /// fault during a kernel-mode access to a user-space address). + pub kernel_mode: bool, } /// An x86 exception type. diff --git a/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs b/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs index c6662aaa9..1bb0ea651 100644 --- a/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs +++ b/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs @@ -216,6 +216,12 @@ pub fn init_thread( match tls.shim.get().unwrap().init(pt_regs) { litebox::shim::ContinueOperation::ResumeGuest => {} litebox::shim::ContinueOperation::ExitThread => exit_thread(), + litebox::shim::ContinueOperation::ExceptionHandled => { + panic!("ExceptionHandled not expected in SNP init") + } + litebox::shim::ContinueOperation::ExceptionFixup => { + panic!("ExceptionFixup not expected in SNP init") + } } } @@ -238,6 +244,12 @@ pub fn handle_syscall(pt_regs: &mut litebox_common_linux::PtRegs) { match tls.shim.get().unwrap().syscall(pt_regs) { litebox::shim::ContinueOperation::ResumeGuest => {} litebox::shim::ContinueOperation::ExitThread => exit_thread(), + litebox::shim::ContinueOperation::ExceptionHandled => { + panic!("ExceptionHandled not expected in SNP syscall") + } + litebox::shim::ContinueOperation::ExceptionFixup => { + panic!("ExceptionFixup not expected in SNP syscall") + } } } diff --git a/litebox_platform_linux_userland/src/lib.rs b/litebox_platform_linux_userland/src/lib.rs index 5d6ffc339..a1567a2a7 100644 --- a/litebox_platform_linux_userland/src/lib.rs +++ b/litebox_platform_linux_userland/src/lib.rs @@ -1596,6 +1596,7 @@ extern "C-unwind" fn exception_handler( exception: litebox::shim::Exception(trapno.try_into().unwrap()), error_code: error.try_into().unwrap(), cr2, + kernel_mode: false, }; thread_ctx.call_shim(|shim, ctx| shim.exception(ctx, &info)); } @@ -1632,6 +1633,12 @@ impl ThreadContext<'_> { match op { ContinueOperation::ResumeGuest => unsafe { switch_to_guest(self.ctx) }, ContinueOperation::ExitThread => {} + ContinueOperation::ExceptionHandled => { + panic!("ExceptionHandled not expected in linux_userland") + } + ContinueOperation::ExceptionFixup => { + panic!("ExceptionFixup not expected in linux_userland") + } } } } diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.S b/litebox_platform_lvbs/src/arch/x86/interrupts.S index 5f227ead2..3b2a33aca 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.S +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.S @@ -166,8 +166,52 @@ isr_with_err_code isr_stack_segment_fault stack_segment_fault_handler_impl 12 /* Vector 13: General Protection Fault (#GP) - Error code */ isr_with_err_code isr_general_protection_fault general_protection_fault_handler_impl 13 -/* Vector 14: Page Fault (#PF) - Error code */ -isr_with_err_code isr_page_fault page_fault_handler_impl 14 +/* Vector 14: Page Fault (#PF) - Error code + * + * Custom ISR stub to support kernel-mode demand paging for user-space addresses. + * + * For kernel-mode page faults, page_fault_handler_impl returns: + * 0 = handled (exception table fixup applied) + * 1 = demand paging needed for a user-space address + * (panics for unrecoverable faults — never returns) + * + * When demand paging is needed, the stub restores GPRs and jumps to + * kernel_exception_callback (in run_thread_arch), which calls the shim's + * exception handler and then iretq back to the faulting kernel instruction. + */ +.global isr_page_fault +isr_page_fault: + cld + test qword ptr [rsp + 16], 0x3 /* Check CS RPL bits */ + jnz .Luser_isr_page_fault + + /* --- Kernel-mode page fault --- */ + push_regs + mov rbp, rsp /* Save stack pointer */ + and rsp, -16 /* Align stack to 16 bytes for call */ + mov rdi, rbp /* Pass pointer to saved registers as first arg */ + call page_fault_handler_impl + test eax, eax + jnz .Lkernel_demand_page + + /* Handled (exception table fixup) — return to fixed-up instruction */ + mov rsp, rbp + pop_regs + add rsp, 8 /* Skip error code */ + iretq + +.Lkernel_demand_page: + /* Demand paging needed. Exception info already stored in per-CPU vars by + * page_fault_handler_impl. GPRs are already saved on the stack by push_regs + * (same layout as SAVE_CPU_CONTEXT_ASM). Jump directly to + * kernel_exception_regs_saved, skipping the redundant pop_regs/push cycle. + */ + mov rsp, rbp + jmp kernel_exception_regs_saved + +.Luser_isr_page_fault: + push 14 /* Pass vector number to exception_callback */ + jmp exception_callback /* Vector 16: x87 Floating-Point Exception (#MF) - No error code */ isr_no_err_code isr_x87_floating_point x87_floating_point_handler_impl 16 diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.rs b/litebox_platform_lvbs/src/arch/x86/interrupts.rs index 8ad4affe8..38a2811b2 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.rs +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.rs @@ -171,30 +171,51 @@ extern "C" fn general_protection_fault_handler_impl(regs: &PtRegs) { ); } +/// Fault was handled via exception table fixup. The ISR stub should +/// `pop_regs + iretq` to resume at the fixup address. +const PF_HANDLED: usize = 0; + +/// Demand paging is needed for a user-space address. Exception info has been +/// stored in per-CPU variables. +const PF_DEMAND_PAGE: usize = 1; + /// Kernel-mode page fault handler (vector 14). +/// +/// Returns [`PF_HANDLED`] or [`PF_DEMAND_PAGE`] to the ISR stub. +/// For unrecoverable faults, this function panics and never returns. #[unsafe(no_mangle)] -extern "C" fn page_fault_handler_impl(regs: &mut PtRegs) { +extern "C" fn page_fault_handler_impl(regs: &mut PtRegs) -> usize { + use crate::host::per_cpu_variables::with_per_cpu_variables_asm; + use crate::{USER_ADDR_MAX, USER_ADDR_MIN}; use litebox::mm::exception_table::search_exception_tables; use litebox::utils::TruncateExt as _; use x86_64::registers::control::Cr2; - // Check the exception table for a recovery address. - // This handles fallible memory operations like memcpy_fallible that access - // user-space or VTL0 addresses which might be unmapped. + let fault_addr: usize = Cr2::read_raw().truncate(); + let error_code = regs.orig_rax; + + // Kernel-mode page fault at a user-space address: route to the shim's + // exception handler for demand paging (and exception table fixup on failure). + if (USER_ADDR_MIN..USER_ADDR_MAX).contains(&fault_addr) { + with_per_cpu_variables_asm(|pcv| { + pcv.set_exception_info( + litebox::shim::Exception::PAGE_FAULT, + error_code.truncate(), + fault_addr, + regs.rip, + ); + }); + return PF_DEMAND_PAGE; + } + + // Handle fallible memory operations like memcpy_fallible that access + // non-user-space addresses (e.g., VTL0 addresses) which might be unmapped. if let Some(fixup_addr) = search_exception_tables(regs.rip) { regs.rip = fixup_addr; - return; + return PF_HANDLED; } - // TODO: Add kernel-mode demand paging for user-space addresses. We cannot - // rely on the exception_callback logic which aims to return to user-space - // faulting instructions. Here, we need to return to kernel-space faulting - // instruction after handling the page fault. We still require the shim's - // support to handle this page fault along with the `VmArea` information. - - // Kernel-mode page fault at kernel-space addresses - let fault_addr: usize = Cr2::read_raw().truncate(); - let error_code = regs.orig_rax; + // Kernel-mode page fault at kernel-space addresses — unrecoverable panic!( "EXCEPTION: PAGE FAULT\nAccessed Address: {:#x}\nError Code: {:#x}\n{:#x?}", fault_addr, error_code, regs diff --git a/litebox_platform_lvbs/src/host/per_cpu_variables.rs b/litebox_platform_lvbs/src/host/per_cpu_variables.rs index 3732cfed7..d88c04a34 100644 --- a/litebox_platform_lvbs/src/host/per_cpu_variables.rs +++ b/litebox_platform_lvbs/src/host/per_cpu_variables.rs @@ -246,6 +246,8 @@ pub struct PerCpuVariablesAsm { exception_error_code: Cell, /// Exception info: faulting address (CR2) exception_cr2: Cell, + /// Exception info: faulting kernel RIP (for exception table fixup) + exception_rip: Cell, } impl PerCpuVariablesAsm { @@ -357,10 +359,12 @@ impl PerCpuVariablesAsm { exception: litebox::shim::Exception, error_code: u32, cr2: usize, + rip: usize, ) { self.exception_trapno.set(exception.0); self.exception_error_code.set(error_code); self.exception_cr2.set(cr2); + self.exception_rip.set(rip); } pub fn get_exception(&self) -> litebox::shim::Exception { litebox::shim::Exception(self.exception_trapno.get()) @@ -371,6 +375,9 @@ impl PerCpuVariablesAsm { pub fn get_exception_cr2(&self) -> usize { self.exception_cr2.get() } + pub fn get_exception_rip(&self) -> usize { + self.exception_rip.get() + } pub fn get_user_context_top_addr(&self) -> usize { self.user_context_top_addr.get() } @@ -425,6 +432,7 @@ impl RefCellWrapper { exception_trapno: Cell::new(0), exception_error_code: Cell::new(0), exception_cr2: Cell::new(0), + exception_rip: Cell::new(0), }, inner: RefCell::new(value), } diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index a9288e674..b222a93ee 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -1607,14 +1607,33 @@ macro_rules! SAVE_PF_USER_CONTEXT_ASM { }; } -/// Restore user context from the memory area pointed by the current `rsp`. -/// -/// This macro uses the `pop` instructions (i.e., from low addresses up to high ones) such that -/// it requires the start address of the memory area (not the top one). -/// -/// Prerequisite: The memory area has `PtRegs` structure containing user context. +/// Save all general-purpose registers onto the stack. #[cfg(target_arch = "x86_64")] -macro_rules! RESTORE_USER_CONTEXT_ASM { +macro_rules! SAVE_CPU_CONTEXT_ASM { + () => { + " + push rdi + push rsi + push rdx + push rcx + push rax + push r8 + push r9 + push r10 + push r11 + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + " + }; +} + +/// Restore all general-purpose registers and skip `orig_rax` from the stack. +#[cfg(target_arch = "x86_64")] +macro_rules! RESTORE_CPU_CONTEXT_ASM { () => { " pop r15 @@ -1704,8 +1723,40 @@ unsafe extern "C" fn run_thread_arch( "mov rbp, gs:[{cur_kernel_bp_off}]", "mov rsp, gs:[{cur_kernel_sp_off}]", "mov rdi, [rsp]", // pass `thread_ctx` + "xor esi, esi", // kernel_mode = false "call {exception_handler}", "jmp done", + // Kernel-mode page fault callback: demand paging for user-space addresses. + // At this point: + // - rsp points to ISR stack with GPRs already saved + // - Exception info already stored in per-CPU variables + // - GS = kernel (no swapgs needed) + // - User extended states already saved by the enclosing syscall/exception path + // + // Two entry points: + // kernel_exception_callback - saves GPRs first (for callers without prior push_regs) + // kernel_exception_regs_saved - GPRs already on stack (from ISR stub's push_regs) + ".globl kernel_exception_callback", + "kernel_exception_callback:", + SAVE_CPU_CONTEXT_ASM!(), + ".globl kernel_exception_regs_saved", + "kernel_exception_regs_saved:", + "mov rbp, rsp", + "and rsp, -16", + "mov rdi, gs:[{cur_kernel_sp_off}]", + "mov rdi, [rdi]", // thread_ctx + "mov esi, 1", // kernel_mode = true + "call {exception_handler}", + // If demand paging failed, rax contains the exception table fixup + // address. Patch the saved RIP on the ISR stack so iretq resumes + // at the fixup instead of re-faulting. + "test rax, rax", + "jz 5f", + "mov [rbp + 128], rax", // patch saved RIP (15 GPRs + error_code = 128) + "5:", + "mov rsp, rbp", + RESTORE_CPU_CONTEXT_ASM!(), + "iretq", ".globl interrupt_callback", "interrupt_callback:", "jmp done", @@ -1740,16 +1791,42 @@ unsafe extern "C" fn syscall_handler(thread_ctx: &mut ThreadContext) { thread_ctx.call_shim(|shim, ctx| shim.syscall(ctx)); } -/// Handles user-mode exceptions by reading exception info from per-CPU variables +/// Handles exceptions by reading exception info from per-CPU variables /// and routing to the shim's exception handler. -unsafe extern "C" fn exception_handler(thread_ctx: &mut ThreadContext) { - use crate::host::per_cpu_variables::with_per_cpu_variables_asm; +/// +/// Returns 0 for normal flow (user-mode or successful demand paging), or +/// a fixup address when kernel-mode demand paging fails and an exception +/// table entry exists. +unsafe extern "C" fn exception_handler(thread_ctx: &mut ThreadContext, kernel_mode: bool) -> usize { + use crate::host::per_cpu_variables::{PerCpuVariablesAsm, with_per_cpu_variables_asm}; let info = with_per_cpu_variables_asm(|pcv| litebox::shim::ExceptionInfo { exception: pcv.get_exception(), error_code: pcv.get_exception_error_code(), cr2: pcv.get_exception_cr2(), + kernel_mode, }); - thread_ctx.call_shim(|shim, ctx| shim.exception(ctx, &info)); + + if kernel_mode { + // Call the shim directly instead of using `call_shim` because: + // - `ExceptionFixup` requires post-processing (exception table lookup) + // - Must return a fixup address to the asm caller (not resume user mode) + let op = thread_ctx.shim.exception(thread_ctx.ctx, &info); + match op { + ContinueOperation::ExceptionHandled => 0, + ContinueOperation::ExceptionFixup => { + let faulting_rip = + with_per_cpu_variables_asm(PerCpuVariablesAsm::get_exception_rip); + litebox::mm::exception_table::search_exception_tables(faulting_rip) + .expect("kernel-mode page fault with no exception table fixup") + } + ContinueOperation::ExitThread | ContinueOperation::ResumeGuest => { + panic!("unexpected {op:?} for kernel-mode exception") + } + } + } else { + thread_ctx.call_shim(|shim, ctx| shim.exception(ctx, &info)); + 0 + } } /// Calls `f` in order to call into a shim entrypoint. @@ -1765,6 +1842,12 @@ impl ThreadContext<'_> { match op { ContinueOperation::ResumeGuest => unsafe { switch_to_user(self.ctx) }, ContinueOperation::ExitThread => {} + ContinueOperation::ExceptionHandled => { + panic!("ExceptionHandled not expected in user-mode call_shim path") + } + ContinueOperation::ExceptionFixup => { + panic!("ExceptionFixup not expected in user-mode call_shim path") + } } } } @@ -1802,7 +1885,7 @@ unsafe extern "C" fn switch_to_user(_ctx: &litebox_common_linux::PtRegs) -> ! { XRSTOR_VTL1_ASM!({vtl1_user_xsave_area_off}, {vtl1_xsave_mask_lo_off}, {vtl1_xsave_mask_hi_off}, {vtl1_user_xsaved_off}), // Restore user context from ctx. "mov rsp, rdi", - RESTORE_USER_CONTEXT_ASM!(), + RESTORE_CPU_CONTEXT_ASM!(), // clear the GS base register (as the `KernelGsBase` MSR contains 0) // while writing the current GS base value to `KernelGsBase`. "swapgs", diff --git a/litebox_runner_lvbs/src/lib.rs b/litebox_runner_lvbs/src/lib.rs index 53157c692..53991e56c 100644 --- a/litebox_runner_lvbs/src/lib.rs +++ b/litebox_runner_lvbs/src/lib.rs @@ -1061,8 +1061,12 @@ fn write_msg_args_to_normal_world( } // use include_bytes! to include ldelf and (KMPP) TA binaries -const LDELF_BINARY: &[u8] = &[0u8; 0]; -const TA_BINARY: &[u8] = &[0u8; 0]; +// const LDELF_BINARY: &[u8] = &[0u8; 0]; +// const TA_BINARY: &[u8] = &[0u8; 0]; +const LDELF_BINARY: &[u8] = + include_bytes!("../../litebox_runner_optee_on_linux_userland/tests/ldelf.elf"); +const TA_BINARY: &[u8] = + include_bytes!("../../litebox_runner_optee_on_linux_userland/tests/kmpp-ta.elf"); #[panic_handler] fn panic(info: &PanicInfo) -> ! { diff --git a/litebox_shim_linux/src/lib.rs b/litebox_shim_linux/src/lib.rs index f4dccc729..819c834f5 100644 --- a/litebox_shim_linux/src/lib.rs +++ b/litebox_shim_linux/src/lib.rs @@ -94,6 +94,28 @@ impl litebox::shim::EnterShim for LinuxShimEntrypoints { ctx: &mut Self::ExecutionContext, info: &litebox::shim::ExceptionInfo, ) -> ContinueOperation { + if info.exception == litebox::shim::Exception::PAGE_FAULT { + let result = unsafe { + self.task + .global + .pm + .handle_page_fault(info.cr2, info.error_code.into()) + }; + return if info.kernel_mode { + if result.is_ok() { + ContinueOperation::ExceptionHandled + } else { + ContinueOperation::ExceptionFixup + } + } else if result.is_ok() { + ContinueOperation::ResumeGuest + } else { + // User-mode page fault that couldn't be resolved; + // fall through to signal delivery below. + return self + .enter_shim(false, ctx, |task, _ctx| task.handle_exception_request(info)); + }; + } self.enter_shim(false, ctx, |task, _ctx| task.handle_exception_request(info)) } diff --git a/litebox_shim_linux/src/syscalls/signal/mod.rs b/litebox_shim_linux/src/syscalls/signal/mod.rs index a48b98dfa..9c21bb1c9 100644 --- a/litebox_shim_linux/src/syscalls/signal/mod.rs +++ b/litebox_shim_linux/src/syscalls/signal/mod.rs @@ -63,6 +63,7 @@ impl SignalState { exception: litebox::shim::Exception(0), error_code: 0, cr2: 0, + kernel_mode: false, }), } } diff --git a/litebox_shim_optee/src/lib.rs b/litebox_shim_optee/src/lib.rs index 0ef292230..0b3e5f3b9 100644 --- a/litebox_shim_optee/src/lib.rs +++ b/litebox_shim_optee/src/lib.rs @@ -71,19 +71,31 @@ impl litebox::shim::EnterShim for OpteeShimEntrypoints { ctx: &mut Self::ExecutionContext, info: &litebox::shim::ExceptionInfo, ) -> ContinueOperation { - if info.exception == litebox::shim::Exception::PAGE_FAULT - && unsafe { + if info.exception == litebox::shim::Exception::PAGE_FAULT { + let result = unsafe { self.task .global .pm .handle_page_fault(info.cr2, info.error_code.into()) - } - .is_ok() - { - return ContinueOperation::ResumeGuest; + }; + return if info.kernel_mode { + if result.is_ok() { + ContinueOperation::ExceptionHandled + } else { + ContinueOperation::ExceptionFixup + } + } else if result.is_ok() { + ContinueOperation::ResumeGuest + } else { + // User-mode page fault that couldn't be resolved; + // fall through to kill the TA below. + return { + ctx.rax = (TeeResult::TargetDead as u32) as usize; + ContinueOperation::ExitThread + }; + }; } - // Note: OP-TEE OS doesn't have a concept of signal handling. It kills - // the TA on CPU exceptions except for pageable page faults. + // OP-TEE has no signal handling. Kill the TA on any non-PF exception. ctx.rax = (TeeResult::TargetDead as u32) as usize; ContinueOperation::ExitThread } From 4eecc49cef1c6131f2244a24ab5fb46067ddcc0b Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Mon, 9 Feb 2026 21:07:29 +0000 Subject: [PATCH 06/11] drop some MAP_POPULATE --- litebox_platform_windows_userland/src/lib.rs | 26 ++++++++++++++++++++ litebox_shim_linux/src/lib.rs | 24 ++++++------------ litebox_shim_optee/src/lib.rs | 19 ++++++-------- litebox_shim_optee/src/loader/elf.rs | 4 +++ litebox_shim_optee/src/loader/ta_stack.rs | 5 ++-- litebox_shim_optee/src/syscalls/ldelf.rs | 10 ++------ 6 files changed, 49 insertions(+), 39 deletions(-) diff --git a/litebox_platform_windows_userland/src/lib.rs b/litebox_platform_windows_userland/src/lib.rs index 00a95b086..0d6adbabf 100644 --- a/litebox_platform_windows_userland/src/lib.rs +++ b/litebox_platform_windows_userland/src/lib.rs @@ -1713,6 +1713,7 @@ unsafe extern "C-unwind" fn exception_handler( exception, error_code, cr2, + kernel_mode: false, }; thread_ctx.call_shim(|shim, ctx, _interrupt| shim.exception(ctx, &info)); @@ -1753,6 +1754,12 @@ impl ThreadContext<'_> { match op { ContinueOperation::ResumeGuest => unsafe { switch_to_guest(self.ctx) }, ContinueOperation::ExitThread => {} + ContinueOperation::ExceptionHandled => { + panic!("ExceptionHandled not expected in windows_userland") + } + ContinueOperation::ExceptionFixup => { + panic!("ExceptionFixup not expected in windows_userland") + } } } } @@ -1795,6 +1802,25 @@ impl litebox::platform::CrngProvider for WindowsUserland { } } +/// Dummy `VmemPageFaultHandler`. +/// +/// Page faults are handled transparently by the host Windows kernel. +/// Provided to satisfy trait bounds for `PageManager::handle_page_fault`. +impl litebox::mm::linux::VmemPageFaultHandler for WindowsUserland { + unsafe fn handle_page_fault( + &self, + _fault_addr: usize, + _flags: litebox::mm::linux::VmFlags, + _error_code: u64, + ) -> Result<(), litebox::mm::linux::PageFaultError> { + unreachable!("host kernel handles page faults for Windows userland") + } + + fn access_error(_error_code: u64, _flags: litebox::mm::linux::VmFlags) -> bool { + unreachable!("host kernel handles page faults for Windows userland") + } +} + #[cfg(test)] mod tests { use core::sync::atomic::AtomicU32; diff --git a/litebox_shim_linux/src/lib.rs b/litebox_shim_linux/src/lib.rs index 819c834f5..027d09be2 100644 --- a/litebox_shim_linux/src/lib.rs +++ b/litebox_shim_linux/src/lib.rs @@ -94,27 +94,19 @@ impl litebox::shim::EnterShim for LinuxShimEntrypoints { ctx: &mut Self::ExecutionContext, info: &litebox::shim::ExceptionInfo, ) -> ContinueOperation { - if info.exception == litebox::shim::Exception::PAGE_FAULT { - let result = unsafe { + if info.kernel_mode && info.exception == litebox::shim::Exception::PAGE_FAULT { + if unsafe { self.task .global .pm .handle_page_fault(info.cr2, info.error_code.into()) - }; - return if info.kernel_mode { - if result.is_ok() { - ContinueOperation::ExceptionHandled - } else { - ContinueOperation::ExceptionFixup - } - } else if result.is_ok() { - ContinueOperation::ResumeGuest + } + .is_ok() + { + return ContinueOperation::ExceptionHandled; } else { - // User-mode page fault that couldn't be resolved; - // fall through to signal delivery below. - return self - .enter_shim(false, ctx, |task, _ctx| task.handle_exception_request(info)); - }; + return ContinueOperation::ResumeGuest; + } } self.enter_shim(false, ctx, |task, _ctx| task.handle_exception_request(info)) } diff --git a/litebox_shim_optee/src/lib.rs b/litebox_shim_optee/src/lib.rs index 0b3e5f3b9..21042ac0e 100644 --- a/litebox_shim_optee/src/lib.rs +++ b/litebox_shim_optee/src/lib.rs @@ -78,22 +78,17 @@ impl litebox::shim::EnterShim for OpteeShimEntrypoints { .pm .handle_page_fault(info.cr2, info.error_code.into()) }; - return if info.kernel_mode { - if result.is_ok() { + if info.kernel_mode { + return if result.is_ok() { ContinueOperation::ExceptionHandled } else { ContinueOperation::ExceptionFixup - } - } else if result.is_ok() { - ContinueOperation::ResumeGuest - } else { - // User-mode page fault that couldn't be resolved; - // fall through to kill the TA below. - return { - ctx.rax = (TeeResult::TargetDead as u32) as usize; - ContinueOperation::ExitThread }; - }; + } else if result.is_ok() { + return ContinueOperation::ResumeGuest; + } + // User-mode page fault that couldn't be resolved; + // fall through to kill the TA below. } // OP-TEE has no signal handling. Kill the TA on any non-PF exception. ctx.rax = (TeeResult::TargetDead as u32) as usize; diff --git a/litebox_shim_optee/src/loader/elf.rs b/litebox_shim_optee/src/loader/elf.rs index b1b13715d..47859eb09 100644 --- a/litebox_shim_optee/src/loader/elf.rs +++ b/litebox_shim_optee/src/loader/elf.rs @@ -120,6 +120,8 @@ impl litebox_common_linux::loader::MapMemory for ElfFileInMemory<'_> { MapFlags::MAP_ANONYMOUS | MapFlags::MAP_PRIVATE | MapFlags::MAP_FIXED + // Pre-populate: ELF loading runs before run_thread_arch sets up + // the kernel-mode demand paging infrastructure. | MapFlags::MAP_POPULATE, -1, offset.truncate(), @@ -157,6 +159,8 @@ impl litebox_common_linux::loader::MapMemory for ElfFileInMemory<'_> { MapFlags::MAP_ANONYMOUS | MapFlags::MAP_PRIVATE | MapFlags::MAP_FIXED + // Pre-populate: ELF loading runs before run_thread_arch sets up + // the kernel-mode demand paging infrastructure. | MapFlags::MAP_POPULATE, -1, 0, diff --git a/litebox_shim_optee/src/loader/ta_stack.rs b/litebox_shim_optee/src/loader/ta_stack.rs index 6cdeab2db..c1cf3d10d 100644 --- a/litebox_shim_optee/src/loader/ta_stack.rs +++ b/litebox_shim_optee/src/loader/ta_stack.rs @@ -289,9 +289,8 @@ pub(crate) fn allocate_stack(task: &crate::Task, stack_base: Option) -> O .create_stack_pages( None, length, - // Pre-populate because the shim writes to the stack from kernel mode - // (e.g., push_bytes via memcpy_fallible) before the TA runs, and - // kernel-mode demand paging is not yet supported. + // Pre-populate: stack initialization runs before run_thread_arch + // sets up the kernel-mode demand paging infrastructure. CreatePagesFlags::POPULATE_PAGES_IMMEDIATELY, ) .ok()? diff --git a/litebox_shim_optee/src/syscalls/ldelf.rs b/litebox_shim_optee/src/syscalls/ldelf.rs index 69d5203da..72f6a5ab4 100644 --- a/litebox_shim_optee/src/syscalls/ldelf.rs +++ b/litebox_shim_optee/src/syscalls/ldelf.rs @@ -60,10 +60,7 @@ impl Task { // `sys_map_zi` always creates read/writeable mapping. // // We map with PROT_READ_WRITE first, then mprotect padding regions to PROT_NONE. - // Pre-populate because ldelf (user mode) accesses these pages immediately after - // mapping, and kernel-mode demand paging is not yet supported for the - // exception_callback path during early TA loading. - let mut flags = MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS | MapFlags::MAP_POPULATE; + let mut flags = MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS; if addr != 0 { flags |= MapFlags::MAP_FIXED; } @@ -188,10 +185,7 @@ impl Task { } // We map with PROT_READ_WRITE first, then mprotect padding regions to PROT_NONE as // explained in `sys_map_zi`. - // Pre-populate because `read_ta_bin` writes to these pages from kernel mode - // via memcpy_fallible, and kernel-mode demand paging is not yet supported. - let mut flags_internal = - MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS | MapFlags::MAP_POPULATE; + let mut flags_internal = MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS; if addr != 0 { flags_internal |= MapFlags::MAP_FIXED; } From bcc5be01c40e895130718abd9c78b23add7b10e9 Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Tue, 10 Feb 2026 02:35:03 +0000 Subject: [PATCH 07/11] rename --- litebox/src/shim.rs | 4 ++-- litebox_platform_linux_kernel/src/host/snp/snp_impl.rs | 8 ++++---- litebox_platform_linux_userland/src/lib.rs | 4 ++-- litebox_platform_lvbs/src/lib.rs | 6 +++--- litebox_platform_windows_userland/src/lib.rs | 4 ++-- litebox_shim_linux/src/lib.rs | 2 +- litebox_shim_optee/src/lib.rs | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/litebox/src/shim.rs b/litebox/src/shim.rs index 4145a6199..81d041166 100644 --- a/litebox/src/shim.rs +++ b/litebox/src/shim.rs @@ -96,8 +96,8 @@ pub enum ContinueOperation { ResumeGuest, /// Exit the current thread. ExitThread, - /// The exception was handled in kernel mode; resume kernel execution. - ExceptionHandled, + /// The exception was triggered in kernel mode; resume kernel execution. + ResumeKernel, /// The exception was not handled. The platform should apply an /// exception-table fixup if one exists. ExceptionFixup, diff --git a/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs b/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs index 1bb0ea651..6ec870290 100644 --- a/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs +++ b/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs @@ -216,8 +216,8 @@ pub fn init_thread( match tls.shim.get().unwrap().init(pt_regs) { litebox::shim::ContinueOperation::ResumeGuest => {} litebox::shim::ContinueOperation::ExitThread => exit_thread(), - litebox::shim::ContinueOperation::ExceptionHandled => { - panic!("ExceptionHandled not expected in SNP init") + litebox::shim::ContinueOperation::ResumeKernel => { + panic!("ResumeKernel not expected in SNP init") } litebox::shim::ContinueOperation::ExceptionFixup => { panic!("ExceptionFixup not expected in SNP init") @@ -244,8 +244,8 @@ pub fn handle_syscall(pt_regs: &mut litebox_common_linux::PtRegs) { match tls.shim.get().unwrap().syscall(pt_regs) { litebox::shim::ContinueOperation::ResumeGuest => {} litebox::shim::ContinueOperation::ExitThread => exit_thread(), - litebox::shim::ContinueOperation::ExceptionHandled => { - panic!("ExceptionHandled not expected in SNP syscall") + litebox::shim::ContinueOperation::ResumeKernel => { + panic!("ResumeKernel not expected in SNP syscall") } litebox::shim::ContinueOperation::ExceptionFixup => { panic!("ExceptionFixup not expected in SNP syscall") diff --git a/litebox_platform_linux_userland/src/lib.rs b/litebox_platform_linux_userland/src/lib.rs index a1567a2a7..4ac651222 100644 --- a/litebox_platform_linux_userland/src/lib.rs +++ b/litebox_platform_linux_userland/src/lib.rs @@ -1633,8 +1633,8 @@ impl ThreadContext<'_> { match op { ContinueOperation::ResumeGuest => unsafe { switch_to_guest(self.ctx) }, ContinueOperation::ExitThread => {} - ContinueOperation::ExceptionHandled => { - panic!("ExceptionHandled not expected in linux_userland") + ContinueOperation::ResumeKernel => { + panic!("ResumeKernel not expected in linux_userland") } ContinueOperation::ExceptionFixup => { panic!("ExceptionFixup not expected in linux_userland") diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index b222a93ee..b830fcb39 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -1812,7 +1812,7 @@ unsafe extern "C" fn exception_handler(thread_ctx: &mut ThreadContext, kernel_mo // - Must return a fixup address to the asm caller (not resume user mode) let op = thread_ctx.shim.exception(thread_ctx.ctx, &info); match op { - ContinueOperation::ExceptionHandled => 0, + ContinueOperation::ResumeKernel => 0, ContinueOperation::ExceptionFixup => { let faulting_rip = with_per_cpu_variables_asm(PerCpuVariablesAsm::get_exception_rip); @@ -1842,8 +1842,8 @@ impl ThreadContext<'_> { match op { ContinueOperation::ResumeGuest => unsafe { switch_to_user(self.ctx) }, ContinueOperation::ExitThread => {} - ContinueOperation::ExceptionHandled => { - panic!("ExceptionHandled not expected in user-mode call_shim path") + ContinueOperation::ResumeKernel => { + panic!("ResumeKernel not expected in user-mode call_shim path") } ContinueOperation::ExceptionFixup => { panic!("ExceptionFixup not expected in user-mode call_shim path") diff --git a/litebox_platform_windows_userland/src/lib.rs b/litebox_platform_windows_userland/src/lib.rs index 0d6adbabf..57787082b 100644 --- a/litebox_platform_windows_userland/src/lib.rs +++ b/litebox_platform_windows_userland/src/lib.rs @@ -1754,8 +1754,8 @@ impl ThreadContext<'_> { match op { ContinueOperation::ResumeGuest => unsafe { switch_to_guest(self.ctx) }, ContinueOperation::ExitThread => {} - ContinueOperation::ExceptionHandled => { - panic!("ExceptionHandled not expected in windows_userland") + ContinueOperation::ResumeKernel => { + panic!("ResumeKernel not expected in windows_userland") } ContinueOperation::ExceptionFixup => { panic!("ExceptionFixup not expected in windows_userland") diff --git a/litebox_shim_linux/src/lib.rs b/litebox_shim_linux/src/lib.rs index 027d09be2..f5bc735ad 100644 --- a/litebox_shim_linux/src/lib.rs +++ b/litebox_shim_linux/src/lib.rs @@ -103,7 +103,7 @@ impl litebox::shim::EnterShim for LinuxShimEntrypoints { } .is_ok() { - return ContinueOperation::ExceptionHandled; + return ContinueOperation::ResumeKernel; } else { return ContinueOperation::ResumeGuest; } diff --git a/litebox_shim_optee/src/lib.rs b/litebox_shim_optee/src/lib.rs index 21042ac0e..31050b3ea 100644 --- a/litebox_shim_optee/src/lib.rs +++ b/litebox_shim_optee/src/lib.rs @@ -80,7 +80,7 @@ impl litebox::shim::EnterShim for OpteeShimEntrypoints { }; if info.kernel_mode { return if result.is_ok() { - ContinueOperation::ExceptionHandled + ContinueOperation::ResumeKernel } else { ContinueOperation::ExceptionFixup }; From 955353ea9b2416bb24a808476bf6a5bf61489758 Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Tue, 10 Feb 2026 06:36:56 +0000 Subject: [PATCH 08/11] addressed feedbacks --- litebox/src/shim.rs | 2 +- .../src/arch/x86/interrupts.S | 2 +- .../src/arch/x86/interrupts.rs | 23 ++-- litebox_platform_lvbs/src/lib.rs | 126 +++++++----------- litebox_runner_lvbs/src/lib.rs | 38 ++++-- litebox_shim_optee/src/lib.rs | 8 +- litebox_shim_optee/src/ptr.rs | 38 +++--- litebox_shim_optee/src/session.rs | 51 ++++++- 8 files changed, 149 insertions(+), 139 deletions(-) diff --git a/litebox/src/shim.rs b/litebox/src/shim.rs index 81d041166..1ea0ee012 100644 --- a/litebox/src/shim.rs +++ b/litebox/src/shim.rs @@ -99,7 +99,7 @@ pub enum ContinueOperation { /// The exception was triggered in kernel mode; resume kernel execution. ResumeKernel, /// The exception was not handled. The platform should apply an - /// exception-table fixup if one exists. + /// exception-table fixup (e.g., advance the instruction pointer) if one exists. ExceptionFixup, } diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.S b/litebox_platform_lvbs/src/arch/x86/interrupts.S index 3b2a33aca..cb6ec569a 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.S +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.S @@ -194,7 +194,7 @@ isr_page_fault: test eax, eax jnz .Lkernel_demand_page - /* Handled (exception table fixup) — return to fixed-up instruction */ + /* Handled (exception table fixup) — return to fixup address */ mov rsp, rbp pop_regs add rsp, 8 /* Skip error code */ diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.rs b/litebox_platform_lvbs/src/arch/x86/interrupts.rs index 38a2811b2..031ef078c 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.rs +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.rs @@ -171,20 +171,21 @@ extern "C" fn general_protection_fault_handler_impl(regs: &PtRegs) { ); } -/// Fault was handled via exception table fixup. The ISR stub should -/// `pop_regs + iretq` to resume at the fixup address. -const PF_HANDLED: usize = 0; - -/// Demand paging is needed for a user-space address. Exception info has been -/// stored in per-CPU variables. -const PF_DEMAND_PAGE: usize = 1; +/// Result from the kernel-mode page fault handler, consumed by the ISR stub. +#[repr(u8)] +enum PageFaultResult { + /// Fault was handled via exception table fixup. + Handled = 0, + /// Demand paging is needed for a user-space address. + DemandPage = 1, +} /// Kernel-mode page fault handler (vector 14). /// -/// Returns [`PF_HANDLED`] or [`PF_DEMAND_PAGE`] to the ISR stub. +/// Returns [`PageFaultResult`] to the ISR stub. /// For unrecoverable faults, this function panics and never returns. #[unsafe(no_mangle)] -extern "C" fn page_fault_handler_impl(regs: &mut PtRegs) -> usize { +extern "C" fn page_fault_handler_impl(regs: &mut PtRegs) -> PageFaultResult { use crate::host::per_cpu_variables::with_per_cpu_variables_asm; use crate::{USER_ADDR_MAX, USER_ADDR_MIN}; use litebox::mm::exception_table::search_exception_tables; @@ -205,14 +206,14 @@ extern "C" fn page_fault_handler_impl(regs: &mut PtRegs) -> usize { regs.rip, ); }); - return PF_DEMAND_PAGE; + return PageFaultResult::DemandPage; } // Handle fallible memory operations like memcpy_fallible that access // non-user-space addresses (e.g., VTL0 addresses) which might be unmapped. if let Some(fixup_addr) = search_exception_tables(regs.rip) { regs.rip = fixup_addr; - return PF_HANDLED; + return PageFaultResult::Handled; } // Kernel-mode page fault at kernel-space addresses — unrecoverable diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index b830fcb39..98cd56ebe 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -9,7 +9,7 @@ use crate::{host::per_cpu_variables::PerCpuVariablesAsm, mshv::vsm::Vtl0KernelInfo}; use core::{ arch::asm, - sync::atomic::{AtomicU32, AtomicU64, AtomicUsize, Ordering}, + sync::atomic::{AtomicU32, AtomicU64}, }; use hashbrown::HashMap; use litebox::platform::{ @@ -78,6 +78,7 @@ fn box_new_zeroed() -> alloc::boxed::Box { static CPU_MHZ: AtomicU64 = AtomicU64::new(0); /// Special page table ID for the base (kernel-only) page table. +/// No real physical frame has address 0, so this is a safe sentinel. pub const BASE_PAGE_TABLE_ID: usize = 0; /// Maximum virtual address (exclusive) for user-space allocations. @@ -117,14 +118,8 @@ pub struct PageTableManager { base_page_table: mm::PageTable, /// Cached physical frame of the base page table (for fast CR3 comparison). base_page_table_frame: PhysFrame, - /// Task page tables indexed by their ID (starting from 1). - /// Each contains kernel mappings + task-specific user-space mappings. + /// Task page tables keyed by their P4 frame start address (the page table ID). task_page_tables: spin::Mutex>>>, - /// Reverse lookup: physical frame -> page table ID (for O(1) CR3 lookup). - /// Only contains task page tables (base page table is checked separately). - frame_to_id: spin::Mutex, usize>>, - /// Next available task page table ID. - next_task_pt_id: AtomicUsize, } impl PageTableManager { @@ -140,8 +135,6 @@ impl PageTableManager { base_page_table: base_pt, base_page_table_frame: base_frame, task_page_tables: spin::Mutex::new(HashMap::new()), - frame_to_id: spin::Mutex::new(HashMap::new()), - next_task_pt_id: AtomicUsize::new(1), } } @@ -164,25 +157,18 @@ impl PageTableManager { return &self.base_page_table; } - // Look up task page table by frame using the reverse lookup map - let task_pt_id = { - let frame_to_id = self.frame_to_id.lock(); - frame_to_id.get(&cr3_frame).copied() - }; - - if let Some(id) = task_pt_id { - let task_pts = self.task_page_tables.lock(); - if let Some(pt) = task_pts.get(&id) { - // SAFETY: Three invariants guarantee this reference remains valid: - // 1. The PageTable is Box-allocated, so HashMap rehashing does not - // move the PageTable itself (only the Box pointer moves). - // 2. This page table is the current CR3, so `delete_task_page_table` - // will refuse to remove it (returns EBUSY). - // 3. The PageTableManager is 'static, so neither it nor the HashMap - // will be deallocated. - let pt_ref: &mm::PageTable = pt; - return unsafe { &*core::ptr::from_ref(pt_ref) }; - } + let cr3_id: usize = cr3_frame.start_address().as_u64().truncate(); + let task_pts = self.task_page_tables.lock(); + if let Some(pt) = task_pts.get(&cr3_id) { + // SAFETY: Three invariants guarantee this reference remains valid: + // 1. The PageTable is Box-allocated, so HashMap rehashing does not + // move the PageTable itself (only the Box pointer moves). + // 2. This page table is the current CR3, so `delete_task_page_table` + // will refuse to remove it (returns EBUSY). + // 3. The PageTableManager is 'static, so neither it nor the HashMap + // will be deallocated. + let pt_ref: &mm::PageTable = pt; + return unsafe { &*core::ptr::from_ref(pt_ref) }; } // CR3 doesn't match any known page table - this shouldn't happen @@ -210,16 +196,8 @@ impl PageTableManager { return BASE_PAGE_TABLE_ID; } - let frame_to_id = self.frame_to_id.lock(); - if let Some(&id) = frame_to_id.get(&cr3_frame) { - return id; - } - - // CR3 doesn't match any known page table - this shouldn't happen - unreachable!( - "CR3 contains unknown page table: {:?}", - cr3_frame.start_address() - ); + // The task page table ID is the start address of the P4 frame. + cr3_frame.start_address().as_u64().truncate() } /// Returns `true` if the base page table is currently active. @@ -281,8 +259,8 @@ impl PageTableManager { /// /// # Returns /// - /// The ID of the newly created task page table, or `Err(Errno::ENOMEM)` if - /// allocation fails or the ID space is exhausted. + /// The ID of the newly created task page table (its P4 frame start address), + /// or `Err(Errno::ENOMEM)` if allocation fails. pub fn create_task_page_table( &self, vtl1_phys_frame_range: PhysFrameRange, @@ -298,21 +276,11 @@ impl PageTableManager { return Err(Errno::ENOMEM); } - let task_pt_id = self.next_task_pt_id.fetch_add(1, Ordering::Relaxed); - if task_pt_id == 0 { - // Wrapped around, which shouldn't happen in practice - return Err(Errno::ENOMEM); - } - let pt = alloc::boxed::Box::new(pt); - let phys_frame = pt.get_physical_frame(); + let task_pt_id: usize = pt.get_physical_frame().start_address().as_u64().truncate(); let mut task_pts = self.task_page_tables.lock(); task_pts.insert(task_pt_id, pt); - drop(task_pts); - - let mut frame_to_id = self.frame_to_id.lock(); - frame_to_id.insert(phys_frame, task_pt_id); Ok(task_pt_id) } @@ -344,20 +312,18 @@ impl PageTableManager { return Err(Errno::EINVAL); } - // Ensure we're not deleting the current page table (check CR3) - if self.current_page_table_id() == task_pt_id { + let mut task_pts = self.task_page_tables.lock(); + + // Check CR3 under the same lock to avoid TOCTOU with the removal below. + let (cr3_frame, _) = x86_64::registers::control::Cr3::read(); + let cr3_id: usize = cr3_frame.start_address().as_u64().truncate(); + if cr3_id == task_pt_id { return Err(Errno::EBUSY); } - let mut task_pts = self.task_page_tables.lock(); if let Some(pt) = task_pts.remove(&task_pt_id) { - let phys_frame = pt.get_physical_frame(); drop(task_pts); - let mut frame_to_id = self.frame_to_id.lock(); - frame_to_id.remove(&phys_frame); - drop(frame_to_id); - // Safety: We're about to delete this page table, so it's safe to unmap all pages. unsafe { pt.cleanup_user_mappings(Self::USER_ADDR_MIN, Self::USER_ADDR_MAX); @@ -554,15 +520,16 @@ impl LinuxKernel { size: u64, flags: PageTableFlags, ) -> Option> { - let (page_addr, length) = self + let (page_addr, page_aligned_length) = self .map_vtl0_phys_range(phys_addr, phys_addr + size, flags) .ok()?; let page_offset: usize = (phys_addr - phys_addr.align_down(Size4KiB::SIZE)).truncate(); Some(Vtl0MappedGuard { owner: self, page_addr, - length, + page_aligned_length, ptr: page_addr.wrapping_add(page_offset), + size: size.truncate(), }) } @@ -580,7 +547,7 @@ impl LinuxKernel { return Some(alloc::boxed::Box::new(T::new_zeroed())); } - let guard = self.map_vtl0_guard( + let src_guard = self.map_vtl0_guard( phys_addr, core::mem::size_of::() as u64, PageTableFlags::PRESENT, @@ -590,8 +557,8 @@ impl LinuxKernel { let result = unsafe { litebox::mm::exception_table::memcpy_fallible( core::ptr::from_mut::(boxed.as_mut()).cast(), - guard.ptr, - core::mem::size_of::(), + src_guard.ptr, + src_guard.size, ) }; @@ -612,20 +579,19 @@ impl LinuxKernel { return true; } - let Some(guard) = self.map_vtl0_guard( + let Some(dst_guard) = self.map_vtl0_guard( phys_addr, core::mem::size_of::() as u64, PageTableFlags::PRESENT | PageTableFlags::WRITABLE, ) else { return false; }; - let dst_ptr = guard.ptr; unsafe { litebox::mm::exception_table::memcpy_fallible( - dst_ptr, + dst_guard.ptr, core::ptr::from_ref::(value).cast::(), - core::mem::size_of::(), + dst_guard.size, ) } .is_ok() @@ -646,20 +612,19 @@ impl LinuxKernel { return true; } - let Some(guard) = self.map_vtl0_guard( + let Some(dst_guard) = self.map_vtl0_guard( phys_addr, core::mem::size_of_val(value) as u64, PageTableFlags::PRESENT | PageTableFlags::WRITABLE, ) else { return false; }; - let dst_ptr = guard.ptr; unsafe { litebox::mm::exception_table::memcpy_fallible( - dst_ptr, + dst_guard.ptr, value.as_ptr().cast::(), - core::mem::size_of_val(value), + dst_guard.size, ) } .is_ok() @@ -680,20 +645,19 @@ impl LinuxKernel { return true; } - let Some(guard) = self.map_vtl0_guard( + let Some(src_guard) = self.map_vtl0_guard( phys_addr, core::mem::size_of_val(buf) as u64, PageTableFlags::PRESENT, ) else { return false; }; - let src_ptr = guard.ptr; unsafe { litebox::mm::exception_table::memcpy_fallible( buf.as_mut_ptr().cast::(), - src_ptr, - core::mem::size_of_val(buf), + src_guard.ptr, + src_guard.size, ) } .is_ok() @@ -784,15 +748,16 @@ impl LinuxKernel { struct Vtl0MappedGuard<'a, Host: HostInterface> { owner: &'a LinuxKernel, page_addr: *mut u8, - length: usize, + page_aligned_length: usize, ptr: *mut u8, + size: usize, } impl Drop for Vtl0MappedGuard<'_, Host> { fn drop(&mut self) { assert!( self.owner - .unmap_vtl0_pages(self.page_addr, self.length) + .unmap_vtl0_pages(self.page_addr, self.page_aligned_length) .is_ok(), "Failed to unmap VTL0 pages" ); @@ -1807,7 +1772,8 @@ unsafe extern "C" fn exception_handler(thread_ctx: &mut ThreadContext, kernel_mo }); if kernel_mode { - // Call the shim directly instead of using `call_shim` because: + // We don't use `thread_ctx.call_shim()` here because: + // - `call_shim()` switches back to user mode after the call // - `ExceptionFixup` requires post-processing (exception table lookup) // - Must return a fixup address to the asm caller (not resume user mode) let op = thread_ctx.shim.exception(thread_ctx.ctx, &info); diff --git a/litebox_runner_lvbs/src/lib.rs b/litebox_runner_lvbs/src/lib.rs index 53991e56c..1ca9e28fd 100644 --- a/litebox_runner_lvbs/src/lib.rs +++ b/litebox_runner_lvbs/src/lib.rs @@ -10,6 +10,7 @@ use alloc::sync::Arc; use core::{ops::Neg, panic::PanicInfo}; use litebox::{ mm::linux::PAGE_SIZE, + platform::RawConstPointer, utils::{ReinterpretSignedExt, TruncateExt}, }; use litebox_common_linux::errno::Errno; @@ -39,9 +40,9 @@ use litebox_shim_optee::msg_handler::{ decode_ta_request, handle_optee_msg_args, handle_optee_smc_args, update_optee_msg_args, }; use litebox_shim_optee::session::{ - MAX_TA_INSTANCES, SessionManager, TaInstance, allocate_session_id, + MAX_TA_INSTANCES, SessionIdGuard, SessionManager, TaInstance, allocate_session_id, }; -use litebox_shim_optee::{NormalWorldConstPtr, NormalWorldMutPtr}; +use litebox_shim_optee::{NormalWorldConstPtr, NormalWorldMutPtr, UserConstPtr}; use once_cell::race::OnceBox; use spin::mutex::SpinMutex; @@ -401,8 +402,13 @@ fn open_session_single_instance( .try_lock() .ok_or(OpteeSmcReturnCode::EThreadLimit)?; - // Allocate session ID BEFORE calling load_ta_context so TA gets correct ID - let runner_session_id = allocate_session_id().ok_or(OpteeSmcReturnCode::EBusy)?; + // Allocate session ID BEFORE calling load_ta_context so TA gets correct ID. + // Use SessionIdGuard to ensure the ID is recycled on any error path + // (before it is registered with the session manager). + let session_id_guard = + SessionIdGuard::new(allocate_session_id().ok_or(OpteeSmcReturnCode::EBusy)?); + // Safe to unwrap: guard was just created with Some(id). + let runner_session_id = session_id_guard.id().unwrap(); debug_serial_println!( "Reusing single-instance TA: uuid={:?}, task_pt_id={}, session_id={}", @@ -445,7 +451,9 @@ fn open_session_single_instance( .loaded_program .params_address .ok_or(OpteeSmcReturnCode::EBadAddr)?; - let ta_params = unsafe { *(params_address as *const UteeParams) }; + let ta_params = UserConstPtr::::from_usize(params_address) + .read_at_offset(0) + .ok_or(OpteeSmcReturnCode::EBadAddr)?; // Check the return code from the TA's OpenSession entry point let return_code: u32 = ctx.rax.truncate(); @@ -515,7 +523,9 @@ fn open_session_single_instance( return Ok(()); } - // Success: register session + // Success: register session and disarm the guard (ownership transfers to session map) + // Safe to unwrap: guard has not been disarmed yet. + let runner_session_id = session_id_guard.disarm().unwrap(); session_manager().register_session(runner_session_id, instance_arc.clone(), ta_uuid, ta_flags); write_msg_args_to_normal_world( @@ -672,7 +682,9 @@ fn open_session_new_instance( let params_address = loaded_program .params_address .ok_or(OpteeSmcReturnCode::EBadAddr)?; - let ta_params = unsafe { *(params_address as *const UteeParams) }; + let ta_params = UserConstPtr::::from_usize(params_address) + .read_at_offset(0) + .ok_or(OpteeSmcReturnCode::EBadAddr)?; // Check the return code from the TA's OpenSession entry point let return_code: u32 = ctx.rax.truncate(); @@ -804,7 +816,9 @@ fn handle_invoke_command( .loaded_program .params_address .ok_or(OpteeSmcReturnCode::EBadAddr)?; - let ta_params = unsafe { *(params_address as *const UteeParams) }; + let ta_params = UserConstPtr::::from_usize(params_address) + .read_at_offset(0) + .ok_or(OpteeSmcReturnCode::EBadAddr)?; let return_code: u32 = ctx.rax.truncate(); let return_code = TeeResult::try_from(return_code).unwrap_or(TeeResult::GenericError); @@ -1061,12 +1075,8 @@ fn write_msg_args_to_normal_world( } // use include_bytes! to include ldelf and (KMPP) TA binaries -// const LDELF_BINARY: &[u8] = &[0u8; 0]; -// const TA_BINARY: &[u8] = &[0u8; 0]; -const LDELF_BINARY: &[u8] = - include_bytes!("../../litebox_runner_optee_on_linux_userland/tests/ldelf.elf"); -const TA_BINARY: &[u8] = - include_bytes!("../../litebox_runner_optee_on_linux_userland/tests/kmpp-ta.elf"); +const LDELF_BINARY: &[u8] = &[0u8; 0]; +const TA_BINARY: &[u8] = &[0u8; 0]; #[panic_handler] fn panic(info: &PanicInfo) -> ! { diff --git a/litebox_shim_optee/src/lib.rs b/litebox_shim_optee/src/lib.rs index 31050b3ea..c98a8a1a3 100644 --- a/litebox_shim_optee/src/lib.rs +++ b/litebox_shim_optee/src/lib.rs @@ -39,7 +39,7 @@ pub mod ptr; // Re-export session management types for convenience pub use session::{ MAX_TA_INSTANCES, SessionEntry, SessionManager, SessionMap, SingleInstanceCache, TaInstance, - allocate_session_id, recycle_session_id, + allocate_session_id, }; const MAX_KERNEL_BUF_SIZE: usize = 0x80_000; @@ -1273,12 +1273,6 @@ struct Task { // TODO: OP-TEE supports global, persistent objects across sessions. Add these maps if needed. } -impl Drop for Task { - fn drop(&mut self) { - SessionIdPool::recycle(self.session_id); - } -} - struct ThreadState { init_state: Cell, /// Whether init has been called. This is used to ensure `handle_init_request` diff --git a/litebox_shim_optee/src/ptr.rs b/litebox_shim_optee/src/ptr.rs index e883b81d2..162f3980c 100644 --- a/litebox_shim_optee/src/ptr.rs +++ b/litebox_shim_optee/src/ptr.rs @@ -190,13 +190,12 @@ impl PhysMutPtr { PhysPageMapPermissions::READ, )? }; - let src = guard.ptr_at(count); let mut buffer = core::mem::MaybeUninit::::uninit(); unsafe { litebox::mm::exception_table::memcpy_fallible( buffer.as_mut_ptr().cast::(), - src.cast::(), - core::mem::size_of::(), + guard.ptr.cast::(), + guard.size, ) .map_err(|_| PhysPointerError::CopyFailed)?; } @@ -229,12 +228,11 @@ impl PhysMutPtr { PhysPageMapPermissions::READ, )? }; - let src = guard.ptr_at(count); unsafe { litebox::mm::exception_table::memcpy_fallible( values.as_mut_ptr().cast::(), - src.cast::(), - core::mem::size_of_val(values), + guard.ptr.cast::(), + guard.size, ) .map_err(|_| PhysPointerError::CopyFailed)?; } @@ -263,12 +261,11 @@ impl PhysMutPtr { PhysPageMapPermissions::READ | PhysPageMapPermissions::WRITE, )? }; - let dst = guard.ptr_at(count); unsafe { litebox::mm::exception_table::memcpy_fallible( - dst.cast::(), + guard.ptr.cast::(), core::ptr::from_ref(&value).cast::(), - core::mem::size_of::(), + guard.size, ) .map_err(|_| PhysPointerError::CopyFailed)?; } @@ -300,12 +297,11 @@ impl PhysMutPtr { PhysPageMapPermissions::READ | PhysPageMapPermissions::WRITE, )? }; - let dst = guard.ptr_at(count); unsafe { litebox::mm::exception_table::memcpy_fallible( - dst.cast::(), + guard.ptr.cast::(), values.as_ptr().cast::(), - core::mem::size_of_val(values), + guard.size, ) .map_err(|_| PhysPointerError::CopyFailed)?; } @@ -351,9 +347,13 @@ impl PhysMutPtr { .map_info .as_ref() .ok_or(PhysPointerError::NoMappingInfo)?; - let base = map_info.base.wrapping_add(skip % ALIGN).cast::(); + let ptr = map_info.base.wrapping_add(skip % ALIGN).cast::(); let _ = map_info; - Ok(MappedGuard { owner: self, base }) + Ok(MappedGuard { + owner: self, + ptr, + size, + }) } /// Map the physical pages from `start` to `end` indexes. @@ -414,14 +414,8 @@ impl PhysMutPtr { /// `PhysMutPtr` and provides the mapped base pointer for the duration of the mapping. struct MappedGuard<'a, T: Clone, const ALIGN: usize> { owner: &'a mut PhysMutPtr, - base: *mut T, -} - -impl MappedGuard<'_, T, ALIGN> { - /// Returns the mapped base pointer offset by `count` elements. - fn ptr_at(&self, count: usize) -> *mut T { - self.base.wrapping_add(count) - } + ptr: *mut T, + size: usize, } impl Drop for MappedGuard<'_, T, ALIGN> { diff --git a/litebox_shim_optee/src/session.rs b/litebox_shim_optee/src/session.rs index e6bc6eea0..86518a373 100644 --- a/litebox_shim_optee/src/session.rs +++ b/litebox_shim_optee/src/session.rs @@ -119,7 +119,7 @@ pub struct TaInstance { pub task_page_table_id: usize, } -// SAFETY: The shim is designed to be used in single-threaded contexts per-CPU. +// SAFETY: TaInstance is protected by SpinMutex and try_lock (`SessionEntry`) unsafe impl Send for TaInstance {} unsafe impl Sync for TaInstance {} @@ -274,6 +274,47 @@ pub fn recycle_session_id(session_id: u32) { SessionIdPool::recycle(session_id); } +/// RAII guard that recycles a session ID on drop unless disarmed. +/// +/// Session IDs are allocated before the TA is invoked and only registered on +/// success via [`SessionManager::register_session`]. This guard ensures it is +/// recycled on all error paths before this registration. +pub struct SessionIdGuard { + session_id: Option, +} + +impl SessionIdGuard { + /// Create a new guard that will recycle `session_id` on drop. + pub fn new(session_id: u32) -> Self { + Self { + session_id: Some(session_id), + } + } + + /// Return the guarded session ID, or `None` if already disarmed. + pub fn id(&self) -> Option { + self.session_id + } + + /// Disarm the guard so the session ID is **not** recycled on drop. + /// + /// Call this after the session ID has been successfully registered. + /// Once registered, [`SessionManager::unregister_session`] owns recycling. + /// + /// Returns `None` if the guard was already disarmed. + pub fn disarm(mut self) -> Option { + self.session_id.take() + } +} + +impl Drop for SessionIdGuard { + fn drop(&mut self) { + if let Some(id) = self.session_id { + recycle_session_id(id); + } + } +} + /// Session manager that coordinates session and instance lifecycle. /// /// This provides a unified interface for: @@ -338,9 +379,13 @@ impl SessionManager { .insert(session_id, instance, ta_uuid, ta_flags); } - /// Unregister a session and return its entry. + /// Unregister a session, recycle its session ID, and return the entry. pub fn unregister_session(&self, session_id: u32) -> Option { - self.sessions.remove(session_id) + let entry = self.sessions.remove(session_id); + if entry.is_some() { + recycle_session_id(session_id); + } + entry } /// Remove a single-instance TA from the cache. From c461284198f67c725391de1aa09c56ae1619f794 Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Wed, 11 Feb 2026 17:34:45 +0000 Subject: [PATCH 09/11] clarification --- litebox/src/shim.rs | 4 +- .../src/host/snp/snp_impl.rs | 8 ++-- litebox_platform_linux_userland/src/lib.rs | 4 +- .../src/arch/x86/interrupts.rs | 4 +- litebox_platform_lvbs/src/lib.rs | 39 +++++++++++++------ litebox_platform_windows_userland/src/lib.rs | 4 +- litebox_shim_linux/src/lib.rs | 2 +- litebox_shim_optee/src/lib.rs | 2 +- litebox_shim_optee/src/ptr.rs | 32 +++++++++------ 9 files changed, 61 insertions(+), 38 deletions(-) diff --git a/litebox/src/shim.rs b/litebox/src/shim.rs index 1ea0ee012..2f6bdcb60 100644 --- a/litebox/src/shim.rs +++ b/litebox/src/shim.rs @@ -96,8 +96,8 @@ pub enum ContinueOperation { ResumeGuest, /// Exit the current thread. ExitThread, - /// The exception was triggered in kernel mode; resume kernel execution. - ResumeKernel, + /// The exception was triggered in platform mode; resume platform execution. + ResumePlatform, /// The exception was not handled. The platform should apply an /// exception-table fixup (e.g., advance the instruction pointer) if one exists. ExceptionFixup, diff --git a/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs b/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs index 6ec870290..d1ad36003 100644 --- a/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs +++ b/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs @@ -216,8 +216,8 @@ pub fn init_thread( match tls.shim.get().unwrap().init(pt_regs) { litebox::shim::ContinueOperation::ResumeGuest => {} litebox::shim::ContinueOperation::ExitThread => exit_thread(), - litebox::shim::ContinueOperation::ResumeKernel => { - panic!("ResumeKernel not expected in SNP init") + litebox::shim::ContinueOperation::ResumePlatform => { + panic!("ResumePlatform not expected in SNP init") } litebox::shim::ContinueOperation::ExceptionFixup => { panic!("ExceptionFixup not expected in SNP init") @@ -244,8 +244,8 @@ pub fn handle_syscall(pt_regs: &mut litebox_common_linux::PtRegs) { match tls.shim.get().unwrap().syscall(pt_regs) { litebox::shim::ContinueOperation::ResumeGuest => {} litebox::shim::ContinueOperation::ExitThread => exit_thread(), - litebox::shim::ContinueOperation::ResumeKernel => { - panic!("ResumeKernel not expected in SNP syscall") + litebox::shim::ContinueOperation::ResumePlatform => { + panic!("ResumePlatform not expected in SNP syscall") } litebox::shim::ContinueOperation::ExceptionFixup => { panic!("ExceptionFixup not expected in SNP syscall") diff --git a/litebox_platform_linux_userland/src/lib.rs b/litebox_platform_linux_userland/src/lib.rs index 4ac651222..4921d8467 100644 --- a/litebox_platform_linux_userland/src/lib.rs +++ b/litebox_platform_linux_userland/src/lib.rs @@ -1633,8 +1633,8 @@ impl ThreadContext<'_> { match op { ContinueOperation::ResumeGuest => unsafe { switch_to_guest(self.ctx) }, ContinueOperation::ExitThread => {} - ContinueOperation::ResumeKernel => { - panic!("ResumeKernel not expected in linux_userland") + ContinueOperation::ResumePlatform => { + panic!("ResumePlatform not expected in linux_userland") } ContinueOperation::ExceptionFixup => { panic!("ExceptionFixup not expected in linux_userland") diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.rs b/litebox_platform_lvbs/src/arch/x86/interrupts.rs index 031ef078c..0d0505dab 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.rs +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.rs @@ -209,8 +209,8 @@ extern "C" fn page_fault_handler_impl(regs: &mut PtRegs) -> PageFaultResult { return PageFaultResult::DemandPage; } - // Handle fallible memory operations like memcpy_fallible that access - // non-user-space addresses (e.g., VTL0 addresses) which might be unmapped. + // Safety net for fallible kernel memory operations (e.g., copying to/from + // VTL0 addresses) where the target address may be unmapped due to bugs. if let Some(fixup_addr) = search_exception_tables(regs.rip) { regs.rip = fixup_addr; return PageFaultResult::Handled; diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index 98cd56ebe..586b8c40c 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -554,6 +554,14 @@ impl LinuxKernel { )?; let mut boxed = box_new_zeroed::(); + // Use memcpy_fallible instead of ptr::copy_nonoverlapping to handle + // the race where another core unmaps this page (via a shared page + // table) between map_vtl0_guard and the copy. The mapping is valid + // at this point, so a fault is not expected in the common case. + // TODO: Once VTL0 page-range locking is in place, this fallible copy + // may become unnecessary since the lock would prevent concurrent + // unmapping. It could still serve as a safety net against callers + // that forget to acquire the lock. let result = unsafe { litebox::mm::exception_table::memcpy_fallible( core::ptr::from_mut::(boxed.as_mut()).cast(), @@ -561,6 +569,7 @@ impl LinuxKernel { src_guard.size, ) }; + debug_assert!(result.is_ok(), "fault copying from VTL0 mapped page"); result.ok().map(|()| boxed) } @@ -587,14 +596,16 @@ impl LinuxKernel { return false; }; - unsafe { + // Fallible: another core may unmap this page concurrently. + let result = unsafe { litebox::mm::exception_table::memcpy_fallible( dst_guard.ptr, core::ptr::from_ref::(value).cast::(), dst_guard.size, ) - } - .is_ok() + }; + debug_assert!(result.is_ok(), "fault copying to VTL0 mapped page"); + result.is_ok() } /// This function copies a slice from the VTL1 kernel to VTL0 physical memory. @@ -620,14 +631,16 @@ impl LinuxKernel { return false; }; - unsafe { + // Fallible: another core may unmap this page concurrently. + let result = unsafe { litebox::mm::exception_table::memcpy_fallible( dst_guard.ptr, value.as_ptr().cast::(), dst_guard.size, ) - } - .is_ok() + }; + debug_assert!(result.is_ok(), "fault copying to VTL0 mapped page"); + result.is_ok() } /// This function copies a slice from VTL0 physical memory to the VTL1 kernel. @@ -653,14 +666,16 @@ impl LinuxKernel { return false; }; - unsafe { + // Fallible: another core may unmap this page concurrently. + let result = unsafe { litebox::mm::exception_table::memcpy_fallible( buf.as_mut_ptr().cast::(), src_guard.ptr, src_guard.size, ) - } - .is_ok() + }; + debug_assert!(result.is_ok(), "fault copying from VTL0 mapped page"); + result.is_ok() } /// Create a new task page table for VTL1 user space and returns its ID. @@ -1778,7 +1793,7 @@ unsafe extern "C" fn exception_handler(thread_ctx: &mut ThreadContext, kernel_mo // - Must return a fixup address to the asm caller (not resume user mode) let op = thread_ctx.shim.exception(thread_ctx.ctx, &info); match op { - ContinueOperation::ResumeKernel => 0, + ContinueOperation::ResumePlatform => 0, ContinueOperation::ExceptionFixup => { let faulting_rip = with_per_cpu_variables_asm(PerCpuVariablesAsm::get_exception_rip); @@ -1808,8 +1823,8 @@ impl ThreadContext<'_> { match op { ContinueOperation::ResumeGuest => unsafe { switch_to_user(self.ctx) }, ContinueOperation::ExitThread => {} - ContinueOperation::ResumeKernel => { - panic!("ResumeKernel not expected in user-mode call_shim path") + ContinueOperation::ResumePlatform => { + panic!("ResumePlatform not expected in user-mode call_shim path") } ContinueOperation::ExceptionFixup => { panic!("ExceptionFixup not expected in user-mode call_shim path") diff --git a/litebox_platform_windows_userland/src/lib.rs b/litebox_platform_windows_userland/src/lib.rs index 57787082b..8de5579b5 100644 --- a/litebox_platform_windows_userland/src/lib.rs +++ b/litebox_platform_windows_userland/src/lib.rs @@ -1754,8 +1754,8 @@ impl ThreadContext<'_> { match op { ContinueOperation::ResumeGuest => unsafe { switch_to_guest(self.ctx) }, ContinueOperation::ExitThread => {} - ContinueOperation::ResumeKernel => { - panic!("ResumeKernel not expected in windows_userland") + ContinueOperation::ResumePlatform => { + panic!("ResumePlatform not expected in windows_userland") } ContinueOperation::ExceptionFixup => { panic!("ExceptionFixup not expected in windows_userland") diff --git a/litebox_shim_linux/src/lib.rs b/litebox_shim_linux/src/lib.rs index f5bc735ad..e86985892 100644 --- a/litebox_shim_linux/src/lib.rs +++ b/litebox_shim_linux/src/lib.rs @@ -103,7 +103,7 @@ impl litebox::shim::EnterShim for LinuxShimEntrypoints { } .is_ok() { - return ContinueOperation::ResumeKernel; + return ContinueOperation::ResumePlatform; } else { return ContinueOperation::ResumeGuest; } diff --git a/litebox_shim_optee/src/lib.rs b/litebox_shim_optee/src/lib.rs index c98a8a1a3..08abf77cf 100644 --- a/litebox_shim_optee/src/lib.rs +++ b/litebox_shim_optee/src/lib.rs @@ -80,7 +80,7 @@ impl litebox::shim::EnterShim for OpteeShimEntrypoints { }; if info.kernel_mode { return if result.is_ok() { - ContinueOperation::ResumeKernel + ContinueOperation::ResumePlatform } else { ContinueOperation::ExceptionFixup }; diff --git a/litebox_shim_optee/src/ptr.rs b/litebox_shim_optee/src/ptr.rs index 162f3980c..a27105caa 100644 --- a/litebox_shim_optee/src/ptr.rs +++ b/litebox_shim_optee/src/ptr.rs @@ -191,14 +191,16 @@ impl PhysMutPtr { )? }; let mut buffer = core::mem::MaybeUninit::::uninit(); - unsafe { + // Fallible: another core may unmap this page concurrently. + let result = unsafe { litebox::mm::exception_table::memcpy_fallible( buffer.as_mut_ptr().cast::(), guard.ptr.cast::(), guard.size, ) - .map_err(|_| PhysPointerError::CopyFailed)?; - } + }; + debug_assert!(result.is_ok(), "fault reading from mapped physical page"); + result.map_err(|_| PhysPointerError::CopyFailed)?; // Safety: memcpy_fallible fully initialized the buffer on success. Ok(alloc::boxed::Box::new(unsafe { buffer.assume_init() })) } @@ -228,14 +230,16 @@ impl PhysMutPtr { PhysPageMapPermissions::READ, )? }; - unsafe { + // Fallible: another core may unmap this page concurrently. + let result = unsafe { litebox::mm::exception_table::memcpy_fallible( values.as_mut_ptr().cast::(), guard.ptr.cast::(), guard.size, ) - .map_err(|_| PhysPointerError::CopyFailed)?; - } + }; + debug_assert!(result.is_ok(), "fault reading from mapped physical page"); + result.map_err(|_| PhysPointerError::CopyFailed)?; Ok(()) } @@ -261,14 +265,16 @@ impl PhysMutPtr { PhysPageMapPermissions::READ | PhysPageMapPermissions::WRITE, )? }; - unsafe { + // Fallible: another core may unmap this page concurrently. + let result = unsafe { litebox::mm::exception_table::memcpy_fallible( guard.ptr.cast::(), core::ptr::from_ref(&value).cast::(), guard.size, ) - .map_err(|_| PhysPointerError::CopyFailed)?; - } + }; + debug_assert!(result.is_ok(), "fault writing to mapped physical page"); + result.map_err(|_| PhysPointerError::CopyFailed)?; Ok(()) } @@ -297,14 +303,16 @@ impl PhysMutPtr { PhysPageMapPermissions::READ | PhysPageMapPermissions::WRITE, )? }; - unsafe { + // Fallible: another core may unmap this page concurrently. + let result = unsafe { litebox::mm::exception_table::memcpy_fallible( guard.ptr.cast::(), values.as_ptr().cast::(), guard.size, ) - .map_err(|_| PhysPointerError::CopyFailed)?; - } + }; + debug_assert!(result.is_ok(), "fault writing to mapped physical page"); + result.map_err(|_| PhysPointerError::CopyFailed)?; Ok(()) } From eb480150c7e51e41b8072dfe34cbc7d7d2ad4c59 Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Wed, 11 Feb 2026 22:57:51 +0000 Subject: [PATCH 10/11] addressed feedbacks --- litebox/src/shim.rs | 18 +++++-- .../src/host/snp/snp_impl.rs | 8 ++-- litebox_platform_linux_userland/src/lib.rs | 4 +- .../src/arch/x86/interrupts.S | 8 ++-- .../src/arch/x86/interrupts.rs | 12 +++-- litebox_platform_lvbs/src/lib.rs | 47 +++++++------------ litebox_platform_windows_userland/src/lib.rs | 4 +- litebox_shim_linux/src/lib.rs | 2 +- litebox_shim_optee/src/lib.rs | 2 +- 9 files changed, 51 insertions(+), 54 deletions(-) diff --git a/litebox/src/shim.rs b/litebox/src/shim.rs index 2f6bdcb60..468db9fce 100644 --- a/litebox/src/shim.rs +++ b/litebox/src/shim.rs @@ -90,16 +90,26 @@ pub trait EnterShim { } /// The operation to perform after returning from a shim handler +/// +/// - `ResumeGuest` and `ExitThread` cover the cases where the platform enters the shim +/// in response to events that occur during guest execution (e.g., a syscall). +/// - `ResumeKernelPlatform` and `ExceptionFixup` cover the cases where the **kernel platform** +/// enters the shim in response to events that occur during platform execution +/// (e.g., a user-space page fault triggered by a syscall handler). #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum ContinueOperation { /// Resume execution of the guest. ResumeGuest, /// Exit the current thread. ExitThread, - /// The exception was triggered in platform mode; resume platform execution. - ResumePlatform, - /// The exception was not handled. The platform should apply an - /// exception-table fixup (e.g., advance the instruction pointer) if one exists. + /// The shim successfully handled an exception which was triggered by + /// the kernel platform (e.g., a syscall handler's copy_from_user against + /// demand-pageable user memory); Resume the kernel platform's execution. + ResumeKernelPlatform, + /// The shim failed to handle the exception (e.g., invalid memory access). + /// The kernel platform will apply a fixup via + /// [`search_exception_tables`](crate::mm::exception_table::search_exception_tables) + /// if one exists. ExceptionFixup, } diff --git a/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs b/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs index d1ad36003..78dfda3e8 100644 --- a/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs +++ b/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs @@ -216,8 +216,8 @@ pub fn init_thread( match tls.shim.get().unwrap().init(pt_regs) { litebox::shim::ContinueOperation::ResumeGuest => {} litebox::shim::ContinueOperation::ExitThread => exit_thread(), - litebox::shim::ContinueOperation::ResumePlatform => { - panic!("ResumePlatform not expected in SNP init") + litebox::shim::ContinueOperation::ResumeKernelPlatform => { + panic!("ResumeKernelPlatform not expected in SNP init") } litebox::shim::ContinueOperation::ExceptionFixup => { panic!("ExceptionFixup not expected in SNP init") @@ -244,8 +244,8 @@ pub fn handle_syscall(pt_regs: &mut litebox_common_linux::PtRegs) { match tls.shim.get().unwrap().syscall(pt_regs) { litebox::shim::ContinueOperation::ResumeGuest => {} litebox::shim::ContinueOperation::ExitThread => exit_thread(), - litebox::shim::ContinueOperation::ResumePlatform => { - panic!("ResumePlatform not expected in SNP syscall") + litebox::shim::ContinueOperation::ResumeKernelPlatform => { + panic!("ResumeKernelPlatform not expected in SNP syscall") } litebox::shim::ContinueOperation::ExceptionFixup => { panic!("ExceptionFixup not expected in SNP syscall") diff --git a/litebox_platform_linux_userland/src/lib.rs b/litebox_platform_linux_userland/src/lib.rs index 4921d8467..2b2c2a3d3 100644 --- a/litebox_platform_linux_userland/src/lib.rs +++ b/litebox_platform_linux_userland/src/lib.rs @@ -1633,8 +1633,8 @@ impl ThreadContext<'_> { match op { ContinueOperation::ResumeGuest => unsafe { switch_to_guest(self.ctx) }, ContinueOperation::ExitThread => {} - ContinueOperation::ResumePlatform => { - panic!("ResumePlatform not expected in linux_userland") + ContinueOperation::ResumeKernelPlatform => { + panic!("ResumeKernelPlatform not expected in linux_userland") } ContinueOperation::ExceptionFixup => { panic!("ExceptionFixup not expected in linux_userland") diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.S b/litebox_platform_lvbs/src/arch/x86/interrupts.S index cb6ec569a..fbc5d1013 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.S +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.S @@ -168,14 +168,14 @@ isr_with_err_code isr_general_protection_fault general_protection_fault_handler_ /* Vector 14: Page Fault (#PF) - Error code * - * Custom ISR stub to support kernel-mode demand paging for user-space addresses. + * Custom ISR stub to support kernel-mode page faults at user-space addresses. * * For kernel-mode page faults, page_fault_handler_impl returns: * 0 = handled (exception table fixup applied) - * 1 = demand paging needed for a user-space address + * 1 = user-space address fault; route to shim for demand paging or fixup * (panics for unrecoverable faults — never returns) * - * When demand paging is needed, the stub restores GPRs and jumps to + * When routing to the shim is needed, the stub restores GPRs and jumps to * kernel_exception_callback (in run_thread_arch), which calls the shim's * exception handler and then iretq back to the faulting kernel instruction. */ @@ -201,7 +201,7 @@ isr_page_fault: iretq .Lkernel_demand_page: - /* Demand paging needed. Exception info already stored in per-CPU vars by + /* User-space address fault. Exception info already stored in per-CPU vars by * page_fault_handler_impl. GPRs are already saved on the stack by push_regs * (same layout as SAVE_CPU_CONTEXT_ASM). Jump directly to * kernel_exception_regs_saved, skipping the redundant pop_regs/push cycle. diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.rs b/litebox_platform_lvbs/src/arch/x86/interrupts.rs index 0d0505dab..575ad2bbd 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.rs +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.rs @@ -176,8 +176,9 @@ extern "C" fn general_protection_fault_handler_impl(regs: &PtRegs) { enum PageFaultResult { /// Fault was handled via exception table fixup. Handled = 0, - /// Demand paging is needed for a user-space address. - DemandPage = 1, + /// Fault at a user-space address; route to the shim's exception handler + /// for demand paging or exception-table fixup on invalid access. + RouteToShim = 1, } /// Kernel-mode page fault handler (vector 14). @@ -195,8 +196,9 @@ extern "C" fn page_fault_handler_impl(regs: &mut PtRegs) -> PageFaultResult { let fault_addr: usize = Cr2::read_raw().truncate(); let error_code = regs.orig_rax; - // Kernel-mode page fault at a user-space address: route to the shim's - // exception handler for demand paging (and exception table fixup on failure). + // Kernel-mode page fault at a user-space address (e.g., a syscall handler has accessed + // not-yet mapped or invalid user memory): route to the shim's exception handler for + // demand paging or exception table fixup. if (USER_ADDR_MIN..USER_ADDR_MAX).contains(&fault_addr) { with_per_cpu_variables_asm(|pcv| { pcv.set_exception_info( @@ -206,7 +208,7 @@ extern "C" fn page_fault_handler_impl(regs: &mut PtRegs) -> PageFaultResult { regs.rip, ); }); - return PageFaultResult::DemandPage; + return PageFaultResult::RouteToShim; } // Safety net for fallible kernel memory operations (e.g., copying to/from diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index 586b8c40c..2055dd5b0 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -1772,45 +1772,27 @@ unsafe extern "C" fn syscall_handler(thread_ctx: &mut ThreadContext) { } /// Handles exceptions by reading exception info from per-CPU variables -/// and routing to the shim's exception handler. +/// and routing to the shim's exception handler via `call_shim`. /// /// Returns 0 for normal flow (user-mode or successful demand paging), or /// a fixup address when kernel-mode demand paging fails and an exception /// table entry exists. unsafe extern "C" fn exception_handler(thread_ctx: &mut ThreadContext, kernel_mode: bool) -> usize { - use crate::host::per_cpu_variables::{PerCpuVariablesAsm, with_per_cpu_variables_asm}; + use crate::host::per_cpu_variables::with_per_cpu_variables_asm; let info = with_per_cpu_variables_asm(|pcv| litebox::shim::ExceptionInfo { exception: pcv.get_exception(), error_code: pcv.get_exception_error_code(), cr2: pcv.get_exception_cr2(), kernel_mode, }); - - if kernel_mode { - // We don't use `thread_ctx.call_shim()` here because: - // - `call_shim()` switches back to user mode after the call - // - `ExceptionFixup` requires post-processing (exception table lookup) - // - Must return a fixup address to the asm caller (not resume user mode) - let op = thread_ctx.shim.exception(thread_ctx.ctx, &info); - match op { - ContinueOperation::ResumePlatform => 0, - ContinueOperation::ExceptionFixup => { - let faulting_rip = - with_per_cpu_variables_asm(PerCpuVariablesAsm::get_exception_rip); - litebox::mm::exception_table::search_exception_tables(faulting_rip) - .expect("kernel-mode page fault with no exception table fixup") - } - ContinueOperation::ExitThread | ContinueOperation::ResumeGuest => { - panic!("unexpected {op:?} for kernel-mode exception") - } - } - } else { - thread_ctx.call_shim(|shim, ctx| shim.exception(ctx, &info)); - 0 - } + thread_ctx.call_shim(|shim, ctx| shim.exception(ctx, &info)) } /// Calls `f` in order to call into a shim entrypoint. +/// +/// Returns 0 for most operations. For `ExceptionFixup`, returns the fixup +/// address from the exception table. For `ResumeGuest`, does not return +/// (switches directly to user mode). impl ThreadContext<'_> { fn call_shim( &mut self, @@ -1818,16 +1800,19 @@ impl ThreadContext<'_> { &dyn litebox::shim::EnterShim, &mut litebox_common_linux::PtRegs, ) -> ContinueOperation, - ) { + ) -> usize { let op = f(self.shim, self.ctx); match op { ContinueOperation::ResumeGuest => unsafe { switch_to_user(self.ctx) }, - ContinueOperation::ExitThread => {} - ContinueOperation::ResumePlatform => { - panic!("ResumePlatform not expected in user-mode call_shim path") - } + ContinueOperation::ExitThread | ContinueOperation::ResumeKernelPlatform => 0, ContinueOperation::ExceptionFixup => { - panic!("ExceptionFixup not expected in user-mode call_shim path") + use crate::host::per_cpu_variables::{ + PerCpuVariablesAsm, with_per_cpu_variables_asm, + }; + let faulting_rip = + with_per_cpu_variables_asm(PerCpuVariablesAsm::get_exception_rip); + litebox::mm::exception_table::search_exception_tables(faulting_rip) + .expect("kernel-mode page fault with no exception table fixup") } } } diff --git a/litebox_platform_windows_userland/src/lib.rs b/litebox_platform_windows_userland/src/lib.rs index 8de5579b5..836c161ea 100644 --- a/litebox_platform_windows_userland/src/lib.rs +++ b/litebox_platform_windows_userland/src/lib.rs @@ -1754,8 +1754,8 @@ impl ThreadContext<'_> { match op { ContinueOperation::ResumeGuest => unsafe { switch_to_guest(self.ctx) }, ContinueOperation::ExitThread => {} - ContinueOperation::ResumePlatform => { - panic!("ResumePlatform not expected in windows_userland") + ContinueOperation::ResumeKernelPlatform => { + panic!("ResumeKernelPlatform not expected in windows_userland") } ContinueOperation::ExceptionFixup => { panic!("ExceptionFixup not expected in windows_userland") diff --git a/litebox_shim_linux/src/lib.rs b/litebox_shim_linux/src/lib.rs index e86985892..48ff56c1e 100644 --- a/litebox_shim_linux/src/lib.rs +++ b/litebox_shim_linux/src/lib.rs @@ -103,7 +103,7 @@ impl litebox::shim::EnterShim for LinuxShimEntrypoints { } .is_ok() { - return ContinueOperation::ResumePlatform; + return ContinueOperation::ResumeKernelPlatform; } else { return ContinueOperation::ResumeGuest; } diff --git a/litebox_shim_optee/src/lib.rs b/litebox_shim_optee/src/lib.rs index 08abf77cf..b3d71781a 100644 --- a/litebox_shim_optee/src/lib.rs +++ b/litebox_shim_optee/src/lib.rs @@ -80,7 +80,7 @@ impl litebox::shim::EnterShim for OpteeShimEntrypoints { }; if info.kernel_mode { return if result.is_ok() { - ContinueOperation::ResumePlatform + ContinueOperation::ResumeKernelPlatform } else { ContinueOperation::ExceptionFixup }; From a17e6d4149739d997853ec46e0a0ad93e7cab8bc Mon Sep 17 00:00:00 2001 From: Sangho Lee Date: Thu, 12 Feb 2026 01:31:14 +0000 Subject: [PATCH 11/11] simplification --- .../src/arch/x86/interrupts.S | 42 +----- .../src/arch/x86/interrupts.rs | 54 -------- .../src/host/per_cpu_variables.rs | 36 ----- litebox_platform_lvbs/src/lib.rs | 123 +++++++++++------- litebox_shim_linux/src/lib.rs | 2 +- 5 files changed, 84 insertions(+), 173 deletions(-) diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.S b/litebox_platform_lvbs/src/arch/x86/interrupts.S index fbc5d1013..e4a92e096 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.S +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.S @@ -168,49 +168,19 @@ isr_with_err_code isr_general_protection_fault general_protection_fault_handler_ /* Vector 14: Page Fault (#PF) - Error code * - * Custom ISR stub to support kernel-mode page faults at user-space addresses. - * - * For kernel-mode page faults, page_fault_handler_impl returns: - * 0 = handled (exception table fixup applied) - * 1 = user-space address fault; route to shim for demand paging or fixup - * (panics for unrecoverable faults — never returns) - * - * When routing to the shim is needed, the stub restores GPRs and jumps to - * kernel_exception_callback (in run_thread_arch), which calls the shim's - * exception handler and then iretq back to the faulting kernel instruction. + * Both kernel-mode and user-mode page faults are routed through the shim's + * exception_handler, which handles demand paging, exception table fixup, + * and panic in order. The ISR stub just pushes the vector number and jumps + * to the appropriate callback. */ .global isr_page_fault isr_page_fault: cld test qword ptr [rsp + 16], 0x3 /* Check CS RPL bits */ + push 14 /* Pass vector number (push does not affect flags) */ jnz .Luser_isr_page_fault - - /* --- Kernel-mode page fault --- */ - push_regs - mov rbp, rsp /* Save stack pointer */ - and rsp, -16 /* Align stack to 16 bytes for call */ - mov rdi, rbp /* Pass pointer to saved registers as first arg */ - call page_fault_handler_impl - test eax, eax - jnz .Lkernel_demand_page - - /* Handled (exception table fixup) — return to fixup address */ - mov rsp, rbp - pop_regs - add rsp, 8 /* Skip error code */ - iretq - -.Lkernel_demand_page: - /* User-space address fault. Exception info already stored in per-CPU vars by - * page_fault_handler_impl. GPRs are already saved on the stack by push_regs - * (same layout as SAVE_CPU_CONTEXT_ASM). Jump directly to - * kernel_exception_regs_saved, skipping the redundant pop_regs/push cycle. - */ - mov rsp, rbp - jmp kernel_exception_regs_saved - + jmp kernel_exception_callback .Luser_isr_page_fault: - push 14 /* Pass vector number to exception_callback */ jmp exception_callback /* Vector 16: x87 Floating-Point Exception (#MF) - No error code */ diff --git a/litebox_platform_lvbs/src/arch/x86/interrupts.rs b/litebox_platform_lvbs/src/arch/x86/interrupts.rs index 575ad2bbd..3d03e0cc8 100644 --- a/litebox_platform_lvbs/src/arch/x86/interrupts.rs +++ b/litebox_platform_lvbs/src/arch/x86/interrupts.rs @@ -171,60 +171,6 @@ extern "C" fn general_protection_fault_handler_impl(regs: &PtRegs) { ); } -/// Result from the kernel-mode page fault handler, consumed by the ISR stub. -#[repr(u8)] -enum PageFaultResult { - /// Fault was handled via exception table fixup. - Handled = 0, - /// Fault at a user-space address; route to the shim's exception handler - /// for demand paging or exception-table fixup on invalid access. - RouteToShim = 1, -} - -/// Kernel-mode page fault handler (vector 14). -/// -/// Returns [`PageFaultResult`] to the ISR stub. -/// For unrecoverable faults, this function panics and never returns. -#[unsafe(no_mangle)] -extern "C" fn page_fault_handler_impl(regs: &mut PtRegs) -> PageFaultResult { - use crate::host::per_cpu_variables::with_per_cpu_variables_asm; - use crate::{USER_ADDR_MAX, USER_ADDR_MIN}; - use litebox::mm::exception_table::search_exception_tables; - use litebox::utils::TruncateExt as _; - use x86_64::registers::control::Cr2; - - let fault_addr: usize = Cr2::read_raw().truncate(); - let error_code = regs.orig_rax; - - // Kernel-mode page fault at a user-space address (e.g., a syscall handler has accessed - // not-yet mapped or invalid user memory): route to the shim's exception handler for - // demand paging or exception table fixup. - if (USER_ADDR_MIN..USER_ADDR_MAX).contains(&fault_addr) { - with_per_cpu_variables_asm(|pcv| { - pcv.set_exception_info( - litebox::shim::Exception::PAGE_FAULT, - error_code.truncate(), - fault_addr, - regs.rip, - ); - }); - return PageFaultResult::RouteToShim; - } - - // Safety net for fallible kernel memory operations (e.g., copying to/from - // VTL0 addresses) where the target address may be unmapped due to bugs. - if let Some(fixup_addr) = search_exception_tables(regs.rip) { - regs.rip = fixup_addr; - return PageFaultResult::Handled; - } - - // Kernel-mode page fault at kernel-space addresses — unrecoverable - panic!( - "EXCEPTION: PAGE FAULT\nAccessed Address: {:#x}\nError Code: {:#x}\n{:#x?}", - fault_addr, error_code, regs - ); -} - /// Kernel-mode handler for x87 floating-point exception (vector 16). #[unsafe(no_mangle)] extern "C" fn x87_floating_point_handler_impl(regs: &PtRegs) { diff --git a/litebox_platform_lvbs/src/host/per_cpu_variables.rs b/litebox_platform_lvbs/src/host/per_cpu_variables.rs index d88c04a34..34f730760 100644 --- a/litebox_platform_lvbs/src/host/per_cpu_variables.rs +++ b/litebox_platform_lvbs/src/host/per_cpu_variables.rs @@ -242,12 +242,6 @@ pub struct PerCpuVariablesAsm { vtl1_user_xsaved: Cell, /// Exception info: exception vector number exception_trapno: Cell, - /// Exception info: hardware error code - exception_error_code: Cell, - /// Exception info: faulting address (CR2) - exception_cr2: Cell, - /// Exception info: faulting kernel RIP (for exception table fixup) - exception_rip: Cell, } impl PerCpuVariablesAsm { @@ -348,36 +342,9 @@ impl PerCpuVariablesAsm { pub const fn exception_trapno_offset() -> usize { offset_of!(PerCpuVariablesAsm, exception_trapno) } - pub const fn exception_error_code_offset() -> usize { - offset_of!(PerCpuVariablesAsm, exception_error_code) - } - pub const fn exception_cr2_offset() -> usize { - offset_of!(PerCpuVariablesAsm, exception_cr2) - } - pub fn set_exception_info( - &self, - exception: litebox::shim::Exception, - error_code: u32, - cr2: usize, - rip: usize, - ) { - self.exception_trapno.set(exception.0); - self.exception_error_code.set(error_code); - self.exception_cr2.set(cr2); - self.exception_rip.set(rip); - } pub fn get_exception(&self) -> litebox::shim::Exception { litebox::shim::Exception(self.exception_trapno.get()) } - pub fn get_exception_error_code(&self) -> u32 { - self.exception_error_code.get() - } - pub fn get_exception_cr2(&self) -> usize { - self.exception_cr2.get() - } - pub fn get_exception_rip(&self) -> usize { - self.exception_rip.get() - } pub fn get_user_context_top_addr(&self) -> usize { self.user_context_top_addr.get() } @@ -430,9 +397,6 @@ impl RefCellWrapper { vtl1_kernel_xsaved: Cell::new(0), vtl1_user_xsaved: Cell::new(0), exception_trapno: Cell::new(0), - exception_error_code: Cell::new(0), - exception_cr2: Cell::new(0), - exception_rip: Cell::new(0), }, inner: RefCell::new(value), } diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index 2055dd5b0..1892e5f52 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -1682,7 +1682,7 @@ unsafe extern "C" fn run_thread_arch( "jmp done", // Exception callback: entered from ISR stubs for user-mode exceptions. // At this point: - // - rsp points to ISR stack: [rsp]=vector, [rsp+8]=error_code, then iret frame + // - rsp = ISR stack: [vector, error_code, rip, cs, rflags, rsp, ss] // - All GPRs contain user-mode values // - Interrupts are disabled (IDT gate clears IF) // - GS = user (swapgs has NOT happened yet) @@ -1690,12 +1690,8 @@ unsafe extern "C" fn run_thread_arch( "exception_callback:", "swapgs", "mov gs:[{scratch_off}], rax", // Save `rax` to per-CPU scratch - "mov rax, cr2", - "mov gs:[{exception_cr2_off}], rax", // Save `CR2` (only meaningful for #PF) "mov al, [rsp]", "mov gs:[{exception_trapno_off}], al", // vector number from ISR stack - "mov eax, [rsp + 8]", - "mov gs:[{exception_error_code_off}], eax", // error code from ISR stack "mov rax, rsp", // store ISR `rsp` in `rax` "mov rsp, gs:[{user_context_top_off}]", // `rsp` points to the top address of user context area SAVE_PF_USER_CONTEXT_ASM!(), @@ -1704,28 +1700,34 @@ unsafe extern "C" fn run_thread_arch( "mov rsp, gs:[{cur_kernel_sp_off}]", "mov rdi, [rsp]", // pass `thread_ctx` "xor esi, esi", // kernel_mode = false + "mov rdx, cr2", // cr2 (still valid — nothing overwrites it) "call {exception_handler}", "jmp done", - // Kernel-mode page fault callback: demand paging for user-space addresses. - // At this point: - // - rsp points to ISR stack with GPRs already saved - // - Exception info already stored in per-CPU variables + // Kernel-mode exception callback (currently used for #PF demand paging + // and exception-table fixup). + // At entry: + // - rsp = ISR stack: [vector, error_code, rip, cs, rflags, rsp, ss] + // - All GPRs = kernel values at time of fault + // - Interrupts are disabled (IDT gate clears IF) // - GS = kernel (no swapgs needed) - // - User extended states already saved by the enclosing syscall/exception path // - // Two entry points: - // kernel_exception_callback - saves GPRs first (for callers without prior push_regs) - // kernel_exception_regs_saved - GPRs already on stack (from ISR stub's push_regs) + // Saves GPRs, then passes exception info (CR2, error code, faulting + // RIP) to exception_handler via registers. exception_handler will try + // demand paging, exception table fixup, and kernel panic in that order. ".globl kernel_exception_callback", "kernel_exception_callback:", + "add rsp, 8", // skip vector number + // Now stack: [error_code, rip, cs, rflags, rsp, ss] SAVE_CPU_CONTEXT_ASM!(), - ".globl kernel_exception_regs_saved", - "kernel_exception_regs_saved:", "mov rbp, rsp", "and rsp, -16", + // Pass exception info via registers (SysV ABI args 1-5) "mov rdi, gs:[{cur_kernel_sp_off}]", - "mov rdi, [rdi]", // thread_ctx - "mov esi, 1", // kernel_mode = true + "mov rdi, [rdi]", // arg1: thread_ctx + "mov esi, 1", // arg2: kernel_mode = true + "mov rdx, cr2", // arg3: cr2 (fault address) + "mov ecx, [rbp + 120]", // arg4: error_code (orig_rax slot) + "mov r8, [rbp + 128]", // arg5: faulting RIP (iret frame) "call {exception_handler}", // If demand paging failed, rax contains the exception table fixup // address. Patch the saved RIP on the ISR stack so iretq resumes @@ -1758,8 +1760,6 @@ unsafe extern "C" fn run_thread_arch( USER_CONTEXT_SIZE = const core::mem::size_of::(), scratch_off = const { PerCpuVariablesAsm::scratch_offset() }, exception_trapno_off = const { PerCpuVariablesAsm::exception_trapno_offset() }, - exception_error_code_off = const { PerCpuVariablesAsm::exception_error_code_offset() }, - exception_cr2_off = const { PerCpuVariablesAsm::exception_cr2_offset() }, init_handler = sym init_handler, reenter_handler = sym reenter_handler, syscall_handler = sym syscall_handler, @@ -1771,28 +1771,67 @@ unsafe extern "C" fn syscall_handler(thread_ctx: &mut ThreadContext) { thread_ctx.call_shim(|shim, ctx| shim.syscall(ctx)); } -/// Handles exceptions by reading exception info from per-CPU variables -/// and routing to the shim's exception handler via `call_shim`. +/// Handles exceptions and routes to the shim's exception handler via `call_shim`. +/// +/// `cr2` is passed by both kernel- and user-mode assembly callbacks. +/// For kernel-mode exceptions, `error_code` and `faulting_rip` +/// are also passed from the ISR stack. +/// For user-mode exceptions, `error_code` is read from the saved +/// `orig_rax` in the user context and the vector number is read from +/// the per-CPU trapno variable. /// /// Returns 0 for normal flow (user-mode or successful demand paging), or -/// a fixup address when kernel-mode demand paging fails and an exception -/// table entry exists. -unsafe extern "C" fn exception_handler(thread_ctx: &mut ThreadContext, kernel_mode: bool) -> usize { - use crate::host::per_cpu_variables::with_per_cpu_variables_asm; - let info = with_per_cpu_variables_asm(|pcv| litebox::shim::ExceptionInfo { - exception: pcv.get_exception(), - error_code: pcv.get_exception_error_code(), - cr2: pcv.get_exception_cr2(), - kernel_mode, - }); - thread_ctx.call_shim(|shim, ctx| shim.exception(ctx, &info)) +/// a fixup address when kernel-mode user-space demand paging fails and +/// an exception table entry exists. Panics if no fixup is found. +unsafe extern "C" fn exception_handler( + thread_ctx: &mut ThreadContext, + kernel_mode: bool, + cr2: usize, + error_code: usize, + faulting_rip: usize, +) -> usize { + let info = if kernel_mode { + use litebox::utils::TruncateExt as _; + litebox::shim::ExceptionInfo { + exception: litebox::shim::Exception::PAGE_FAULT, + error_code: error_code.truncate(), + cr2, + kernel_mode: true, + } + } else { + use crate::host::per_cpu_variables::{PerCpuVariablesAsm, with_per_cpu_variables_asm}; + use litebox::utils::TruncateExt as _; + litebox::shim::ExceptionInfo { + exception: with_per_cpu_variables_asm(PerCpuVariablesAsm::get_exception), + error_code: thread_ctx.ctx.orig_rax.truncate(), + cr2, + kernel_mode: false, + } + }; + match thread_ctx.call_shim(|shim, ctx| shim.exception(ctx, &info)) { + Some(val) => val, + None => { + // ExceptionFixup: look up exception table, panic if not found. + litebox::mm::exception_table::search_exception_tables(faulting_rip).unwrap_or_else( + || { + panic!( + "EXCEPTION: PAGE FAULT\n\ + Accessed Address: {:#x}\n\ + Error Code: {:#x}\n\ + Faulting RIP: {:#x}", + info.cr2, info.error_code, faulting_rip, + ) + }, + ) + } + } } /// Calls `f` in order to call into a shim entrypoint. /// -/// Returns 0 for most operations. For `ExceptionFixup`, returns the fixup -/// address from the exception table. For `ResumeGuest`, does not return -/// (switches directly to user mode). +/// Returns `Some(0)` for most operations. Returns `None` for +/// `ExceptionFixup` (caller is responsible for looking up the fixup). +/// For `ResumeGuest`, does not return (switches directly to user mode). impl ThreadContext<'_> { fn call_shim( &mut self, @@ -1800,20 +1839,12 @@ impl ThreadContext<'_> { &dyn litebox::shim::EnterShim, &mut litebox_common_linux::PtRegs, ) -> ContinueOperation, - ) -> usize { + ) -> Option { let op = f(self.shim, self.ctx); match op { ContinueOperation::ResumeGuest => unsafe { switch_to_user(self.ctx) }, - ContinueOperation::ExitThread | ContinueOperation::ResumeKernelPlatform => 0, - ContinueOperation::ExceptionFixup => { - use crate::host::per_cpu_variables::{ - PerCpuVariablesAsm, with_per_cpu_variables_asm, - }; - let faulting_rip = - with_per_cpu_variables_asm(PerCpuVariablesAsm::get_exception_rip); - litebox::mm::exception_table::search_exception_tables(faulting_rip) - .expect("kernel-mode page fault with no exception table fixup") - } + ContinueOperation::ExitThread | ContinueOperation::ResumeKernelPlatform => Some(0), + ContinueOperation::ExceptionFixup => None, } } } diff --git a/litebox_shim_linux/src/lib.rs b/litebox_shim_linux/src/lib.rs index 48ff56c1e..1db39dda4 100644 --- a/litebox_shim_linux/src/lib.rs +++ b/litebox_shim_linux/src/lib.rs @@ -105,7 +105,7 @@ impl litebox::shim::EnterShim for LinuxShimEntrypoints { { return ContinueOperation::ResumeKernelPlatform; } else { - return ContinueOperation::ResumeGuest; + return ContinueOperation::ExceptionFixup; } } self.enter_shim(false, ctx, |task, _ctx| task.handle_exception_request(info))