Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion kernel/src/driver/block/cache/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub const BLOCK_SIZE_LOG: usize = 9;
///块大小这里固定为512
pub const BLOCK_SIZE: usize = 1 << BLOCK_SIZE_LOG;
///这里规定Cache的threshold大小,单位为:MB
pub const CACHE_THRESHOLD: usize = 64;
pub const CACHE_THRESHOLD: usize = 2;

pub enum BlockCacheError {
BlockSizeError,
Expand Down
147 changes: 107 additions & 40 deletions kernel/src/filesystem/vfs/iov.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,15 @@ use alloc::vec::Vec;
use system_error::SystemError;

use crate::{
mm::verify_area,
mm::VirtAddr,
syscall::user_access::{user_accessible_len, UserBufferReader, UserBufferWriter},
syscall::user_access::{
copy_from_user_protected, user_accessible_len, UserBufferReader, UserBufferWriter,
},
};

/// Linux UIO_MAXIOV: maximum number of iovec structures per syscall
const IOV_MAX: usize = 1024;
#[repr(C)]
#[derive(Debug, Clone, Copy)]
pub struct IoVec {
Expand All @@ -25,7 +31,15 @@ impl IoVecs {
/// 获取IoVecs中所有缓冲区的总长度
#[inline(never)]
pub fn total_len(&self) -> usize {
self.0.iter().map(|x| x.iov_len).sum()
self.0
.iter()
.try_fold(0usize, |acc, x| acc.checked_add(x.iov_len))
.unwrap_or(usize::MAX)
}

/// Borrow the validated iovec list.
pub fn iovs(&self) -> &[IoVec] {
&self.0
}

/// Constructs `IoVecs` from an array of `IoVec` in userspace.
Expand All @@ -34,7 +48,7 @@ impl IoVecs {
///
/// * `iov` - Pointer to the array of `IoVec` in userspace
/// * `iovcnt` - Number of `IoVec` elements in the array
/// * `readv` - Whether this is for the `readv` syscall (currently unused)
/// * `readv` - Whether this is for the `readv` syscall (true = check write permission)
///
/// # Returns
///
Expand All @@ -52,23 +66,53 @@ impl IoVecs {
iovcnt: usize,
_readv: bool,
) -> Result<Self, SystemError> {
let iovs_reader = UserBufferReader::new(iov, iovcnt * core::mem::size_of::<IoVec>(), true)?;
// Linux: iovcnt must be > 0 and not unreasonably large.
if iovcnt == 0 || iovcnt > IOV_MAX {
return Err(SystemError::EINVAL);
}

let elem_size = core::mem::size_of::<IoVec>();
let total_bytes = iovcnt.checked_mul(elem_size).ok_or(SystemError::EINVAL)?;

// Only does range check (user range) here.
let iovs_reader = UserBufferReader::new(iov, total_bytes, true)?;

// Use exception-table protected copy to avoid kernel faults when userspace pointer is bad.
let iovs_buf = iovs_reader.buffer_protected(0)?;

// 将用户空间的IoVec转换为引用(注意:这里的引用是静态的,因为用户空间的IoVec不会被释放)
let iovs = iovs_reader.buffer::<IoVec>(0)?;
let mut slices: Vec<IoVec> = Vec::with_capacity(iovcnt);
for idx in 0..iovcnt {
let offset = idx * elem_size;
let one: IoVec = iovs_buf.read_one(offset)?;

let mut slices: Vec<IoVec> = Vec::with_capacity(iovs.len());
// Linux behavior: always validate iov_base is a user pointer, even when iov_len==0.
// This matches Linux access_ok(addr, 0) behavior and is required by gVisor tests.
let base = VirtAddr::new(one.iov_base as usize);

for iov in iovs.iter() {
if iov.iov_len == 0 {
// Only do lightweight address range check (like Linux's access_ok).
// This checks that the address range is within user space limits,
// but does NOT traverse page tables or check actual mappings.
// Actual page mapping/permission checks happen during copy operations.
verify_area(base, one.iov_len)?;

// Skip zero-length iovecs after validation
if one.iov_len == 0 {
continue;
}

let _ = UserBufferWriter::new(iov.iov_base, iov.iov_len, true)?;
slices.push(*iov);
// If the first byte isn't writable/readable at all, fail early with EFAULT.
// Partial accessibility is handled by the syscall implementation.
// Note: user_accessible_len returns 0 for null pointers (addr.is_null() check),
// so null pointer detection is covered here.
let accessible = user_accessible_len(base, one.iov_len, _readv /* check_write */);
if accessible == 0 {
return Err(SystemError::EFAULT);
}

slices.push(one);
}

return Ok(Self(slices));
Ok(Self(slices))
}

/// Aggregates data from all IoVecs into a single buffer.
Expand All @@ -87,35 +131,36 @@ impl IoVecs {
/// read at all, `Err(SystemError::EFAULT)` is returned.
pub fn gather(&self) -> Result<Vec<u8>, SystemError> {
let mut buf = Vec::with_capacity(self.total_len());
let mut read_any = false;

for iov in self.0.iter() {
let base = VirtAddr::new(iov.iov_base as usize);
// 检查从 iov_base 开始有多少 bytes 在 vma 内部且实际可以访问
let accessible =
user_accessible_len(VirtAddr::new(iov.iov_base as usize), iov.iov_len, false);

// log::debug!(
// "iov is {:?}. iov_len: {}; accessible len:{}",
// iov,
// iov.iov_len,
// accessible
// );
let accessible = user_accessible_len(base, iov.iov_len, false /* read */);

// 如果一个字节都不能访问
if accessible == 0 {
if buf.is_empty() {
// log::error!(
// "The first iov is empty, returning EFAULT. iov shape: {:?}",
// iov
// );
if !read_any {
return Err(SystemError::EFAULT);
}
return Ok(buf);
}

// 复制可访问的部分
unsafe {
let src = core::slice::from_raw_parts(iov.iov_base as *const u8, accessible);
buf.extend_from_slice(src);
// 使用异常保护的拷贝,与 scatter 保持一致
let mut chunk = alloc::vec![0u8; accessible];
match unsafe { copy_from_user_protected(&mut chunk, base) } {
Ok(_) => {
buf.extend_from_slice(&chunk);
read_any = true;
}
Err(SystemError::EFAULT) => {
// Linux: return partial data if any bytes were copied.
if !read_any {
return Err(SystemError::EFAULT);
}
return Ok(buf);
}
Err(e) => return Err(e),
}

// 如果没有读取完整个 iov,说明遇到了不可访问的区域
Expand Down Expand Up @@ -144,21 +189,43 @@ impl IoVecs {
/// let iovecs = IoVecs::from_user(/* ... */)?;
/// iovecs.scatter(&[1, 2, 3, 4, 5]);
/// ```
pub fn scatter(&self, data: &[u8]) {
let mut data: &[u8] = data;
for slice in self.0.iter() {
let len = core::cmp::min(slice.iov_len, data.len());
if len == 0 {
pub fn scatter(&self, data: &[u8]) -> Result<(), SystemError> {
let mut remaining = data;
let mut written_any = false;

for iov in self.0.iter() {
if remaining.is_empty() {
break;
}

let want = core::cmp::min(iov.iov_len, remaining.len());
if want == 0 {
continue;
}

let mut buf_writer =
UserBufferWriter::new(slice.iov_base, slice.iov_len, true).unwrap();
let slice = buf_writer.buffer::<u8>(0).unwrap();
let base = VirtAddr::new(iov.iov_base as usize);
let accessible = user_accessible_len(base, want, true /*write*/);
if accessible == 0 {
if !written_any {
return Err(SystemError::EFAULT);
}
break;
}

let mut writer = UserBufferWriter::new(iov.iov_base, accessible, true)?;
let mut user_buf = writer.buffer_protected(0)?;
user_buf.write_to_user(0, &remaining[..accessible])?;

slice[..len].copy_from_slice(&data[..len]);
data = &data[len..];
written_any = true;
remaining = &remaining[accessible..];

if accessible < want {
// Hit an unmapped/forbidden region; stop as Linux does.
break;
}
}

Ok(())
}

/// Creates a buffer with capacity equal to the total length of all IoVecs.
Expand Down
2 changes: 1 addition & 1 deletion kernel/src/filesystem/vfs/syscall/sys_preadv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ pub fn do_preadv(fd: i32, iovecs: &IoVecs, offset: usize) -> Result<usize, Syste
let read_len = file.pread(offset, data.len(), &mut data)?;

// Scatter the read data back to user buffers.
iovecs.scatter(&data[..read_len]);
iovecs.scatter(&data[..read_len])?;

Ok(read_len)
}
Expand Down
2 changes: 1 addition & 1 deletion kernel/src/filesystem/vfs/syscall/sys_preadv2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ pub fn do_preadv2(

let mut data = vec![0; iovecs.total_len()];
let read_len = file.read(data.len(), &mut data)?;
iovecs.scatter(&data[..read_len]);
iovecs.scatter(&data[..read_len])?;
return Ok(read_len);
}

Expand Down
68 changes: 54 additions & 14 deletions kernel/src/filesystem/vfs/syscall/sys_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::mm::VirtAddr;
use crate::process::ProcessManager;
use crate::syscall::table::FormattedSyscallParam;
use crate::syscall::table::Syscall;
use crate::syscall::user_access::{user_accessible_len, UserBufferWriter};
use crate::syscall::user_access::{copy_to_user_protected, user_accessible_len, UserBufferWriter};
use alloc::string::ToString;
use alloc::vec::Vec;

Expand Down Expand Up @@ -46,21 +46,12 @@ impl Syscall for SysReadHandle {
return Ok(0);
}

let mut user_buffer_writer = UserBufferWriter::new(buf_vaddr, len, frame.is_from_user())?;

if frame.is_from_user() {
// 用户态:先计算可写入长度,避免直接写入无效用户页
let accessible =
user_accessible_len(VirtAddr::new(buf_vaddr as usize), len, true /*write*/);
if accessible == 0 {
return Err(SystemError::EFAULT);
}

let user_buf = user_buffer_writer.buffer(0)?;
let read_len = do_read(fd, &mut user_buf[..accessible])?;
Ok(read_len)
read_into_user_buffer(fd, buf_vaddr, len)
} else {
// 内核态:直接借用用户缓冲区
// 内核态:直接借用内核缓冲区
let mut user_buffer_writer =
UserBufferWriter::new(buf_vaddr, len, frame.is_from_user())?;
let user_buf = user_buffer_writer.buffer(0)?;
do_read(fd, user_buf)
}
Expand Down Expand Up @@ -127,3 +118,52 @@ pub(super) fn do_read(fd: i32, buf: &mut [u8]) -> Result<usize, SystemError> {

return file.read(buf.len(), buf);
}

/// Read into a userspace buffer safely (exception-table protected) and in chunks.
///
/// Linux semantics: if a fault happens after some bytes are copied, return the number
/// of bytes copied instead of -EFAULT.
fn read_into_user_buffer(fd: i32, user_ptr: *mut u8, len: usize) -> Result<usize, SystemError> {
// 用户态:先计算可写入长度,避免直接写入无效用户页。
let accessible =
user_accessible_len(VirtAddr::new(user_ptr as usize), len, true /*write*/);
if accessible == 0 {
return Err(SystemError::EFAULT);
}

// Keep the kernel-side buffer modest to avoid huge allocations/long critical sections.
const CHUNK: usize = 64 * 1024;
let mut total = 0usize;

while total < accessible {
let remain = accessible - total;
let chunk_len = core::cmp::min(CHUNK, remain);

let mut kbuf = alloc::vec![0u8; chunk_len];
let n = do_read(fd, &mut kbuf[..])?;
if n == 0 {
break;
}

let dst = VirtAddr::new(user_ptr as usize + total);
let write_res = unsafe { copy_to_user_protected(dst, &kbuf[..n]) };
match write_res {
Ok(_) => {
total += n;
}
Err(SystemError::EFAULT) => {
if total == 0 {
return Err(SystemError::EFAULT);
}
break;
}
Err(e) => return Err(e),
}

if n < chunk_len {
break;
}
}

Ok(total)
}
Loading
Loading